Merge branch 'main' into cli-with-main-script-to-print-version

ourownstory · Nov 22, 2022 · dc47a97 · dc47a97
2 parents c42e9c5 + 71f8b26
commit dc47a97
Show file tree

Hide file tree

Showing 10 changed files with 165 additions and 232 deletions.
diff --git a/README.md b/README.md
@@ -84,30 +84,37 @@ cd neural_prophet
 pip install .
 ```
 
-## Model features
-* Autocorrelation modelling through AR-Net
-* Piecewise linear trend with optional automatic changepoint detection
-* Fourier term Seasonality at different periods such as yearly, daily, weekly, hourly.
-* Lagged regressors (measured features, e.g temperature sensor)
-* Future regressors (in advance known features, e.g. temperature forecast)
-* Country holidays & recurring special events
-* Sparsity of coefficients through regularization
-* Plotting for forecast components, model coefficients as well as final predictions
-* Automatic selection of training related hyperparameters
-* Support for panel data by building global forecasting models.
-
-### Coming up soon
-For details, please view the [Development Timeline](notes/development_timeline.md).
-
-The next versions of NeuralProphet are expected to cover a set of new exciting features:
-
+## Features
+### Model components
+* Autoregression: Autocorrelation modelling - linear or NN (AR-Net)
+* Trend: Piecewise linear trend with optional automatic changepoint detection
+* Seasonality: Fourier terms at different periods such as yearly, daily, weekly, hourly.
+* Lagged regressors: Lagged observations (e.g temperature sensor) - linear or NN
+* Future regressors: In advance known features (e.g. temperature forecast) - linear
+* Events: Country holidays & recurring custom events
+
+
+### Framework features
+* Multiple time series: Fit a global/glocal model with (partially) shared model parameters
+* Uncertainty: Estimate values of specific quantiles - Quantile Regression
+* Regularize modelling components
+* Plotting of forecast components, model coefficients and more
+* Time series crossvalidation utility
+* Model checkpointing and validation
+
+
+### Coming soon<sup>:tm:</sup>
+
+* Cross-relation of lagged regressors
+* Cross-relation and non-linear modelling of future regressors
+* Static featues / Time series featurization 
 * Logistic growth for trend component.
-* Uncertainty estimation of predicted values
-* Incorporate time series featurization for improved forecast accuracy.
-* Model bias modelling/correction with secondary model
-* Multimodal dynamics: unsupervised automatic modality-specific forecast.
+* Model bias modelling / correction with secondary model
+* Multimodal seasonality
+
+For a list of past changes, please refer to the [releases page](https://github.com/ourownstory/neural_prophet/releases).
 
-For a complete list of all past and near-future changes, please refer to the [changelogs](notes/changelogs.md).
+The vision for future development can be seen at [Development Timeline](notes/development_timeline.md) (partially outdated).
 
 ## Cite
 Please cite [NeuralProphet](https://arxiv.org/abs/2111.15397) in your publications if it helps your research:

diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
@@ -24,10 +24,9 @@ class ShiftScale:
 
 def prep_or_copy_df(df):
     """Copy df if it contains the ID column. Creates ID column with '__df__' if it is a df with a single time series.
-    Converts a dict to the right df format (it will be deprecated soon).
     Parameters
     ----------
-        df : pd.DataFrame, dict (deprecated)
+        df : pd.DataFrame
             df or dict containing data
     Returns
     -------
@@ -37,12 +36,9 @@ def prep_or_copy_df(df):
             whether the ID col was present
         bool
             wheter it is a single time series
-        bool
-            wheter a dict was received
     """
     received_ID_col = False
     received_single_time_series = True
-    received_dict = False
     if isinstance(df, pd.DataFrame):
         new_df = df.copy(deep=True)
         if "ID" in df.columns:
@@ -56,27 +52,18 @@ def prep_or_copy_df(df):
         else:
             new_df["ID"] = "__df__"
             log.debug("Received df with single time series")
-    elif isinstance(df, dict):
-        if len(df) > 1:
-            received_single_time_series = False
-        received_dict = True
-        log.warning("dict as input are deprecated. Please, use dataframes with ‘ID’ column instead")
-        new_df = pd.DataFrame()
-        for df_name, df_i in df.items():
-            df_i["ID"] = df_name
-            new_df = pd.concat((new_df, df_i.copy(deep=True)), ignore_index=True)
     elif df is None:
         raise ValueError("df is None")
     else:
-        raise ValueError("Please, insert valid df type (i.e. pd.DataFrame, dict)")
+        raise ValueError("Please, insert valid df type (pd.DataFrame)")
 
     # list of IDs
     id_list = list(new_df.ID.unique())
 
-    return new_df, received_ID_col, received_single_time_series, received_dict, id_list
+    return new_df, received_ID_col, received_single_time_series, id_list
 
 
-def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True, received_dict=False):
+def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True):
     """Return dataframe in the original format.
 
     Parameters
@@ -87,22 +74,16 @@ def return_df_in_original_format(df, received_ID_col=False, received_single_time
             whether the ID col was present
         received_single_time_series: bool
             wheter it is a single time series
-        received_dict: bool
-            wheter data originated from a dict
     Returns
     -------
-        pd.Dataframe, dict (deprecated)
+        pd.Dataframe
             original input format
     """
-    if received_dict:
-        new_df = {df_name: df_i.loc[:, df.columns != "ID"].copy(deep=True) for (df_name, df_i) in df.groupby("ID")}
-        log.info("Returning dict")
-    else:
-        new_df = df.copy(deep=True)
-        if not received_ID_col and received_single_time_series:
-            assert len(new_df["ID"].unique()) == 1
-            new_df.drop("ID", axis=1, inplace=True)
-            log.info("Returning df with no ID column")
+    new_df = df.copy(deep=True)
+    if not received_ID_col and received_single_time_series:
+        assert len(new_df["ID"].unique()) == 1
+        new_df.drop("ID", axis=1, inplace=True)
+        log.info("Returning df with no ID column")
     return new_df
 
 
@@ -305,7 +286,7 @@ def init_data_params(
             ShiftScale entries containing ``shift`` and ``scale`` parameters for each column
     """
     # Compute Global data params
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     df_merged = df.copy(deep=True).drop("ID", axis=1)
     global_data_params = data_params_definition(
         df_merged, normalize, config_lagged_regressors, config_regressors, config_events
@@ -512,7 +493,7 @@ def check_dataframe(df, check_y=True, covariates=None, regressors=None, events=N
         pd.DataFrame or dict
             checked dataframe
     """
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     checked_df = pd.DataFrame()
     regressors_to_remove = []
     for df_name, df_i in df.groupby("ID"):
@@ -666,7 +647,7 @@ def _crossvalidation_with_time_threshold(df, n_lags, n_forecasts, k, fold_pct, f
     min_train = total_samples - samples_fold - (k - 1) * (samples_fold - samples_overlap)
     assert min_train >= samples_fold
     folds = []
-    df_fold, _, _, _, _ = prep_or_copy_df(df)
+    df_fold, _, _, _ = prep_or_copy_df(df)
     for i in range(k, 0, -1):
         threshold_time_stamp = find_time_threshold(df_fold, n_lags, n_forecasts, samples_fold, inputs_overbleed=True)
         df_train, df_val = split_considering_timestamp(
@@ -725,7 +706,7 @@ def crossvalidation_split_df(
 
             validation data
     """
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     if len(df["ID"].unique()) == 1:
         for df_name, df_i in df.groupby("ID"):
             folds = _crossvalidation_split_df(df_i, n_lags, n_forecasts, k, fold_pct, fold_overlap_pct)
@@ -783,7 +764,7 @@ def double_crossvalidation_split_df(df, n_lags, n_forecasts, k, valid_pct, test_
         tuple of k tuples [(folds_val, folds_test), …]
             elements same as :meth:`crossvalidation_split_df` returns
     """
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     if len(df["ID"].unique()) > 1:
         raise NotImplementedError("double_crossvalidation_split_df not implemented for df with many time series")
     fold_pct_test = float(test_pct) / k
@@ -943,7 +924,7 @@ def split_df(df, n_lags, n_forecasts, valid_p=0.2, inputs_overbleed=True, local_
         pd.DataFrame, dict
             validation data
     """
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     df_train = pd.DataFrame()
     df_val = pd.DataFrame()
     if local_split:
@@ -1330,7 +1311,7 @@ def infer_frequency(df, freq, n_lags, min_freq_percentage=0.7):
             Valid frequency tag according to major frequency.
 
     """
-    df, _, _, _, _ = prep_or_copy_df(df)
+    df, _, _, _ = prep_or_copy_df(df)
     freq_df = list()
     for df_name, df_i in df.groupby("ID"):
         freq_df.append(_infer_frequency(df_i, freq, min_freq_percentage))
@@ -1374,7 +1355,6 @@ def create_dict_for_events_or_regressors(df, other_df, other_df_name):  # Not su
             received_ID_col,
             _,
             _,
-            _,
         ) = prep_or_copy_df(other_df)
         # if other_df does not contain ID, create dictionary with original ID with the same other_df for each ID
         if not received_ID_col: