Skip to content

Commit

Permalink
Merge branch 'main' into cli-with-main-script-to-print-version
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin-Chen0 authored Nov 22, 2022
2 parents c42e9c5 + 71f8b26 commit dc47a97
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 232 deletions.
51 changes: 29 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,30 +84,37 @@ cd neural_prophet
pip install .
```

## Model features
* Autocorrelation modelling through AR-Net
* Piecewise linear trend with optional automatic changepoint detection
* Fourier term Seasonality at different periods such as yearly, daily, weekly, hourly.
* Lagged regressors (measured features, e.g temperature sensor)
* Future regressors (in advance known features, e.g. temperature forecast)
* Country holidays & recurring special events
* Sparsity of coefficients through regularization
* Plotting for forecast components, model coefficients as well as final predictions
* Automatic selection of training related hyperparameters
* Support for panel data by building global forecasting models.

### Coming up soon
For details, please view the [Development Timeline](notes/development_timeline.md).

The next versions of NeuralProphet are expected to cover a set of new exciting features:

## Features
### Model components
* Autoregression: Autocorrelation modelling - linear or NN (AR-Net)
* Trend: Piecewise linear trend with optional automatic changepoint detection
* Seasonality: Fourier terms at different periods such as yearly, daily, weekly, hourly.
* Lagged regressors: Lagged observations (e.g temperature sensor) - linear or NN
* Future regressors: In advance known features (e.g. temperature forecast) - linear
* Events: Country holidays & recurring custom events


### Framework features
* Multiple time series: Fit a global/glocal model with (partially) shared model parameters
* Uncertainty: Estimate values of specific quantiles - Quantile Regression
* Regularize modelling components
* Plotting of forecast components, model coefficients and more
* Time series crossvalidation utility
* Model checkpointing and validation


### Coming soon<sup>:tm:</sup>

* Cross-relation of lagged regressors
* Cross-relation and non-linear modelling of future regressors
* Static featues / Time series featurization
* Logistic growth for trend component.
* Uncertainty estimation of predicted values
* Incorporate time series featurization for improved forecast accuracy.
* Model bias modelling/correction with secondary model
* Multimodal dynamics: unsupervised automatic modality-specific forecast.
* Model bias modelling / correction with secondary model
* Multimodal seasonality

For a list of past changes, please refer to the [releases page](https://github.com/ourownstory/neural_prophet/releases).

For a complete list of all past and near-future changes, please refer to the [changelogs](notes/changelogs.md).
The vision for future development can be seen at [Development Timeline](notes/development_timeline.md) (partially outdated).

## Cite
Please cite [NeuralProphet](https://arxiv.org/abs/2111.15397) in your publications if it helps your research:
Expand Down
54 changes: 17 additions & 37 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ class ShiftScale:

def prep_or_copy_df(df):
"""Copy df if it contains the ID column. Creates ID column with '__df__' if it is a df with a single time series.
Converts a dict to the right df format (it will be deprecated soon).
Parameters
----------
df : pd.DataFrame, dict (deprecated)
df : pd.DataFrame
df or dict containing data
Returns
-------
Expand All @@ -37,12 +36,9 @@ def prep_or_copy_df(df):
whether the ID col was present
bool
wheter it is a single time series
bool
wheter a dict was received
"""
received_ID_col = False
received_single_time_series = True
received_dict = False
if isinstance(df, pd.DataFrame):
new_df = df.copy(deep=True)
if "ID" in df.columns:
Expand All @@ -56,27 +52,18 @@ def prep_or_copy_df(df):
else:
new_df["ID"] = "__df__"
log.debug("Received df with single time series")
elif isinstance(df, dict):
if len(df) > 1:
received_single_time_series = False
received_dict = True
log.warning("dict as input are deprecated. Please, use dataframes with ‘ID’ column instead")
new_df = pd.DataFrame()
for df_name, df_i in df.items():
df_i["ID"] = df_name
new_df = pd.concat((new_df, df_i.copy(deep=True)), ignore_index=True)
elif df is None:
raise ValueError("df is None")
else:
raise ValueError("Please, insert valid df type (i.e. pd.DataFrame, dict)")
raise ValueError("Please, insert valid df type (pd.DataFrame)")

# list of IDs
id_list = list(new_df.ID.unique())

return new_df, received_ID_col, received_single_time_series, received_dict, id_list
return new_df, received_ID_col, received_single_time_series, id_list


def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True, received_dict=False):
def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True):
"""Return dataframe in the original format.
Parameters
Expand All @@ -87,22 +74,16 @@ def return_df_in_original_format(df, received_ID_col=False, received_single_time
whether the ID col was present
received_single_time_series: bool
wheter it is a single time series
received_dict: bool
wheter data originated from a dict
Returns
-------
pd.Dataframe, dict (deprecated)
pd.Dataframe
original input format
"""
if received_dict:
new_df = {df_name: df_i.loc[:, df.columns != "ID"].copy(deep=True) for (df_name, df_i) in df.groupby("ID")}
log.info("Returning dict")
else:
new_df = df.copy(deep=True)
if not received_ID_col and received_single_time_series:
assert len(new_df["ID"].unique()) == 1
new_df.drop("ID", axis=1, inplace=True)
log.info("Returning df with no ID column")
new_df = df.copy(deep=True)
if not received_ID_col and received_single_time_series:
assert len(new_df["ID"].unique()) == 1
new_df.drop("ID", axis=1, inplace=True)
log.info("Returning df with no ID column")
return new_df


Expand Down Expand Up @@ -305,7 +286,7 @@ def init_data_params(
ShiftScale entries containing ``shift`` and ``scale`` parameters for each column
"""
# Compute Global data params
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
df_merged = df.copy(deep=True).drop("ID", axis=1)
global_data_params = data_params_definition(
df_merged, normalize, config_lagged_regressors, config_regressors, config_events
Expand Down Expand Up @@ -512,7 +493,7 @@ def check_dataframe(df, check_y=True, covariates=None, regressors=None, events=N
pd.DataFrame or dict
checked dataframe
"""
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
checked_df = pd.DataFrame()
regressors_to_remove = []
for df_name, df_i in df.groupby("ID"):
Expand Down Expand Up @@ -666,7 +647,7 @@ def _crossvalidation_with_time_threshold(df, n_lags, n_forecasts, k, fold_pct, f
min_train = total_samples - samples_fold - (k - 1) * (samples_fold - samples_overlap)
assert min_train >= samples_fold
folds = []
df_fold, _, _, _, _ = prep_or_copy_df(df)
df_fold, _, _, _ = prep_or_copy_df(df)
for i in range(k, 0, -1):
threshold_time_stamp = find_time_threshold(df_fold, n_lags, n_forecasts, samples_fold, inputs_overbleed=True)
df_train, df_val = split_considering_timestamp(
Expand Down Expand Up @@ -725,7 +706,7 @@ def crossvalidation_split_df(
validation data
"""
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
if len(df["ID"].unique()) == 1:
for df_name, df_i in df.groupby("ID"):
folds = _crossvalidation_split_df(df_i, n_lags, n_forecasts, k, fold_pct, fold_overlap_pct)
Expand Down Expand Up @@ -783,7 +764,7 @@ def double_crossvalidation_split_df(df, n_lags, n_forecasts, k, valid_pct, test_
tuple of k tuples [(folds_val, folds_test), …]
elements same as :meth:`crossvalidation_split_df` returns
"""
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
if len(df["ID"].unique()) > 1:
raise NotImplementedError("double_crossvalidation_split_df not implemented for df with many time series")
fold_pct_test = float(test_pct) / k
Expand Down Expand Up @@ -943,7 +924,7 @@ def split_df(df, n_lags, n_forecasts, valid_p=0.2, inputs_overbleed=True, local_
pd.DataFrame, dict
validation data
"""
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
df_train = pd.DataFrame()
df_val = pd.DataFrame()
if local_split:
Expand Down Expand Up @@ -1330,7 +1311,7 @@ def infer_frequency(df, freq, n_lags, min_freq_percentage=0.7):
Valid frequency tag according to major frequency.
"""
df, _, _, _, _ = prep_or_copy_df(df)
df, _, _, _ = prep_or_copy_df(df)
freq_df = list()
for df_name, df_i in df.groupby("ID"):
freq_df.append(_infer_frequency(df_i, freq, min_freq_percentage))
Expand Down Expand Up @@ -1374,7 +1355,6 @@ def create_dict_for_events_or_regressors(df, other_df, other_df_name): # Not su
received_ID_col,
_,
_,
_,
) = prep_or_copy_df(other_df)
# if other_df does not contain ID, create dictionary with original ID with the same other_df for each ID
if not received_ID_col:
Expand Down
Loading

0 comments on commit dc47a97

Please sign in to comment.