Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[major] lagged regressor with interaction modeling (shared NN) #903

Merged
merged 55 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
8dcccd4
initial version of model attribution
karl-richter Oct 20, 2022
8cc087c
consider quentiles in model interpretation
karl-richter Oct 21, 2022
58feb4c
simplified captum integration
karl-richter Oct 21, 2022
cf50f06
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Oct 21, 2022
4248087
remove captum import error
karl-richter Oct 21, 2022
4d44611
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Oct 21, 2022
42a1d38
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Oct 25, 2022
4a4cd6d
adressed pr comments
karl-richter Oct 25, 2022
fbe7305
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Oct 25, 2022
561fd9b
initial shared covar net
karl-richter Oct 25, 2022
8f962dd
support model weights for shared covar net
karl-richter Oct 25, 2022
12a02f3
simplified covar net definition
karl-richter Oct 25, 2022
d261158
docs: added docstring to covar weights function
karl-richter Oct 25, 2022
b302ddb
reversed change on test file
karl-richter Oct 25, 2022
ae4aa96
refactored compute components in shared covar net
karl-richter Oct 25, 2022
791b395
simplified weight calculation if shalllow covar net
karl-richter Oct 25, 2022
ad886a1
added attribution-based component forecasting
karl-richter Oct 26, 2022
ffb851d
refactored the compute components method
karl-richter Oct 26, 2022
3f9d3ea
updated notebook with shared regressor net
karl-richter Oct 26, 2022
bdad926
support custom calculation of model attributions
karl-richter Oct 26, 2022
c8e21bc
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Nov 2, 2022
e905386
Merge branch 'feature/deep_model_interpretation' into feature/regress…
karl-richter Nov 17, 2022
256f53c
Merge branch 'main' into feature/deep_model_interpretation
karl-richter Nov 17, 2022
a843cf2
isort + flake8
karl-richter Nov 17, 2022
699b8d2
removed todos
karl-richter Nov 17, 2022
b10eaf8
removed todos
karl-richter Nov 17, 2022
2f4008c
Merge branch 'feature/deep_model_interpretation' into feature/regress…
karl-richter Nov 17, 2022
3179abf
fixing computing components
alfonsogarciadecorral Nov 30, 2022
b53cec4
refactored storage of covar_weights
karl-richter Nov 30, 2022
a095eb1
added docs
karl-richter Nov 30, 2022
33222dc
fixed pytests
karl-richter Dec 7, 2022
e9891dc
Merge branch 'main' into feature/regressors_shared_net
karl-richter Dec 7, 2022
1d17833
Merge branch 'main' into feature/regressors_shared_net
karl-richter Dec 13, 2022
7db0b13
added alternative attribution method
karl-richter Dec 13, 2022
c39a670
Merge remote-tracking branch 'origin/feature/regressors_shared_net' i…
karl-richter Dec 13, 2022
1a12fa4
removed alternative attribution method
karl-richter Dec 20, 2022
dc201b6
Merge branch 'main' into feature/regressors_shared_net
karl-richter Dec 20, 2022
514e3b8
reduce pandas warning
karl-richter Dec 21, 2022
3872484
Merge branch 'main' into feature/regressors_shared_net
karl-richter Dec 22, 2022
3d3343b
Update plot_model_parameters_matplotlib.py
karl-richter Dec 22, 2022
b40d87e
Merge branch 'main' into feature/regressors_shared_net
karl-richter Dec 27, 2022
b4670f7
Merge branch 'main' into feature/regressors_shared_net
ourownstory Jan 30, 2023
d117e12
Merged main
karl-richter Feb 8, 2023
343b89d
Merge branch 'main' into feature/regressors_shared_net
karl-richter Mar 8, 2023
46bdef1
Merge branch 'main' into feature/regressors_shared_net
karl-richter Mar 8, 2023
e604974
log scale on metrics plots
karl-richter Mar 8, 2023
c2a063a
log scale on metrics plots
karl-richter Mar 8, 2023
d128626
covar_net and ar_net initialised through networks arrays
alfonsogarciadecorral Apr 16, 2023
9f38c8c
covar_net and ar_net initialised through networks arrays
alfonsogarciadecorral Apr 16, 2023
8beaf66
(ar_net_layers_array, covar_net_layers_array) renamed to (ar_layers, …
alfonsogarciadecorral Apr 21, 2023
0a9126f
documentation updated
alfonsogarciadecorral Apr 21, 2023
3bede80
Tutorials updated
alfonsogarciadecorral Apr 21, 2023
2bdb0ad
merging main
alfonsogarciadecorral Apr 21, 2023
fde1b1a
Minor apadtions: Docstr and Typing
judussoari Apr 25, 2023
6fd76a1
Merge branch 'main' into feature/regressors_shared_net
ourownstory Apr 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions docs/source/guides/hyperparameter-selection.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,12 @@ The default loss function is the 'Huber' loss, which is considered to be robust
However, you are free to choose the standard `MSE` or any other PyTorch `torch.nn.modules.loss` loss function.

## Increasing Depth of the Model
`num_hidden_layers` defines the number of hidden layers of the FFNNs used in the overall model. This includes the
AR-Net and the FFNN of the lagged regressors. The default is 0, meaning that the FFNNs will have only one final layer
of size `n_forecasts`. Adding more layers results in increased complexity and also increased computational time, consequently.
However, the added number of hidden layers can help build more complex relationships especially useful for the lagged
regressors. To tradeoff between the computational complexity and the improved accuracy the `num_hidden_layers` is recommended
to be set in between 1-2. Nevertheless, in most cases a good enough performance can be achieved by having no hidden layers at all.

`d_hidden` is the number of units in the hidden layers. This is only considered if `num_hidden_layers` is specified,
otherwise ignored. The default value for `d_hidden` if not specified is (`n_lags` + `n_forecasts`). If tuned manually, the recommended
practice is to set a value in between `n_lags` and `n_forecasts` for `d_hidden`. It is also important to note that with the current
implementation, NeuralProphet sets the same `d_hidden` for the all the hidden layers.
`ar_layers` defines the number of hidden layers and their sizes for the AR-Net in the overall model. It is an array where each element is the size of the corresponding hidden layer. The default is an empty array, meaning that the AR-Net will have only one final layer of size `n_forecasts`. Adding more layers results in increased complexity and also increased computational time, consequently. However, the added number of hidden layers can help build more complex relationships. To tradeoff between the computational complexity and the improved accuracy, the `ar_layers` is recommended to be set as an array with 1-2 elements. Nevertheless, in most cases, a good enough performance can be achieved by having no hidden layers at all.

`lagged_reg_layers` defines the number of hidden layers and their sizes for the lagged regressors' FFNN in the overall model. It is an array where each element is the size of the corresponding hidden layer. The default is an empty array, meaning that the FFNN of the lagged regressors will have only one final layer of size `n_forecasts`. Adding more layers results in increased complexity and also increased computational time, consequently. However, the added number of hidden layers can help build more complex relationships, especially useful for the lagged regressors. To tradeoff between the computational complexity and the improved accuracy, the `lagged_reg_layers` is recommended to be set as an array with 1-2 elements. Nevertheless, in most cases, a good enough performance can be achieved by having no hidden layers at all.

Please note that the previous `num_hidden_layers` and `d_hidden` arguments are now deprecated. The ar_net and covar_net architecture configuration is now input through `ar_layers` and `lagged_reg_layers`. If tuned manually, the recommended practice is to set values in between `n_lags` and `n_forecasts` for the sizes of the hidden layers. It is also important to note that with the current implementation, NeuralProphet allows you to specify different sizes for the hidden layers in both ar_net and covar_net.


## Data Preprocessing Related Parameters

Expand Down Expand Up @@ -83,7 +78,7 @@ distorted by such components, they can explicitly turn them off by setting the r
`yearly_seasonality`, `weekly_seasonality` and `daily_seasonality` can also be set to number of Fourier terms of the respective seasonalities.
The defaults are 6 for yearly, 4 for weekly and 6 for daily. Users can set this to any number they want. If the number of terms is 6 for yearly, that
effectively makes the total number of Fourier terms for the yearly seasonality 12 (6*2), to accommodate both sine and cosine terms.
Increasing the number of Fourier terms can make the model capable of capturing quite complex seasonal patterns. However, similar to the `num_hidden_layers`,
Increasing the number of Fourier terms can make the model capable of capturing quite complex seasonal patterns. However, similar to the `ar_layers`,
this too results in added model complexity. Users can get some insights about the optimal number of Fourier terms by looking at the final component
plots. The default `seasonality_mode` is additive. This means that no heteroscedasticity is expected in the series in terms of the seasonality.
However, if the series contains clear variance, where the seasonal fluctuations become larger proportional to the trend, the `seasonality_mode`
Expand Down
6 changes: 3 additions & 3 deletions docs/source/tutorials/auto-regression.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ below.

![plot-param-1](../images/plot_param_ar_1.png){: style="height:600px"}

You can see the relevance of each of the lags when modelling the autocorrelation. You can also specify the `num_hidden_layers`
You can see the relevance of each of the lags when modelling the autocorrelation. You can also specify the `ar_layers`
for the AR-Net, in order to increase the complexity of the AR-Net.

```python
m = NeuralProphet(
n_forecasts=3,
n_lags=5,
num_hidden_layers=2,
ar_layers=[32, 32],
yearly_seasonality=False,
weekly_seasonality=False,
daily_seasonality=False
Expand All @@ -53,7 +53,7 @@ like below. For more details on setting a value for `ar_sparsity`, refer to the
m = NeuralProphet(
n_forecasts=3,
n_lags=5,
num_hidden_layers=2,
ar_layers=[32, 32],
ar_sparsity=0.01,
yearly_seasonality=False,
weekly_seasonality=False,
Expand Down
6 changes: 3 additions & 3 deletions docs/zh/自回归.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ m = NeuralProphet(

![plot-param-1](http://neuralprophet.com/images/plot_param_ar_1.png)

在建立自相关模型时,您可以看到每个滞后的相关性。您也可以为AR-Net指定`num_hidden_layers`,以增加AR-Net的复杂性。
在建立自相关模型时,您可以看到每个滞后的相关性。您也可以为AR-Net指定`ar_layers`,以增加AR-Net的复杂性。

```python
m = NeuralProphet(
n_forecasts=3,
n_lags=5,
num_hidden_layers=2,
ar_layers=[32,32],
yearly_seasonality=False,
weekly_seasonality=False,
daily_seasonality=False
Expand All @@ -45,7 +45,7 @@ m = NeuralProphet(
m = NeuralProphet(
n_forecasts=3,
n_lags=5,
num_hidden_layers=2,
ar_layers=[32,32],
ar_sparsity=0.01,
yearly_seasonality=False,
weekly_seasonality=False,
Expand Down
12 changes: 7 additions & 5 deletions docs/zh/超参数选取.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ NeuralProphet有一些超参数需要用户指定。如果没有指定,将使
| `seasonality_reg` | None |
| `n_forecasts` | 1 |
| `n_lags` | 0 |
| `num_hidden_layers` | 0 |
| `d_hidden` | None |
| `ar_layers` | [] |
| `ar_sparsity` | None |
| `learning_rate` | None |
| `epochs` | None |
Expand Down Expand Up @@ -48,9 +47,12 @@ NeuralProphet采用随机梯度下降法进行拟合--更准确地说,是采

## 增加模型的深度

`num_hidden_layers`定义了整个模型中使用的FFNN的隐藏层数。这包括AR-Net和滞后回归项的FNNN。默认值为0,意味着FFNNs将只有一个大小为`n_forecasts`的最后一层。增加更多的层数会导致复杂度增加,也会因此增加计算时间。然而,增加的隐藏层数可以帮助建立更复杂的关系,特别是对滞后回归者有用。为了在计算复杂性和提高精度之间进行权衡,建议将 `num_hidden_layers` 设置在1-2之间。然而,在大多数情况下,完全没有隐藏层可以获得足够好的性能。
`ar_layers`定义了整个模型中AR-Net的隐藏层数量及其大小。它是一个数组,其中每个元素都是相应隐藏层的大小。默认为空数组,这意味着AR-Net将只有一个大小为`n_forecasts`的最终层。添加更多层将增加复杂性和计算时间。然而,增加隐藏层的数量有助于构建更复杂的关系。为了在计算复杂性和改进精度之间取得平衡,建议将`ar_layers`设置为具有1-2个元素的数组。然而,在大多数情况下,通过完全没有隐藏层也可以实现足够好的性能。

`lagged_reg_layers`定义了整个模型中滞后回归器FFNN的隐藏层数量及其大小。它是一个数组,其中每个元素都是相应隐藏层的大小。默认为空数组,这意味着滞后回归器的FFNN将只有一个大小为`n_forecasts`的最终层。添加更多层将增加复杂性和计算时间。然而,增加隐藏层的数量有助于构建更复杂的关系,尤其是对于滞后回归器。为了在计算复杂性和改进精度之间取得平衡,建议将`lagged_reg_layers`设置为具有1-2个元素的数组。然而,在大多数情况下,通过完全没有隐藏层也可以实现足够好的性能。

请注意,以前的`num_hidden_layers`和`d_hidden`参数现在已被弃用。现在通过`ar_layers`和`lagged_reg_layers`输入ar_net和covar_net架构配置。如果手动调整,建议将隐藏层大小的值设置在`n_lags`和`n_forecasts`之间。同样重要的是要注意,当前的NeuralProphet实现允许您为ar_net和covar_net中的隐藏层指定不同的大小。

`d_hidden`是隐藏层的单位数。只有在指定了 "num_hidden_layers "的情况下才会考虑,否则忽略。如果没有指定,`d_hidden`的默认值是(`n_lags` + `n_forecasts`)。如果手动调整,建议的做法是在`n_lags`和`n_forecasts`之间为`d_hidden`设置一个值。还需要注意的是,在当前的实现中,NeuralProphet为所有的隐藏层设置了相同的`d_hidden`。

## 数据预处理相关参数

Expand All @@ -74,7 +76,7 @@ NeuralProphet采用随机梯度下降法进行拟合--更准确地说,是采

## 季节性相关参数

`yearly_seasonality`、`weekly_seasonality` 和 `daily_seasonality` 是关于要模拟的季节成分。例如,如果你使用温度数据,你可能可以选择每天和每年。例如,使用使用地铁的乘客数量更可能有一个每周的季节性。将这些季节性设置在默认的`auto`模式下,可以让NeuralProphet根据可用数据的多少来决定包括哪些季节性。例如,如果可用数据少于两年,则不会考虑年季节性。同样,如果可用数据少于两周,每周的季节性将不被考虑等等。然而,如果用户确定系列不包括年、周或日季节性,因此模型不应该被这些成分扭曲,他们可以通过设置相应的成分为`False`来明确关闭它们。除此之外,参数 `yearly_seasonality`、`weekly_seasonality` 和 `daily_seasonality` 也可以设置为各自季节性的傅里叶项数。默认值为年6,周4和日6。用户可以将其设置为任何他们想要的数字。如果每年的项数为6,那么实际上每年季节性的傅里叶项总数为12(6*2),以适应正弦和余弦项。增加Fourier项的数量可以使模型能够捕捉相当复杂的季节性模式。然而,与 `num_hidden_layers`类似,这也会增加模型的复杂性。用户可以通过观察最终的分量图来了解最佳的Fourier项数。默认的`seasonality_mode`是加法。这意味着在季节性方面,序列中没有异方差。然而,如果序列包含明显的方差,季节性波动与趋势成正比,则可以将`seasonality_mode` 设置为乘法。
`yearly_seasonality`、`weekly_seasonality` 和 `daily_seasonality` 是关于要模拟的季节成分。例如,如果你使用温度数据,你可能可以选择每天和每年。例如,使用使用地铁的乘客数量更可能有一个每周的季节性。将这些季节性设置在默认的`auto`模式下,可以让NeuralProphet根据可用数据的多少来决定包括哪些季节性。例如,如果可用数据少于两年,则不会考虑年季节性。同样,如果可用数据少于两周,每周的季节性将不被考虑等等。然而,如果用户确定系列不包括年、周或日季节性,因此模型不应该被这些成分扭曲,他们可以通过设置相应的成分为`False`来明确关闭它们。除此之外,参数 `yearly_seasonality`、`weekly_seasonality` 和 `daily_seasonality` 也可以设置为各自季节性的傅里叶项数。默认值为年6,周4和日6。用户可以将其设置为任何他们想要的数字。如果每年的项数为6,那么实际上每年季节性的傅里叶项总数为12(6*2),以适应正弦和余弦项。增加Fourier项的数量可以使模型能够捕捉相当复杂的季节性模式。然而,与 `ar_layers`类似,这也会增加模型的复杂性。用户可以通过观察最终的分量图来了解最佳的Fourier项数。默认的`seasonality_mode`是加法。这意味着在季节性方面,序列中没有异方差。然而,如果序列包含明显的方差,季节性波动与趋势成正比,则可以将`seasonality_mode` 设置为乘法。

## 正则化相关参数

Expand Down
7 changes: 3 additions & 4 deletions neuralprophet/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@

@dataclass
class Model:
num_hidden_layers: int
d_hidden: Optional[int]
lagged_reg_layers: Optional[List[int]]


@dataclass
Expand Down Expand Up @@ -345,6 +344,7 @@ def append(self, name, period, resolution, arg, condition_name):
class AR:
n_lags: int
ar_reg: Optional[float] = None
ar_layers: Optional[List[int]] = None

def __post_init__(self):
if self.ar_reg is not None and self.ar_reg > 0:
Expand Down Expand Up @@ -383,8 +383,7 @@ class LaggedRegressor:
as_scalar: bool
normalize: Union[bool, str]
n_lags: int
num_hidden_layers: Optional[int]
d_hidden: Optional[int]
lagged_reg_layers: Optional[List[int]]

def __post_init__(self):
if self.reg_lambda is not None:
Expand Down
53 changes: 23 additions & 30 deletions neuralprophet/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,16 +168,18 @@ class NeuralProphet:
Large values (~1-100) will limit the number of nonzero coefficients dramatically.
Small values (~0.001-1.0) will allow more non-zero coefficients.
default: 0 no regularization of coefficients.
ar_layers : list of int, optional
array of hidden layer dimensions of the AR-Net. Specifies number of hidden layers (number of entries)
and layer dimension (list entry).

COMMENT
Model Config
COMMENT
n_forecasts : int
Number of steps ahead of prediction time step to forecast.
num_hidden_layers : int, optional
number of hidden layer to include in AR-Net (defaults to 0)
d_hidden : int, optional
dimension of hidden layers of the AR-Net. Ignored if ``num_hidden_layers`` == 0.
lagged_reg_layers : list of int, optional
array of hidden layer dimensions of the Covar-Net. Specifies number of hidden layers (number of entries)
and layer dimension (list entry).

COMMENT
Train Config
Expand Down Expand Up @@ -344,9 +346,9 @@ def __init__(
season_global_local: np_types.SeasonGlobalLocalMode = "global",
n_forecasts: int = 1,
n_lags: int = 0,
num_hidden_layers: int = 0,
d_hidden: Optional[int] = None,
ar_layers: Optional[list] = [],
ar_reg: Optional[float] = None,
lagged_reg_layers: Optional[list] = [],
learning_rate: Optional[float] = None,
epochs: Optional[int] = None,
batch_size: Optional[int] = None,
Expand Down Expand Up @@ -414,18 +416,12 @@ def __init__(
self.metrics = utils_metrics.get_metrics(collect_metrics)

# AR
self.config_ar = configure.AR(
n_lags=n_lags,
ar_reg=ar_reg,
)
self.config_ar = configure.AR(n_lags=n_lags, ar_reg=ar_reg, ar_layers=ar_layers)
self.n_lags = self.config_ar.n_lags
self.max_lags = self.n_lags

# Model
self.config_model = configure.Model(
num_hidden_layers=num_hidden_layers,
d_hidden=d_hidden,
)
self.config_model = configure.Model(lagged_reg_layers=lagged_reg_layers)

# Trend
self.config_trend = configure.Trend(
Expand Down Expand Up @@ -480,8 +476,6 @@ def add_lagged_regressor(
self,
names: Union[str, List[str]],
n_lags: Union[int, np_types.Literal["auto", "scalar"]] = "auto",
num_hidden_layers: Optional[int] = None,
d_hidden: Optional[int] = None,
regularization: Optional[float] = None,
normalize: Union[bool, str] = "auto",
):
Expand All @@ -497,21 +491,14 @@ def add_lagged_regressor(
previous regressors time steps to use as input in the predictor (covar order)
if ``auto``, time steps will be equivalent to the AR order (default)
if ``scalar``, all the regressors will only use last known value as input
num_hidden_layers : int
number of hidden layers to include in Lagged-Regressor-Net (defaults to same configuration as AR-Net)
d_hidden : int
dimension of hidden layers of the Lagged-Regressor-Net. Ignored if ``num_hidden_layers`` == 0.
regularization : float
optional scale for regularization strength
normalize : bool
optional, specify whether this regressor will benormalized prior to fitting.
if ``auto``, binary regressors will not be normalized.
"""
if num_hidden_layers is None:
num_hidden_layers = self.config_model.num_hidden_layers
lagged_reg_layers = self.config_model.lagged_reg_layers

if d_hidden is None:
d_hidden = self.config_model.d_hidden
if n_lags == 0 or n_lags is None:
n_lags = 0
log.warning(
Expand Down Expand Up @@ -552,8 +539,7 @@ def add_lagged_regressor(
normalize=normalize,
as_scalar=only_last_value,
n_lags=n_lags,
num_hidden_layers=num_hidden_layers,
d_hidden=d_hidden,
lagged_reg_layers=lagged_reg_layers,
)
return self

Expand Down Expand Up @@ -2462,8 +2448,8 @@ def _init_model(self):
n_forecasts=self.n_forecasts,
n_lags=self.n_lags,
max_lags=self.max_lags,
num_hidden_layers=self.config_model.num_hidden_layers,
d_hidden=self.config_model.d_hidden,
ar_layers=self.config_ar.ar_layers,
lagged_reg_layers=self.config_model.lagged_reg_layers,
metrics=self.metrics,
id_list=self.id_list,
num_trends_modelled=self.num_trends_modelled,
Expand Down Expand Up @@ -2519,7 +2505,12 @@ def _init_train_loader(self, df, num_workers=0):
# Determine the max_number of epochs
self.config_train.set_auto_batch_epoch(n_data=len(dataset))

loader = DataLoader(dataset, batch_size=self.config_train.batch_size, shuffle=True, num_workers=num_workers)
loader = DataLoader(
dataset,
batch_size=self.config_train.batch_size,
shuffle=True,
num_workers=num_workers,
)

return loader

Expand Down Expand Up @@ -2748,7 +2739,9 @@ def _predict_raw(self, df, df_name, include_components=False, prediction_frequen
dates = df["ds"].iloc[self.max_lags :]

# Pass the include_components flag to the model
self.model.set_compute_components(include_components)
if include_components:
self.model.set_compute_components(include_components)
self.model.set_covar_weights(self.model.get_covar_weights())
# Compute the predictions and components (if requested)
result = self.trainer.predict(self.model, loader)
# Extract the prediction and components
Expand Down
Loading