Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uncertainty: Conformal Prediction V1.1 - add Conformal class to conformal_prediction.py and rename file to conformal.py #1074

Merged
merged 15 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions neuralprophet/conformal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from dataclasses import dataclass

import matplotlib
import pandas as pd

from neuralprophet.plot_forecast_matplotlib import plot_nonconformity_scores
from neuralprophet.plot_forecast_plotly import plot_nonconformity_scores as plot_nonconformity_scores_plotly


@dataclass
class Conformal:
"""Conformal prediction dataclass

Parameters
----------
alpha : float
user-specified significance level of the prediction interval
method : str
name of conformal prediction technique used

Options
* ``naive``: Naive or Absolute Residual
* ``cqr``: Conformalized Quantile Regression
quantiles : list
optional, list of quantiles for quantile regression uncertainty estimate

"""

alpha: float
method: str
quantiles: list = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
quantiles: list = None
quantiles: Optional[list] = None

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can this be None?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If None, NeuralProphet will automatically set it to [] behind-the-scene. Then, if 0.5 doesn't exist in the input quantiles list, then NP will automatically add it to the front of the list. Therefore, default None will become [0.5].


def predict(self, df, df_cal):
"""Apply a given conformal prediction technique to get the uncertainty prediction intervals (or q-hat) for test dataframe.

Parameters
----------
df : pd.DataFrame
test dataframe
df_cal : pd.DataFrame
calibration dataframe

Returns
-------
pd.DataFrame
test dataframe with uncertainty prediction intervals

"""
# conformalize
self.noncon_scores = self._get_nonconformity_scores(df_cal)
self.q_hat = self._get_q_hat(df_cal)
df["qhat1"] = self.q_hat
if self.method == "naive":
df["yhat1 - qhat1"] = df["yhat1"] - self.q_hat
df["yhat1 + qhat1"] = df["yhat1"] + self.q_hat
elif self.method == "cqr":
quantile_hi = str(max(self.quantiles) * 100)
quantile_lo = str(min(self.quantiles) * 100)
df[f"yhat1 {quantile_hi}% - qhat1"] = df[f"yhat1 {quantile_hi}%"] - self.q_hat
df[f"yhat1 {quantile_hi}% + qhat1"] = df[f"yhat1 {quantile_hi}%"] + self.q_hat
df[f"yhat1 {quantile_lo}% - qhat1"] = df[f"yhat1 {quantile_lo}%"] - self.q_hat
df[f"yhat1 {quantile_lo}% + qhat1"] = df[f"yhat1 {quantile_lo}%"] + self.q_hat
else:
raise ValueError(
f"Unknown conformal prediction method '{self.method}'. Please input either 'naive' or 'cqr'."
)

return df

def _get_nonconformity_scores(self, df_cal):
"""Get the nonconformity scores using the given conformal prediction technique.

Parameters
----------
df_cal : pd.DataFrame
calibration dataframe

Returns
-------
np.ndarray
nonconformity scores from the calibration datapoints

"""
if self.method == "cqr":
# CQR nonconformity scoring function
quantile_hi = str(max(self.quantiles) * 100)
quantile_lo = str(min(self.quantiles) * 100)
cqr_scoring_func = (
lambda row: [None, None]
if row[f"yhat1 {quantile_lo}%"] is None or row[f"yhat1 {quantile_hi}%"] is None
else [
max(
row[f"yhat1 {quantile_lo}%"] - row["y"],
row["y"] - row[f"yhat1 {quantile_hi}%"],
),
0 if row[f"yhat1 {quantile_lo}%"] - row["y"] > row["y"] - row[f"yhat1 {quantile_hi}%"] else 1,
]
)
scores_df = df_cal.apply(cqr_scoring_func, axis=1, result_type="expand")
scores_df.columns = ["scores", "arg"]
noncon_scores = scores_df["scores"].values
else: # self.method == "naive"
# Naive nonconformity scoring function
noncon_scores = abs(df_cal["y"] - df_cal["yhat1"]).values
# Remove NaN values
noncon_scores = noncon_scores[~pd.isnull(noncon_scores)]
# Sort
noncon_scores.sort()

return noncon_scores

def _get_q_hat(self, df_cal):
"""Get the q_hat that is derived from the nonconformity scores.

Parameters
----------
df_cal : pd.DataFrame
calibration dataframe

Returns
-------
float
q_hat value, or the one-sided prediction interval width

"""
# Get the q-hat index and value
q_hat_idx = int(len(self.noncon_scores) * self.alpha)
q_hat = self.noncon_scores[-q_hat_idx]

return q_hat

def plot(self, plotting_backend):
"""Apply a given conformal prediction technique to get the uncertainty prediction intervals (or q-hats).

Parameters
----------
plotting_backend : str
specifies the plotting backend for the nonconformity scores plot, if any

Options
* ``matplotlib``: Use matplotlib backend for plotting
* ``plotly``: Use the plotly backend for plotting

"""
method = self.method.upper() if "cqr" in self.method.lower() else self.method.title()
if plotting_backend == "plotly":
fig = plot_nonconformity_scores_plotly(self.noncon_scores, self.alpha, self.q_hat, method)
elif plotting_backend == "matplotlib":
fig = plot_nonconformity_scores(self.noncon_scores, self.alpha, self.q_hat, method)
if plotting_backend in ["matplotlib", "plotly"] and matplotlib.is_interactive():
fig.show()
109 changes: 0 additions & 109 deletions neuralprophet/conformal_prediction.py

This file was deleted.

30 changes: 12 additions & 18 deletions neuralprophet/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from torch.utils.data import DataLoader

from neuralprophet import configure, df_utils, np_types, time_dataset, time_net, utils, utils_metrics
from neuralprophet.conformal_prediction import conformalize
from neuralprophet.conformal import Conformal
from neuralprophet.logger import MetricsLogger
from neuralprophet.plot_forecast_matplotlib import plot, plot_components
from neuralprophet.plot_forecast_plotly import plot as plot_plotly
Expand Down Expand Up @@ -3132,24 +3132,18 @@ def conformal_predict(self, df, calibration_df, alpha, method="naive", plotting_
kwargs : dict
additional predict parameters for test df
"""
# conformalize
# get predictions for calibration dataframe
df_cal = self.predict(calibration_df)
# get predictions for test dataframe
df = self.predict(df, **kwargs)
# initiate Conformal instance
c = Conformal(alpha=alpha, method=method, quantiles=self.config_train.quantiles)
# call Conformal's predict to output test df with conformal prediction intervals
df = c.predict(df=df, df_cal=df_cal)
# plot one-sided prediction interval width with q
if isinstance(plotting_backend, str) and plotting_backend == "default":
plotting_backend = "matplotlib"
q_hats = conformalize(df_cal, alpha, method, self.config_train.quantiles, plotting_backend)
# predict
df = self.predict(df, **kwargs)
df["qhat1"] = q_hats[0]
if method == "naive":
df["yhat1 - qhat1"] = df["yhat1"] - q_hats[0]
df["yhat1 + qhat1"] = df["yhat1"] + q_hats[0]
elif method == "cqr":
quantile_hi = str(max(self.config_train.quantiles) * 100)
quantile_lo = str(min(self.config_train.quantiles) * 100)
df[f"yhat1 {quantile_hi}% - qhat1"] = df[f"yhat1 {quantile_hi}%"] - q_hats[0]
df[f"yhat1 {quantile_hi}% + qhat1"] = df[f"yhat1 {quantile_hi}%"] + q_hats[0]
df[f"yhat1 {quantile_lo}% - qhat1"] = df[f"yhat1 {quantile_lo}%"] - q_hats[0]
df[f"yhat1 {quantile_lo}% + qhat1"] = df[f"yhat1 {quantile_lo}%"] + q_hats[0]
else:
raise ValueError(f"Unknown conformal prediction method '{method}'. Please input either 'naive' or 'cqr'.")
if plotting_backend:
c.plot(plotting_backend)

return df
Loading