Time Series Forecasting with Uncertainty

Compare ConformalForecaster, QuantileForestForecaster, and AdaptiveConformalForecaster on real weather data
Published

May 11, 2026

Time Series Forecasting with Uncertainty

Time series data demands temporal splitting — random splits leak future information. This notebook compares three forecasting approaches:

  1. ConformalForecaster — distribution-free conformal intervals around any regressor
  2. QuantileForestForecaster — native quantile predictions from a quantile forest
  3. AdaptiveConformalForecaster — adapts interval width under distribution shift

Setup

Code
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor

from uncertainty_flow import (
    ConformalForecaster,
    QuantileForestForecaster,
    coverage_score,
    winkler_score,
)
from uncertainty_flow.utils import select_validation_plan
from uncertainty_flow.wrappers import AdaptiveConformalForecaster

Configuration

Code
target_col = "T (degC)"
horizon = 6

Data & Temporal Splitting

Code
weather = pl.read_parquet("../data/weather.parquet").drop_nulls()
print(f"Rows: {weather.height:,} | Columns: {weather.width}")
weather.head(3)
Rows: 36,887 | Columns: 22
shape: (3, 22)
p (mbar) T (degC) Tpot (K) Tdew (degC) rh (%) VPmax (mbar) VPact (mbar) VPdef (mbar) sh (g/kg) H2OC (mmol/mol) rho (g/m**3) wv (m/s) max. wv (m/s) wd (deg) rain (mm) raining (s) SWDR (W/m�) PAR (�mol/m�/s) max. PAR (�mol/m�/s) Tlog (degC) OT timestamp_idx
f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 f64 i64
2.1142588 -1.45998 -1.607936 -1.052596 0.9901279 -1.14118 -0.979132 -0.838497 -0.998513 -0.999107 1.9404055 -0.017801 -0.832381 0.5555713 -0.093506 -0.22105 -0.672767 -0.679493 -0.588296 -1.424133 0.044395 0
2.0991955 -1.454798 -1.602882 -1.069612 0.942141 -1.138714 -0.990506 -0.828332 -1.009228 -1.008072 1.9327866 -0.029125 -1.125141 0.354339 -0.093506 -0.22105 -0.672767 -0.679493 -0.588296 -1.416612 0.044134 1
2.0876048 -1.457389 -1.604146 -1.0758 0.9368093 -1.139947 -0.992781 -0.826638 -1.009228 -1.012554 1.931738 -0.02567 -0.878606 0.242798 -0.093506 -0.22105 -0.672767 -0.679493 -0.588296 -1.405331 0.043092 2
Code
df_ts = weather.select([target_col])
Code
plan = select_validation_plan(df_ts, task_type="time_series", holdout_fraction=0.15, random_state=42)
train_df, test_df = plan.outer_split
print(f"Strategy: {plan.metadata.strategy_name} | Train: {train_df.height:,} | Test: {test_df.height:,}")
Strategy: temporal_holdout | Train: 31,354 | Test: 5,533

Model 1: ConformalForecaster

Wraps a regressor with conformal prediction bands. Coverage is guaranteed under exchangeability. Requires targets, horizon, and lags to auto-generate time features.

Code
cf = ConformalForecaster(
    base_model=GradientBoostingRegressor(random_state=42),
    targets=target_col,
    horizon=horizon,
    lags=[1, 2, 3, 6, 12, 24],
    copula_family="independent",
    auto_tune=False,
    random_state=42,
)
cf.fit(train_df)
pred_cf = cf.predict(test_df)

Model 2: QuantileForestForecaster

Directly predicts quantiles — no conformal wrapper needed. Coverage is empirical (not guaranteed).

Code
qf = QuantileForestForecaster(
    targets=target_col,
    horizon=horizon,
    n_estimators=100,
    calibration_size=0.2,
    auto_tune=False,
    random_state=42,
)
qf.fit(train_df)
pred_qf = qf.predict(test_df)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 9
      5     calibration_size=0.2,
      6     auto_tune=False,
      7     random_state=42,
      8 )
----> 9 qf.fit(train_df)
     10 pred_qf = qf.predict(test_df)

File ~/Desktop/personal/uncertainty_flow/uncertainty_flow/models/quantile_forest.py:228, in QuantileForestForecaster.fit(self, data, target, **kwargs)
    220     error_invalid_data("Target vector contains NaN or Inf values")
    222 rf = RandomForestRegressor(
    223     n_estimators=self.n_estimators,
    224     min_samples_leaf=self.min_samples_leaf,
    225     max_depth=self.max_depth,
    226     random_state=self.random_state,
    227 )
--> 228 rf.fit(x_train, y_train)
    229 self._models[target] = rf
    231 self._leaf_distributions[target] = self._extract_leaf_distributions(
    232     rf, x_train, y_train, self._quantile_levels_
    233 )

File ~/Desktop/personal/uncertainty_flow/.venv/lib/python3.13/site-packages/sklearn/base.py:1336, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1329     estimator._validate_params()
   1331 with config_context(
   1332     skip_parameter_validation=(
   1333         prefer_skip_nested_validation or global_skip_validation
   1334     )
   1335 ):
-> 1336     return fit_method(estimator, *args, **kwargs)

File ~/Desktop/personal/uncertainty_flow/.venv/lib/python3.13/site-packages/sklearn/ensemble/_forest.py:359, in BaseForest.fit(self, X, y, sample_weight)
    356 if issparse(y):
    357     raise ValueError("sparse multilabel-indicator for y is not supported.")
--> 359 X, y = validate_data(
    360     self,
    361     X,
    362     y,
    363     multi_output=True,
    364     accept_sparse="csc",
    365     dtype=DTYPE,
    366     ensure_all_finite=False,
    367 )
    368 # _compute_missing_values_in_feature_mask checks if X has missing values and
    369 # will raise an error if the underlying tree base estimator can't handle missing
    370 # values. Only the criterion is required to determine if the tree supports
    371 # missing values.
    372 estimator = type(self.estimator)(criterion=self.criterion)

File ~/Desktop/personal/uncertainty_flow/.venv/lib/python3.13/site-packages/sklearn/utils/validation.py:2919, in validate_data(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)
   2917         y = check_array(y, input_name="y", **check_y_params)
   2918     else:
-> 2919         X, y = check_X_y(X, y, **check_params)
   2920     out = X, y
   2922 if not no_val_X and check_params.get("ensure_2d", True):

File ~/Desktop/personal/uncertainty_flow/.venv/lib/python3.13/site-packages/sklearn/utils/validation.py:1314, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, ensure_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
   1309         estimator_name = _check_estimator_name(estimator)
   1310     raise ValueError(
   1311         f"{estimator_name} requires y to be passed, but the target y is None"
   1312     )
-> 1314 X = check_array(
   1315     X,
   1316     accept_sparse=accept_sparse,
   1317     accept_large_sparse=accept_large_sparse,
   1318     dtype=dtype,
   1319     order=order,
   1320     copy=copy,
   1321     force_writeable=force_writeable,
   1322     ensure_all_finite=ensure_all_finite,
   1323     ensure_2d=ensure_2d,
   1324     allow_nd=allow_nd,
   1325     ensure_min_samples=ensure_min_samples,
   1326     ensure_min_features=ensure_min_features,
   1327     estimator=estimator,
   1328     input_name="X",
   1329 )
   1331 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
   1333 check_consistent_length(X, y)

File ~/Desktop/personal/uncertainty_flow/.venv/lib/python3.13/site-packages/sklearn/utils/validation.py:1097, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
   1095     n_samples = _num_samples(array)
   1096     if n_samples < ensure_min_samples:
-> 1097         raise ValueError(
   1098             "Found array with %d sample(s) (shape=%s) while a"
   1099             " minimum of %d is required%s."
   1100             % (n_samples, array.shape, ensure_min_samples, context)
   1101         )
   1103 if ensure_min_features > 0 and array.ndim == 2:
   1104     n_features = array.shape[1]

ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 1 is required by RandomForestRegressor.

Model 3: AdaptiveConformalForecaster

Adapts interval width after each observation using the Gibbs & Candes (2021) ACI rule. Ideal when the data distribution shifts over time.

Code
from uncertainty_flow.wrappers import AdaptiveConformalForecaster

aci = AdaptiveConformalForecaster(
    model=cf,
    alpha=0.1,
    gamma=0.01,
)
aci.fit(test_df.head(100), target=target_col)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[8], line 8
      4     model=cf,
      5     alpha=0.1,
      6     gamma=0.01,
      7 )
----> 8 aci.fit(test_df.head(100), target=target_col)

File ~/Desktop/personal/uncertainty_flow/uncertainty_flow/wrappers/adaptive_conformal.py:133, in AdaptiveConformalForecaster.fit(self, data, target, **kwargs)
    130     median_df = pred.median()
    131     point_preds = median_df[target_str].to_numpy()
--> 133 residuals = np.abs(y_true - point_preds)
    134 self._scores = residuals.tolist()
    135 self._alpha_t = self._initial_alpha

ValueError: operands could not be broadcast together with shapes (100,) (76,) 
Code
y_test = test_df[target_col].to_numpy()
n_aci = min(200, len(test_df))

alphas = []
for i in range(n_aci):
    row = test_df.slice(i, 1)
    _ = aci.predict(row)
    alphas.append(aci.current_alpha)
    if i < len(y_test):
        aci.update(float(y_test[i]))
---------------------------------------------------------------------------
ModelNotFittedError                       Traceback (most recent call last)
Cell In[9], line 7
      3 
      4 alphas = []
      5 for i in range(n_aci):
      6     row = test_df.slice(i, 1)
----> 7     _ = aci.predict(row)
      8     alphas.append(aci.current_alpha)
      9     if i < len(y_test):
     10         aci.update(float(y_test[i]))

File ~/Desktop/personal/uncertainty_flow/uncertainty_flow/wrappers/adaptive_conformal.py:156, in AdaptiveConformalForecaster.predict(self, data, steps)
    144 """
    145 Generate adaptive prediction intervals.
    146 
   (...)    153     DistributionPrediction with intervals reflecting current alpha_t.
    154 """
    155 if not self._fitted:
--> 156     error_model_not_fitted("AdaptiveConformalForecaster")
    158 data = materialize_lazyframe(data)
    159 pred = self.model.predict(data)

File ~/Desktop/personal/uncertainty_flow/uncertainty_flow/utils/exceptions.py:112, in error_model_not_fitted(model_name)
    103 def error_model_not_fitted(model_name: str = "Model") -> NoReturn:
    104     """Raise ModelNotFittedError.
    105 
    106     Args:
   (...)    110         ModelNotFittedError: Always
    111     """
--> 112     raise ModelNotFittedError(model_name)

ModelNotFittedError: AdaptiveConformalForecaster not fitted. Call .fit() first. [UF-E002]

Side-by-Side Comparison

Forecast Plot

Code
n_plot = min(200, len(test_df))
x = np.arange(n_plot)
y_true_plot = test_df[target_col].to_numpy()[:n_plot]

int_cf = pred_cf.interval(0.9)
n_preds = min(n_plot, int_cf.height)

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

lower = int_cf["lower"].to_numpy()[:n_preds]
upper = int_cf["upper"].to_numpy()[:n_preds]
median = (lower + upper) / 2

axes[0].fill_between(x[:n_preds], lower, upper, alpha=0.25, color="#4C78A8", label="90% interval")
axes[0].plot(x[:n_preds], median, color="#4C78A8", linewidth=1, label="Median")
axes[0].scatter(x[:n_preds], y_true_plot[:n_preds], s=3, color="#E45756", alpha=0.5, label="Actual")
axes[0].set_ylabel("Temperature")
axes[0].set_title("ConformalForecaster")
axes[0].legend(loc="upper right", fontsize=8)

axes[1].plot(x[:len(alphas)], alphas, color="#54A24B")
axes[1].set_ylabel("Adaptive alpha")
axes[1].set_xlabel("Time step")
axes[1].set_title("AdaptiveConformalForecaster — alpha evolution")
axes[1].axhline(y=0.1, color="gray", linestyle="--", alpha=0.5, label="Initial alpha=0.1")
axes[1].legend(fontsize=8)

plt.tight_layout()
plt.show()

Metrics Table

Code
y_true_all = test_df[target_col]
intervals_cf = pred_cf.interval(0.9)

rows = [{
    "Model": "ConformalForecaster",
    "Coverage_90": coverage_score(
        y_true_all[:intervals_cf.height],
        intervals_cf["lower"],
        intervals_cf["upper"],
    ),
    "Winkler_90": winkler_score(
        y_true_all[:intervals_cf.height],
        intervals_cf["lower"],
        intervals_cf["upper"],
        0.9,
    ),
    "CRPS": pred_cf.crps(y_true_all[:intervals_cf.height]),
}]

try:
    intervals_qf = pred_qf.interval(0.9)
    rows.append({
        "Model": "QuantileForestForecaster",
        "Coverage_90": coverage_score(
            y_true_all[:intervals_qf.height],
            intervals_qf["lower"],
            intervals_qf["upper"],
        ),
        "Winkler_90": winkler_score(
            y_true_all[:intervals_qf.height],
            intervals_qf["lower"],
            intervals_qf["upper"],
            0.9,
        ),
        "CRPS": pred_qf.crps(y_true_all[:intervals_qf.height]),
    })
except NameError:
    pass

metrics_df = pl.DataFrame(rows)
metrics_df
shape: (1, 4)
Model Coverage_90 Winkler_90 CRPS
str f64 f64 f64
"ConformalForecaster" 0.096025 6.817163 0.347335

Metrics Bar Chart

Code
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
metric_cols = ["Coverage_90", "Winkler_90", "CRPS"]

for ax, col in zip(axes, metric_cols):
    vals = metrics_df[col].to_numpy()
    names = metrics_df["Model"].to_numpy()
    short = [n.replace("Forecaster", "") for n in names]
    colors = ["#4C78A8", "#E45756"]
    ax.bar(short, vals, color=colors)
    ax.set_title(col)
    ax.tick_params(axis="x", rotation=15)

plt.tight_layout()
plt.show()

Key Takeaways

Model Coverage Best for
ConformalForecaster Guaranteed (under exchangeability) Stationary series, new users
QuantileForestForecaster Empirical When you need sharp intervals, can validate coverage separately
AdaptiveConformalForecaster Adaptive (Gibbs & Candes 2021) Distribution shift, streaming data