Skip to content

Decomposition

uncertainty_flow.decomposition

Uncertainty decomposition into aleatoric and epistemic components.

EnsembleDecomposition

Decompose uncertainty into aleatoric and epistemic components by refitting.

This workflow fits a bootstrap ensemble from train_data using model_factory, then evaluates all refit members on the same prediction frame: - Aleatoric: mean interval width across refit members - Epistemic: variance of ensemble point predictions across refit members

Parameters

model_factory : Callable[[], BaseUncertaintyModel] Callable returning a fresh model instance for each bootstrap refit train_data : PolarsInput Training data used to refit bootstrap ensemble members target : str | list[str], optional Optional target passed into fit() for models that require it confidence : float, default=0.9 Confidence level for interval width calculation n_bootstrap : int, default=5 Number of bootstrap refits random_state : int, optional Random seed for reproducibility

Source code in uncertainty_flow/decomposition/ensemble.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
class EnsembleDecomposition:
    """
    Decompose uncertainty into aleatoric and epistemic components by refitting.

    This workflow fits a bootstrap ensemble from `train_data` using `model_factory`,
    then evaluates all refit members on the same prediction frame:
    - Aleatoric: mean interval width across refit members
    - Epistemic: variance of ensemble point predictions across refit members

    Parameters
    ----------
    model_factory : Callable[[], BaseUncertaintyModel]
        Callable returning a fresh model instance for each bootstrap refit
    train_data : PolarsInput
        Training data used to refit bootstrap ensemble members
    target : str | list[str], optional
        Optional target passed into `fit()` for models that require it
    confidence : float, default=0.9
        Confidence level for interval width calculation
    n_bootstrap : int, default=5
        Number of bootstrap refits
    random_state : int, optional
        Random seed for reproducibility
    """

    def __init__(
        self,
        model_factory: Callable[[], "BaseUncertaintyModel"],
        train_data: "PolarsInput",
        target: "TargetSpec" | None = None,
        confidence: float = 0.9,
        n_bootstrap: int = 5,
        random_state: int | None = None,
    ):
        if not callable(model_factory):
            raise InvalidDataError("model_factory must be callable")
        if n_bootstrap < 1:
            raise InvalidDataError(f"n_bootstrap must be at least 1, got {n_bootstrap}")
        if not (0 < confidence < 1):
            raise InvalidDataError(f"confidence must be in (0, 1), got {confidence}")
        if train_data is None:
            raise InvalidDataError("train_data is required")

        self.model_factory = model_factory
        self.train_data = materialize_lazyframe(train_data)
        if self.train_data.is_empty():
            raise InvalidDataError("train_data must contain at least one row")

        self.target = target
        self.confidence = confidence
        self.n_bootstrap = n_bootstrap
        self.random_state = random_state
        self._rng = np.random.default_rng(random_state)
        self._ensemble_models: list["BaseUncertaintyModel"] | None = None

    def _fit_ensemble(self) -> None:
        """Fit bootstrap-refit ensemble members once and reuse them."""
        if self._ensemble_models is not None:
            return

        n_rows = self.train_data.height
        ensemble_models: list["BaseUncertaintyModel"] = []

        for _ in range(self.n_bootstrap):
            bootstrap_indices = self._rng.choice(n_rows, size=n_rows, replace=True)
            bootstrap_data = self.train_data[bootstrap_indices]
            model = self.model_factory()

            if hasattr(model, "random_state") and self.random_state is not None:
                bootstrap_seed = int(self._rng.integers(0, np.iinfo(np.int32).max))
                try:
                    setattr(model, "random_state", bootstrap_seed)
                except Exception:
                    pass

            if self.target is None:
                model.fit(bootstrap_data)
            else:
                model.fit(bootstrap_data, target=self.target)
            ensemble_models.append(model)

        self._ensemble_models = ensemble_models

    def _predict_ensemble(self, data: pl.DataFrame) -> tuple[np.ndarray, np.ndarray]:
        """Return stacked point predictions and interval widths."""
        self._fit_ensemble()
        if self._ensemble_models is None:
            raise RuntimeError("Internal error: ensemble models not fitted")

        point_predictions = []
        interval_widths = []
        for model in self._ensemble_models:
            prediction = model.predict(data)
            point_predictions.append(_point_prediction_matrix(prediction))
            interval_widths.append(_interval_width_matrix(prediction, self.confidence))

        return np.stack(point_predictions, axis=0), np.stack(interval_widths, axis=0)

    def decompose(
        self,
        data: pl.DataFrame,
    ) -> dict[str, float]:
        """
        Decompose prediction uncertainty on an evaluation frame.

        Returns
        -------
        dict[str, float]
            Dictionary with `aleatoric`, `epistemic`, and `total`
        """
        if data.height == 0:
            raise InvalidDataError("Cannot decompose uncertainty on empty DataFrame")

        point_stack, width_stack = self._predict_ensemble(data)
        aleatoric_by_sample = width_stack.mean(axis=(0, 2))
        epistemic_by_sample = point_stack.var(axis=0).mean(axis=1)

        aleatoric = float(aleatoric_by_sample.mean())
        epistemic = float(epistemic_by_sample.mean())
        return {
            "aleatoric": aleatoric,
            "epistemic": epistemic,
            "total": aleatoric + epistemic,
        }

    def decompose_by_sample(
        self,
        data: pl.DataFrame,
    ) -> pl.DataFrame:
        """
        Decompose uncertainty for each sample in an evaluation frame.

        Returns
        -------
        pl.DataFrame
            DataFrame with columns `aleatoric`, `epistemic`, and `total`
        """
        if data.height == 0:
            raise InvalidDataError("Cannot decompose uncertainty on empty DataFrame")

        point_stack, width_stack = self._predict_ensemble(data)
        aleatoric = width_stack.mean(axis=(0, 2))
        epistemic = point_stack.var(axis=0).mean(axis=1)

        return pl.DataFrame(
            {
                "aleatoric": aleatoric,
                "epistemic": epistemic,
                "total": aleatoric + epistemic,
            }
        )

    def summary(self) -> dict[str, object]:
        """Return configuration summary for the refit ensemble workflow."""
        return {
            "confidence": self.confidence,
            "n_bootstrap": self.n_bootstrap,
            "random_state": self.random_state,
            "target": self.target,
            "refit_based": True,
        }

decompose(data)

Decompose prediction uncertainty on an evaluation frame.

Returns

dict[str, float] Dictionary with aleatoric, epistemic, and total

Source code in uncertainty_flow/decomposition/ensemble.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def decompose(
    self,
    data: pl.DataFrame,
) -> dict[str, float]:
    """
    Decompose prediction uncertainty on an evaluation frame.

    Returns
    -------
    dict[str, float]
        Dictionary with `aleatoric`, `epistemic`, and `total`
    """
    if data.height == 0:
        raise InvalidDataError("Cannot decompose uncertainty on empty DataFrame")

    point_stack, width_stack = self._predict_ensemble(data)
    aleatoric_by_sample = width_stack.mean(axis=(0, 2))
    epistemic_by_sample = point_stack.var(axis=0).mean(axis=1)

    aleatoric = float(aleatoric_by_sample.mean())
    epistemic = float(epistemic_by_sample.mean())
    return {
        "aleatoric": aleatoric,
        "epistemic": epistemic,
        "total": aleatoric + epistemic,
    }

decompose_by_sample(data)

Decompose uncertainty for each sample in an evaluation frame.

Returns

pl.DataFrame DataFrame with columns aleatoric, epistemic, and total

Source code in uncertainty_flow/decomposition/ensemble.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def decompose_by_sample(
    self,
    data: pl.DataFrame,
) -> pl.DataFrame:
    """
    Decompose uncertainty for each sample in an evaluation frame.

    Returns
    -------
    pl.DataFrame
        DataFrame with columns `aleatoric`, `epistemic`, and `total`
    """
    if data.height == 0:
        raise InvalidDataError("Cannot decompose uncertainty on empty DataFrame")

    point_stack, width_stack = self._predict_ensemble(data)
    aleatoric = width_stack.mean(axis=(0, 2))
    epistemic = point_stack.var(axis=0).mean(axis=1)

    return pl.DataFrame(
        {
            "aleatoric": aleatoric,
            "epistemic": epistemic,
            "total": aleatoric + epistemic,
        }
    )

summary()

Return configuration summary for the refit ensemble workflow.

Source code in uncertainty_flow/decomposition/ensemble.py
198
199
200
201
202
203
204
205
206
def summary(self) -> dict[str, object]:
    """Return configuration summary for the refit ensemble workflow."""
    return {
        "confidence": self.confidence,
        "n_bootstrap": self.n_bootstrap,
        "random_state": self.random_state,
        "target": self.target,
        "refit_based": True,
    }