From 5da55ffc4b123a8a76b1caf944e25dc2ce298ee7 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 11 Jun 2026 00:04:18 +0000 Subject: [PATCH 1/6] chore(release): 8.0.0-rc.2 [skip ci] ## [8.0.0-rc.2](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0-rc.1...v8.0.0-rc.2) (2026-06-11) ### Features * **deps:** require spotforecast2-safe >=22.0.0 ([13893ad](https://github.com/sequential-parameter-optimization/spotforecast2/commit/13893ad9173ef6988fad50614c633676dbaea9d3)) --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff7b8d93..c176a978 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [8.0.0-rc.2](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0-rc.1...v8.0.0-rc.2) (2026-06-11) + +### Features + +* **deps:** require spotforecast2-safe >=22.0.0 ([13893ad](https://github.com/sequential-parameter-optimization/spotforecast2/commit/13893ad9173ef6988fad50614c633676dbaea9d3)) + ## [8.0.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v7.1.0...v8.0.0-rc.1) (2026-06-10) ### ⚠ BREAKING CHANGES diff --git a/pyproject.toml b/pyproject.toml index 8264d3c2..fdcca4b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "spotforecast2" -version = "8.0.0-rc.1" +version = "8.0.0-rc.2" description = "Forecasting with spot" readme = "README.md" license = { text = "AGPL-3.0-or-later" } From 0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Fri, 12 Jun 2026 02:49:26 +0200 Subject: [PATCH 2/6] feat(plots): operational diagnostics plots (ACF, importance-by-family, SHAP summary, forecast-vs-reference) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports five matplotlib helpers from bart26k-lecture/scripts/team4_4zones_submit.py into a reusable, stateless public API in spotforecast2.plots.diagnostics: - feature_family(name) — maps LGBMRegressor feature names to six diagnostic families - plot_acf_with_lags(acf, key_lags, conf) — bar chart with confidence band and annotated key lags - plot_feature_importance_by_family(names, importances, top_n) — top-N barh coloured by family - plot_shap_summary(estimator, X, max_samples) — subsampled TreeExplainer bar summary - plot_forecast_vs_reference(forecast, reference, ...) — generalised forecast-vs-ENTSO-E line plot All functions return matplotlib.figure.Figure; the caller saves/closes. No plt.show(), no matplotlib.use(), no global state mutation. Overlap MAD is logged at INFO via the module logger (preserved from the original). 44 new tests in tests/test_plots_diagnostics.py; 1207 tests pass total. Registered in _quarto.yml sidebar + quartodoc contents (plots.diagnostics). Co-Authored-By: Claude Fable 5 --- _quarto.yml | 5 +- src/spotforecast2/plots/__init__.py | 12 + src/spotforecast2/plots/diagnostics.py | 381 +++++++++++++++++++++++++ tests/test_plots_diagnostics.py | 302 ++++++++++++++++++++ 4 files changed, 699 insertions(+), 1 deletion(-) create mode 100644 src/spotforecast2/plots/diagnostics.py create mode 100644 tests/test_plots_diagnostics.py diff --git a/_quarto.yml b/_quarto.yml index 63015e8e..ec490fa3 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -116,6 +116,8 @@ website: file: docs/reference/model_selection.utils_metrics.qmd - section: "Plots" contents: + - text: "diagnostics" + file: docs/reference/plots.diagnostics.qmd - text: "outlier_plots" file: docs/reference/plots.outlier_plots.qmd - text: "plotter" @@ -215,8 +217,9 @@ quartodoc: sections: - title: "Plots" - desc: "Outlier, prediction, and time-series visualization tools." + desc: "Outlier, prediction, time-series, and operational diagnostic visualization tools." contents: + - plots.diagnostics - plots.outlier_plots - plots.plotter - plots.time_series_visualization diff --git a/src/spotforecast2/plots/__init__.py b/src/spotforecast2/plots/__init__.py index ba0923de..5b545892 100644 --- a/src/spotforecast2/plots/__init__.py +++ b/src/spotforecast2/plots/__init__.py @@ -1,6 +1,13 @@ # SPDX-FileCopyrightText: 2026 bartzbeielstein # SPDX-License-Identifier: AGPL-3.0-or-later +from .diagnostics import ( + feature_family, + plot_acf_with_lags, + plot_feature_importance_by_family, + plot_forecast_vs_reference, + plot_shap_summary, +) from .distribution import plot_distribution, plot_distributions from .outlier_plots import ( visualize_outliers_hist, @@ -13,9 +20,14 @@ ) __all__ = [ + "feature_family", + "plot_acf_with_lags", "plot_distribution", "plot_distributions", + "plot_feature_importance_by_family", + "plot_forecast_vs_reference", "plot_periodogram", + "plot_shap_summary", "visualize_outliers_hist", "visualize_outliers_plotly_scatter", "visualize_ts_plotly", diff --git a/src/spotforecast2/plots/diagnostics.py b/src/spotforecast2/plots/diagnostics.py new file mode 100644 index 00000000..9839ab53 --- /dev/null +++ b/src/spotforecast2/plots/diagnostics.py @@ -0,0 +1,381 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Operational diagnostic plots for energy-load forecasting pipelines. + +Ports five matplotlib helpers from the chapter-14 team4 operational script +(`bart26k-lecture/scripts/team4_4zones_submit.py`) into a reusable, stateless +API. All functions return a `matplotlib.figure.Figure`; the caller is +responsible for saving and closing it. No `plt.show()` is called and the +backend is never changed here (set `matplotlib.use("Agg")` before importing +`pyplot` in headless environments). +""" + +from __future__ import annotations + +import logging +from typing import Sequence + +import matplotlib.pyplot as plt +import pandas as pd +from matplotlib.figure import Figure + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Family colour map — identical to the chapter-14 script so PNG diagnostics +# look the same whether run from the script or via this module. +# --------------------------------------------------------------------------- +_FAMILY_COLOR: dict[str, str] = { + "lag": "#B22222", + "weather/other": "#E07B00", + "weather_window": "#F0A33C", + "cyclical/RBF": "#1F4E79", + "holiday": "#2E8B57", + "polynomial": "#7B5EA7", +} + + +# --------------------------------------------------------------------------- +# Public helpers +# --------------------------------------------------------------------------- + + +def feature_family(name: str) -> str: + """Map a feature name to its diagnostic family label. + + This is the public, importable version of the `family_of` helper used + inside the chapter-14 team4 operational script. The mapping is + intentionally coarse — it covers the feature names that `ConfigEntsoe` + and `ForecasterRecursive` generate and is used exclusively for colour + grouping in `plot_feature_importance_by_family`. + + Args: + name: Feature name as returned by `LGBMRegressor.feature_name_`. + + Returns: + One of: `"holiday"`, `"polynomial"`, `"weather_window"`, + `"cyclical/RBF"`, `"lag"`, `"weather/other"`. + + Examples: + ```{python} + from spotforecast2.plots.diagnostics import feature_family + + print(feature_family("holiday_DE")) + print(feature_family("brueckentag_NW")) + print(feature_family("poly_hour_2")) + print(feature_family("window_mean_72")) + print(feature_family("sin_hour")) + print(feature_family("lag_1")) + print(feature_family("wind_speed_10m")) + ``` + """ + c = name.lower() + if "holiday" in c or "brueckentag" in c: + return "holiday" + if "poly" in c: + return "polynomial" + if "window" in c: + return "weather_window" + if any(k in c for k in ("sin", "cos", "rbf")): + return "cyclical/RBF" + if c.startswith("lag"): + return "lag" + return "weather/other" + + +def plot_acf_with_lags( + acf: pd.DataFrame, + key_lags: Sequence[int], + conf: float, +) -> Figure: + """Bar chart of autocorrelation values with annotated key lags. + + Ports `_plot_acf` from the chapter-14 team4 script. The `acf` DataFrame + is the output of + `spotforecast2.stats.autocorrelation.calculate_lag_autocorrelation` and + must contain at minimum the columns `"lag"` and `"autocorrelation"`. + + Confidence-band lines at `+conf` / `-conf` are drawn as dashed grey + horizontal lines. Each lag in `key_lags` that is present in `acf["lag"]` + gets an orange arrow annotation. Lags not found in the frame are silently + skipped. + + Args: + acf: DataFrame with columns `"lag"` (int) and `"autocorrelation"` + (float), as returned by `calculate_lag_autocorrelation`. + key_lags: Sequence of lag values to annotate (e.g. the PACF-selected + lags from the pipeline). + conf: Half-width of the confidence band, typically + `1.96 / sqrt(n_obs)`. + + Returns: + A `matplotlib.figure.Figure`. + + Examples: + ```{python} + import numpy as np + import pandas as pd + import matplotlib + matplotlib.use("Agg") + from spotforecast2.plots.diagnostics import plot_acf_with_lags + + rng = np.random.default_rng(0) + n = 100 + acf = pd.DataFrame({"lag": range(n), "autocorrelation": rng.uniform(-0.3, 0.3, n)}) + fig = plot_acf_with_lags(acf, key_lags=[1, 24, 48], conf=0.1) + print(type(fig).__name__) + ``` + """ + fig, ax = plt.subplots(figsize=(8.5, 3.0)) + ax.bar(acf["lag"], acf["autocorrelation"], width=0.9, color="#1F4E79") + ax.axhline(conf, color="#999999", lw=0.8, ls="--") + ax.axhline(-conf, color="#999999", lw=0.8, ls="--") + n_annotated = 0 + for lag in key_lags: + row = acf[acf["lag"] == lag] + if row.empty: + continue + val = float(row["autocorrelation"].iloc[0]) + ax.annotate( + f"lag {lag}", + xy=(lag, val), + xytext=(lag, val + 0.08), + ha="center", + fontsize=8, + color="#E07B00", + arrowprops=dict(arrowstyle="->", color="#E07B00", lw=0.8), + ) + n_annotated += 1 + ax.set_xlabel("lag (hours)") + ax.set_ylabel("autocorrelation") + ax.set_title("ACF of Actual Load (annotated: data-selected key_lags)") + ax.grid(True, color="#E5E5E5", linewidth=0.5) + for spine in ("top", "right"): + ax.spines[spine].set_visible(False) + fig.tight_layout() + return fig + + +def plot_feature_importance_by_family( + names: Sequence[str], + importances: Sequence[float], + *, + top_n: int = 20, +) -> Figure: + """Horizontal bar chart of the top-N feature importances, coloured by family. + + Ports `_plot_importance` from the chapter-14 team4 script. Feature + families are determined by `feature_family`; the colour palette is the + same as in the script so diagnostics look identical. + + Args: + names: Feature names (e.g. `fc.estimator.feature_name_`). + importances: Corresponding importance scores (e.g. + `fc.estimator.feature_importances_`). + top_n: Number of top features to display. Defaults to 20. + + Returns: + A `matplotlib.figure.Figure`. + + Examples: + ```{python} + import matplotlib + matplotlib.use("Agg") + from spotforecast2.plots.diagnostics import plot_feature_importance_by_family + + names = ["lag_1", "lag_24", "holiday_DE", "wind_speed_10m", + "poly_hour_2", "window_mean_72", "sin_hour"] + scores = [100, 80, 60, 55, 40, 35, 20] + fig = plot_feature_importance_by_family(names, scores, top_n=5) + print(type(fig).__name__) + ``` + """ + ranking = sorted( + zip(names, importances), key=lambda kv: kv[1], reverse=True + )[:top_n] + labels = [n for n, _ in ranking][::-1] + values = [v for _, v in ranking][::-1] + colors = [_FAMILY_COLOR.get(feature_family(n), "#888888") for n in labels] + + fig, ax = plt.subplots(figsize=(8.5, 5.5)) + ax.barh(labels, values, color=colors) + ax.set_xlabel("split count (feature importance)") + ax.set_title( + f"Top-{top_n} feature importances (coloured by family; lags in red)" + ) + ax.grid(True, axis="x", color="#E5E5E5", linewidth=0.5) + handles = [ + plt.Rectangle((0, 0), 1, 1, color=c) for c in _FAMILY_COLOR.values() + ] + ax.legend(handles, _FAMILY_COLOR.keys(), fontsize=7, loc="lower right") + for spine in ("top", "right"): + ax.spines[spine].set_visible(False) + fig.tight_layout() + return fig + + +def plot_shap_summary( + estimator: object, + X: pd.DataFrame, + *, + max_samples: int = 2000, +) -> Figure: + """SHAP bar-summary plot for a tree-based estimator. + + Ports `_plot_shap` from the chapter-14 team4 script. `X` is subsampled + to at most `max_samples` rows (uniform stride) before computing SHAP + values so the call stays fast even for large training matrices. + + The function uses `shap.TreeExplainer` and + `shap.summary_plot(plot_type="bar", show=False)`, then captures the + current matplotlib figure via `plt.gcf()`. + + Args: + estimator: A fitted tree-based estimator supported by + `shap.TreeExplainer` (e.g. `LGBMRegressor`). + X: Feature matrix; typically the design matrix returned by + `ForecasterRecursive.create_train_X_y`. + max_samples: Maximum number of rows to pass to the SHAP explainer. + Defaults to 2000. + + Returns: + A `matplotlib.figure.Figure`. + + Examples: + ```{python} + #| eval: false + import matplotlib + matplotlib.use("Agg") + import numpy as np + import pandas as pd + from lightgbm import LGBMRegressor + from spotforecast2.plots.diagnostics import plot_shap_summary + + rng = np.random.default_rng(0) + X = pd.DataFrame(rng.standard_normal((200, 5)), + columns=[f"f{i}" for i in range(5)]) + y = X["f0"] + rng.standard_normal(200) * 0.1 + est = LGBMRegressor(n_estimators=20, verbose=-1) + est.fit(X, y) + fig = plot_shap_summary(est, X, max_samples=100) + print(type(fig).__name__) + ``` + """ + import shap + + step = max(1, len(X) // max_samples) + X_sample = X.iloc[::step] + explainer = shap.TreeExplainer(estimator) + sv = explainer.shap_values(X_sample) + shap.summary_plot(sv, X_sample, plot_type="bar", show=False) + fig = plt.gcf() + fig.tight_layout() + return fig + + +def plot_forecast_vs_reference( + forecast: pd.Series, + reference: pd.Series, + *, + forecast_label: str = "forecast", + reference_label: str = "reference", + unit_scale: float = 1e-3, + unit: str = "GW", +) -> Figure: + """Line plot comparing a forecast against an optional reference series. + + Ports `_plot_vs_entsoe` from the chapter-14 team4 script into a general, + label-parametrised form. The reference is reindexed to `forecast.index`; + only the overlapping (non-NaN) timestamps are plotted. If there is no + overlap the reference line is omitted and an INFO message is logged — the + function still returns a valid single-line figure rather than raising. + + Both series are scaled by `unit_scale` before plotting (default `1e-3` + converts MW to GW). + + The overlap MAD (mean absolute deviation between `forecast` and + `reference` over shared timestamps) is logged at INFO level when an + overlap exists. This mirrors the original script's behaviour and is + useful for post-hoc sanity checks in operator logs. + + Args: + forecast: Point forecast series with a `DatetimeIndex`. + reference: Reference series (e.g. ENTSO-E day-ahead forecast). + Will be reindexed to `forecast.index`; NaN rows after reindexing + are treated as "no overlap" for that timestamp. + forecast_label: Legend label for the forecast line. + reference_label: Legend label for the reference line. + unit_scale: Multiplicative scale applied to both series before + plotting. Defaults to `1e-3` (MW → GW). + unit: Unit string used in the y-axis label. + + Returns: + A `matplotlib.figure.Figure`. + + Examples: + ```{python} + import matplotlib + matplotlib.use("Agg") + import numpy as np + import pandas as pd + from spotforecast2.plots.diagnostics import plot_forecast_vs_reference + + idx = pd.date_range("2024-01-15", periods=24, freq="h", tz="UTC") + rng = np.random.default_rng(42) + forecast = pd.Series(40_000 + rng.standard_normal(24) * 1000, index=idx) + reference = pd.Series(41_000 + rng.standard_normal(24) * 800, index=idx) + + fig = plot_forecast_vs_reference( + forecast, reference, + forecast_label="team_4 forecast", + reference_label="ENTSO-E day-ahead", + ) + print(type(fig).__name__) + ``` + """ + ref_aligned = reference.reindex(forecast.index) + overlap = ref_aligned.dropna().index + + fig, ax = plt.subplots(figsize=(8.5, 3.5)) + ax.plot( + forecast.index, + forecast.values * unit_scale, + marker="o", + ms=3, + color="#1F4E79", + label=forecast_label, + ) + if len(overlap) > 0: + ax.plot( + ref_aligned.index, + ref_aligned.values * unit_scale, + marker="x", + ms=4, + color="#E07B00", + label=reference_label, + ) + mad = float((forecast.loc[overlap] - ref_aligned.loc[overlap]).abs().mean()) + logger.info( + "mean |%s - %s| over %d overlap hours: %.0f MW", + forecast_label, + reference_label, + len(overlap), + mad / unit_scale, + ) + else: + logger.info( + "%s not available for forecast period; plotting %s only.", + reference_label, + forecast_label, + ) + ax.set_xlabel("Time (UTC)") + ax.set_ylabel(f"Load [{unit}]") + ax.set_title(f"{forecast_label} vs. {reference_label} — target day") + ax.grid(True, color="#E5E5E5", linewidth=0.5) + ax.legend(fontsize=8) + for spine in ("top", "right"): + ax.spines[spine].set_visible(False) + fig.autofmt_xdate() + fig.tight_layout() + return fig diff --git a/tests/test_plots_diagnostics.py b/tests/test_plots_diagnostics.py new file mode 100644 index 00000000..a0833a5f --- /dev/null +++ b/tests/test_plots_diagnostics.py @@ -0,0 +1,302 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Tests for spotforecast2.plots.diagnostics. + +Headless matplotlib backend must be set BEFORE pyplot is imported. The +`matplotlib.use()` call at module level (before any pyplot import) satisfies +that requirement. +""" + +import matplotlib + +matplotlib.use("Agg", force=True) + +import matplotlib.pyplot as plt # noqa: E402 — must be after matplotlib.use +import numpy as np # noqa: E402 +import pandas as pd # noqa: E402 +import pytest # noqa: E402 +from matplotlib.figure import Figure # noqa: E402 + +from spotforecast2.plots.diagnostics import ( # noqa: E402 + feature_family, + plot_acf_with_lags, + plot_feature_importance_by_family, + plot_forecast_vs_reference, + plot_shap_summary, +) + + +# --------------------------------------------------------------------------- +# Teardown +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def close_all_figures(): + """Close every matplotlib figure after each test to avoid resource leaks.""" + yield + plt.close("all") + + +# --------------------------------------------------------------------------- +# feature_family — exhaustive family-to-name mapping table +# --------------------------------------------------------------------------- + +_FAMILY_CASES = [ + # holiday family + ("holiday_DE", "holiday"), + ("is_holiday", "holiday"), + ("HOLIDAY_NW", "holiday"), + ("brueckentag_NW", "holiday"), + ("BRUECKENTAG_DE", "holiday"), + ("day_after_holiday", "holiday"), + ("day_before_holiday", "holiday"), + # polynomial family + ("poly_hour_2", "polynomial"), + ("poly_dayofweek_3", "polynomial"), + ("POLY_month", "polynomial"), + # weather_window family + ("window_mean_72", "weather_window"), + ("roll_window_168", "weather_window"), + ("WINDOW_std_24", "weather_window"), + # cyclical/RBF family + ("sin_hour", "cyclical/RBF"), + ("cos_hour", "cyclical/RBF"), + ("rbf_hour_0", "cyclical/RBF"), + ("SIN_dayofyear", "cyclical/RBF"), + ("COS_month", "cyclical/RBF"), + # lag family + ("lag_1", "lag"), + ("lag_24", "lag"), + ("lag_168", "lag"), + # weather/other (catch-all) + ("wind_speed_10m", "weather/other"), + ("temperature_2m", "weather/other"), + ("entsoe_forecasted_load", "weather/other"), + ("is_workday", "weather/other"), + ("day_type", "weather/other"), + ("solar_zenith", "weather/other"), +] + + +@pytest.mark.parametrize("name,expected", _FAMILY_CASES) +def test_feature_family_mapping(name, expected): + """Every name-to-family pair in the exhaustive table resolves correctly.""" + assert feature_family(name) == expected + + +def test_feature_family_case_insensitive_for_holiday(): + """Holiday detection is case-insensitive (lowercased internally).""" + assert feature_family("Holiday_NW") == "holiday" + + +def test_feature_family_lag_prefix_only(): + """Only names that START with 'lag' (after lowercasing) map to lag family.""" + assert feature_family("lag_1") == "lag" + # 'flag' contains 'lag' but does NOT start with it + assert feature_family("flag_col") == "weather/other" + + +# --------------------------------------------------------------------------- +# plot_acf_with_lags +# --------------------------------------------------------------------------- + + +def _make_acf(n: int = 60, seed: int = 0) -> pd.DataFrame: + rng = np.random.default_rng(seed) + return pd.DataFrame( + {"lag": np.arange(n), "autocorrelation": rng.uniform(-0.4, 0.4, n)} + ) + + +def test_plot_acf_returns_figure(): + acf = _make_acf(60) + fig = plot_acf_with_lags(acf, key_lags=[1, 24, 48], conf=0.1) + assert isinstance(fig, Figure) + + +def test_plot_acf_annotation_count_equals_present_key_lags(): + """Annotations are drawn only for key_lags that actually appear in acf.""" + acf = _make_acf(50) # lags 0..49 + # 3 of 4 lags are present in the frame (lag 100 is not) + key_lags = [1, 24, 48, 100] + expected_annotations = sum(1 for lag in key_lags if lag in acf["lag"].values) + fig = plot_acf_with_lags(acf, key_lags=key_lags, conf=0.1) + ax = fig.axes[0] + n_annotations = len(ax.texts) + assert n_annotations == expected_annotations + + +def test_plot_acf_empty_key_lags_no_annotations(): + acf = _make_acf(30) + fig = plot_acf_with_lags(acf, key_lags=[], conf=0.1) + ax = fig.axes[0] + assert len(ax.texts) == 0 + + +def test_plot_acf_all_key_lags_missing_no_annotations(): + """key_lags entirely outside the acf range produces zero annotations.""" + acf = _make_acf(10) # lags 0..9 + fig = plot_acf_with_lags(acf, key_lags=[50, 100, 168], conf=0.1) + ax = fig.axes[0] + assert len(ax.texts) == 0 + + +# --------------------------------------------------------------------------- +# plot_feature_importance_by_family +# --------------------------------------------------------------------------- + + +def test_importance_plot_returns_figure(): + names = ["lag_1", "lag_24", "holiday_DE", "wind_speed"] + scores = [100, 80, 60, 40] + fig = plot_feature_importance_by_family(names, scores) + assert isinstance(fig, Figure) + + +def test_importance_plot_top_n_respected(): + """The bar chart contains at most top_n bars.""" + rng = np.random.default_rng(1) + n_features = 30 + names = [f"feature_{i}" for i in range(n_features)] + scores = rng.integers(1, 100, n_features).tolist() + top_n = 10 + fig = plot_feature_importance_by_family(names, scores, top_n=top_n) + ax = fig.axes[0] + # barh produces one patch per bar + assert len(ax.patches) == top_n + + +def test_importance_plot_family_colors_distinct(): + """Known feature names from different families get different bar colors.""" + names = ["lag_1", "holiday_DE", "poly_hour_2", "window_mean_72", + "sin_hour", "wind_speed"] + scores = [100, 90, 80, 70, 60, 50] + fig = plot_feature_importance_by_family(names, scores) + ax = fig.axes[0] + # Each bar's facecolor should be non-grey (grey = unknown family) + face_colors = [p.get_facecolor() for p in ax.patches] + # At minimum the lag bar (red #B22222) and holiday (green #2E8B57) differ + assert len(set(str(c) for c in face_colors)) > 1 + + +def test_importance_plot_top_n_larger_than_features(): + """top_n larger than available features still works (no error).""" + names = ["lag_1", "lag_24"] + scores = [50, 30] + fig = plot_feature_importance_by_family(names, scores, top_n=20) + assert isinstance(fig, Figure) + + +# --------------------------------------------------------------------------- +# plot_shap_summary +# --------------------------------------------------------------------------- + + +def test_shap_summary_returns_figure(): + """Train a tiny LGBMRegressor and verify plot_shap_summary returns Figure.""" + pytest.importorskip("lightgbm") + pytest.importorskip("shap") + from lightgbm import LGBMRegressor + + rng = np.random.default_rng(7) + X = pd.DataFrame( + rng.standard_normal((120, 4)), columns=["f0", "f1", "f2", "f3"] + ) + y = X["f0"] * 2 + rng.standard_normal(120) * 0.1 + est = LGBMRegressor(n_estimators=10, verbose=-1, random_state=0) + est.fit(X, y) + + fig = plot_shap_summary(est, X, max_samples=50) + assert isinstance(fig, Figure) + + +def test_shap_summary_subsamples_max_samples(): + """When X has more rows than max_samples, SHAP only sees a subset. + + We cannot directly inspect the SHAP call, but we verify the function + completes without error and returns a Figure. The subsampling logic + (stride = max(1, len(X) // max_samples)) is verified by the fast runtime. + """ + pytest.importorskip("lightgbm") + pytest.importorskip("shap") + from lightgbm import LGBMRegressor + + rng = np.random.default_rng(9) + X = pd.DataFrame( + rng.standard_normal((500, 3)), columns=["a", "b", "c"] + ) + y = X["a"] + rng.standard_normal(500) * 0.2 + est = LGBMRegressor(n_estimators=10, verbose=-1, random_state=0) + est.fit(X, y) + + fig = plot_shap_summary(est, X, max_samples=50) + assert isinstance(fig, Figure) + + +# --------------------------------------------------------------------------- +# plot_forecast_vs_reference +# --------------------------------------------------------------------------- + + +def _make_forecast(n: int = 24, seed: int = 0) -> pd.Series: + rng = np.random.default_rng(seed) + idx = pd.date_range("2024-01-15", periods=n, freq="h", tz="UTC") + return pd.Series(40_000 + rng.standard_normal(n) * 1000, index=idx) + + +def test_forecast_vs_reference_returns_figure_with_overlap(): + forecast = _make_forecast(24) + reference = _make_forecast(24, seed=1) # same index -> full overlap + fig = plot_forecast_vs_reference( + forecast, reference, + forecast_label="team_4", reference_label="ENTSO-E" + ) + assert isinstance(fig, Figure) + + +def test_forecast_vs_reference_two_lines_when_overlap(): + """Two lines (forecast + reference) should appear when overlap exists.""" + forecast = _make_forecast(24) + reference = _make_forecast(24, seed=2) + fig = plot_forecast_vs_reference(forecast, reference) + ax = fig.axes[0] + assert len(ax.lines) == 2 + + +def test_forecast_vs_reference_one_line_when_empty_overlap(): + """When reference has no overlap with forecast, only the forecast line is drawn.""" + forecast = _make_forecast(24) + # Reference on a completely different index (no overlap) + idx_other = pd.date_range("2025-06-01", periods=24, freq="h", tz="UTC") + rng = np.random.default_rng(3) + reference = pd.Series(40_000 + rng.standard_normal(24) * 500, index=idx_other) + fig = plot_forecast_vs_reference(forecast, reference) + ax = fig.axes[0] + assert len(ax.lines) == 1 + + +def test_forecast_vs_reference_no_exception_on_empty_overlap(): + """Empty overlap must not raise — the function returns a valid figure.""" + forecast = _make_forecast(24) + empty_ref = pd.Series(dtype=float) + # Should not raise + fig = plot_forecast_vs_reference(forecast, empty_ref) + assert isinstance(fig, Figure) + + +def test_forecast_vs_reference_unit_scale_applied(caplog): + """unit_scale=1 leaves values unscaled; verify axis range is ~MW not GW.""" + import logging + + forecast = _make_forecast(24) + reference = _make_forecast(24, seed=5) + with caplog.at_level(logging.INFO, logger="spotforecast2.plots.diagnostics"): + fig = plot_forecast_vs_reference( + forecast, reference, unit_scale=1.0, unit="MW" + ) + ax = fig.axes[0] + # y-axis upper limit should be in the MW range (>1000), not GW (<100) + ymin, ymax = ax.get_ylim() + assert ymax > 1000 From c8079b2a10fbfb97e333c89948eb234fc2381e9d Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Fri, 12 Jun 2026 02:50:19 +0200 Subject: [PATCH 3/6] feat(stats): PACF lag selection + search-space boundary report - stats/autocorrelation.py: add select_pacf_lags(), ported from bart26k-lecture/scripts/team4_4zones_submit.py (select_key_lags). Computes PACF up to n_lags, returns top_k significant lags sorted ascending; falls back to caller-supplied list or raises ValueError on degenerate series. - model_selection/boundary.py (new): three search-space boundary helpers promoted from bart26k-lecture/14_team_4_submission.qmd and team4_4zones_submit.py (KB entry 2026-06-08-hyperparameter-boundary- management). report_boundary_positions() logs and returns flagged dims; boundary_report() returns a DataFrame; suggest_bounds() returns a widened search space ready for a round-2 run_task_spotoptim() call. - tests/test_stats_select_pacf_lags.py: 15 tests (AR(1), AR(24), degenerate/constant, fallback, determinism, top_k bounds). - tests/test_model_selection_boundary.py: 27 tests (linear/log10 dims, prefix stripping, bool skip, categorical skip, missing key, logger injection, suggest_bounds widening for float/log/int). - _quarto.yml: register stats.select_pacf_lags and model_selection.boundary in sidebar and quartodoc sections. Co-Authored-By: Claude Fable 5 --- _quarto.yml | 8 +- src/spotforecast2/model_selection/__init__.py | 4 + src/spotforecast2/model_selection/boundary.py | 376 ++++++++++++++++++ src/spotforecast2/stats/__init__.py | 3 +- src/spotforecast2/stats/autocorrelation.py | 131 ++++++ tests/test_model_selection_boundary.py | 284 +++++++++++++ tests/test_stats_select_pacf_lags.py | 159 ++++++++ uv.lock | 2 +- 8 files changed, 964 insertions(+), 3 deletions(-) create mode 100644 src/spotforecast2/model_selection/boundary.py create mode 100644 tests/test_model_selection_boundary.py create mode 100644 tests/test_stats_select_pacf_lags.py diff --git a/_quarto.yml b/_quarto.yml index 63015e8e..298f21f1 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -106,6 +106,8 @@ website: contents: - text: "bayesian_search" file: docs/reference/model_selection.bayesian_search.qmd + - text: "boundary" + file: docs/reference/model_selection.boundary.qmd - text: "grid_search" file: docs/reference/model_selection.grid_search.qmd - text: "random_search" @@ -126,6 +128,8 @@ website: contents: - text: "autocorrelation" file: docs/reference/stats.autocorrelation.qmd + - text: "select_pacf_lags" + file: docs/reference/stats.select_pacf_lags.qmd - section: "Tasks" contents: - text: "task_demo" @@ -222,9 +226,10 @@ quartodoc: - plots.time_series_visualization - title: "Model Selection" - desc: "Search algorithms and cross-validation tools." + desc: "Search algorithms, cross-validation tools, and boundary management." contents: - model_selection.bayesian_search + - model_selection.boundary - model_selection.grid_search - model_selection.random_search - model_selection.spotoptim_search @@ -256,6 +261,7 @@ quartodoc: desc: "Statistical analysis tools." contents: - stats.autocorrelation + - stats.select_pacf_lags - title: "Tasks" desc: "Demonstration and predefined forecasting tasks." diff --git a/src/spotforecast2/model_selection/__init__.py b/src/spotforecast2/model_selection/__init__.py index 5f133488..69102ef1 100644 --- a/src/spotforecast2/model_selection/__init__.py +++ b/src/spotforecast2/model_selection/__init__.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later from .bayesian_search import bayesian_search_forecaster +from .boundary import boundary_report, report_boundary_positions, suggest_bounds from .grid_search import grid_search_forecaster from .random_search import random_search_forecaster from .spotoptim_search import build_warm_start_x0, spotoptim_search_forecaster @@ -12,4 +13,7 @@ "bayesian_search_forecaster", "spotoptim_search_forecaster", "build_warm_start_x0", + "report_boundary_positions", + "boundary_report", + "suggest_bounds", ] diff --git a/src/spotforecast2/model_selection/boundary.py b/src/spotforecast2/model_selection/boundary.py new file mode 100644 index 00000000..3cd46835 --- /dev/null +++ b/src/spotforecast2/model_selection/boundary.py @@ -0,0 +1,376 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Search-space boundary management helpers for hyperparameter tuning. + +After a SpotOptim (or any optimizer) run, the tuned optimum may press against +a search-space boundary — meaning the optimizer wanted to go further but was +constrained. These helpers make that visible and actionable. + +Motivation: KB entry 2026-06-08-hyperparameter-boundary-management documents an +operational case where ``reg_alpha`` pinned at its old ceiling (98.9 % of the +linear range), inflating L1 regularization and flattening the live forecast. +Widening that bound and re-running resolved the issue. The helpers below +systematize that diagnostic loop. + +Three functions are provided: + +- `report_boundary_positions` — logs each dimension's position and returns a + list of flagged names. Decoupled from ``MultiTask``: the caller extracts + params (e.g. ``estimator.get_params()``). Primary operational diagnostic. +- `boundary_report` — returns a ``pd.DataFrame`` with one row per numeric + dimension: position, scale, flag. Companion to `suggest_bounds`. +- `suggest_bounds` — returns a copy of the search space with flagged bounds + widened on the pressed side; pass it straight back to + ``run_task_spotoptim(search_space=...)``. + +Ported from: + +- ``report_boundary_positions``: ``bart26k-lecture/scripts/team4_4zones_submit.py`` + (``report_boundary_positions`` function, section ``@sec-team4-boundary-diagnostics``). +- ``boundary_report`` / ``suggest_bounds``: ``bart26k-lecture/14_team_4_submission.qmd`` + (cell ``team4-boundary-helpers``, section ``@sec-team4-boundary-management``). +""" + +from __future__ import annotations + +import logging +import math +from collections.abc import Mapping +from typing import Any + +import pandas as pd + +_logger = logging.getLogger(__name__) + + +def report_boundary_positions( + params: Mapping[str, float | int], + search_space: Mapping[str, Any], + *, + warn_frac: float = 0.10, + logger: logging.Logger | None = None, +) -> list[str]: + """Log where each tuned value sits inside its numeric search-space interval. + + For each entry in `search_space` that is a 2- or 3-tuple numeric dimension + ``(low, high)`` or ``(low, high, "log10")``, the function: + + - strips the ``"estimator__"`` prefix from the key when looking up the + corresponding entry in `params`; + - skips non-numeric or boolean values; + - computes the position in the dimension's own scale (log10 for log dims, + guarding against ``val <= 0`` or ``low <= 0``); + - flags ``"> upper"`` when ``pos > 1 - warn_frac`` and ``"< lower"`` when + ``pos < warn_frac``; + - logs each dimension at INFO level in a columnar format; + - returns the list of flagged strings (e.g. ``["reg_alpha > upper"]``). + + Categorical dimensions (list-valued entries) and unreadable entries are + skipped. The function never raises — it is a diagnostic and returns an + empty list on any unexpected error. + + Args: + params: Flat dict of parameter names to numeric values, as returned by + ``estimator.get_params()`` or equivalent. Keys should NOT carry the + ``"estimator__"`` prefix (it is stripped from `search_space` keys, + not from `params` keys). + search_space: Dict mapping search-space keys (potentially with + ``"estimator__"`` prefix) to dimension specs: ``(low, high)``, + ``(low, high, "log10")``, or a list of categories. + warn_frac: Fraction of the range (in the dimension's own scale) that + defines the "near-boundary" zone at each end. Default is 0.10. + logger: Logger to use for INFO/WARNING messages. Defaults to the + module-level ``logging.getLogger(__name__)`` logger. + + Returns: + List of flagged dimension strings, e.g. ``["reg_alpha > upper", + "learning_rate < lower"]``. Empty if all dimensions are interior. + + Examples: + Interior optimum — no flags returned: + + ```{python} + from spotforecast2.model_selection.boundary import report_boundary_positions + + params = {"num_leaves": 300, "learning_rate": 0.05} + space = { + "estimator__num_leaves": (8, 1024), + "estimator__learning_rate": (0.005, 0.3, "log10"), + } + flagged = report_boundary_positions(params, space) + print("flagged:", flagged) + assert flagged == [] + ``` + + Near-upper-boundary — flag is returned: + + ```{python} + from spotforecast2.model_selection.boundary import report_boundary_positions + + params = {"reg_alpha": 9.9} + space = {"estimator__reg_alpha": (0.001, 10.0)} + flagged = report_boundary_positions(params, space) + print("flagged:", flagged) + assert flagged == ["reg_alpha > upper"] + ``` + + """ + log = logger if logger is not None else _logger + flagged: list[str] = [] + + for name, spec in search_space.items(): + if not (isinstance(spec, tuple) and len(spec) in (2, 3)): + continue # categorical dim (e.g. "lags") + key = name.split("estimator__", 1)[-1] + try: + val = params.get(key) # type: ignore[arg-type] + if val is None: + continue + if not isinstance(val, (int, float)) or isinstance(val, bool): + continue + low, high = float(spec[0]), float(spec[1]) + is_log = len(spec) == 3 and spec[2] == "log10" + if is_log and (val <= 0 or low <= 0): + continue + if is_log: + pos = (math.log10(val) - math.log10(low)) / ( + math.log10(high) - math.log10(low) + ) + else: + pos = (float(val) - low) / (high - low) + flag = ( + "> upper" + if pos > 1 - warn_frac + else ("< lower" if pos < warn_frac else "") + ) + log.info( + " bound %-18s = %-11.5g in [%g, %g]%s pos=%.2f%s", + key, + val, + low, + high, + " log10" if is_log else "", + pos, + f" <-- {flag}" if flag else "", + ) + if flag: + flagged.append(f"{key} {flag}") + except Exception as exc: # noqa: BLE001 + log.warning("boundary check: skipping %r (unreadable entry: %s)", name, exc) + + if flagged: + log.warning( + "boundary check: %d tuned dim(s) near a bound (%s) -- consider " + "widening that side and re-running.", + len(flagged), + ", ".join(flagged), + ) + else: + log.info("boundary check: all tuned dimensions sit interior.") + + return flagged + + +def boundary_report( + best_params: Mapping[str, float | int], + search_space: Mapping[str, Any], + *, + warn_frac: float = 0.10, +) -> pd.DataFrame: + """Tabulate each tuned value's position inside its search-space bound. + + Returns a DataFrame sorted by descending position, with one row per + numeric dimension. Categorical dimensions are skipped. ``flag`` is one of + ``"> upper"``, ``"< lower"``, or ``""`` (interior). + + This function uses the `search_space` keys as-is (including any + ``"estimator__"`` prefix) to look up matching entries in `best_params`. + The returned ``param`` column strips the ``"estimator__"`` prefix for + readability. + + Ported from ``bart26k-lecture/14_team_4_submission.qmd``, cell + ``team4-boundary-helpers``. + + Args: + best_params: Flat dict of parameter names to values, keyed with the + same names as `search_space` (including any ``"estimator__"`` + prefix). + search_space: Dict mapping parameter names to dimension specs: + ``(low, high)``, ``(low, high, "log10")``, or a list of + categories (skipped). + warn_frac: Fraction of the range (in the dimension's own scale) + defining the "near-boundary" zone. Default is 0.10. + + Returns: + DataFrame with columns ``param``, ``low``, ``high``, ``value``, + ``scale``, ``position``, ``flag``, sorted by ``position`` descending. + + Examples: + Report on a near-upper-boundary value: + + ```{python} + from spotforecast2.model_selection.boundary import boundary_report + + best = { + "estimator__reg_alpha": 9.89, + "estimator__learning_rate": 0.069, + } + space = { + "estimator__reg_alpha": (0.001, 10.0), + "estimator__learning_rate": (0.005, 0.3, "log10"), + } + df = boundary_report(best, space) + print(df.to_string(index=False)) + assert "reg_alpha" in df["param"].values + flagged = df[df["flag"] == "> upper"]["param"].tolist() + assert "reg_alpha" in flagged + ``` + + """ + rows = [] + for name, spec in search_space.items(): + if not (isinstance(spec, tuple) and len(spec) in (2, 3)): + continue # categorical dim + low, high = float(spec[0]), float(spec[1]) + is_log = len(spec) == 3 and spec[2] == "log10" + val = best_params.get(name) # type: ignore[arg-type] + if val is None: + continue + if is_log: + if val <= 0 or low <= 0: + continue + pos = (math.log10(val) - math.log10(low)) / ( + math.log10(high) - math.log10(low) + ) + else: + pos = (float(val) - low) / (high - low) + flag = ( + "> upper" + if pos > 1 - warn_frac + else ("< lower" if pos < warn_frac else "") + ) + rows.append( + { + "param": name.replace("estimator__", ""), + "low": low, + "high": high, + "value": val, + "scale": "log10" if is_log else "linear", + "position": round(pos, 3), + "flag": flag, + } + ) + df = pd.DataFrame(rows) + if df.empty: + return pd.DataFrame(columns=["param", "low", "high", "value", "scale", "position", "flag"]) + return df.sort_values("position", ascending=False).reset_index(drop=True) + + +def suggest_bounds( + best_params: Mapping[str, float | int], + search_space: Mapping[str, Any], + *, + warn_frac: float = 0.10, + widen_factor: float = 10.0, +) -> dict[str, Any]: + """Return a copy of `search_space` with flagged bounds widened. + + Upper-pinned dimensions grow upward (``high * widen_factor`` for float/log, + ``high + (high - low)`` for integer); lower-pinned dimensions grow downward + (``low / widen_factor`` for float/log, ``max(1, low - (high - low))`` for + integer). Interior and categorical dimensions are copied unchanged. + + Pass the result straight back to + ``run_task_spotoptim(search_space=suggest_bounds(...))``. + + Ported from ``bart26k-lecture/14_team_4_submission.qmd``, cell + ``team4-boundary-helpers`` (parameter ``widen`` renamed to ``widen_factor`` + for clarity). + + Args: + best_params: Flat dict of parameter names to values, keyed with the + same names as `search_space` (including any ``"estimator__"`` + prefix). + search_space: Dict mapping parameter names to dimension specs: + ``(low, high)``, ``(low, high, "log10")``, or a list of + categories (returned unchanged). + warn_frac: Fraction of the range (in the dimension's own scale) + defining the "near-boundary" zone. Default is 0.10. + widen_factor: Multiplicative factor for widening float/log bounds. + Integer bounds use an additive span instead. Default is 10.0. + + Returns: + New search-space dict with the same keys as `search_space` but with + boundary-pressed bounds extended on the pressed side. + + Examples: + Upper-pinned float bound is multiplied by widen_factor: + + ```{python} + from spotforecast2.model_selection.boundary import suggest_bounds + + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0)} + new_space = suggest_bounds(best, space, widen_factor=10.0) + print(new_space) + assert new_space["estimator__reg_alpha"][1] == 100.0 + ``` + + Log-scale upper-pinned bound is also multiplied: + + ```{python} + from spotforecast2.model_selection.boundary import suggest_bounds + + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0, "log10")} + new_space = suggest_bounds(best, space, widen_factor=10.0) + print(new_space) + assert new_space["estimator__reg_alpha"][1] == 100.0 + ``` + + Integer bound grows additively: + + ```{python} + from spotforecast2.model_selection.boundary import suggest_bounds + + best = {"estimator__n_estimators": 4950} + space = {"estimator__n_estimators": (100, 5000)} + new_space = suggest_bounds(best, space, widen_factor=10.0) + print(new_space) + assert new_space["estimator__n_estimators"][1] == 5000 + (5000 - 100) + ``` + + Interior dimension is returned unchanged: + + ```{python} + from spotforecast2.model_selection.boundary import suggest_bounds + + best = {"estimator__num_leaves": 300} + space = {"estimator__num_leaves": (8, 1024)} + new_space = suggest_bounds(best, space) + assert new_space["estimator__num_leaves"] == (8, 1024) + ``` + + """ + report = boundary_report(best_params, search_space, warn_frac=warn_frac) + flagged = { + row["param"]: row["flag"] + for _, row in report.iterrows() + if row["flag"] + } + out: dict[str, Any] = {} + for name, spec in search_space.items(): + if not (isinstance(spec, tuple) and len(spec) in (2, 3)): + out[name] = spec + continue + low, high, rest = spec[0], spec[1], spec[2:] + is_int = isinstance(low, int) and isinstance(high, int) and not rest + short_name = name.replace("estimator__", "") + flag = flagged.get(short_name, "") + if flag == "> upper": + high = int(high + (high - low)) if is_int else high * widen_factor + elif flag == "< lower": + low = max(1, int(low - (high - low))) if is_int else low / widen_factor + out[name] = (low, high, *rest) + return out diff --git a/src/spotforecast2/stats/__init__.py b/src/spotforecast2/stats/__init__.py index 5ae619be..9ee8a065 100644 --- a/src/spotforecast2/stats/__init__.py +++ b/src/spotforecast2/stats/__init__.py @@ -13,10 +13,11 @@ compute_periodogram, ) -from .autocorrelation import calculate_lag_autocorrelation +from .autocorrelation import calculate_lag_autocorrelation, select_pacf_lags __all__ = [ "augmented_dickey_fuller", "calculate_lag_autocorrelation", "compute_periodogram", + "select_pacf_lags", ] diff --git a/src/spotforecast2/stats/autocorrelation.py b/src/spotforecast2/stats/autocorrelation.py index 241c2d95..e89a6ed4 100644 --- a/src/spotforecast2/stats/autocorrelation.py +++ b/src/spotforecast2/stats/autocorrelation.py @@ -3,11 +3,14 @@ from __future__ import annotations +import logging from importlib.util import find_spec import numpy as np import pandas as pd +_logger = logging.getLogger(__name__) + def calculate_lag_autocorrelation( data: pd.Series | pd.DataFrame, @@ -192,3 +195,131 @@ def calculate_lag_autocorrelation( ) return results + + +def select_pacf_lags( + series: pd.Series, + *, + n_lags: int = 200, + top_k: int = 8, + fallback: list[int] | None = None, +) -> list[int]: + """Select the most informative lags using the partial autocorrelation function. + + Computes the PACF up to `n_lags` via `calculate_lag_autocorrelation`, then + returns the `top_k` lags whose `|PACF|` exceeds the 95 % significance band + (1.96 / sqrt(N)), sorted in ascending order. + + This is a pure-compute helper ported from the operational team-4 pipeline + (`select_key_lags` in ``bart26k-lecture/scripts/team4_4zones_submit.py``). + No plotting, no side effects beyond an optional DEBUG log line. + + Args: + series: The time series from which to estimate lags. Must contain at + least `2 * n_lags + 1` observations for statsmodels PACF to run + without truncating `n_lags`. + n_lags: Number of lags passed to `calculate_lag_autocorrelation`. + Default is 200. + top_k: Maximum number of lags to return (the `top_k` significant lags + ranked by descending `|PACF|`). Default is 8. + fallback: Lag list returned when no lag exceeds the significance band + (degenerate series: too short, nearly constant, or `n_lags` too + large). If `None`, a `ValueError` is raised instead. + + Returns: + Sorted list of lag integers (ascending). Length is at most `top_k`; + may be shorter if fewer than `top_k` significant lags exist. + + Raises: + ValueError: If no lag exceeds the significance band and `fallback` is + `None`. Pass `fallback=[1, 2, 24, 168]` (or any operator-chosen + constant) to suppress the error and use a safe default instead. + + Examples: + Select significant lags from a synthetic AR(1) series: + + ```{python} + import numpy as np + import pandas as pd + from spotforecast2.stats.autocorrelation import select_pacf_lags + + rng = np.random.default_rng(42) + n = 24 * 120 # 120 days of hourly data + ar = np.zeros(n) + for t in range(1, n): + ar[t] = 0.7 * ar[t - 1] + rng.standard_normal() + series = pd.Series(ar) + lags = select_pacf_lags(series, n_lags=50, top_k=8) + print("selected lags:", lags) + assert isinstance(lags, list) + assert all(isinstance(x, int) for x in lags) + assert lags == sorted(lags) + assert len(lags) <= 8 + assert 1 in lags, "lag-1 AR component should be selected" + ``` + + Select significant lags from an AR(24) process — lag 24 dominates: + + ```{python} + import numpy as np + import pandas as pd + from spotforecast2.stats.autocorrelation import select_pacf_lags + + rng = np.random.default_rng(42) + n = 24 * 200 + ar = np.zeros(n) + for t in range(24, n): + ar[t] = 0.8 * ar[t - 24] + rng.standard_normal() + series = pd.Series(ar) + lags = select_pacf_lags(series, n_lags=50, top_k=8) + print("selected lags:", lags) + assert 24 in lags, f"lag 24 expected in {lags}" + ``` + + Degenerate (constant) series — fallback is returned when provided: + + ```{python} + import pandas as pd + from spotforecast2.stats.autocorrelation import select_pacf_lags + + series = pd.Series([1.0] * 50) + result = select_pacf_lags(series, n_lags=10, fallback=[1, 2, 24]) + print("fallback lags:", result) + assert result == [1, 2, 24] + ``` + + Degenerate series with no fallback raises ValueError: + + ```{python} + import pytest + import pandas as pd + from spotforecast2.stats.autocorrelation import select_pacf_lags + + series = pd.Series([1.0] * 50) + with pytest.raises(ValueError, match="no significant"): + select_pacf_lags(series, n_lags=10, fallback=None) + ``` + + """ + acf_df = calculate_lag_autocorrelation(series, n_lags=n_lags) + conf = 1.96 / np.sqrt(len(series)) + significant = acf_df[acf_df["partial_autocorrelation_abs"] > conf] + top = significant.nlargest(top_k, "partial_autocorrelation_abs") + key_lags = sorted(top["lag"].astype(int).tolist()) + _logger.debug( + "select_pacf_lags: N=%d band=+/-%.4f significant=%d top_%d=%s", + len(series), + conf, + len(significant), + top_k, + key_lags, + ) + if not key_lags: + if fallback is not None: + return list(fallback) + raise ValueError( + f"select_pacf_lags: no significant PACF lags found " + f"(N={len(series)}, band={conf:.4f}, n_lags={n_lags}). " + "Pass fallback= to use a safe default instead of raising." + ) + return key_lags diff --git a/tests/test_model_selection_boundary.py b/tests/test_model_selection_boundary.py new file mode 100644 index 00000000..8318d483 --- /dev/null +++ b/tests/test_model_selection_boundary.py @@ -0,0 +1,284 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Tests for spotforecast2.model_selection.boundary.""" + +from __future__ import annotations + +import logging + +from spotforecast2.model_selection.boundary import ( + boundary_report, + report_boundary_positions, + suggest_bounds, +) + + +# --------------------------------------------------------------------------- +# Shared fixtures / helpers +# --------------------------------------------------------------------------- + +LINEAR_SPACE = { + "estimator__num_leaves": (8, 1024), + "estimator__n_estimators": (100, 5000), + "estimator__bagging_fraction": (0.5, 1.0), + "estimator__reg_alpha": (0.001, 10.0), + "lags": ["[1, 2, 24]", "[1, 2, 24, 168]"], # categorical, must be skipped +} + +LOG_SPACE = { + "estimator__learning_rate": (0.005, 0.3, "log10"), + "estimator__reg_alpha": (0.001, 100.0, "log10"), +} + +MIXED_SPACE = {**LINEAR_SPACE, **LOG_SPACE} + + +# --------------------------------------------------------------------------- +# report_boundary_positions +# --------------------------------------------------------------------------- + +class TestReportBoundaryPositions: + def test_interior_no_flag(self): + """An optimum well inside all bounds returns an empty list.""" + params = { + "num_leaves": 300, + "n_estimators": 2000, + "bagging_fraction": 0.75, + "reg_alpha": 1.0, + "learning_rate": 0.05, + } + flagged = report_boundary_positions(params, MIXED_SPACE) + assert flagged == [] + + def test_near_upper_linear_flagged(self): + """A value at 98 % of the linear range is flagged as '> upper'.""" + # reg_alpha = 9.8 in (0.001, 10.0): pos = (9.8 - 0.001) / (10.0 - 0.001) ≈ 0.980 + params = {"reg_alpha": 9.8} + space = {"estimator__reg_alpha": (0.001, 10.0)} + flagged = report_boundary_positions(params, space) + assert flagged == ["reg_alpha > upper"] + + def test_near_lower_linear_flagged(self): + """A value at 2 % of the linear range is flagged as '< lower'.""" + # n_estimators = 190 in (100, 5000): pos = (190 - 100) / (5000 - 100) ≈ 0.018 + params = {"n_estimators": 190} + space = {"estimator__n_estimators": (100, 5000)} + flagged = report_boundary_positions(params, space) + assert flagged == ["n_estimators < lower"] + + def test_log10_dim_near_lower(self): + """In log10 space, a value near the lower decade boundary is flagged.""" + # learning_rate = 0.006 in (0.005, 0.3) log10: + # pos = (log10(0.006) - log10(0.005)) / (log10(0.3) - log10(0.005)) + import math + low, high, val = 0.005, 0.3, 0.006 + pos = (math.log10(val) - math.log10(low)) / (math.log10(high) - math.log10(low)) + assert pos < 0.10 # confirm this is a near-lower case + params = {"learning_rate": val} + space = {"estimator__learning_rate": (low, high, "log10")} + flagged = report_boundary_positions(params, space) + assert flagged == ["learning_rate < lower"] + + def test_estimator_prefix_stripped(self): + """The 'estimator__' prefix is stripped before looking up params.""" + params = {"reg_alpha": 9.9} + space = {"estimator__reg_alpha": (0.001, 10.0)} + flagged = report_boundary_positions(params, space) + assert any("reg_alpha" in f for f in flagged) + + def test_categorical_lags_skipped(self): + """List-valued entries in the search space are silently skipped.""" + params = {} + space = {"lags": ["[1, 2, 24]", "[1, 24, 168]"]} + flagged = report_boundary_positions(params, space) + assert flagged == [] + + def test_bool_param_skipped(self): + """Boolean values are skipped (isinstance(True, int) is True in Python).""" + params = {"verbose": True} + space = {"estimator__verbose": (0, 1)} + flagged = report_boundary_positions(params, space) + assert flagged == [] + + def test_missing_param_key_skipped_with_warning(self, caplog): + """A param key not found in params is skipped; no exception is raised.""" + params = {} # empty — key is missing + space = {"estimator__num_leaves": (8, 1024)} + with caplog.at_level(logging.DEBUG): + flagged = report_boundary_positions(params, space) + assert flagged == [] + # No warning should be emitted for a simply missing key (it is None, + # so we skip via the `val is None` check, not the except branch) + + def test_returned_list_exact_contents(self): + """The returned list contains exactly the flagged strings in order.""" + params = {"reg_alpha": 9.9, "n_estimators": 150} + space = { + "estimator__reg_alpha": (0.001, 10.0), + "estimator__n_estimators": (100, 5000), + } + flagged = report_boundary_positions(params, space) + # reg_alpha is near upper; n_estimators 150 in (100, 5000) = pos 0.01 → < lower + assert "reg_alpha > upper" in flagged + assert "n_estimators < lower" in flagged + + def test_logger_injection(self, caplog): + """A custom logger passed via the logger= argument is used for messages.""" + custom_logger = logging.getLogger("test_custom_boundary_logger") + params = {"num_leaves": 300} + space = {"estimator__num_leaves": (8, 1024)} + with caplog.at_level(logging.INFO, logger="test_custom_boundary_logger"): + report_boundary_positions(params, space, logger=custom_logger) + # The custom logger should have emitted at least one INFO message + assert any( + r.name == "test_custom_boundary_logger" for r in caplog.records + ) + + def test_multiple_flags(self): + """Multiple near-boundary dims all appear in the returned list.""" + params = {"reg_alpha": 9.9, "learning_rate": 0.29} + space = { + "estimator__reg_alpha": (0.001, 10.0), + "estimator__learning_rate": (0.005, 0.3, "log10"), + } + flagged = report_boundary_positions(params, space) + assert len(flagged) == 2 + + def test_log_invalid_val_skipped(self): + """A zero or negative value in a log10 dim is silently skipped.""" + params = {"reg_alpha": 0.0} + space = {"estimator__reg_alpha": (0.001, 10.0, "log10")} + flagged = report_boundary_positions(params, space) + assert flagged == [] + + +# --------------------------------------------------------------------------- +# boundary_report (DataFrame form) +# --------------------------------------------------------------------------- + +class TestBoundaryReport: + def test_returns_dataframe(self): + best = {"estimator__reg_alpha": 9.89, "estimator__learning_rate": 0.069} + space = { + "estimator__reg_alpha": (0.001, 10.0), + "estimator__learning_rate": (0.005, 0.3, "log10"), + } + df = boundary_report(best, space) + assert set(df.columns) == {"param", "low", "high", "value", "scale", "position", "flag"} + + def test_flagged_near_upper(self): + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0)} + df = boundary_report(best, space) + row = df[df["param"] == "reg_alpha"].iloc[0] + assert row["flag"] == "> upper" + assert row["scale"] == "linear" + + def test_log10_scale(self): + best = {"estimator__learning_rate": 0.069} + space = {"estimator__learning_rate": (0.005, 0.3, "log10")} + df = boundary_report(best, space) + row = df[df["param"] == "learning_rate"].iloc[0] + assert row["scale"] == "log10" + assert 0.0 < row["position"] < 1.0 + + def test_categorical_skipped(self): + best = {} + space = {"lags": ["[1, 2, 24]"]} + df = boundary_report(best, space) + assert df.empty + + def test_sorted_descending_position(self): + best = { + "estimator__reg_alpha": 9.89, # near upper → high position + "estimator__num_leaves": 300, # interior + } + space = { + "estimator__reg_alpha": (0.001, 10.0), + "estimator__num_leaves": (8, 1024), + } + df = boundary_report(best, space) + assert df["position"].iloc[0] >= df["position"].iloc[-1] + + def test_prefix_stripped_in_param_column(self): + best = {"estimator__num_leaves": 512} + space = {"estimator__num_leaves": (8, 1024)} + df = boundary_report(best, space) + assert "num_leaves" in df["param"].values + assert "estimator__num_leaves" not in df["param"].values + + +# --------------------------------------------------------------------------- +# suggest_bounds +# --------------------------------------------------------------------------- + +class TestSuggestBounds: + def test_interior_unchanged(self): + best = {"estimator__num_leaves": 300} + space = {"estimator__num_leaves": (8, 1024)} + new_space = suggest_bounds(best, space) + assert new_space["estimator__num_leaves"] == (8, 1024) + + def test_upper_pinned_float_multiplied(self): + """Upper-pinned float bound: high * widen_factor.""" + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0)} + new_space = suggest_bounds(best, space, widen_factor=10.0) + assert new_space["estimator__reg_alpha"][1] == 100.0 + + def test_upper_pinned_log_multiplied(self): + """Upper-pinned log10 bound: high * widen_factor.""" + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0, "log10")} + new_space = suggest_bounds(best, space, widen_factor=10.0) + assert new_space["estimator__reg_alpha"][1] == 100.0 + assert new_space["estimator__reg_alpha"][2] == "log10" + + def test_upper_pinned_int_additive(self): + """Upper-pinned integer bound: high + (high - low).""" + best = {"estimator__n_estimators": 4950} + space = {"estimator__n_estimators": (100, 5000)} + new_space = suggest_bounds(best, space, widen_factor=10.0) + assert new_space["estimator__n_estimators"][1] == 5000 + (5000 - 100) + + def test_lower_pinned_float_divided(self): + """Lower-pinned float bound: low / widen_factor.""" + best = {"estimator__learning_rate": 0.006} + space = {"estimator__learning_rate": (0.005, 0.3, "log10")} + new_space = suggest_bounds(best, space, widen_factor=10.0) + assert abs(new_space["estimator__learning_rate"][0] - 0.0005) < 1e-10 + + def test_categorical_unchanged(self): + """Categorical (list) entries pass through unchanged.""" + best = {} + space = {"lags": ["[1, 2, 24]", "[1, 24, 168]"]} + new_space = suggest_bounds(best, space) + assert new_space["lags"] == space["lags"] + + def test_all_keys_preserved(self): + """The returned dict has exactly the same keys as search_space.""" + best = {"num_leaves": 512, "learning_rate": 0.05} + space = { + "estimator__num_leaves": (8, 1024), + "estimator__learning_rate": (0.005, 0.3, "log10"), + "lags": ["[1, 2, 24]"], + } + new_space = suggest_bounds(best, space) + assert set(new_space.keys()) == set(space.keys()) + + def test_widen_factor_parameter(self): + """Different widen_factor values produce different bounds.""" + best = {"estimator__reg_alpha": 9.89} + space = {"estimator__reg_alpha": (0.001, 10.0)} + new5 = suggest_bounds(best, space, widen_factor=5.0) + new10 = suggest_bounds(best, space, widen_factor=10.0) + assert new10["estimator__reg_alpha"][1] > new5["estimator__reg_alpha"][1] + + def test_lower_pinned_int_additive_floor_1(self): + """Lower-pinned integer bound floors at 1: max(1, low - (high - low)).""" + best = {"estimator__n_estimators": 105} # near lower end of (100, 5000) + space = {"estimator__n_estimators": (100, 5000)} + new_space = suggest_bounds(best, space, widen_factor=10.0) + # new low = max(1, 100 - (5000 - 100)) = max(1, -4800) = 1 + assert new_space["estimator__n_estimators"][0] >= 1 diff --git a/tests/test_stats_select_pacf_lags.py b/tests/test_stats_select_pacf_lags.py new file mode 100644 index 00000000..95abc872 --- /dev/null +++ b/tests/test_stats_select_pacf_lags.py @@ -0,0 +1,159 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Tests for spotforecast2.stats.autocorrelation.select_pacf_lags.""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest + +from spotforecast2.stats.autocorrelation import select_pacf_lags + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_daily_ar_series(n_days: int = 120, seed: int = 42) -> pd.Series: + """AR(1) with moderate coefficient for general lag selection tests.""" + rng = np.random.default_rng(seed) + n = n_days * 24 + ar = np.zeros(n) + for t in range(1, n): + ar[t] = 0.7 * ar[t - 1] + rng.standard_normal() + return pd.Series(ar) + + +def _make_ar24_series(n_days: int = 200, seed: int = 42) -> pd.Series: + """AR(24) process — lag 24 dominates the PACF (strong daily periodicity).""" + rng = np.random.default_rng(seed) + n = n_days * 24 + ar = np.zeros(n) + for t in range(24, n): + ar[t] = 0.8 * ar[t - 24] + rng.standard_normal() + return pd.Series(ar) + + +# --------------------------------------------------------------------------- +# Happy-path tests +# --------------------------------------------------------------------------- + +class TestSelectPacfLagsHappyPath: + def test_returns_list_of_ints(self): + series = _make_daily_ar_series() + lags = select_pacf_lags(series, n_lags=50, top_k=8) + assert isinstance(lags, list) + assert all(isinstance(x, int) for x in lags) + + def test_sorted_ascending(self): + series = _make_daily_ar_series() + lags = select_pacf_lags(series, n_lags=50, top_k=8) + assert lags == sorted(lags) + + def test_top_k_respected(self): + series = _make_daily_ar_series() + for k in (1, 4, 8): + lags = select_pacf_lags(series, n_lags=50, top_k=k) + assert len(lags) <= k + + def test_lag_1_selected_ar1_component(self): + """AR(1) coefficient 0.7 should produce a strongly significant lag-1 PACF.""" + series = _make_daily_ar_series() + lags = select_pacf_lags(series, n_lags=50, top_k=8) + assert 1 in lags, f"expected lag 1 in {lags}" + + def test_daily_lag_24_in_top_k(self): + """An AR(24) process must have lag 24 in the top-k selected lags.""" + series = _make_ar24_series(n_days=200) + lags = select_pacf_lags(series, n_lags=50, top_k=8) + assert 24 in lags, f"expected lag 24 in {lags} (AR(24) process)" + + def test_determinism(self): + """Same series, same parameters → same result every call.""" + series = _make_daily_ar_series(seed=0) + r1 = select_pacf_lags(series, n_lags=50, top_k=6) + r2 = select_pacf_lags(series, n_lags=50, top_k=6) + assert r1 == r2 + + def test_n_lags_limits_search(self): + """Returned lags must not exceed n_lags.""" + series = _make_daily_ar_series() + n_lags = 30 + lags = select_pacf_lags(series, n_lags=n_lags, top_k=8) + assert all(lag <= n_lags for lag in lags) + + +# --------------------------------------------------------------------------- +# Degenerate / edge-case tests +# --------------------------------------------------------------------------- + +class TestSelectPacfLagsDegenerate: + def test_constant_series_fallback_returned(self): + series = pd.Series([1.0] * 50) + result = select_pacf_lags(series, n_lags=10, fallback=[1, 2, 24]) + assert result == [1, 2, 24] + + def test_constant_series_no_fallback_raises(self): + series = pd.Series([1.0] * 50) + with pytest.raises(ValueError, match="no significant"): + select_pacf_lags(series, n_lags=10, fallback=None) + + def test_very_short_series_fallback(self): + """Series shorter than 2*n_lags+1; PACF falls back on statsmodels truncation. + + No lag should pass the wide confidence band for a near-white-noise short series. + We do not mandate whether ValueError or fallback is triggered; only that either + the fallback list is returned or a ValueError is raised. + """ + rng = np.random.default_rng(7) + series = pd.Series(rng.standard_normal(20)) + try: + result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2]) + # if no significant lags, fallback should be returned + assert isinstance(result, list) + except ValueError: + pass # also acceptable + + def test_fallback_list_copied(self): + """The returned list must be a copy, not the same object as fallback.""" + series = pd.Series([0.0] * 50) + fb = [1, 2, 24] + result = select_pacf_lags(series, n_lags=10, fallback=fb) + assert result is not fb + + def test_fallback_none_is_default(self): + """fallback=None is the default; constant series raises ValueError.""" + series = pd.Series([3.14] * 50) + with pytest.raises(ValueError): + select_pacf_lags(series, n_lags=10) + + +# --------------------------------------------------------------------------- +# Parameter variation +# --------------------------------------------------------------------------- + +class TestSelectPacfLagsParams: + def test_default_n_lags_200_top_k_8(self): + """Default parameters work on a long series without error.""" + series = _make_daily_ar_series(n_days=500) + lags = select_pacf_lags(series) + assert len(lags) <= 8 + assert lags == sorted(lags) + + def test_top_k_1(self): + series = _make_daily_ar_series() + lags = select_pacf_lags(series, n_lags=50, top_k=1) + assert len(lags) == 1 + + def test_large_top_k_bounded_by_significant(self): + """If fewer than top_k significant lags exist, return only those.""" + series = _make_daily_ar_series() + # Use a large top_k; returned list must still be <= n_sig + lags_k3 = select_pacf_lags(series, n_lags=5, top_k=3) + lags_k100 = select_pacf_lags(series, n_lags=5, top_k=100) + # Both share the same significant set (for n_lags=5); the larger top_k + # cannot return MORE lags, only the same. + assert set(lags_k3).issubset(set(lags_k100)) + assert len(lags_k100) >= len(lags_k3) diff --git a/uv.lock b/uv.lock index ddac7dee..6e344191 100644 --- a/uv.lock +++ b/uv.lock @@ -3491,7 +3491,7 @@ wheels = [ [[package]] name = "spotforecast2" -version = "8.0.0rc1" +version = "8.0.0" source = { editable = "." } dependencies = [ { name = "astral" }, From 5a7857c1d9ac012a47e975354381dc0af7ba43fc Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Fri, 12 Jun 2026 03:13:17 +0200 Subject: [PATCH 4/6] fix(plots): apply code-review fixes to diagnostics.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FIX 1 (blocker): plot_forecast_vs_reference MAD log now uses `mad * unit_scale` (not `mad / unit_scale`) so the value is reported in the display unit (GW) rather than inflated by 1000×. Log format updated to include the unit label. Strengthened test asserts logged value is 1.0 GW for a constant 1000 MW offset. FIX 2: plot_shap_summary docstring notes the figure is harvested via plt.gcf() (shap API limitation), the function is not thread-safe, and callers must close the returned figure before other pyplot work. FIX 3: plot_shap_summary docstring replaces "at most max_samples rows" with the precise stride description. FIX 4: Remove feature_family from plots/__init__.py re-exports and __all__; it is a string classifier, not a plot function. Remains public at spotforecast2.plots.diagnostics.feature_family. Tests already used the full module path — no changes needed there. FIX 5: Remove dead n_annotated variable in plot_acf_with_lags. FIX 6: Remove per-example matplotlib.use("Agg") calls from the four docstring Examples; examples still avoid plt.show(). FIX 7: Add two feature_family precedence cases to the parametrized table: "poly_window_24" -> "polynomial" and "sin_window" -> "weather_window", pinning holiday > poly > window > cyclical > lag. Co-Authored-By: Claude Fable 5 --- src/spotforecast2/plots/__init__.py | 2 -- src/spotforecast2/plots/diagnostics.py | 26 ++++++---------- tests/test_plots_diagnostics.py | 42 ++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/src/spotforecast2/plots/__init__.py b/src/spotforecast2/plots/__init__.py index 5b545892..7f14046a 100644 --- a/src/spotforecast2/plots/__init__.py +++ b/src/spotforecast2/plots/__init__.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later from .diagnostics import ( - feature_family, plot_acf_with_lags, plot_feature_importance_by_family, plot_forecast_vs_reference, @@ -20,7 +19,6 @@ ) __all__ = [ - "feature_family", "plot_acf_with_lags", "plot_distribution", "plot_distributions", diff --git a/src/spotforecast2/plots/diagnostics.py b/src/spotforecast2/plots/diagnostics.py index 9839ab53..f5d68b34 100644 --- a/src/spotforecast2/plots/diagnostics.py +++ b/src/spotforecast2/plots/diagnostics.py @@ -116,8 +116,6 @@ def plot_acf_with_lags( ```{python} import numpy as np import pandas as pd - import matplotlib - matplotlib.use("Agg") from spotforecast2.plots.diagnostics import plot_acf_with_lags rng = np.random.default_rng(0) @@ -131,7 +129,6 @@ def plot_acf_with_lags( ax.bar(acf["lag"], acf["autocorrelation"], width=0.9, color="#1F4E79") ax.axhline(conf, color="#999999", lw=0.8, ls="--") ax.axhline(-conf, color="#999999", lw=0.8, ls="--") - n_annotated = 0 for lag in key_lags: row = acf[acf["lag"] == lag] if row.empty: @@ -146,7 +143,6 @@ def plot_acf_with_lags( color="#E07B00", arrowprops=dict(arrowstyle="->", color="#E07B00", lw=0.8), ) - n_annotated += 1 ax.set_xlabel("lag (hours)") ax.set_ylabel("autocorrelation") ax.set_title("ACF of Actual Load (annotated: data-selected key_lags)") @@ -180,8 +176,6 @@ def plot_feature_importance_by_family( Examples: ```{python} - import matplotlib - matplotlib.use("Agg") from spotforecast2.plots.diagnostics import plot_feature_importance_by_family names = ["lag_1", "lag_24", "holiday_DE", "wind_speed_10m", @@ -224,20 +218,23 @@ def plot_shap_summary( """SHAP bar-summary plot for a tree-based estimator. Ports `_plot_shap` from the chapter-14 team4 script. `X` is subsampled - to at most `max_samples` rows (uniform stride) before computing SHAP + to approximately `max_samples` rows (uniform stride `len(X) // max_samples`; + lengths just above `max_samples` are passed in full) before computing SHAP values so the call stays fast even for large training matrices. The function uses `shap.TreeExplainer` and `shap.summary_plot(plot_type="bar", show=False)`, then captures the - current matplotlib figure via `plt.gcf()`. + current matplotlib figure via `plt.gcf()`. Because the figure is harvested + from the global pyplot state this function is **not thread-safe**. Callers + must close the returned figure (e.g. `plt.close(fig)`) before performing + other pyplot work. Args: estimator: A fitted tree-based estimator supported by `shap.TreeExplainer` (e.g. `LGBMRegressor`). X: Feature matrix; typically the design matrix returned by `ForecasterRecursive.create_train_X_y`. - max_samples: Maximum number of rows to pass to the SHAP explainer. - Defaults to 2000. + max_samples: Row budget passed to the SHAP explainer. Defaults to 2000. Returns: A `matplotlib.figure.Figure`. @@ -245,8 +242,6 @@ def plot_shap_summary( Examples: ```{python} #| eval: false - import matplotlib - matplotlib.use("Agg") import numpy as np import pandas as pd from lightgbm import LGBMRegressor @@ -315,8 +310,6 @@ def plot_forecast_vs_reference( Examples: ```{python} - import matplotlib - matplotlib.use("Agg") import numpy as np import pandas as pd from spotforecast2.plots.diagnostics import plot_forecast_vs_reference @@ -357,11 +350,12 @@ def plot_forecast_vs_reference( ) mad = float((forecast.loc[overlap] - ref_aligned.loc[overlap]).abs().mean()) logger.info( - "mean |%s - %s| over %d overlap hours: %.0f MW", + "mean |%s - %s| over %d overlap hours: %.1f %s", forecast_label, reference_label, len(overlap), - mad / unit_scale, + mad * unit_scale, + unit, ) else: logger.info( diff --git a/tests/test_plots_diagnostics.py b/tests/test_plots_diagnostics.py index a0833a5f..d8b4fdb5 100644 --- a/tests/test_plots_diagnostics.py +++ b/tests/test_plots_diagnostics.py @@ -56,10 +56,14 @@ def close_all_figures(): ("poly_hour_2", "polynomial"), ("poly_dayofweek_3", "polynomial"), ("POLY_month", "polynomial"), + # precedence: poly beats window + ("poly_window_24", "polynomial"), # weather_window family ("window_mean_72", "weather_window"), ("roll_window_168", "weather_window"), ("WINDOW_std_24", "weather_window"), + # precedence: window beats cyclical + ("sin_window", "weather_window"), # cyclical/RBF family ("sin_hour", "cyclical/RBF"), ("cos_hour", "cyclical/RBF"), @@ -300,3 +304,41 @@ def test_forecast_vs_reference_unit_scale_applied(caplog): # y-axis upper limit should be in the MW range (>1000), not GW (<100) ymin, ymax = ax.get_ylim() assert ymax > 1000 + + +def test_forecast_vs_reference_mad_logged_in_display_unit(caplog): + """MAD is logged in the display unit (GW), not in raw MW. + + With a constant offset of 1000 MW and unit_scale=1e-3, the logged value + must be 1.0 GW. The old bug (mad / unit_scale) would have logged 1000000. + """ + import logging + import re + + idx = pd.date_range("2024-01-15", periods=24, freq="h", tz="UTC") + forecast = pd.Series(np.full(24, 40_000.0), index=idx) + # Exact constant offset of 1000 MW → MAD = 1000 MW = 1.0 GW + reference = pd.Series(np.full(24, 41_000.0), index=idx) + + with caplog.at_level(logging.INFO, logger="spotforecast2.plots.diagnostics"): + plot_forecast_vs_reference( + forecast, + reference, + forecast_label="fc", + reference_label="ref", + unit_scale=1e-3, + unit="GW", + ) + + # Find the MAD log message and extract the numeric value + mad_messages = [r for r in caplog.records if "overlap hours" in r.message] + assert mad_messages, "Expected MAD log message not found in caplog" + msg = mad_messages[0].message + # Extract the number immediately before " GW" + match = re.search(r"([\d.]+)\s+GW", msg) + assert match, f"Could not parse GW value from log message: {msg!r}" + logged_value = float(match.group(1)) + assert logged_value == pytest.approx(1.0, abs=0.05), ( + f"Logged MAD should be 1.0 GW, got {logged_value}. " + f"Full message: {msg!r}" + ) From 62af0dddc84de20cbf4145dbf961852d404a3643 Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Fri, 12 Jun 2026 03:13:41 +0200 Subject: [PATCH 5/6] fix(stats,model_selection): code-review fixes for PACF lag selection and boundary helpers FIX 1 (_quarto.yml): remove invalid `stats.select_pacf_lags` quartodoc contents entry and matching sidebar item; select_pacf_lags is a function inside stats/autocorrelation, already rendered as a section of stats.autocorrelation.qmd. Delete stale docs/reference/stats.select_pacf_lags.qmd. FIX 2 (boundary.py): expand module docstring with a prominent paragraph contrasting the two key-prefix conventions (report_boundary_positions strips "estimator__", boundary_report/suggest_bounds use full keys). Add logger.warning in boundary_report when result is empty but search_space contains numeric tuple dims, suggesting a key-convention mismatch. FIX 3 (uv.lock): restore uv.lock to develop's version; branch-specific lock hunk dropped. FIX 4 (test_model_selection_boundary.py): change test_all_keys_preserved to use "estimator__"-prefixed best_params so the flagged/widened code path is actually exercised; add asserts that the near-upper-boundary integer dim was widened and the interior log dim is unchanged. FIX 5 (test_stats_select_pacf_lags.py): remove dead `except ValueError: pass` arm from test_very_short_series_fallback (never raises with fallback given); add separate test_very_short_series_no_fallback_raises asserting ValueError on constant series without fallback. FIX 6 (boundary.py): add `if isinstance(val, bool): continue` guard in boundary_report mirroring report_boundary_positions; note boolean skip in docstring. FIX 7 (autocorrelation.py): coerce fallback elements to int via `[int(x) for x in fallback]`; note the coercion in the fallback param docstring. Co-Authored-By: Claude Fable 5 --- _quarto.yml | 3 -- src/spotforecast2/model_selection/boundary.py | 39 ++++++++++++++++++- src/spotforecast2/stats/autocorrelation.py | 5 ++- tests/test_model_selection_boundary.py | 16 +++++++- tests/test_stats_select_pacf_lags.py | 25 +++++++----- uv.lock | 2 +- 6 files changed, 70 insertions(+), 20 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 298f21f1..e596b288 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -128,8 +128,6 @@ website: contents: - text: "autocorrelation" file: docs/reference/stats.autocorrelation.qmd - - text: "select_pacf_lags" - file: docs/reference/stats.select_pacf_lags.qmd - section: "Tasks" contents: - text: "task_demo" @@ -261,7 +259,6 @@ quartodoc: desc: "Statistical analysis tools." contents: - stats.autocorrelation - - stats.select_pacf_lags - title: "Tasks" desc: "Demonstration and predefined forecasting tasks." diff --git a/src/spotforecast2/model_selection/boundary.py b/src/spotforecast2/model_selection/boundary.py index 3cd46835..bd40373e 100644 --- a/src/spotforecast2/model_selection/boundary.py +++ b/src/spotforecast2/model_selection/boundary.py @@ -24,6 +24,25 @@ widened on the pressed side; pass it straight back to ``run_task_spotoptim(search_space=...)``. +**Key convention difference — prefix handling:** + +``report_boundary_positions`` strips the ``"estimator__"`` prefix from each +search-space key before looking up the value in ``params``. The ``params`` +dict is expected to come from ``estimator.get_params()`` (scikit-learn style), +which returns UN-prefixed keys such as ``"reg_alpha"`` — not +``"estimator__reg_alpha"``. + +``boundary_report`` and ``suggest_bounds`` look up ``best_params`` using the +search-space key **as-is**, including any ``"estimator__"`` prefix. The +``best_params`` dict is expected to come from a SpotOptim result, which stores +FULL search-space keys (e.g. ``"estimator__reg_alpha"``). + +Mixing conventions (passing ``get_params()``-style keys to ``boundary_report``, +or SpotOptim result keys to ``report_boundary_positions``) will silently produce +an empty result because no key matches. If ``boundary_report`` returns an empty +DataFrame for a space with numeric dimensions, a key-convention mismatch is the +most likely cause. + Ported from: - ``report_boundary_positions``: ``bart26k-lecture/scripts/team4_4zones_submit.py`` @@ -181,8 +200,8 @@ def boundary_report( """Tabulate each tuned value's position inside its search-space bound. Returns a DataFrame sorted by descending position, with one row per - numeric dimension. Categorical dimensions are skipped. ``flag`` is one of - ``"> upper"``, ``"< lower"``, or ``""`` (interior). + numeric dimension. Categorical and boolean-valued dimensions are skipped. + ``flag`` is one of ``"> upper"``, ``"< lower"``, or ``""`` (interior). This function uses the `search_space` keys as-is (including any ``"estimator__"`` prefix) to look up matching entries in `best_params`. @@ -237,6 +256,8 @@ def boundary_report( val = best_params.get(name) # type: ignore[arg-type] if val is None: continue + if isinstance(val, bool): + continue # bool is a subclass of int; skip to avoid false positions if is_log: if val <= 0 or low <= 0: continue @@ -263,6 +284,20 @@ def boundary_report( ) df = pd.DataFrame(rows) if df.empty: + numeric_dims = sum( + 1 + for spec in search_space.values() + if isinstance(spec, tuple) and len(spec) in (2, 3) + ) + if numeric_dims > 0: + _logger.warning( + "boundary_report: result is empty but search_space has %d numeric " + "dimension(s). Check key-convention: best_params must use the FULL " + "search-space keys (e.g. 'estimator__reg_alpha'), not the unprefixed " + "get_params() style ('reg_alpha'). " + "Use report_boundary_positions() instead if you have unprefixed keys.", + numeric_dims, + ) return pd.DataFrame(columns=["param", "low", "high", "value", "scale", "position", "flag"]) return df.sort_values("position", ascending=False).reset_index(drop=True) diff --git a/src/spotforecast2/stats/autocorrelation.py b/src/spotforecast2/stats/autocorrelation.py index e89a6ed4..9b4027c3 100644 --- a/src/spotforecast2/stats/autocorrelation.py +++ b/src/spotforecast2/stats/autocorrelation.py @@ -224,7 +224,8 @@ def select_pacf_lags( ranked by descending `|PACF|`). Default is 8. fallback: Lag list returned when no lag exceeds the significance band (degenerate series: too short, nearly constant, or `n_lags` too - large). If `None`, a `ValueError` is raised instead. + large). Elements are coerced to `int` before returning. If `None`, + a `ValueError` is raised instead. Returns: Sorted list of lag integers (ascending). Length is at most `top_k`; @@ -316,7 +317,7 @@ def select_pacf_lags( ) if not key_lags: if fallback is not None: - return list(fallback) + return [int(x) for x in fallback] raise ValueError( f"select_pacf_lags: no significant PACF lags found " f"(N={len(series)}, band={conf:.4f}, n_lags={n_lags}). " diff --git a/tests/test_model_selection_boundary.py b/tests/test_model_selection_boundary.py index 8318d483..1386bdc3 100644 --- a/tests/test_model_selection_boundary.py +++ b/tests/test_model_selection_boundary.py @@ -257,8 +257,16 @@ def test_categorical_unchanged(self): assert new_space["lags"] == space["lags"] def test_all_keys_preserved(self): - """The returned dict has exactly the same keys as search_space.""" - best = {"num_leaves": 512, "learning_rate": 0.05} + """The returned dict has exactly the same keys as search_space. + + Uses prefixed keys (SpotOptim-result style) so the flagged/widened path + is exercised for at least one dimension. + """ + # num_leaves=1000 is near upper of (8, 1024) — should be widened + best = { + "estimator__num_leaves": 1000, + "estimator__learning_rate": 0.05, + } space = { "estimator__num_leaves": (8, 1024), "estimator__learning_rate": (0.005, 0.3, "log10"), @@ -266,6 +274,10 @@ def test_all_keys_preserved(self): } new_space = suggest_bounds(best, space) assert set(new_space.keys()) == set(space.keys()) + # The near-upper-boundary integer dim must have been widened upward + assert new_space["estimator__num_leaves"][1] > 1024 + # Interior log10 dim is unchanged + assert new_space["estimator__learning_rate"] == space["estimator__learning_rate"] def test_widen_factor_parameter(self): """Different widen_factor values produce different bounds.""" diff --git a/tests/test_stats_select_pacf_lags.py b/tests/test_stats_select_pacf_lags.py index 95abc872..7cf687d3 100644 --- a/tests/test_stats_select_pacf_lags.py +++ b/tests/test_stats_select_pacf_lags.py @@ -101,20 +101,25 @@ def test_constant_series_no_fallback_raises(self): select_pacf_lags(series, n_lags=10, fallback=None) def test_very_short_series_fallback(self): - """Series shorter than 2*n_lags+1; PACF falls back on statsmodels truncation. + """Series shorter than 2*n_lags+1; statsmodels truncates n_lags internally. - No lag should pass the wide confidence band for a near-white-noise short series. - We do not mandate whether ValueError or fallback is triggered; only that either - the fallback list is returned or a ValueError is raised. + With fallback provided the function must return either the fallback list + (no significant lags found) or a valid list of ints (some lags pass the + wide confidence band for a very short series). It must never raise. """ rng = np.random.default_rng(7) series = pd.Series(rng.standard_normal(20)) - try: - result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2]) - # if no significant lags, fallback should be returned - assert isinstance(result, list) - except ValueError: - pass # also acceptable + result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2]) + assert isinstance(result, list) + assert all(isinstance(x, int) for x in result) + + def test_very_short_series_no_fallback_raises(self): + """Same short series without fallback: ValueError is raised when no + significant lags are found (constant-like after statsmodels truncation). + """ + series = pd.Series([1.0] * 12) # constant → no significant lags, no fallback + with pytest.raises(ValueError, match="no significant"): + select_pacf_lags(series, n_lags=5, top_k=2, fallback=None) def test_fallback_list_copied(self): """The returned list must be a copy, not the same object as fallback.""" diff --git a/uv.lock b/uv.lock index 6e344191..ddac7dee 100644 --- a/uv.lock +++ b/uv.lock @@ -3491,7 +3491,7 @@ wheels = [ [[package]] name = "spotforecast2" -version = "8.0.0" +version = "8.0.0rc1" source = { editable = "." } dependencies = [ { name = "astral" }, From 6eacc37ed1df960bff6b289c0facbef19263de91 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 12 Jun 2026 01:33:40 +0000 Subject: [PATCH 6/6] chore(release): 8.1.0-rc.1 [skip ci] ## [8.1.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0...v8.1.0-rc.1) (2026-06-12) ### Features * **plots:** operational diagnostics plots (ACF, importance-by-family, SHAP summary, forecast-vs-reference) ([0bbc0c6](https://github.com/sequential-parameter-optimization/spotforecast2/commit/0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f)) * **stats:** PACF lag selection + search-space boundary report ([c8079b2](https://github.com/sequential-parameter-optimization/spotforecast2/commit/c8079b2a10fbfb97e333c89948eb234fc2381e9d)) ### Bug Fixes * **plots:** apply code-review fixes to diagnostics.py ([5a7857c](https://github.com/sequential-parameter-optimization/spotforecast2/commit/5a7857c1d9ac012a47e975354381dc0af7ba43fc)) * **stats,model_selection:** code-review fixes for PACF lag selection and boundary helpers ([62af0dd](https://github.com/sequential-parameter-optimization/spotforecast2/commit/62af0dddc84de20cbf4145dbf961852d404a3643)) --- CHANGELOG.md | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8595eaba..7e306160 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## [8.1.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0...v8.1.0-rc.1) (2026-06-12) + +### Features + +* **plots:** operational diagnostics plots (ACF, importance-by-family, SHAP summary, forecast-vs-reference) ([0bbc0c6](https://github.com/sequential-parameter-optimization/spotforecast2/commit/0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f)) +* **stats:** PACF lag selection + search-space boundary report ([c8079b2](https://github.com/sequential-parameter-optimization/spotforecast2/commit/c8079b2a10fbfb97e333c89948eb234fc2381e9d)) + +### Bug Fixes + +* **plots:** apply code-review fixes to diagnostics.py ([5a7857c](https://github.com/sequential-parameter-optimization/spotforecast2/commit/5a7857c1d9ac012a47e975354381dc0af7ba43fc)) +* **stats,model_selection:** code-review fixes for PACF lag selection and boundary helpers ([62af0dd](https://github.com/sequential-parameter-optimization/spotforecast2/commit/62af0dddc84de20cbf4145dbf961852d404a3643)) + ## [8.0.0](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v7.1.0...v8.0.0) (2026-06-11) ### ⚠ BREAKING CHANGES diff --git a/pyproject.toml b/pyproject.toml index 697539ef..c4fbca15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "spotforecast2" -version = "8.0.0" +version = "8.1.0-rc.1" description = "Forecasting with spot" readme = "README.md" license = { text = "AGPL-3.0-or-later" }