From 5da55ffc4b123a8a76b1caf944e25dc2ce298ee7 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 11 Jun 2026 00:04:18 +0000
Subject: [PATCH 1/6] chore(release): 8.0.0-rc.2 [skip ci]

## [8.0.0-rc.2](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0-rc.1...v8.0.0-rc.2) (2026-06-11)

### Features

* **deps:** require spotforecast2-safe >=22.0.0 ([13893ad](https://github.com/sequential-parameter-optimization/spotforecast2/commit/13893ad9173ef6988fad50614c633676dbaea9d3))
---
 CHANGELOG.md   | 6 ++++++
 pyproject.toml | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff7b8d93..c176a978 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## [8.0.0-rc.2](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0-rc.1...v8.0.0-rc.2) (2026-06-11)
+
+### Features
+
+* **deps:** require spotforecast2-safe >=22.0.0 ([13893ad](https://github.com/sequential-parameter-optimization/spotforecast2/commit/13893ad9173ef6988fad50614c633676dbaea9d3))
+
 ## [8.0.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v7.1.0...v8.0.0-rc.1) (2026-06-10)
 
 ### ⚠ BREAKING CHANGES
diff --git a/pyproject.toml b/pyproject.toml
index 8264d3c2..fdcca4b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "spotforecast2"
-version = "8.0.0-rc.1"
+version = "8.0.0-rc.2"
 description = "Forecasting with spot"
 readme = "README.md"
 license = { text = "AGPL-3.0-or-later" }

From 0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f Mon Sep 17 00:00:00 2001
From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
Date: Fri, 12 Jun 2026 02:49:26 +0200
Subject: [PATCH 2/6] feat(plots): operational diagnostics plots (ACF,
 importance-by-family, SHAP summary, forecast-vs-reference)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports five matplotlib helpers from bart26k-lecture/scripts/team4_4zones_submit.py
into a reusable, stateless public API in spotforecast2.plots.diagnostics:

- feature_family(name) — maps LGBMRegressor feature names to six diagnostic families
- plot_acf_with_lags(acf, key_lags, conf) — bar chart with confidence band and annotated key lags
- plot_feature_importance_by_family(names, importances, top_n) — top-N barh coloured by family
- plot_shap_summary(estimator, X, max_samples) — subsampled TreeExplainer bar summary
- plot_forecast_vs_reference(forecast, reference, ...) — generalised forecast-vs-ENTSO-E line plot

All functions return matplotlib.figure.Figure; the caller saves/closes.
No plt.show(), no matplotlib.use(), no global state mutation.
Overlap MAD is logged at INFO via the module logger (preserved from the original).
44 new tests in tests/test_plots_diagnostics.py; 1207 tests pass total.
Registered in _quarto.yml sidebar + quartodoc contents (plots.diagnostics).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 _quarto.yml                            |   5 +-
 src/spotforecast2/plots/__init__.py    |  12 +
 src/spotforecast2/plots/diagnostics.py | 381 +++++++++++++++++++++++++
 tests/test_plots_diagnostics.py        | 302 ++++++++++++++++++++
 4 files changed, 699 insertions(+), 1 deletion(-)
 create mode 100644 src/spotforecast2/plots/diagnostics.py
 create mode 100644 tests/test_plots_diagnostics.py

diff --git a/_quarto.yml b/_quarto.yml
index 63015e8e..ec490fa3 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -116,6 +116,8 @@ website:
                 file: docs/reference/model_selection.utils_metrics.qmd
           - section: "Plots"
             contents:
+              - text: "diagnostics"
+                file: docs/reference/plots.diagnostics.qmd
               - text: "outlier_plots"
                 file: docs/reference/plots.outlier_plots.qmd
               - text: "plotter"
@@ -215,8 +217,9 @@ quartodoc:
 
   sections:
     - title: "Plots"
-      desc: "Outlier, prediction, and time-series visualization tools."
+      desc: "Outlier, prediction, time-series, and operational diagnostic visualization tools."
       contents:
+        - plots.diagnostics
         - plots.outlier_plots
         - plots.plotter
         - plots.time_series_visualization
diff --git a/src/spotforecast2/plots/__init__.py b/src/spotforecast2/plots/__init__.py
index ba0923de..5b545892 100644
--- a/src/spotforecast2/plots/__init__.py
+++ b/src/spotforecast2/plots/__init__.py
@@ -1,6 +1,13 @@
 # SPDX-FileCopyrightText: 2026 bartzbeielstein
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
+from .diagnostics import (
+    feature_family,
+    plot_acf_with_lags,
+    plot_feature_importance_by_family,
+    plot_forecast_vs_reference,
+    plot_shap_summary,
+)
 from .distribution import plot_distribution, plot_distributions
 from .outlier_plots import (
     visualize_outliers_hist,
@@ -13,9 +20,14 @@
 )
 
 __all__ = [
+    "feature_family",
+    "plot_acf_with_lags",
     "plot_distribution",
     "plot_distributions",
+    "plot_feature_importance_by_family",
+    "plot_forecast_vs_reference",
     "plot_periodogram",
+    "plot_shap_summary",
     "visualize_outliers_hist",
     "visualize_outliers_plotly_scatter",
     "visualize_ts_plotly",
diff --git a/src/spotforecast2/plots/diagnostics.py b/src/spotforecast2/plots/diagnostics.py
new file mode 100644
index 00000000..9839ab53
--- /dev/null
+++ b/src/spotforecast2/plots/diagnostics.py
@@ -0,0 +1,381 @@
+# SPDX-FileCopyrightText: 2026 bartzbeielstein
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Operational diagnostic plots for energy-load forecasting pipelines.
+
+Ports five matplotlib helpers from the chapter-14 team4 operational script
+(`bart26k-lecture/scripts/team4_4zones_submit.py`) into a reusable, stateless
+API.  All functions return a `matplotlib.figure.Figure`; the caller is
+responsible for saving and closing it.  No `plt.show()` is called and the
+backend is never changed here (set `matplotlib.use("Agg")` before importing
+`pyplot` in headless environments).
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Sequence
+
+import matplotlib.pyplot as plt
+import pandas as pd
+from matplotlib.figure import Figure
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Family colour map — identical to the chapter-14 script so PNG diagnostics
+# look the same whether run from the script or via this module.
+# ---------------------------------------------------------------------------
+_FAMILY_COLOR: dict[str, str] = {
+    "lag": "#B22222",
+    "weather/other": "#E07B00",
+    "weather_window": "#F0A33C",
+    "cyclical/RBF": "#1F4E79",
+    "holiday": "#2E8B57",
+    "polynomial": "#7B5EA7",
+}
+
+
+# ---------------------------------------------------------------------------
+# Public helpers
+# ---------------------------------------------------------------------------
+
+
+def feature_family(name: str) -> str:
+    """Map a feature name to its diagnostic family label.
+
+    This is the public, importable version of the `family_of` helper used
+    inside the chapter-14 team4 operational script.  The mapping is
+    intentionally coarse — it covers the feature names that `ConfigEntsoe`
+    and `ForecasterRecursive` generate and is used exclusively for colour
+    grouping in `plot_feature_importance_by_family`.
+
+    Args:
+        name: Feature name as returned by `LGBMRegressor.feature_name_`.
+
+    Returns:
+        One of: `"holiday"`, `"polynomial"`, `"weather_window"`,
+        `"cyclical/RBF"`, `"lag"`, `"weather/other"`.
+
+    Examples:
+        ```{python}
+        from spotforecast2.plots.diagnostics import feature_family
+
+        print(feature_family("holiday_DE"))
+        print(feature_family("brueckentag_NW"))
+        print(feature_family("poly_hour_2"))
+        print(feature_family("window_mean_72"))
+        print(feature_family("sin_hour"))
+        print(feature_family("lag_1"))
+        print(feature_family("wind_speed_10m"))
+        ```
+    """
+    c = name.lower()
+    if "holiday" in c or "brueckentag" in c:
+        return "holiday"
+    if "poly" in c:
+        return "polynomial"
+    if "window" in c:
+        return "weather_window"
+    if any(k in c for k in ("sin", "cos", "rbf")):
+        return "cyclical/RBF"
+    if c.startswith("lag"):
+        return "lag"
+    return "weather/other"
+
+
+def plot_acf_with_lags(
+    acf: pd.DataFrame,
+    key_lags: Sequence[int],
+    conf: float,
+) -> Figure:
+    """Bar chart of autocorrelation values with annotated key lags.
+
+    Ports `_plot_acf` from the chapter-14 team4 script.  The `acf` DataFrame
+    is the output of
+    `spotforecast2.stats.autocorrelation.calculate_lag_autocorrelation` and
+    must contain at minimum the columns `"lag"` and `"autocorrelation"`.
+
+    Confidence-band lines at `+conf` / `-conf` are drawn as dashed grey
+    horizontal lines.  Each lag in `key_lags` that is present in `acf["lag"]`
+    gets an orange arrow annotation.  Lags not found in the frame are silently
+    skipped.
+
+    Args:
+        acf: DataFrame with columns `"lag"` (int) and `"autocorrelation"`
+            (float), as returned by `calculate_lag_autocorrelation`.
+        key_lags: Sequence of lag values to annotate (e.g. the PACF-selected
+            lags from the pipeline).
+        conf: Half-width of the confidence band, typically
+            `1.96 / sqrt(n_obs)`.
+
+    Returns:
+        A `matplotlib.figure.Figure`.
+
+    Examples:
+        ```{python}
+        import numpy as np
+        import pandas as pd
+        import matplotlib
+        matplotlib.use("Agg")
+        from spotforecast2.plots.diagnostics import plot_acf_with_lags
+
+        rng = np.random.default_rng(0)
+        n = 100
+        acf = pd.DataFrame({"lag": range(n), "autocorrelation": rng.uniform(-0.3, 0.3, n)})
+        fig = plot_acf_with_lags(acf, key_lags=[1, 24, 48], conf=0.1)
+        print(type(fig).__name__)
+        ```
+    """
+    fig, ax = plt.subplots(figsize=(8.5, 3.0))
+    ax.bar(acf["lag"], acf["autocorrelation"], width=0.9, color="#1F4E79")
+    ax.axhline(conf, color="#999999", lw=0.8, ls="--")
+    ax.axhline(-conf, color="#999999", lw=0.8, ls="--")
+    n_annotated = 0
+    for lag in key_lags:
+        row = acf[acf["lag"] == lag]
+        if row.empty:
+            continue
+        val = float(row["autocorrelation"].iloc[0])
+        ax.annotate(
+            f"lag {lag}",
+            xy=(lag, val),
+            xytext=(lag, val + 0.08),
+            ha="center",
+            fontsize=8,
+            color="#E07B00",
+            arrowprops=dict(arrowstyle="->", color="#E07B00", lw=0.8),
+        )
+        n_annotated += 1
+    ax.set_xlabel("lag (hours)")
+    ax.set_ylabel("autocorrelation")
+    ax.set_title("ACF of Actual Load (annotated: data-selected key_lags)")
+    ax.grid(True, color="#E5E5E5", linewidth=0.5)
+    for spine in ("top", "right"):
+        ax.spines[spine].set_visible(False)
+    fig.tight_layout()
+    return fig
+
+
+def plot_feature_importance_by_family(
+    names: Sequence[str],
+    importances: Sequence[float],
+    *,
+    top_n: int = 20,
+) -> Figure:
+    """Horizontal bar chart of the top-N feature importances, coloured by family.
+
+    Ports `_plot_importance` from the chapter-14 team4 script.  Feature
+    families are determined by `feature_family`; the colour palette is the
+    same as in the script so diagnostics look identical.
+
+    Args:
+        names: Feature names (e.g. `fc.estimator.feature_name_`).
+        importances: Corresponding importance scores (e.g.
+            `fc.estimator.feature_importances_`).
+        top_n: Number of top features to display.  Defaults to 20.
+
+    Returns:
+        A `matplotlib.figure.Figure`.
+
+    Examples:
+        ```{python}
+        import matplotlib
+        matplotlib.use("Agg")
+        from spotforecast2.plots.diagnostics import plot_feature_importance_by_family
+
+        names = ["lag_1", "lag_24", "holiday_DE", "wind_speed_10m",
+                 "poly_hour_2", "window_mean_72", "sin_hour"]
+        scores = [100, 80, 60, 55, 40, 35, 20]
+        fig = plot_feature_importance_by_family(names, scores, top_n=5)
+        print(type(fig).__name__)
+        ```
+    """
+    ranking = sorted(
+        zip(names, importances), key=lambda kv: kv[1], reverse=True
+    )[:top_n]
+    labels = [n for n, _ in ranking][::-1]
+    values = [v for _, v in ranking][::-1]
+    colors = [_FAMILY_COLOR.get(feature_family(n), "#888888") for n in labels]
+
+    fig, ax = plt.subplots(figsize=(8.5, 5.5))
+    ax.barh(labels, values, color=colors)
+    ax.set_xlabel("split count (feature importance)")
+    ax.set_title(
+        f"Top-{top_n} feature importances (coloured by family; lags in red)"
+    )
+    ax.grid(True, axis="x", color="#E5E5E5", linewidth=0.5)
+    handles = [
+        plt.Rectangle((0, 0), 1, 1, color=c) for c in _FAMILY_COLOR.values()
+    ]
+    ax.legend(handles, _FAMILY_COLOR.keys(), fontsize=7, loc="lower right")
+    for spine in ("top", "right"):
+        ax.spines[spine].set_visible(False)
+    fig.tight_layout()
+    return fig
+
+
+def plot_shap_summary(
+    estimator: object,
+    X: pd.DataFrame,
+    *,
+    max_samples: int = 2000,
+) -> Figure:
+    """SHAP bar-summary plot for a tree-based estimator.
+
+    Ports `_plot_shap` from the chapter-14 team4 script.  `X` is subsampled
+    to at most `max_samples` rows (uniform stride) before computing SHAP
+    values so the call stays fast even for large training matrices.
+
+    The function uses `shap.TreeExplainer` and
+    `shap.summary_plot(plot_type="bar", show=False)`, then captures the
+    current matplotlib figure via `plt.gcf()`.
+
+    Args:
+        estimator: A fitted tree-based estimator supported by
+            `shap.TreeExplainer` (e.g. `LGBMRegressor`).
+        X: Feature matrix; typically the design matrix returned by
+            `ForecasterRecursive.create_train_X_y`.
+        max_samples: Maximum number of rows to pass to the SHAP explainer.
+            Defaults to 2000.
+
+    Returns:
+        A `matplotlib.figure.Figure`.
+
+    Examples:
+        ```{python}
+        #| eval: false
+        import matplotlib
+        matplotlib.use("Agg")
+        import numpy as np
+        import pandas as pd
+        from lightgbm import LGBMRegressor
+        from spotforecast2.plots.diagnostics import plot_shap_summary
+
+        rng = np.random.default_rng(0)
+        X = pd.DataFrame(rng.standard_normal((200, 5)),
+                         columns=[f"f{i}" for i in range(5)])
+        y = X["f0"] + rng.standard_normal(200) * 0.1
+        est = LGBMRegressor(n_estimators=20, verbose=-1)
+        est.fit(X, y)
+        fig = plot_shap_summary(est, X, max_samples=100)
+        print(type(fig).__name__)
+        ```
+    """
+    import shap
+
+    step = max(1, len(X) // max_samples)
+    X_sample = X.iloc[::step]
+    explainer = shap.TreeExplainer(estimator)
+    sv = explainer.shap_values(X_sample)
+    shap.summary_plot(sv, X_sample, plot_type="bar", show=False)
+    fig = plt.gcf()
+    fig.tight_layout()
+    return fig
+
+
+def plot_forecast_vs_reference(
+    forecast: pd.Series,
+    reference: pd.Series,
+    *,
+    forecast_label: str = "forecast",
+    reference_label: str = "reference",
+    unit_scale: float = 1e-3,
+    unit: str = "GW",
+) -> Figure:
+    """Line plot comparing a forecast against an optional reference series.
+
+    Ports `_plot_vs_entsoe` from the chapter-14 team4 script into a general,
+    label-parametrised form.  The reference is reindexed to `forecast.index`;
+    only the overlapping (non-NaN) timestamps are plotted.  If there is no
+    overlap the reference line is omitted and an INFO message is logged — the
+    function still returns a valid single-line figure rather than raising.
+
+    Both series are scaled by `unit_scale` before plotting (default `1e-3`
+    converts MW to GW).
+
+    The overlap MAD (mean absolute deviation between `forecast` and
+    `reference` over shared timestamps) is logged at INFO level when an
+    overlap exists.  This mirrors the original script's behaviour and is
+    useful for post-hoc sanity checks in operator logs.
+
+    Args:
+        forecast: Point forecast series with a `DatetimeIndex`.
+        reference: Reference series (e.g. ENTSO-E day-ahead forecast).
+            Will be reindexed to `forecast.index`; NaN rows after reindexing
+            are treated as "no overlap" for that timestamp.
+        forecast_label: Legend label for the forecast line.
+        reference_label: Legend label for the reference line.
+        unit_scale: Multiplicative scale applied to both series before
+            plotting.  Defaults to `1e-3` (MW → GW).
+        unit: Unit string used in the y-axis label.
+
+    Returns:
+        A `matplotlib.figure.Figure`.
+
+    Examples:
+        ```{python}
+        import matplotlib
+        matplotlib.use("Agg")
+        import numpy as np
+        import pandas as pd
+        from spotforecast2.plots.diagnostics import plot_forecast_vs_reference
+
+        idx = pd.date_range("2024-01-15", periods=24, freq="h", tz="UTC")
+        rng = np.random.default_rng(42)
+        forecast = pd.Series(40_000 + rng.standard_normal(24) * 1000, index=idx)
+        reference = pd.Series(41_000 + rng.standard_normal(24) * 800, index=idx)
+
+        fig = plot_forecast_vs_reference(
+            forecast, reference,
+            forecast_label="team_4 forecast",
+            reference_label="ENTSO-E day-ahead",
+        )
+        print(type(fig).__name__)
+        ```
+    """
+    ref_aligned = reference.reindex(forecast.index)
+    overlap = ref_aligned.dropna().index
+
+    fig, ax = plt.subplots(figsize=(8.5, 3.5))
+    ax.plot(
+        forecast.index,
+        forecast.values * unit_scale,
+        marker="o",
+        ms=3,
+        color="#1F4E79",
+        label=forecast_label,
+    )
+    if len(overlap) > 0:
+        ax.plot(
+            ref_aligned.index,
+            ref_aligned.values * unit_scale,
+            marker="x",
+            ms=4,
+            color="#E07B00",
+            label=reference_label,
+        )
+        mad = float((forecast.loc[overlap] - ref_aligned.loc[overlap]).abs().mean())
+        logger.info(
+            "mean |%s - %s| over %d overlap hours: %.0f MW",
+            forecast_label,
+            reference_label,
+            len(overlap),
+            mad / unit_scale,
+        )
+    else:
+        logger.info(
+            "%s not available for forecast period; plotting %s only.",
+            reference_label,
+            forecast_label,
+        )
+    ax.set_xlabel("Time (UTC)")
+    ax.set_ylabel(f"Load [{unit}]")
+    ax.set_title(f"{forecast_label} vs. {reference_label} — target day")
+    ax.grid(True, color="#E5E5E5", linewidth=0.5)
+    ax.legend(fontsize=8)
+    for spine in ("top", "right"):
+        ax.spines[spine].set_visible(False)
+    fig.autofmt_xdate()
+    fig.tight_layout()
+    return fig
diff --git a/tests/test_plots_diagnostics.py b/tests/test_plots_diagnostics.py
new file mode 100644
index 00000000..a0833a5f
--- /dev/null
+++ b/tests/test_plots_diagnostics.py
@@ -0,0 +1,302 @@
+# SPDX-FileCopyrightText: 2026 bartzbeielstein
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Tests for spotforecast2.plots.diagnostics.
+
+Headless matplotlib backend must be set BEFORE pyplot is imported.  The
+`matplotlib.use()` call at module level (before any pyplot import) satisfies
+that requirement.
+"""
+
+import matplotlib
+
+matplotlib.use("Agg", force=True)
+
+import matplotlib.pyplot as plt  # noqa: E402 — must be after matplotlib.use
+import numpy as np  # noqa: E402
+import pandas as pd  # noqa: E402
+import pytest  # noqa: E402
+from matplotlib.figure import Figure  # noqa: E402
+
+from spotforecast2.plots.diagnostics import (  # noqa: E402
+    feature_family,
+    plot_acf_with_lags,
+    plot_feature_importance_by_family,
+    plot_forecast_vs_reference,
+    plot_shap_summary,
+)
+
+
+# ---------------------------------------------------------------------------
+# Teardown
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def close_all_figures():
+    """Close every matplotlib figure after each test to avoid resource leaks."""
+    yield
+    plt.close("all")
+
+
+# ---------------------------------------------------------------------------
+# feature_family — exhaustive family-to-name mapping table
+# ---------------------------------------------------------------------------
+
+_FAMILY_CASES = [
+    # holiday family
+    ("holiday_DE", "holiday"),
+    ("is_holiday", "holiday"),
+    ("HOLIDAY_NW", "holiday"),
+    ("brueckentag_NW", "holiday"),
+    ("BRUECKENTAG_DE", "holiday"),
+    ("day_after_holiday", "holiday"),
+    ("day_before_holiday", "holiday"),
+    # polynomial family
+    ("poly_hour_2", "polynomial"),
+    ("poly_dayofweek_3", "polynomial"),
+    ("POLY_month", "polynomial"),
+    # weather_window family
+    ("window_mean_72", "weather_window"),
+    ("roll_window_168", "weather_window"),
+    ("WINDOW_std_24", "weather_window"),
+    # cyclical/RBF family
+    ("sin_hour", "cyclical/RBF"),
+    ("cos_hour", "cyclical/RBF"),
+    ("rbf_hour_0", "cyclical/RBF"),
+    ("SIN_dayofyear", "cyclical/RBF"),
+    ("COS_month", "cyclical/RBF"),
+    # lag family
+    ("lag_1", "lag"),
+    ("lag_24", "lag"),
+    ("lag_168", "lag"),
+    # weather/other (catch-all)
+    ("wind_speed_10m", "weather/other"),
+    ("temperature_2m", "weather/other"),
+    ("entsoe_forecasted_load", "weather/other"),
+    ("is_workday", "weather/other"),
+    ("day_type", "weather/other"),
+    ("solar_zenith", "weather/other"),
+]
+
+
+@pytest.mark.parametrize("name,expected", _FAMILY_CASES)
+def test_feature_family_mapping(name, expected):
+    """Every name-to-family pair in the exhaustive table resolves correctly."""
+    assert feature_family(name) == expected
+
+
+def test_feature_family_case_insensitive_for_holiday():
+    """Holiday detection is case-insensitive (lowercased internally)."""
+    assert feature_family("Holiday_NW") == "holiday"
+
+
+def test_feature_family_lag_prefix_only():
+    """Only names that START with 'lag' (after lowercasing) map to lag family."""
+    assert feature_family("lag_1") == "lag"
+    # 'flag' contains 'lag' but does NOT start with it
+    assert feature_family("flag_col") == "weather/other"
+
+
+# ---------------------------------------------------------------------------
+# plot_acf_with_lags
+# ---------------------------------------------------------------------------
+
+
+def _make_acf(n: int = 60, seed: int = 0) -> pd.DataFrame:
+    rng = np.random.default_rng(seed)
+    return pd.DataFrame(
+        {"lag": np.arange(n), "autocorrelation": rng.uniform(-0.4, 0.4, n)}
+    )
+
+
+def test_plot_acf_returns_figure():
+    acf = _make_acf(60)
+    fig = plot_acf_with_lags(acf, key_lags=[1, 24, 48], conf=0.1)
+    assert isinstance(fig, Figure)
+
+
+def test_plot_acf_annotation_count_equals_present_key_lags():
+    """Annotations are drawn only for key_lags that actually appear in acf."""
+    acf = _make_acf(50)  # lags 0..49
+    # 3 of 4 lags are present in the frame (lag 100 is not)
+    key_lags = [1, 24, 48, 100]
+    expected_annotations = sum(1 for lag in key_lags if lag in acf["lag"].values)
+    fig = plot_acf_with_lags(acf, key_lags=key_lags, conf=0.1)
+    ax = fig.axes[0]
+    n_annotations = len(ax.texts)
+    assert n_annotations == expected_annotations
+
+
+def test_plot_acf_empty_key_lags_no_annotations():
+    acf = _make_acf(30)
+    fig = plot_acf_with_lags(acf, key_lags=[], conf=0.1)
+    ax = fig.axes[0]
+    assert len(ax.texts) == 0
+
+
+def test_plot_acf_all_key_lags_missing_no_annotations():
+    """key_lags entirely outside the acf range produces zero annotations."""
+    acf = _make_acf(10)  # lags 0..9
+    fig = plot_acf_with_lags(acf, key_lags=[50, 100, 168], conf=0.1)
+    ax = fig.axes[0]
+    assert len(ax.texts) == 0
+
+
+# ---------------------------------------------------------------------------
+# plot_feature_importance_by_family
+# ---------------------------------------------------------------------------
+
+
+def test_importance_plot_returns_figure():
+    names = ["lag_1", "lag_24", "holiday_DE", "wind_speed"]
+    scores = [100, 80, 60, 40]
+    fig = plot_feature_importance_by_family(names, scores)
+    assert isinstance(fig, Figure)
+
+
+def test_importance_plot_top_n_respected():
+    """The bar chart contains at most top_n bars."""
+    rng = np.random.default_rng(1)
+    n_features = 30
+    names = [f"feature_{i}" for i in range(n_features)]
+    scores = rng.integers(1, 100, n_features).tolist()
+    top_n = 10
+    fig = plot_feature_importance_by_family(names, scores, top_n=top_n)
+    ax = fig.axes[0]
+    # barh produces one patch per bar
+    assert len(ax.patches) == top_n
+
+
+def test_importance_plot_family_colors_distinct():
+    """Known feature names from different families get different bar colors."""
+    names = ["lag_1", "holiday_DE", "poly_hour_2", "window_mean_72",
+             "sin_hour", "wind_speed"]
+    scores = [100, 90, 80, 70, 60, 50]
+    fig = plot_feature_importance_by_family(names, scores)
+    ax = fig.axes[0]
+    # Each bar's facecolor should be non-grey (grey = unknown family)
+    face_colors = [p.get_facecolor() for p in ax.patches]
+    # At minimum the lag bar (red #B22222) and holiday (green #2E8B57) differ
+    assert len(set(str(c) for c in face_colors)) > 1
+
+
+def test_importance_plot_top_n_larger_than_features():
+    """top_n larger than available features still works (no error)."""
+    names = ["lag_1", "lag_24"]
+    scores = [50, 30]
+    fig = plot_feature_importance_by_family(names, scores, top_n=20)
+    assert isinstance(fig, Figure)
+
+
+# ---------------------------------------------------------------------------
+# plot_shap_summary
+# ---------------------------------------------------------------------------
+
+
+def test_shap_summary_returns_figure():
+    """Train a tiny LGBMRegressor and verify plot_shap_summary returns Figure."""
+    pytest.importorskip("lightgbm")
+    pytest.importorskip("shap")
+    from lightgbm import LGBMRegressor
+
+    rng = np.random.default_rng(7)
+    X = pd.DataFrame(
+        rng.standard_normal((120, 4)), columns=["f0", "f1", "f2", "f3"]
+    )
+    y = X["f0"] * 2 + rng.standard_normal(120) * 0.1
+    est = LGBMRegressor(n_estimators=10, verbose=-1, random_state=0)
+    est.fit(X, y)
+
+    fig = plot_shap_summary(est, X, max_samples=50)
+    assert isinstance(fig, Figure)
+
+
+def test_shap_summary_subsamples_max_samples():
+    """When X has more rows than max_samples, SHAP only sees a subset.
+
+    We cannot directly inspect the SHAP call, but we verify the function
+    completes without error and returns a Figure.  The subsampling logic
+    (stride = max(1, len(X) // max_samples)) is verified by the fast runtime.
+    """
+    pytest.importorskip("lightgbm")
+    pytest.importorskip("shap")
+    from lightgbm import LGBMRegressor
+
+    rng = np.random.default_rng(9)
+    X = pd.DataFrame(
+        rng.standard_normal((500, 3)), columns=["a", "b", "c"]
+    )
+    y = X["a"] + rng.standard_normal(500) * 0.2
+    est = LGBMRegressor(n_estimators=10, verbose=-1, random_state=0)
+    est.fit(X, y)
+
+    fig = plot_shap_summary(est, X, max_samples=50)
+    assert isinstance(fig, Figure)
+
+
+# ---------------------------------------------------------------------------
+# plot_forecast_vs_reference
+# ---------------------------------------------------------------------------
+
+
+def _make_forecast(n: int = 24, seed: int = 0) -> pd.Series:
+    rng = np.random.default_rng(seed)
+    idx = pd.date_range("2024-01-15", periods=n, freq="h", tz="UTC")
+    return pd.Series(40_000 + rng.standard_normal(n) * 1000, index=idx)
+
+
+def test_forecast_vs_reference_returns_figure_with_overlap():
+    forecast = _make_forecast(24)
+    reference = _make_forecast(24, seed=1)  # same index -> full overlap
+    fig = plot_forecast_vs_reference(
+        forecast, reference,
+        forecast_label="team_4", reference_label="ENTSO-E"
+    )
+    assert isinstance(fig, Figure)
+
+
+def test_forecast_vs_reference_two_lines_when_overlap():
+    """Two lines (forecast + reference) should appear when overlap exists."""
+    forecast = _make_forecast(24)
+    reference = _make_forecast(24, seed=2)
+    fig = plot_forecast_vs_reference(forecast, reference)
+    ax = fig.axes[0]
+    assert len(ax.lines) == 2
+
+
+def test_forecast_vs_reference_one_line_when_empty_overlap():
+    """When reference has no overlap with forecast, only the forecast line is drawn."""
+    forecast = _make_forecast(24)
+    # Reference on a completely different index (no overlap)
+    idx_other = pd.date_range("2025-06-01", periods=24, freq="h", tz="UTC")
+    rng = np.random.default_rng(3)
+    reference = pd.Series(40_000 + rng.standard_normal(24) * 500, index=idx_other)
+    fig = plot_forecast_vs_reference(forecast, reference)
+    ax = fig.axes[0]
+    assert len(ax.lines) == 1
+
+
+def test_forecast_vs_reference_no_exception_on_empty_overlap():
+    """Empty overlap must not raise — the function returns a valid figure."""
+    forecast = _make_forecast(24)
+    empty_ref = pd.Series(dtype=float)
+    # Should not raise
+    fig = plot_forecast_vs_reference(forecast, empty_ref)
+    assert isinstance(fig, Figure)
+
+
+def test_forecast_vs_reference_unit_scale_applied(caplog):
+    """unit_scale=1 leaves values unscaled; verify axis range is ~MW not GW."""
+    import logging
+
+    forecast = _make_forecast(24)
+    reference = _make_forecast(24, seed=5)
+    with caplog.at_level(logging.INFO, logger="spotforecast2.plots.diagnostics"):
+        fig = plot_forecast_vs_reference(
+            forecast, reference, unit_scale=1.0, unit="MW"
+        )
+    ax = fig.axes[0]
+    # y-axis upper limit should be in the MW range (>1000), not GW (<100)
+    ymin, ymax = ax.get_ylim()
+    assert ymax > 1000

From c8079b2a10fbfb97e333c89948eb234fc2381e9d Mon Sep 17 00:00:00 2001
From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
Date: Fri, 12 Jun 2026 02:50:19 +0200
Subject: [PATCH 3/6] feat(stats): PACF lag selection + search-space boundary
 report

- stats/autocorrelation.py: add select_pacf_lags(), ported from
  bart26k-lecture/scripts/team4_4zones_submit.py (select_key_lags).
  Computes PACF up to n_lags, returns top_k significant lags sorted
  ascending; falls back to caller-supplied list or raises ValueError
  on degenerate series.

- model_selection/boundary.py (new): three search-space boundary helpers
  promoted from bart26k-lecture/14_team_4_submission.qmd and
  team4_4zones_submit.py (KB entry 2026-06-08-hyperparameter-boundary-
  management). report_boundary_positions() logs and returns flagged dims;
  boundary_report() returns a DataFrame; suggest_bounds() returns a widened
  search space ready for a round-2 run_task_spotoptim() call.

- tests/test_stats_select_pacf_lags.py: 15 tests (AR(1), AR(24),
  degenerate/constant, fallback, determinism, top_k bounds).

- tests/test_model_selection_boundary.py: 27 tests (linear/log10 dims,
  prefix stripping, bool skip, categorical skip, missing key, logger
  injection, suggest_bounds widening for float/log/int).

- _quarto.yml: register stats.select_pacf_lags and
  model_selection.boundary in sidebar and quartodoc sections.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 _quarto.yml                                   |   8 +-
 src/spotforecast2/model_selection/__init__.py |   4 +
 src/spotforecast2/model_selection/boundary.py | 376 ++++++++++++++++++
 src/spotforecast2/stats/__init__.py           |   3 +-
 src/spotforecast2/stats/autocorrelation.py    | 131 ++++++
 tests/test_model_selection_boundary.py        | 284 +++++++++++++
 tests/test_stats_select_pacf_lags.py          | 159 ++++++++
 uv.lock                                       |   2 +-
 8 files changed, 964 insertions(+), 3 deletions(-)
 create mode 100644 src/spotforecast2/model_selection/boundary.py
 create mode 100644 tests/test_model_selection_boundary.py
 create mode 100644 tests/test_stats_select_pacf_lags.py

diff --git a/_quarto.yml b/_quarto.yml
index 63015e8e..298f21f1 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -106,6 +106,8 @@ website:
             contents:
               - text: "bayesian_search"
                 file: docs/reference/model_selection.bayesian_search.qmd
+              - text: "boundary"
+                file: docs/reference/model_selection.boundary.qmd
               - text: "grid_search"
                 file: docs/reference/model_selection.grid_search.qmd
               - text: "random_search"
@@ -126,6 +128,8 @@ website:
             contents:
               - text: "autocorrelation"
                 file: docs/reference/stats.autocorrelation.qmd
+              - text: "select_pacf_lags"
+                file: docs/reference/stats.select_pacf_lags.qmd
           - section: "Tasks"
             contents:
               - text: "task_demo"
@@ -222,9 +226,10 @@ quartodoc:
         - plots.time_series_visualization
 
     - title: "Model Selection"
-      desc: "Search algorithms and cross-validation tools."
+      desc: "Search algorithms, cross-validation tools, and boundary management."
       contents:
         - model_selection.bayesian_search
+        - model_selection.boundary
         - model_selection.grid_search
         - model_selection.random_search
         - model_selection.spotoptim_search
@@ -256,6 +261,7 @@ quartodoc:
       desc: "Statistical analysis tools."
       contents:
         - stats.autocorrelation
+        - stats.select_pacf_lags
 
     - title: "Tasks"
       desc: "Demonstration and predefined forecasting tasks."
diff --git a/src/spotforecast2/model_selection/__init__.py b/src/spotforecast2/model_selection/__init__.py
index 5f133488..69102ef1 100644
--- a/src/spotforecast2/model_selection/__init__.py
+++ b/src/spotforecast2/model_selection/__init__.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
 from .bayesian_search import bayesian_search_forecaster
+from .boundary import boundary_report, report_boundary_positions, suggest_bounds
 from .grid_search import grid_search_forecaster
 from .random_search import random_search_forecaster
 from .spotoptim_search import build_warm_start_x0, spotoptim_search_forecaster
@@ -12,4 +13,7 @@
     "bayesian_search_forecaster",
     "spotoptim_search_forecaster",
     "build_warm_start_x0",
+    "report_boundary_positions",
+    "boundary_report",
+    "suggest_bounds",
 ]
diff --git a/src/spotforecast2/model_selection/boundary.py b/src/spotforecast2/model_selection/boundary.py
new file mode 100644
index 00000000..3cd46835
--- /dev/null
+++ b/src/spotforecast2/model_selection/boundary.py
@@ -0,0 +1,376 @@
+# SPDX-FileCopyrightText: 2026 bartzbeielstein
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Search-space boundary management helpers for hyperparameter tuning.
+
+After a SpotOptim (or any optimizer) run, the tuned optimum may press against
+a search-space boundary — meaning the optimizer wanted to go further but was
+constrained. These helpers make that visible and actionable.
+
+Motivation: KB entry 2026-06-08-hyperparameter-boundary-management documents an
+operational case where ``reg_alpha`` pinned at its old ceiling (98.9 % of the
+linear range), inflating L1 regularization and flattening the live forecast.
+Widening that bound and re-running resolved the issue. The helpers below
+systematize that diagnostic loop.
+
+Three functions are provided:
+
+- `report_boundary_positions` — logs each dimension's position and returns a
+  list of flagged names. Decoupled from ``MultiTask``: the caller extracts
+  params (e.g. ``estimator.get_params()``). Primary operational diagnostic.
+- `boundary_report` — returns a ``pd.DataFrame`` with one row per numeric
+  dimension: position, scale, flag. Companion to `suggest_bounds`.
+- `suggest_bounds` — returns a copy of the search space with flagged bounds
+  widened on the pressed side; pass it straight back to
+  ``run_task_spotoptim(search_space=...)``.
+
+Ported from:
+
+- ``report_boundary_positions``: ``bart26k-lecture/scripts/team4_4zones_submit.py``
+  (``report_boundary_positions`` function, section ``@sec-team4-boundary-diagnostics``).
+- ``boundary_report`` / ``suggest_bounds``: ``bart26k-lecture/14_team_4_submission.qmd``
+  (cell ``team4-boundary-helpers``, section ``@sec-team4-boundary-management``).
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+from collections.abc import Mapping
+from typing import Any
+
+import pandas as pd
+
+_logger = logging.getLogger(__name__)
+
+
+def report_boundary_positions(
+    params: Mapping[str, float | int],
+    search_space: Mapping[str, Any],
+    *,
+    warn_frac: float = 0.10,
+    logger: logging.Logger | None = None,
+) -> list[str]:
+    """Log where each tuned value sits inside its numeric search-space interval.
+
+    For each entry in `search_space` that is a 2- or 3-tuple numeric dimension
+    ``(low, high)`` or ``(low, high, "log10")``, the function:
+
+    - strips the ``"estimator__"`` prefix from the key when looking up the
+      corresponding entry in `params`;
+    - skips non-numeric or boolean values;
+    - computes the position in the dimension's own scale (log10 for log dims,
+      guarding against ``val <= 0`` or ``low <= 0``);
+    - flags ``"> upper"`` when ``pos > 1 - warn_frac`` and ``"< lower"`` when
+      ``pos < warn_frac``;
+    - logs each dimension at INFO level in a columnar format;
+    - returns the list of flagged strings (e.g. ``["reg_alpha > upper"]``).
+
+    Categorical dimensions (list-valued entries) and unreadable entries are
+    skipped. The function never raises — it is a diagnostic and returns an
+    empty list on any unexpected error.
+
+    Args:
+        params: Flat dict of parameter names to numeric values, as returned by
+            ``estimator.get_params()`` or equivalent. Keys should NOT carry the
+            ``"estimator__"`` prefix (it is stripped from `search_space` keys,
+            not from `params` keys).
+        search_space: Dict mapping search-space keys (potentially with
+            ``"estimator__"`` prefix) to dimension specs: ``(low, high)``,
+            ``(low, high, "log10")``, or a list of categories.
+        warn_frac: Fraction of the range (in the dimension's own scale) that
+            defines the "near-boundary" zone at each end. Default is 0.10.
+        logger: Logger to use for INFO/WARNING messages. Defaults to the
+            module-level ``logging.getLogger(__name__)`` logger.
+
+    Returns:
+        List of flagged dimension strings, e.g. ``["reg_alpha > upper",
+        "learning_rate < lower"]``. Empty if all dimensions are interior.
+
+    Examples:
+        Interior optimum — no flags returned:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import report_boundary_positions
+
+        params = {"num_leaves": 300, "learning_rate": 0.05}
+        space = {
+            "estimator__num_leaves": (8, 1024),
+            "estimator__learning_rate": (0.005, 0.3, "log10"),
+        }
+        flagged = report_boundary_positions(params, space)
+        print("flagged:", flagged)
+        assert flagged == []
+        ```
+
+        Near-upper-boundary — flag is returned:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import report_boundary_positions
+
+        params = {"reg_alpha": 9.9}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        flagged = report_boundary_positions(params, space)
+        print("flagged:", flagged)
+        assert flagged == ["reg_alpha > upper"]
+        ```
+
+    """
+    log = logger if logger is not None else _logger
+    flagged: list[str] = []
+
+    for name, spec in search_space.items():
+        if not (isinstance(spec, tuple) and len(spec) in (2, 3)):
+            continue  # categorical dim (e.g. "lags")
+        key = name.split("estimator__", 1)[-1]
+        try:
+            val = params.get(key)  # type: ignore[arg-type]
+            if val is None:
+                continue
+            if not isinstance(val, (int, float)) or isinstance(val, bool):
+                continue
+            low, high = float(spec[0]), float(spec[1])
+            is_log = len(spec) == 3 and spec[2] == "log10"
+            if is_log and (val <= 0 or low <= 0):
+                continue
+            if is_log:
+                pos = (math.log10(val) - math.log10(low)) / (
+                    math.log10(high) - math.log10(low)
+                )
+            else:
+                pos = (float(val) - low) / (high - low)
+            flag = (
+                "> upper"
+                if pos > 1 - warn_frac
+                else ("< lower" if pos < warn_frac else "")
+            )
+            log.info(
+                "  bound %-18s = %-11.5g in [%g, %g]%s  pos=%.2f%s",
+                key,
+                val,
+                low,
+                high,
+                " log10" if is_log else "",
+                pos,
+                f"  <-- {flag}" if flag else "",
+            )
+            if flag:
+                flagged.append(f"{key} {flag}")
+        except Exception as exc:  # noqa: BLE001
+            log.warning("boundary check: skipping %r (unreadable entry: %s)", name, exc)
+
+    if flagged:
+        log.warning(
+            "boundary check: %d tuned dim(s) near a bound (%s) -- consider "
+            "widening that side and re-running.",
+            len(flagged),
+            ", ".join(flagged),
+        )
+    else:
+        log.info("boundary check: all tuned dimensions sit interior.")
+
+    return flagged
+
+
+def boundary_report(
+    best_params: Mapping[str, float | int],
+    search_space: Mapping[str, Any],
+    *,
+    warn_frac: float = 0.10,
+) -> pd.DataFrame:
+    """Tabulate each tuned value's position inside its search-space bound.
+
+    Returns a DataFrame sorted by descending position, with one row per
+    numeric dimension. Categorical dimensions are skipped. ``flag`` is one of
+    ``"> upper"``, ``"< lower"``, or ``""`` (interior).
+
+    This function uses the `search_space` keys as-is (including any
+    ``"estimator__"`` prefix) to look up matching entries in `best_params`.
+    The returned ``param`` column strips the ``"estimator__"`` prefix for
+    readability.
+
+    Ported from ``bart26k-lecture/14_team_4_submission.qmd``, cell
+    ``team4-boundary-helpers``.
+
+    Args:
+        best_params: Flat dict of parameter names to values, keyed with the
+            same names as `search_space` (including any ``"estimator__"``
+            prefix).
+        search_space: Dict mapping parameter names to dimension specs:
+            ``(low, high)``, ``(low, high, "log10")``, or a list of
+            categories (skipped).
+        warn_frac: Fraction of the range (in the dimension's own scale)
+            defining the "near-boundary" zone. Default is 0.10.
+
+    Returns:
+        DataFrame with columns ``param``, ``low``, ``high``, ``value``,
+        ``scale``, ``position``, ``flag``, sorted by ``position`` descending.
+
+    Examples:
+        Report on a near-upper-boundary value:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import boundary_report
+
+        best = {
+            "estimator__reg_alpha": 9.89,
+            "estimator__learning_rate": 0.069,
+        }
+        space = {
+            "estimator__reg_alpha": (0.001, 10.0),
+            "estimator__learning_rate": (0.005, 0.3, "log10"),
+        }
+        df = boundary_report(best, space)
+        print(df.to_string(index=False))
+        assert "reg_alpha" in df["param"].values
+        flagged = df[df["flag"] == "> upper"]["param"].tolist()
+        assert "reg_alpha" in flagged
+        ```
+
+    """
+    rows = []
+    for name, spec in search_space.items():
+        if not (isinstance(spec, tuple) and len(spec) in (2, 3)):
+            continue  # categorical dim
+        low, high = float(spec[0]), float(spec[1])
+        is_log = len(spec) == 3 and spec[2] == "log10"
+        val = best_params.get(name)  # type: ignore[arg-type]
+        if val is None:
+            continue
+        if is_log:
+            if val <= 0 or low <= 0:
+                continue
+            pos = (math.log10(val) - math.log10(low)) / (
+                math.log10(high) - math.log10(low)
+            )
+        else:
+            pos = (float(val) - low) / (high - low)
+        flag = (
+            "> upper"
+            if pos > 1 - warn_frac
+            else ("< lower" if pos < warn_frac else "")
+        )
+        rows.append(
+            {
+                "param": name.replace("estimator__", ""),
+                "low": low,
+                "high": high,
+                "value": val,
+                "scale": "log10" if is_log else "linear",
+                "position": round(pos, 3),
+                "flag": flag,
+            }
+        )
+    df = pd.DataFrame(rows)
+    if df.empty:
+        return pd.DataFrame(columns=["param", "low", "high", "value", "scale", "position", "flag"])
+    return df.sort_values("position", ascending=False).reset_index(drop=True)
+
+
+def suggest_bounds(
+    best_params: Mapping[str, float | int],
+    search_space: Mapping[str, Any],
+    *,
+    warn_frac: float = 0.10,
+    widen_factor: float = 10.0,
+) -> dict[str, Any]:
+    """Return a copy of `search_space` with flagged bounds widened.
+
+    Upper-pinned dimensions grow upward (``high * widen_factor`` for float/log,
+    ``high + (high - low)`` for integer); lower-pinned dimensions grow downward
+    (``low / widen_factor`` for float/log, ``max(1, low - (high - low))`` for
+    integer). Interior and categorical dimensions are copied unchanged.
+
+    Pass the result straight back to
+    ``run_task_spotoptim(search_space=suggest_bounds(...))``.
+
+    Ported from ``bart26k-lecture/14_team_4_submission.qmd``, cell
+    ``team4-boundary-helpers`` (parameter ``widen`` renamed to ``widen_factor``
+    for clarity).
+
+    Args:
+        best_params: Flat dict of parameter names to values, keyed with the
+            same names as `search_space` (including any ``"estimator__"``
+            prefix).
+        search_space: Dict mapping parameter names to dimension specs:
+            ``(low, high)``, ``(low, high, "log10")``, or a list of
+            categories (returned unchanged).
+        warn_frac: Fraction of the range (in the dimension's own scale)
+            defining the "near-boundary" zone. Default is 0.10.
+        widen_factor: Multiplicative factor for widening float/log bounds.
+            Integer bounds use an additive span instead. Default is 10.0.
+
+    Returns:
+        New search-space dict with the same keys as `search_space` but with
+        boundary-pressed bounds extended on the pressed side.
+
+    Examples:
+        Upper-pinned float bound is multiplied by widen_factor:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import suggest_bounds
+
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        print(new_space)
+        assert new_space["estimator__reg_alpha"][1] == 100.0
+        ```
+
+        Log-scale upper-pinned bound is also multiplied:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import suggest_bounds
+
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0, "log10")}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        print(new_space)
+        assert new_space["estimator__reg_alpha"][1] == 100.0
+        ```
+
+        Integer bound grows additively:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import suggest_bounds
+
+        best = {"estimator__n_estimators": 4950}
+        space = {"estimator__n_estimators": (100, 5000)}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        print(new_space)
+        assert new_space["estimator__n_estimators"][1] == 5000 + (5000 - 100)
+        ```
+
+        Interior dimension is returned unchanged:
+
+        ```{python}
+        from spotforecast2.model_selection.boundary import suggest_bounds
+
+        best = {"estimator__num_leaves": 300}
+        space = {"estimator__num_leaves": (8, 1024)}
+        new_space = suggest_bounds(best, space)
+        assert new_space["estimator__num_leaves"] == (8, 1024)
+        ```
+
+    """
+    report = boundary_report(best_params, search_space, warn_frac=warn_frac)
+    flagged = {
+        row["param"]: row["flag"]
+        for _, row in report.iterrows()
+        if row["flag"]
+    }
+    out: dict[str, Any] = {}
+    for name, spec in search_space.items():
+        if not (isinstance(spec, tuple) and len(spec) in (2, 3)):
+            out[name] = spec
+            continue
+        low, high, rest = spec[0], spec[1], spec[2:]
+        is_int = isinstance(low, int) and isinstance(high, int) and not rest
+        short_name = name.replace("estimator__", "")
+        flag = flagged.get(short_name, "")
+        if flag == "> upper":
+            high = int(high + (high - low)) if is_int else high * widen_factor
+        elif flag == "< lower":
+            low = max(1, int(low - (high - low))) if is_int else low / widen_factor
+        out[name] = (low, high, *rest)
+    return out
diff --git a/src/spotforecast2/stats/__init__.py b/src/spotforecast2/stats/__init__.py
index 5ae619be..9ee8a065 100644
--- a/src/spotforecast2/stats/__init__.py
+++ b/src/spotforecast2/stats/__init__.py
@@ -13,10 +13,11 @@
     compute_periodogram,
 )
 
-from .autocorrelation import calculate_lag_autocorrelation
+from .autocorrelation import calculate_lag_autocorrelation, select_pacf_lags
 
 __all__ = [
     "augmented_dickey_fuller",
     "calculate_lag_autocorrelation",
     "compute_periodogram",
+    "select_pacf_lags",
 ]
diff --git a/src/spotforecast2/stats/autocorrelation.py b/src/spotforecast2/stats/autocorrelation.py
index 241c2d95..e89a6ed4 100644
--- a/src/spotforecast2/stats/autocorrelation.py
+++ b/src/spotforecast2/stats/autocorrelation.py
@@ -3,11 +3,14 @@
 
 from __future__ import annotations
 
+import logging
 from importlib.util import find_spec
 
 import numpy as np
 import pandas as pd
 
+_logger = logging.getLogger(__name__)
+
 
 def calculate_lag_autocorrelation(
     data: pd.Series | pd.DataFrame,
@@ -192,3 +195,131 @@ def calculate_lag_autocorrelation(
         )
 
     return results
+
+
+def select_pacf_lags(
+    series: pd.Series,
+    *,
+    n_lags: int = 200,
+    top_k: int = 8,
+    fallback: list[int] | None = None,
+) -> list[int]:
+    """Select the most informative lags using the partial autocorrelation function.
+
+    Computes the PACF up to `n_lags` via `calculate_lag_autocorrelation`, then
+    returns the `top_k` lags whose `|PACF|` exceeds the 95 % significance band
+    (1.96 / sqrt(N)), sorted in ascending order.
+
+    This is a pure-compute helper ported from the operational team-4 pipeline
+    (`select_key_lags` in ``bart26k-lecture/scripts/team4_4zones_submit.py``).
+    No plotting, no side effects beyond an optional DEBUG log line.
+
+    Args:
+        series: The time series from which to estimate lags. Must contain at
+            least `2 * n_lags + 1` observations for statsmodels PACF to run
+            without truncating `n_lags`.
+        n_lags: Number of lags passed to `calculate_lag_autocorrelation`.
+            Default is 200.
+        top_k: Maximum number of lags to return (the `top_k` significant lags
+            ranked by descending `|PACF|`). Default is 8.
+        fallback: Lag list returned when no lag exceeds the significance band
+            (degenerate series: too short, nearly constant, or `n_lags` too
+            large). If `None`, a `ValueError` is raised instead.
+
+    Returns:
+        Sorted list of lag integers (ascending). Length is at most `top_k`;
+        may be shorter if fewer than `top_k` significant lags exist.
+
+    Raises:
+        ValueError: If no lag exceeds the significance band and `fallback` is
+            `None`. Pass `fallback=[1, 2, 24, 168]` (or any operator-chosen
+            constant) to suppress the error and use a safe default instead.
+
+    Examples:
+        Select significant lags from a synthetic AR(1) series:
+
+        ```{python}
+        import numpy as np
+        import pandas as pd
+        from spotforecast2.stats.autocorrelation import select_pacf_lags
+
+        rng = np.random.default_rng(42)
+        n = 24 * 120  # 120 days of hourly data
+        ar = np.zeros(n)
+        for t in range(1, n):
+            ar[t] = 0.7 * ar[t - 1] + rng.standard_normal()
+        series = pd.Series(ar)
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        print("selected lags:", lags)
+        assert isinstance(lags, list)
+        assert all(isinstance(x, int) for x in lags)
+        assert lags == sorted(lags)
+        assert len(lags) <= 8
+        assert 1 in lags, "lag-1 AR component should be selected"
+        ```
+
+        Select significant lags from an AR(24) process — lag 24 dominates:
+
+        ```{python}
+        import numpy as np
+        import pandas as pd
+        from spotforecast2.stats.autocorrelation import select_pacf_lags
+
+        rng = np.random.default_rng(42)
+        n = 24 * 200
+        ar = np.zeros(n)
+        for t in range(24, n):
+            ar[t] = 0.8 * ar[t - 24] + rng.standard_normal()
+        series = pd.Series(ar)
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        print("selected lags:", lags)
+        assert 24 in lags, f"lag 24 expected in {lags}"
+        ```
+
+        Degenerate (constant) series — fallback is returned when provided:
+
+        ```{python}
+        import pandas as pd
+        from spotforecast2.stats.autocorrelation import select_pacf_lags
+
+        series = pd.Series([1.0] * 50)
+        result = select_pacf_lags(series, n_lags=10, fallback=[1, 2, 24])
+        print("fallback lags:", result)
+        assert result == [1, 2, 24]
+        ```
+
+        Degenerate series with no fallback raises ValueError:
+
+        ```{python}
+        import pytest
+        import pandas as pd
+        from spotforecast2.stats.autocorrelation import select_pacf_lags
+
+        series = pd.Series([1.0] * 50)
+        with pytest.raises(ValueError, match="no significant"):
+            select_pacf_lags(series, n_lags=10, fallback=None)
+        ```
+
+    """
+    acf_df = calculate_lag_autocorrelation(series, n_lags=n_lags)
+    conf = 1.96 / np.sqrt(len(series))
+    significant = acf_df[acf_df["partial_autocorrelation_abs"] > conf]
+    top = significant.nlargest(top_k, "partial_autocorrelation_abs")
+    key_lags = sorted(top["lag"].astype(int).tolist())
+    _logger.debug(
+        "select_pacf_lags: N=%d band=+/-%.4f significant=%d top_%d=%s",
+        len(series),
+        conf,
+        len(significant),
+        top_k,
+        key_lags,
+    )
+    if not key_lags:
+        if fallback is not None:
+            return list(fallback)
+        raise ValueError(
+            f"select_pacf_lags: no significant PACF lags found "
+            f"(N={len(series)}, band={conf:.4f}, n_lags={n_lags}). "
+            "Pass fallback=<list> to use a safe default instead of raising."
+        )
+    return key_lags
diff --git a/tests/test_model_selection_boundary.py b/tests/test_model_selection_boundary.py
new file mode 100644
index 00000000..8318d483
--- /dev/null
+++ b/tests/test_model_selection_boundary.py
@@ -0,0 +1,284 @@
+# SPDX-FileCopyrightText: 2026 bartzbeielstein
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Tests for spotforecast2.model_selection.boundary."""
+
+from __future__ import annotations
+
+import logging
+
+from spotforecast2.model_selection.boundary import (
+    boundary_report,
+    report_boundary_positions,
+    suggest_bounds,
+)
+
+
+# ---------------------------------------------------------------------------
+# Shared fixtures / helpers
+# ---------------------------------------------------------------------------
+
+LINEAR_SPACE = {
+    "estimator__num_leaves": (8, 1024),
+    "estimator__n_estimators": (100, 5000),
+    "estimator__bagging_fraction": (0.5, 1.0),
+    "estimator__reg_alpha": (0.001, 10.0),
+    "lags": ["[1, 2, 24]", "[1, 2, 24, 168]"],  # categorical, must be skipped
+}
+
+LOG_SPACE = {
+    "estimator__learning_rate": (0.005, 0.3, "log10"),
+    "estimator__reg_alpha": (0.001, 100.0, "log10"),
+}
+
+MIXED_SPACE = {**LINEAR_SPACE, **LOG_SPACE}
+
+
+# ---------------------------------------------------------------------------
+# report_boundary_positions
+# ---------------------------------------------------------------------------
+
+class TestReportBoundaryPositions:
+    def test_interior_no_flag(self):
+        """An optimum well inside all bounds returns an empty list."""
+        params = {
+            "num_leaves": 300,
+            "n_estimators": 2000,
+            "bagging_fraction": 0.75,
+            "reg_alpha": 1.0,
+            "learning_rate": 0.05,
+        }
+        flagged = report_boundary_positions(params, MIXED_SPACE)
+        assert flagged == []
+
+    def test_near_upper_linear_flagged(self):
+        """A value at 98 % of the linear range is flagged as '> upper'."""
+        # reg_alpha = 9.8 in (0.001, 10.0): pos = (9.8 - 0.001) / (10.0 - 0.001) ≈ 0.980
+        params = {"reg_alpha": 9.8}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == ["reg_alpha > upper"]
+
+    def test_near_lower_linear_flagged(self):
+        """A value at 2 % of the linear range is flagged as '< lower'."""
+        # n_estimators = 190 in (100, 5000): pos = (190 - 100) / (5000 - 100) ≈ 0.018
+        params = {"n_estimators": 190}
+        space = {"estimator__n_estimators": (100, 5000)}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == ["n_estimators < lower"]
+
+    def test_log10_dim_near_lower(self):
+        """In log10 space, a value near the lower decade boundary is flagged."""
+        # learning_rate = 0.006 in (0.005, 0.3) log10:
+        # pos = (log10(0.006) - log10(0.005)) / (log10(0.3) - log10(0.005))
+        import math
+        low, high, val = 0.005, 0.3, 0.006
+        pos = (math.log10(val) - math.log10(low)) / (math.log10(high) - math.log10(low))
+        assert pos < 0.10  # confirm this is a near-lower case
+        params = {"learning_rate": val}
+        space = {"estimator__learning_rate": (low, high, "log10")}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == ["learning_rate < lower"]
+
+    def test_estimator_prefix_stripped(self):
+        """The 'estimator__' prefix is stripped before looking up params."""
+        params = {"reg_alpha": 9.9}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        flagged = report_boundary_positions(params, space)
+        assert any("reg_alpha" in f for f in flagged)
+
+    def test_categorical_lags_skipped(self):
+        """List-valued entries in the search space are silently skipped."""
+        params = {}
+        space = {"lags": ["[1, 2, 24]", "[1, 24, 168]"]}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == []
+
+    def test_bool_param_skipped(self):
+        """Boolean values are skipped (isinstance(True, int) is True in Python)."""
+        params = {"verbose": True}
+        space = {"estimator__verbose": (0, 1)}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == []
+
+    def test_missing_param_key_skipped_with_warning(self, caplog):
+        """A param key not found in params is skipped; no exception is raised."""
+        params = {}  # empty — key is missing
+        space = {"estimator__num_leaves": (8, 1024)}
+        with caplog.at_level(logging.DEBUG):
+            flagged = report_boundary_positions(params, space)
+        assert flagged == []
+        # No warning should be emitted for a simply missing key (it is None,
+        # so we skip via the `val is None` check, not the except branch)
+
+    def test_returned_list_exact_contents(self):
+        """The returned list contains exactly the flagged strings in order."""
+        params = {"reg_alpha": 9.9, "n_estimators": 150}
+        space = {
+            "estimator__reg_alpha": (0.001, 10.0),
+            "estimator__n_estimators": (100, 5000),
+        }
+        flagged = report_boundary_positions(params, space)
+        # reg_alpha is near upper; n_estimators 150 in (100, 5000) = pos 0.01 → < lower
+        assert "reg_alpha > upper" in flagged
+        assert "n_estimators < lower" in flagged
+
+    def test_logger_injection(self, caplog):
+        """A custom logger passed via the logger= argument is used for messages."""
+        custom_logger = logging.getLogger("test_custom_boundary_logger")
+        params = {"num_leaves": 300}
+        space = {"estimator__num_leaves": (8, 1024)}
+        with caplog.at_level(logging.INFO, logger="test_custom_boundary_logger"):
+            report_boundary_positions(params, space, logger=custom_logger)
+        # The custom logger should have emitted at least one INFO message
+        assert any(
+            r.name == "test_custom_boundary_logger" for r in caplog.records
+        )
+
+    def test_multiple_flags(self):
+        """Multiple near-boundary dims all appear in the returned list."""
+        params = {"reg_alpha": 9.9, "learning_rate": 0.29}
+        space = {
+            "estimator__reg_alpha": (0.001, 10.0),
+            "estimator__learning_rate": (0.005, 0.3, "log10"),
+        }
+        flagged = report_boundary_positions(params, space)
+        assert len(flagged) == 2
+
+    def test_log_invalid_val_skipped(self):
+        """A zero or negative value in a log10 dim is silently skipped."""
+        params = {"reg_alpha": 0.0}
+        space = {"estimator__reg_alpha": (0.001, 10.0, "log10")}
+        flagged = report_boundary_positions(params, space)
+        assert flagged == []
+
+
+# ---------------------------------------------------------------------------
+# boundary_report (DataFrame form)
+# ---------------------------------------------------------------------------
+
+class TestBoundaryReport:
+    def test_returns_dataframe(self):
+        best = {"estimator__reg_alpha": 9.89, "estimator__learning_rate": 0.069}
+        space = {
+            "estimator__reg_alpha": (0.001, 10.0),
+            "estimator__learning_rate": (0.005, 0.3, "log10"),
+        }
+        df = boundary_report(best, space)
+        assert set(df.columns) == {"param", "low", "high", "value", "scale", "position", "flag"}
+
+    def test_flagged_near_upper(self):
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        df = boundary_report(best, space)
+        row = df[df["param"] == "reg_alpha"].iloc[0]
+        assert row["flag"] == "> upper"
+        assert row["scale"] == "linear"
+
+    def test_log10_scale(self):
+        best = {"estimator__learning_rate": 0.069}
+        space = {"estimator__learning_rate": (0.005, 0.3, "log10")}
+        df = boundary_report(best, space)
+        row = df[df["param"] == "learning_rate"].iloc[0]
+        assert row["scale"] == "log10"
+        assert 0.0 < row["position"] < 1.0
+
+    def test_categorical_skipped(self):
+        best = {}
+        space = {"lags": ["[1, 2, 24]"]}
+        df = boundary_report(best, space)
+        assert df.empty
+
+    def test_sorted_descending_position(self):
+        best = {
+            "estimator__reg_alpha": 9.89,   # near upper → high position
+            "estimator__num_leaves": 300,    # interior
+        }
+        space = {
+            "estimator__reg_alpha": (0.001, 10.0),
+            "estimator__num_leaves": (8, 1024),
+        }
+        df = boundary_report(best, space)
+        assert df["position"].iloc[0] >= df["position"].iloc[-1]
+
+    def test_prefix_stripped_in_param_column(self):
+        best = {"estimator__num_leaves": 512}
+        space = {"estimator__num_leaves": (8, 1024)}
+        df = boundary_report(best, space)
+        assert "num_leaves" in df["param"].values
+        assert "estimator__num_leaves" not in df["param"].values
+
+
+# ---------------------------------------------------------------------------
+# suggest_bounds
+# ---------------------------------------------------------------------------
+
+class TestSuggestBounds:
+    def test_interior_unchanged(self):
+        best = {"estimator__num_leaves": 300}
+        space = {"estimator__num_leaves": (8, 1024)}
+        new_space = suggest_bounds(best, space)
+        assert new_space["estimator__num_leaves"] == (8, 1024)
+
+    def test_upper_pinned_float_multiplied(self):
+        """Upper-pinned float bound: high * widen_factor."""
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        assert new_space["estimator__reg_alpha"][1] == 100.0
+
+    def test_upper_pinned_log_multiplied(self):
+        """Upper-pinned log10 bound: high * widen_factor."""
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0, "log10")}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        assert new_space["estimator__reg_alpha"][1] == 100.0
+        assert new_space["estimator__reg_alpha"][2] == "log10"
+
+    def test_upper_pinned_int_additive(self):
+        """Upper-pinned integer bound: high + (high - low)."""
+        best = {"estimator__n_estimators": 4950}
+        space = {"estimator__n_estimators": (100, 5000)}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        assert new_space["estimator__n_estimators"][1] == 5000 + (5000 - 100)
+
+    def test_lower_pinned_float_divided(self):
+        """Lower-pinned float bound: low / widen_factor."""
+        best = {"estimator__learning_rate": 0.006}
+        space = {"estimator__learning_rate": (0.005, 0.3, "log10")}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        assert abs(new_space["estimator__learning_rate"][0] - 0.0005) < 1e-10
+
+    def test_categorical_unchanged(self):
+        """Categorical (list) entries pass through unchanged."""
+        best = {}
+        space = {"lags": ["[1, 2, 24]", "[1, 24, 168]"]}
+        new_space = suggest_bounds(best, space)
+        assert new_space["lags"] == space["lags"]
+
+    def test_all_keys_preserved(self):
+        """The returned dict has exactly the same keys as search_space."""
+        best = {"num_leaves": 512, "learning_rate": 0.05}
+        space = {
+            "estimator__num_leaves": (8, 1024),
+            "estimator__learning_rate": (0.005, 0.3, "log10"),
+            "lags": ["[1, 2, 24]"],
+        }
+        new_space = suggest_bounds(best, space)
+        assert set(new_space.keys()) == set(space.keys())
+
+    def test_widen_factor_parameter(self):
+        """Different widen_factor values produce different bounds."""
+        best = {"estimator__reg_alpha": 9.89}
+        space = {"estimator__reg_alpha": (0.001, 10.0)}
+        new5 = suggest_bounds(best, space, widen_factor=5.0)
+        new10 = suggest_bounds(best, space, widen_factor=10.0)
+        assert new10["estimator__reg_alpha"][1] > new5["estimator__reg_alpha"][1]
+
+    def test_lower_pinned_int_additive_floor_1(self):
+        """Lower-pinned integer bound floors at 1: max(1, low - (high - low))."""
+        best = {"estimator__n_estimators": 105}  # near lower end of (100, 5000)
+        space = {"estimator__n_estimators": (100, 5000)}
+        new_space = suggest_bounds(best, space, widen_factor=10.0)
+        # new low = max(1, 100 - (5000 - 100)) = max(1, -4800) = 1
+        assert new_space["estimator__n_estimators"][0] >= 1
diff --git a/tests/test_stats_select_pacf_lags.py b/tests/test_stats_select_pacf_lags.py
new file mode 100644
index 00000000..95abc872
--- /dev/null
+++ b/tests/test_stats_select_pacf_lags.py
@@ -0,0 +1,159 @@
+# SPDX-FileCopyrightText: 2026 bartzbeielstein
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""Tests for spotforecast2.stats.autocorrelation.select_pacf_lags."""
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from spotforecast2.stats.autocorrelation import select_pacf_lags
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_daily_ar_series(n_days: int = 120, seed: int = 42) -> pd.Series:
+    """AR(1) with moderate coefficient for general lag selection tests."""
+    rng = np.random.default_rng(seed)
+    n = n_days * 24
+    ar = np.zeros(n)
+    for t in range(1, n):
+        ar[t] = 0.7 * ar[t - 1] + rng.standard_normal()
+    return pd.Series(ar)
+
+
+def _make_ar24_series(n_days: int = 200, seed: int = 42) -> pd.Series:
+    """AR(24) process — lag 24 dominates the PACF (strong daily periodicity)."""
+    rng = np.random.default_rng(seed)
+    n = n_days * 24
+    ar = np.zeros(n)
+    for t in range(24, n):
+        ar[t] = 0.8 * ar[t - 24] + rng.standard_normal()
+    return pd.Series(ar)
+
+
+# ---------------------------------------------------------------------------
+# Happy-path tests
+# ---------------------------------------------------------------------------
+
+class TestSelectPacfLagsHappyPath:
+    def test_returns_list_of_ints(self):
+        series = _make_daily_ar_series()
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        assert isinstance(lags, list)
+        assert all(isinstance(x, int) for x in lags)
+
+    def test_sorted_ascending(self):
+        series = _make_daily_ar_series()
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        assert lags == sorted(lags)
+
+    def test_top_k_respected(self):
+        series = _make_daily_ar_series()
+        for k in (1, 4, 8):
+            lags = select_pacf_lags(series, n_lags=50, top_k=k)
+            assert len(lags) <= k
+
+    def test_lag_1_selected_ar1_component(self):
+        """AR(1) coefficient 0.7 should produce a strongly significant lag-1 PACF."""
+        series = _make_daily_ar_series()
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        assert 1 in lags, f"expected lag 1 in {lags}"
+
+    def test_daily_lag_24_in_top_k(self):
+        """An AR(24) process must have lag 24 in the top-k selected lags."""
+        series = _make_ar24_series(n_days=200)
+        lags = select_pacf_lags(series, n_lags=50, top_k=8)
+        assert 24 in lags, f"expected lag 24 in {lags} (AR(24) process)"
+
+    def test_determinism(self):
+        """Same series, same parameters → same result every call."""
+        series = _make_daily_ar_series(seed=0)
+        r1 = select_pacf_lags(series, n_lags=50, top_k=6)
+        r2 = select_pacf_lags(series, n_lags=50, top_k=6)
+        assert r1 == r2
+
+    def test_n_lags_limits_search(self):
+        """Returned lags must not exceed n_lags."""
+        series = _make_daily_ar_series()
+        n_lags = 30
+        lags = select_pacf_lags(series, n_lags=n_lags, top_k=8)
+        assert all(lag <= n_lags for lag in lags)
+
+
+# ---------------------------------------------------------------------------
+# Degenerate / edge-case tests
+# ---------------------------------------------------------------------------
+
+class TestSelectPacfLagsDegenerate:
+    def test_constant_series_fallback_returned(self):
+        series = pd.Series([1.0] * 50)
+        result = select_pacf_lags(series, n_lags=10, fallback=[1, 2, 24])
+        assert result == [1, 2, 24]
+
+    def test_constant_series_no_fallback_raises(self):
+        series = pd.Series([1.0] * 50)
+        with pytest.raises(ValueError, match="no significant"):
+            select_pacf_lags(series, n_lags=10, fallback=None)
+
+    def test_very_short_series_fallback(self):
+        """Series shorter than 2*n_lags+1; PACF falls back on statsmodels truncation.
+
+        No lag should pass the wide confidence band for a near-white-noise short series.
+        We do not mandate whether ValueError or fallback is triggered; only that either
+        the fallback list is returned or a ValueError is raised.
+        """
+        rng = np.random.default_rng(7)
+        series = pd.Series(rng.standard_normal(20))
+        try:
+            result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2])
+            # if no significant lags, fallback should be returned
+            assert isinstance(result, list)
+        except ValueError:
+            pass  # also acceptable
+
+    def test_fallback_list_copied(self):
+        """The returned list must be a copy, not the same object as fallback."""
+        series = pd.Series([0.0] * 50)
+        fb = [1, 2, 24]
+        result = select_pacf_lags(series, n_lags=10, fallback=fb)
+        assert result is not fb
+
+    def test_fallback_none_is_default(self):
+        """fallback=None is the default; constant series raises ValueError."""
+        series = pd.Series([3.14] * 50)
+        with pytest.raises(ValueError):
+            select_pacf_lags(series, n_lags=10)
+
+
+# ---------------------------------------------------------------------------
+# Parameter variation
+# ---------------------------------------------------------------------------
+
+class TestSelectPacfLagsParams:
+    def test_default_n_lags_200_top_k_8(self):
+        """Default parameters work on a long series without error."""
+        series = _make_daily_ar_series(n_days=500)
+        lags = select_pacf_lags(series)
+        assert len(lags) <= 8
+        assert lags == sorted(lags)
+
+    def test_top_k_1(self):
+        series = _make_daily_ar_series()
+        lags = select_pacf_lags(series, n_lags=50, top_k=1)
+        assert len(lags) == 1
+
+    def test_large_top_k_bounded_by_significant(self):
+        """If fewer than top_k significant lags exist, return only those."""
+        series = _make_daily_ar_series()
+        # Use a large top_k; returned list must still be <= n_sig
+        lags_k3 = select_pacf_lags(series, n_lags=5, top_k=3)
+        lags_k100 = select_pacf_lags(series, n_lags=5, top_k=100)
+        # Both share the same significant set (for n_lags=5); the larger top_k
+        # cannot return MORE lags, only the same.
+        assert set(lags_k3).issubset(set(lags_k100))
+        assert len(lags_k100) >= len(lags_k3)
diff --git a/uv.lock b/uv.lock
index ddac7dee..6e344191 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3491,7 +3491,7 @@ wheels = [
 
 [[package]]
 name = "spotforecast2"
-version = "8.0.0rc1"
+version = "8.0.0"
 source = { editable = "." }
 dependencies = [
     { name = "astral" },

From 5a7857c1d9ac012a47e975354381dc0af7ba43fc Mon Sep 17 00:00:00 2001
From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
Date: Fri, 12 Jun 2026 03:13:17 +0200
Subject: [PATCH 4/6] fix(plots): apply code-review fixes to diagnostics.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FIX 1 (blocker): plot_forecast_vs_reference MAD log now uses
`mad * unit_scale` (not `mad / unit_scale`) so the value is
reported in the display unit (GW) rather than inflated by 1000×.
Log format updated to include the unit label. Strengthened test
asserts logged value is 1.0 GW for a constant 1000 MW offset.

FIX 2: plot_shap_summary docstring notes the figure is harvested
via plt.gcf() (shap API limitation), the function is not
thread-safe, and callers must close the returned figure before
other pyplot work.

FIX 3: plot_shap_summary docstring replaces "at most max_samples
rows" with the precise stride description.

FIX 4: Remove feature_family from plots/__init__.py re-exports
and __all__; it is a string classifier, not a plot function.
Remains public at spotforecast2.plots.diagnostics.feature_family.
Tests already used the full module path — no changes needed there.

FIX 5: Remove dead n_annotated variable in plot_acf_with_lags.

FIX 6: Remove per-example matplotlib.use("Agg") calls from the
four docstring Examples; examples still avoid plt.show().

FIX 7: Add two feature_family precedence cases to the parametrized
table: "poly_window_24" -> "polynomial" and "sin_window" ->
"weather_window", pinning holiday > poly > window > cyclical > lag.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/spotforecast2/plots/__init__.py    |  2 --
 src/spotforecast2/plots/diagnostics.py | 26 ++++++----------
 tests/test_plots_diagnostics.py        | 42 ++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/src/spotforecast2/plots/__init__.py b/src/spotforecast2/plots/__init__.py
index 5b545892..7f14046a 100644
--- a/src/spotforecast2/plots/__init__.py
+++ b/src/spotforecast2/plots/__init__.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
 from .diagnostics import (
-    feature_family,
     plot_acf_with_lags,
     plot_feature_importance_by_family,
     plot_forecast_vs_reference,
@@ -20,7 +19,6 @@
 )
 
 __all__ = [
-    "feature_family",
     "plot_acf_with_lags",
     "plot_distribution",
     "plot_distributions",
diff --git a/src/spotforecast2/plots/diagnostics.py b/src/spotforecast2/plots/diagnostics.py
index 9839ab53..f5d68b34 100644
--- a/src/spotforecast2/plots/diagnostics.py
+++ b/src/spotforecast2/plots/diagnostics.py
@@ -116,8 +116,6 @@ def plot_acf_with_lags(
         ```{python}
         import numpy as np
         import pandas as pd
-        import matplotlib
-        matplotlib.use("Agg")
         from spotforecast2.plots.diagnostics import plot_acf_with_lags
 
         rng = np.random.default_rng(0)
@@ -131,7 +129,6 @@ def plot_acf_with_lags(
     ax.bar(acf["lag"], acf["autocorrelation"], width=0.9, color="#1F4E79")
     ax.axhline(conf, color="#999999", lw=0.8, ls="--")
     ax.axhline(-conf, color="#999999", lw=0.8, ls="--")
-    n_annotated = 0
     for lag in key_lags:
         row = acf[acf["lag"] == lag]
         if row.empty:
@@ -146,7 +143,6 @@ def plot_acf_with_lags(
             color="#E07B00",
             arrowprops=dict(arrowstyle="->", color="#E07B00", lw=0.8),
         )
-        n_annotated += 1
     ax.set_xlabel("lag (hours)")
     ax.set_ylabel("autocorrelation")
     ax.set_title("ACF of Actual Load (annotated: data-selected key_lags)")
@@ -180,8 +176,6 @@ def plot_feature_importance_by_family(
 
     Examples:
         ```{python}
-        import matplotlib
-        matplotlib.use("Agg")
         from spotforecast2.plots.diagnostics import plot_feature_importance_by_family
 
         names = ["lag_1", "lag_24", "holiday_DE", "wind_speed_10m",
@@ -224,20 +218,23 @@ def plot_shap_summary(
     """SHAP bar-summary plot for a tree-based estimator.
 
     Ports `_plot_shap` from the chapter-14 team4 script.  `X` is subsampled
-    to at most `max_samples` rows (uniform stride) before computing SHAP
+    to approximately `max_samples` rows (uniform stride `len(X) // max_samples`;
+    lengths just above `max_samples` are passed in full) before computing SHAP
     values so the call stays fast even for large training matrices.
 
     The function uses `shap.TreeExplainer` and
     `shap.summary_plot(plot_type="bar", show=False)`, then captures the
-    current matplotlib figure via `plt.gcf()`.
+    current matplotlib figure via `plt.gcf()`.  Because the figure is harvested
+    from the global pyplot state this function is **not thread-safe**.  Callers
+    must close the returned figure (e.g. `plt.close(fig)`) before performing
+    other pyplot work.
 
     Args:
         estimator: A fitted tree-based estimator supported by
             `shap.TreeExplainer` (e.g. `LGBMRegressor`).
         X: Feature matrix; typically the design matrix returned by
             `ForecasterRecursive.create_train_X_y`.
-        max_samples: Maximum number of rows to pass to the SHAP explainer.
-            Defaults to 2000.
+        max_samples: Row budget passed to the SHAP explainer.  Defaults to 2000.
 
     Returns:
         A `matplotlib.figure.Figure`.
@@ -245,8 +242,6 @@ def plot_shap_summary(
     Examples:
         ```{python}
         #| eval: false
-        import matplotlib
-        matplotlib.use("Agg")
         import numpy as np
         import pandas as pd
         from lightgbm import LGBMRegressor
@@ -315,8 +310,6 @@ def plot_forecast_vs_reference(
 
     Examples:
         ```{python}
-        import matplotlib
-        matplotlib.use("Agg")
         import numpy as np
         import pandas as pd
         from spotforecast2.plots.diagnostics import plot_forecast_vs_reference
@@ -357,11 +350,12 @@ def plot_forecast_vs_reference(
         )
         mad = float((forecast.loc[overlap] - ref_aligned.loc[overlap]).abs().mean())
         logger.info(
-            "mean |%s - %s| over %d overlap hours: %.0f MW",
+            "mean |%s - %s| over %d overlap hours: %.1f %s",
             forecast_label,
             reference_label,
             len(overlap),
-            mad / unit_scale,
+            mad * unit_scale,
+            unit,
         )
     else:
         logger.info(
diff --git a/tests/test_plots_diagnostics.py b/tests/test_plots_diagnostics.py
index a0833a5f..d8b4fdb5 100644
--- a/tests/test_plots_diagnostics.py
+++ b/tests/test_plots_diagnostics.py
@@ -56,10 +56,14 @@ def close_all_figures():
     ("poly_hour_2", "polynomial"),
     ("poly_dayofweek_3", "polynomial"),
     ("POLY_month", "polynomial"),
+    # precedence: poly beats window
+    ("poly_window_24", "polynomial"),
     # weather_window family
     ("window_mean_72", "weather_window"),
     ("roll_window_168", "weather_window"),
     ("WINDOW_std_24", "weather_window"),
+    # precedence: window beats cyclical
+    ("sin_window", "weather_window"),
     # cyclical/RBF family
     ("sin_hour", "cyclical/RBF"),
     ("cos_hour", "cyclical/RBF"),
@@ -300,3 +304,41 @@ def test_forecast_vs_reference_unit_scale_applied(caplog):
     # y-axis upper limit should be in the MW range (>1000), not GW (<100)
     ymin, ymax = ax.get_ylim()
     assert ymax > 1000
+
+
+def test_forecast_vs_reference_mad_logged_in_display_unit(caplog):
+    """MAD is logged in the display unit (GW), not in raw MW.
+
+    With a constant offset of 1000 MW and unit_scale=1e-3, the logged value
+    must be 1.0 GW.  The old bug (mad / unit_scale) would have logged 1000000.
+    """
+    import logging
+    import re
+
+    idx = pd.date_range("2024-01-15", periods=24, freq="h", tz="UTC")
+    forecast = pd.Series(np.full(24, 40_000.0), index=idx)
+    # Exact constant offset of 1000 MW → MAD = 1000 MW = 1.0 GW
+    reference = pd.Series(np.full(24, 41_000.0), index=idx)
+
+    with caplog.at_level(logging.INFO, logger="spotforecast2.plots.diagnostics"):
+        plot_forecast_vs_reference(
+            forecast,
+            reference,
+            forecast_label="fc",
+            reference_label="ref",
+            unit_scale=1e-3,
+            unit="GW",
+        )
+
+    # Find the MAD log message and extract the numeric value
+    mad_messages = [r for r in caplog.records if "overlap hours" in r.message]
+    assert mad_messages, "Expected MAD log message not found in caplog"
+    msg = mad_messages[0].message
+    # Extract the number immediately before " GW"
+    match = re.search(r"([\d.]+)\s+GW", msg)
+    assert match, f"Could not parse GW value from log message: {msg!r}"
+    logged_value = float(match.group(1))
+    assert logged_value == pytest.approx(1.0, abs=0.05), (
+        f"Logged MAD should be 1.0 GW, got {logged_value}. "
+        f"Full message: {msg!r}"
+    )

From 62af0dddc84de20cbf4145dbf961852d404a3643 Mon Sep 17 00:00:00 2001
From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
Date: Fri, 12 Jun 2026 03:13:41 +0200
Subject: [PATCH 5/6] fix(stats,model_selection): code-review fixes for PACF
 lag selection and boundary helpers

FIX 1 (_quarto.yml): remove invalid `stats.select_pacf_lags` quartodoc contents
entry and matching sidebar item; select_pacf_lags is a function inside
stats/autocorrelation, already rendered as a section of stats.autocorrelation.qmd.
Delete stale docs/reference/stats.select_pacf_lags.qmd.

FIX 2 (boundary.py): expand module docstring with a prominent paragraph
contrasting the two key-prefix conventions (report_boundary_positions strips
"estimator__", boundary_report/suggest_bounds use full keys). Add
logger.warning in boundary_report when result is empty but search_space
contains numeric tuple dims, suggesting a key-convention mismatch.

FIX 3 (uv.lock): restore uv.lock to develop's version; branch-specific lock
hunk dropped.

FIX 4 (test_model_selection_boundary.py): change test_all_keys_preserved to
use "estimator__"-prefixed best_params so the flagged/widened code path is
actually exercised; add asserts that the near-upper-boundary integer dim was
widened and the interior log dim is unchanged.

FIX 5 (test_stats_select_pacf_lags.py): remove dead `except ValueError: pass`
arm from test_very_short_series_fallback (never raises with fallback given);
add separate test_very_short_series_no_fallback_raises asserting ValueError on
constant series without fallback.

FIX 6 (boundary.py): add `if isinstance(val, bool): continue` guard in
boundary_report mirroring report_boundary_positions; note boolean skip in
docstring.

FIX 7 (autocorrelation.py): coerce fallback elements to int via
`[int(x) for x in fallback]`; note the coercion in the fallback param
docstring.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 _quarto.yml                                   |  3 --
 src/spotforecast2/model_selection/boundary.py | 39 ++++++++++++++++++-
 src/spotforecast2/stats/autocorrelation.py    |  5 ++-
 tests/test_model_selection_boundary.py        | 16 +++++++-
 tests/test_stats_select_pacf_lags.py          | 25 +++++++-----
 uv.lock                                       |  2 +-
 6 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/_quarto.yml b/_quarto.yml
index 298f21f1..e596b288 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -128,8 +128,6 @@ website:
             contents:
               - text: "autocorrelation"
                 file: docs/reference/stats.autocorrelation.qmd
-              - text: "select_pacf_lags"
-                file: docs/reference/stats.select_pacf_lags.qmd
           - section: "Tasks"
             contents:
               - text: "task_demo"
@@ -261,7 +259,6 @@ quartodoc:
       desc: "Statistical analysis tools."
       contents:
         - stats.autocorrelation
-        - stats.select_pacf_lags
 
     - title: "Tasks"
       desc: "Demonstration and predefined forecasting tasks."
diff --git a/src/spotforecast2/model_selection/boundary.py b/src/spotforecast2/model_selection/boundary.py
index 3cd46835..bd40373e 100644
--- a/src/spotforecast2/model_selection/boundary.py
+++ b/src/spotforecast2/model_selection/boundary.py
@@ -24,6 +24,25 @@
   widened on the pressed side; pass it straight back to
   ``run_task_spotoptim(search_space=...)``.
 
+**Key convention difference — prefix handling:**
+
+``report_boundary_positions`` strips the ``"estimator__"`` prefix from each
+search-space key before looking up the value in ``params``.  The ``params``
+dict is expected to come from ``estimator.get_params()`` (scikit-learn style),
+which returns UN-prefixed keys such as ``"reg_alpha"`` — not
+``"estimator__reg_alpha"``.
+
+``boundary_report`` and ``suggest_bounds`` look up ``best_params`` using the
+search-space key **as-is**, including any ``"estimator__"`` prefix.  The
+``best_params`` dict is expected to come from a SpotOptim result, which stores
+FULL search-space keys (e.g. ``"estimator__reg_alpha"``).
+
+Mixing conventions (passing ``get_params()``-style keys to ``boundary_report``,
+or SpotOptim result keys to ``report_boundary_positions``) will silently produce
+an empty result because no key matches.  If ``boundary_report`` returns an empty
+DataFrame for a space with numeric dimensions, a key-convention mismatch is the
+most likely cause.
+
 Ported from:
 
 - ``report_boundary_positions``: ``bart26k-lecture/scripts/team4_4zones_submit.py``
@@ -181,8 +200,8 @@ def boundary_report(
     """Tabulate each tuned value's position inside its search-space bound.
 
     Returns a DataFrame sorted by descending position, with one row per
-    numeric dimension. Categorical dimensions are skipped. ``flag`` is one of
-    ``"> upper"``, ``"< lower"``, or ``""`` (interior).
+    numeric dimension. Categorical and boolean-valued dimensions are skipped.
+    ``flag`` is one of ``"> upper"``, ``"< lower"``, or ``""`` (interior).
 
     This function uses the `search_space` keys as-is (including any
     ``"estimator__"`` prefix) to look up matching entries in `best_params`.
@@ -237,6 +256,8 @@ def boundary_report(
         val = best_params.get(name)  # type: ignore[arg-type]
         if val is None:
             continue
+        if isinstance(val, bool):
+            continue  # bool is a subclass of int; skip to avoid false positions
         if is_log:
             if val <= 0 or low <= 0:
                 continue
@@ -263,6 +284,20 @@ def boundary_report(
         )
     df = pd.DataFrame(rows)
     if df.empty:
+        numeric_dims = sum(
+            1
+            for spec in search_space.values()
+            if isinstance(spec, tuple) and len(spec) in (2, 3)
+        )
+        if numeric_dims > 0:
+            _logger.warning(
+                "boundary_report: result is empty but search_space has %d numeric "
+                "dimension(s). Check key-convention: best_params must use the FULL "
+                "search-space keys (e.g. 'estimator__reg_alpha'), not the unprefixed "
+                "get_params() style ('reg_alpha'). "
+                "Use report_boundary_positions() instead if you have unprefixed keys.",
+                numeric_dims,
+            )
         return pd.DataFrame(columns=["param", "low", "high", "value", "scale", "position", "flag"])
     return df.sort_values("position", ascending=False).reset_index(drop=True)
 
diff --git a/src/spotforecast2/stats/autocorrelation.py b/src/spotforecast2/stats/autocorrelation.py
index e89a6ed4..9b4027c3 100644
--- a/src/spotforecast2/stats/autocorrelation.py
+++ b/src/spotforecast2/stats/autocorrelation.py
@@ -224,7 +224,8 @@ def select_pacf_lags(
             ranked by descending `|PACF|`). Default is 8.
         fallback: Lag list returned when no lag exceeds the significance band
             (degenerate series: too short, nearly constant, or `n_lags` too
-            large). If `None`, a `ValueError` is raised instead.
+            large). Elements are coerced to `int` before returning. If `None`,
+            a `ValueError` is raised instead.
 
     Returns:
         Sorted list of lag integers (ascending). Length is at most `top_k`;
@@ -316,7 +317,7 @@ def select_pacf_lags(
     )
     if not key_lags:
         if fallback is not None:
-            return list(fallback)
+            return [int(x) for x in fallback]
         raise ValueError(
             f"select_pacf_lags: no significant PACF lags found "
             f"(N={len(series)}, band={conf:.4f}, n_lags={n_lags}). "
diff --git a/tests/test_model_selection_boundary.py b/tests/test_model_selection_boundary.py
index 8318d483..1386bdc3 100644
--- a/tests/test_model_selection_boundary.py
+++ b/tests/test_model_selection_boundary.py
@@ -257,8 +257,16 @@ def test_categorical_unchanged(self):
         assert new_space["lags"] == space["lags"]
 
     def test_all_keys_preserved(self):
-        """The returned dict has exactly the same keys as search_space."""
-        best = {"num_leaves": 512, "learning_rate": 0.05}
+        """The returned dict has exactly the same keys as search_space.
+
+        Uses prefixed keys (SpotOptim-result style) so the flagged/widened path
+        is exercised for at least one dimension.
+        """
+        # num_leaves=1000 is near upper of (8, 1024) — should be widened
+        best = {
+            "estimator__num_leaves": 1000,
+            "estimator__learning_rate": 0.05,
+        }
         space = {
             "estimator__num_leaves": (8, 1024),
             "estimator__learning_rate": (0.005, 0.3, "log10"),
@@ -266,6 +274,10 @@ def test_all_keys_preserved(self):
         }
         new_space = suggest_bounds(best, space)
         assert set(new_space.keys()) == set(space.keys())
+        # The near-upper-boundary integer dim must have been widened upward
+        assert new_space["estimator__num_leaves"][1] > 1024
+        # Interior log10 dim is unchanged
+        assert new_space["estimator__learning_rate"] == space["estimator__learning_rate"]
 
     def test_widen_factor_parameter(self):
         """Different widen_factor values produce different bounds."""
diff --git a/tests/test_stats_select_pacf_lags.py b/tests/test_stats_select_pacf_lags.py
index 95abc872..7cf687d3 100644
--- a/tests/test_stats_select_pacf_lags.py
+++ b/tests/test_stats_select_pacf_lags.py
@@ -101,20 +101,25 @@ def test_constant_series_no_fallback_raises(self):
             select_pacf_lags(series, n_lags=10, fallback=None)
 
     def test_very_short_series_fallback(self):
-        """Series shorter than 2*n_lags+1; PACF falls back on statsmodels truncation.
+        """Series shorter than 2*n_lags+1; statsmodels truncates n_lags internally.
 
-        No lag should pass the wide confidence band for a near-white-noise short series.
-        We do not mandate whether ValueError or fallback is triggered; only that either
-        the fallback list is returned or a ValueError is raised.
+        With fallback provided the function must return either the fallback list
+        (no significant lags found) or a valid list of ints (some lags pass the
+        wide confidence band for a very short series). It must never raise.
         """
         rng = np.random.default_rng(7)
         series = pd.Series(rng.standard_normal(20))
-        try:
-            result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2])
-            # if no significant lags, fallback should be returned
-            assert isinstance(result, list)
-        except ValueError:
-            pass  # also acceptable
+        result = select_pacf_lags(series, n_lags=5, top_k=2, fallback=[1, 2])
+        assert isinstance(result, list)
+        assert all(isinstance(x, int) for x in result)
+
+    def test_very_short_series_no_fallback_raises(self):
+        """Same short series without fallback: ValueError is raised when no
+        significant lags are found (constant-like after statsmodels truncation).
+        """
+        series = pd.Series([1.0] * 12)  # constant → no significant lags, no fallback
+        with pytest.raises(ValueError, match="no significant"):
+            select_pacf_lags(series, n_lags=5, top_k=2, fallback=None)
 
     def test_fallback_list_copied(self):
         """The returned list must be a copy, not the same object as fallback."""
diff --git a/uv.lock b/uv.lock
index 6e344191..ddac7dee 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3491,7 +3491,7 @@ wheels = [
 
 [[package]]
 name = "spotforecast2"
-version = "8.0.0"
+version = "8.0.0rc1"
 source = { editable = "." }
 dependencies = [
     { name = "astral" },

From 6eacc37ed1df960bff6b289c0facbef19263de91 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 12 Jun 2026 01:33:40 +0000
Subject: [PATCH 6/6] chore(release): 8.1.0-rc.1 [skip ci]

## [8.1.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0...v8.1.0-rc.1) (2026-06-12)

### Features

* **plots:** operational diagnostics plots (ACF, importance-by-family, SHAP summary, forecast-vs-reference) ([0bbc0c6](https://github.com/sequential-parameter-optimization/spotforecast2/commit/0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f))
* **stats:** PACF lag selection + search-space boundary report ([c8079b2](https://github.com/sequential-parameter-optimization/spotforecast2/commit/c8079b2a10fbfb97e333c89948eb234fc2381e9d))

### Bug Fixes

* **plots:** apply code-review fixes to diagnostics.py ([5a7857c](https://github.com/sequential-parameter-optimization/spotforecast2/commit/5a7857c1d9ac012a47e975354381dc0af7ba43fc))
* **stats,model_selection:** code-review fixes for PACF lag selection and boundary helpers ([62af0dd](https://github.com/sequential-parameter-optimization/spotforecast2/commit/62af0dddc84de20cbf4145dbf961852d404a3643))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8595eaba..7e306160 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [8.1.0-rc.1](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v8.0.0...v8.1.0-rc.1) (2026-06-12)
+
+### Features
+
+* **plots:** operational diagnostics plots (ACF, importance-by-family, SHAP summary, forecast-vs-reference) ([0bbc0c6](https://github.com/sequential-parameter-optimization/spotforecast2/commit/0bbc0c6586502f5f0466fa4d24a92a8bdfcec00f))
+* **stats:** PACF lag selection + search-space boundary report ([c8079b2](https://github.com/sequential-parameter-optimization/spotforecast2/commit/c8079b2a10fbfb97e333c89948eb234fc2381e9d))
+
+### Bug Fixes
+
+* **plots:** apply code-review fixes to diagnostics.py ([5a7857c](https://github.com/sequential-parameter-optimization/spotforecast2/commit/5a7857c1d9ac012a47e975354381dc0af7ba43fc))
+* **stats,model_selection:** code-review fixes for PACF lag selection and boundary helpers ([62af0dd](https://github.com/sequential-parameter-optimization/spotforecast2/commit/62af0dddc84de20cbf4145dbf961852d404a3643))
+
 ## [8.0.0](https://github.com/sequential-parameter-optimization/spotforecast2/compare/v7.1.0...v8.0.0) (2026-06-11)
 
 ### ⚠ BREAKING CHANGES
diff --git a/pyproject.toml b/pyproject.toml
index 697539ef..c4fbca15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "spotforecast2"
-version = "8.0.0"
+version = "8.1.0-rc.1"
 description = "Forecasting with spot"
 readme = "README.md"
 license = { text = "AGPL-3.0-or-later" }