Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"ruff>=0.15.6",
"scikit-learn>=1.8.0",
"shap>=0.49.1",
"spotforecast2-safe>=16.3.0,<17",
"spotforecast2-safe>=18.0.0,<19",
"spotoptim>=0.12.3",
"tqdm>=4.67.2",
]
Expand Down
19 changes: 9 additions & 10 deletions src/spotforecast2/multitask/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
agg_predictor,
)

from spotforecast2.plots.plotter import (
make_plot,
plot_with_outliers as _plot_with_outliers,
)
from spotforecast2.plots.plotter import make_plot
from spotforecast2.plots.plotter import plot_with_outliers as _plot_with_outliers

__all__ = [
"SafeBaseTask",
Expand Down Expand Up @@ -63,6 +61,7 @@ def plot_with_outliers(self) -> None:
df_pipeline=self.df_pipeline, # type: ignore[attr-defined]
df_pipeline_original=self.df_pipeline_original, # type: ignore[attr-defined]
config=self.config, # type: ignore[attr-defined]
targets=self.run_state.targets, # type: ignore[attr-defined]
)

def _show_prediction_figure(
Expand Down Expand Up @@ -100,7 +99,7 @@ def _show_prediction_figure_agg(
agg_pkg,
title=(
f"Aggregated Forecast: Weighted Combination of "
f"Targets {self.config.targets} ({task_name})" # type: ignore[attr-defined]
f"Targets {self.run_state.targets} ({task_name})" # type: ignore[attr-defined]
),
save=False,
)
Expand Down Expand Up @@ -161,24 +160,24 @@ def _aggregate_and_show(
Returns:
Aggregated prediction package dict.
"""
if len(self.config.targets) == 1:
target = self.config.targets[0]
if len(self.run_state.targets) == 1:
target = self.run_state.targets[0]
agg_pkg = results[target]
self.agg_results[task_name] = agg_pkg
return agg_pkg

if self.config.agg_weights is not None:
active_weights = self.config.agg_weights[: len(self.config.targets)]
active_weights = self.config.agg_weights[: len(self.run_state.targets)]
else:
n = len(self.config.targets)
n = len(self.run_state.targets)
active_weights = [1.0 / n] * n
self.logger.info(
"No agg_weights configured — using equal weights (1/%d each).", n
)

agg_pkg = self.agg_predictor(
results=results,
targets=self.config.targets,
targets=self.run_state.targets,
weights=active_weights,
)
self.agg_results[task_name] = agg_pkg
Expand Down
34 changes: 23 additions & 11 deletions src/spotforecast2/plots/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,10 @@ def plot_actual_vs_predicted(


def plot_with_outliers(
df_pipeline: pd.DataFrame, df_pipeline_original: pd.DataFrame, config: Any
df_pipeline: pd.DataFrame,
df_pipeline_original: pd.DataFrame,
config: Any,
targets: Optional[list[str]] = None,
) -> None:
"""Interactive time series plot with outliers and optional bounds.

Expand All @@ -637,15 +640,26 @@ def plot_with_outliers(
The plot title includes the percentage of outliers detected for each
target variable.

The resolved list of target column names must be passed explicitly via
*targets*. Callers inside the pipeline (e.g. ``PlottingMixin``) obtain
this list from ``task.run_state.targets`` after ``prepare_data`` has run.
A ``SimpleNamespace``/dict-like *config* that carries its own ``targets``
attribute is still accepted for backwards-compatible standalone usage —
the explicit *targets* argument takes precedence when provided.

Args:
df_pipeline (pd.DataFrame): The processed DataFrame from the pipeline,
which may contain NaN values where outliers have been detected and
removed.
df_pipeline_original (pd.DataFrame): The original DataFrame before
outlier removal.
config: Configuration object containing ``targets`` (list of column
names) and optionally ``bounds`` (list of ``(lower, upper)``
tuples, one per target, in the same order as ``targets``).
config: Configuration object carrying ``bounds`` (optional list of
``(lower, upper)`` tuples, one per target, in the same order as
*targets*). ``config.targets`` is used as a fallback when the
*targets* argument is ``None`` (legacy path).
targets: Resolved list of target column names. When ``None`` the
function falls back to ``config.targets`` (legacy callers that
pass a ``SimpleNamespace`` with ``targets`` set).

Returns:
None. Displays one interactive Plotly figure per target variable.
Expand All @@ -667,17 +681,15 @@ def plot_with_outliers(
data.loc[dates[20], "target2"] = 150 # Outlier in target2
df_pipeline = data.copy()
df_pipeline.loc[[dates[10], dates[20]], ["target1", "target2"]] = np.nan
# Config with bounds
config = SimpleNamespace(
targets=["target1", "target2"],
bounds=[(-10, 200), (0, 100)],
)
plot_with_outliers(df_pipeline, data, config)
# Config with bounds; targets passed explicitly
config = SimpleNamespace(bounds=[(-10, 200), (0, 100)])
plot_with_outliers(df_pipeline, data, config, targets=["target1", "target2"])
```
"""
bounds = getattr(config, "bounds", None)
_targets = targets if targets is not None else config.targets

for i, target in enumerate(config.targets):
for i, target in enumerate(_targets):
fig = go.Figure()

# Plot Regular Data (lightgrey)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_agg_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class TestAggregateAndShowAlwaysAggregates:

def _make_task_with_targets(self, targets):
task = MultiTask()
task.config.targets = targets
task.run_state.targets = targets
return task

def test_returns_dict_without_agg_weights(self):
Expand Down Expand Up @@ -356,13 +356,13 @@ def _inject_pipeline(task, n_rows=300):
df_test = df.iloc[-24:].copy().reset_index().rename(columns={"index": "DateTime"})
task.df_pipeline = df
task.df_test = df_test
task.config.targets = ["A", "B"]
task.run_state.targets = ["A", "B"]
task.config.agg_weights = [0.5, 0.5]
task.data_with_exog = None
task.exo_pred = None
task.exog_feature_names = []
task.config.end_train_ts = idx[-25]
task.config.start_train_ts = idx[0]
task.run_state.end_train_ts = idx[-25]
task.run_state.start_train_ts = idx[0]
return task


Expand Down
33 changes: 18 additions & 15 deletions tests/test_consumer_contract_team4.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
from pathlib import Path

import pytest
from spotforecast2_safe.configurator.config_entsoe import ConfigEntsoe

from spotforecast2.multitask import MultiTask
from spotforecast2_safe.configurator.config_entsoe import ConfigEntsoe

DEFAULT_QMD = Path.home() / "workspace" / "bart26k-lecture" / "14_team_4_submission.qmd"
QMD_PATH = Path(os.environ.get("TEAM4_QMD", DEFAULT_QMD))
Expand Down Expand Up @@ -134,9 +134,9 @@ def test_spotforecast_imports_resolve(trees):
continue
mod = importlib.import_module(node.module)
for alias in node.names:
assert hasattr(mod, alias.name), (
f"{node.module} no longer exports {alias.name!r}"
)
assert hasattr(
mod, alias.name
), f"{node.module} no longer exports {alias.name!r}"
checked += 1
elif isinstance(node, ast.Import):
for alias in node.names:
Expand Down Expand Up @@ -173,9 +173,9 @@ def test_config_attribute_surface(trees):
):
if not hasattr(cfg, node.attr):
missing.add(node.attr)
assert not missing, (
f"qmd uses config attributes missing on ConfigEntsoe: {sorted(missing)}"
)
assert (
not missing
), f"qmd uses config attributes missing on ConfigEntsoe: {sorted(missing)}"


def test_multitask_call_and_pipeline_methods(trees):
Expand All @@ -189,9 +189,9 @@ def test_multitask_call_and_pipeline_methods(trees):
mt_params = set(inspect.signature(MultiTask).parameters)
for kw in call.keywords:
if kw.arg is not None and kw.arg not in mt_params:
assert kw.arg in ConfigEntsoe._PARAM_NAMES, (
f"MultiTask override {kw.arg!r} is not a ConfigEntsoe field"
)
assert (
kw.arg in ConfigEntsoe._PARAM_NAMES
), f"MultiTask override {kw.arg!r} is not a ConfigEntsoe field"

mt_vars = _multitask_var_names(trees)
assert mt_vars, "qmd no longer assigns a MultiTask instance"
Expand All @@ -209,8 +209,11 @@ def test_multitask_call_and_pipeline_methods(trees):
name = call.func.attr
method = getattr(MultiTask, name, None)
assert method is not None, f"MultiTask lost method {name!r} used by the qmd"
_bind(method, ast.Call( # account for the bound `self` slot
func=call.func,
args=[ast.Constant(value=None)] + list(call.args),
keywords=call.keywords,
))
_bind(
method,
ast.Call( # account for the bound `self` slot
func=call.func,
args=[ast.Constant(value=None)] + list(call.args),
keywords=call.keywords,
),
)
8 changes: 4 additions & 4 deletions tests/test_cv_block_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
be present on the config class — the override is exercised via the same
``getattr`` fallback that ``cv_ts`` uses, by assigning the attribute on the
constructed config object (mirroring how the existing cv_ts tests set
``config.end_train_ts`` and ``config.train_size``).
``run_state.end_train_ts`` and ``config.train_size``).
"""

import math
Expand All @@ -41,7 +41,7 @@


def _make_task(tmp_path: Path, *, val_days: int = 7, **kwargs) -> LazyTask:
"""Return a LazyTask with ``end_train_ts`` pinned to ``_END_TRAIN``.
"""Return a LazyTask with ``run_state.end_train_ts`` pinned to ``_END_TRAIN``.

Mirrors the helper in ``test_cv_ts_sklearn.py``: ``delta_val`` is computed
explicitly from ``val_days * number_folds`` rather than derived inside
Expand All @@ -51,7 +51,7 @@ def _make_task(tmp_path: Path, *, val_days: int = 7, **kwargs) -> LazyTask:
overrides = dict(kwargs, delta_val=pd.Timedelta(days=val_days * number_folds))
overrides.setdefault("data_frame_name", "test_data")
t = LazyTask(cache_home=tmp_path, **overrides)
t.config.end_train_ts = _END_TRAIN
t.run_state.end_train_ts = _END_TRAIN
return t


Expand All @@ -65,7 +65,7 @@ def _skl_cv(
task: LazyTask, y_train: pd.Series, test_size: int
) -> SklearnTimeSeriesSplit:
"""Build the sklearn TimeSeriesSplit equivalent for *task* with *test_size*."""
end_cv = task.config.end_train_ts - task.config.delta_val
end_cv = task.run_state.end_train_ts - task.config.delta_val
n_train_cv = len(y_train.loc[:end_cv])
max_train_size = n_train_cv if task.config.train_size is not None else None
return SklearnTimeSeriesSplit(
Expand Down
6 changes: 3 additions & 3 deletions tests/test_cv_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@


def _make_task(tmp_path: Path, *, val_days: int = 7, **kwargs) -> LazyTask:
"""Return a LazyTask whose ``config.end_train_ts`` is set without loading data.
"""Return a LazyTask whose ``run_state.end_train_ts`` is set without loading data.

``val_days`` is pinned to 7 so that ``delta_val = 7 * number_folds`` days,
keeping the test series length requirements manageable. After the
Expand All @@ -51,7 +51,7 @@ def _make_task(tmp_path: Path, *, val_days: int = 7, **kwargs) -> LazyTask:
overrides = dict(kwargs, delta_val=pd.Timedelta(days=val_days * number_folds))
overrides.setdefault("data_frame_name", "test_data")
t = LazyTask(cache_home=tmp_path, **overrides)
t.config.end_train_ts = _END_TRAIN
t.run_state.end_train_ts = _END_TRAIN
return t


Expand Down Expand Up @@ -110,7 +110,7 @@ def test_allow_incomplete_fold_is_true(self, task, y_train):

class TestCvTsInitialTrainSize:
def test_initial_train_size_matches_slice(self, task, y_train):
end_cv = task.config.end_train_ts - task.config.delta_val
end_cv = task.run_state.end_train_ts - task.config.delta_val
expected = len(y_train.loc[:end_cv])
cv = task.cv_ts(y_train)
assert cv.initial_train_size == expected
Expand Down
16 changes: 8 additions & 8 deletions tests/test_cv_ts_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@


def _make_task(tmp_path, *, val_days: int = 7, **kwargs) -> LazyTask:
"""Return a LazyTask with ``end_train_ts`` set to ``_END_TRAIN``.
"""Return a LazyTask with ``run_state.end_train_ts`` set to ``_END_TRAIN``.

After the config-object refactor ``delta_val`` is no longer derived from
``val_days * number_folds`` inside ``BaseTask``; the helper computes it
Expand All @@ -44,7 +44,7 @@ def _make_task(tmp_path, *, val_days: int = 7, **kwargs) -> LazyTask:
overrides = dict(kwargs, delta_val=pd.Timedelta(days=val_days * number_folds))
overrides.setdefault("data_frame_name", "test_data")
t = LazyTask(cache_home=tmp_path, **overrides)
t.config.end_train_ts = _END_TRAIN
t.run_state.end_train_ts = _END_TRAIN
return t


Expand All @@ -56,7 +56,7 @@ def _make_y_train(end: pd.Timestamp = _END_TRAIN, n: int = _N) -> pd.Series:

def _skl_cv(task: LazyTask, y_train: pd.Series) -> SklearnTimeSeriesSplit:
"""Build the equivalent sklearn TimeSeriesSplit for *task*."""
end_cv = task.config.end_train_ts - task.config.delta_val
end_cv = task.run_state.end_train_ts - task.config.delta_val
n_train_cv = len(y_train.loc[:end_cv])
max_train_size = n_train_cv if task.config.train_size is not None else None
return SklearnTimeSeriesSplit(
Expand Down Expand Up @@ -230,7 +230,7 @@ def test_fixed_train_size_flag_false_when_train_size_none(self, tmp_path, y_trai

def test_max_train_size_equals_n_train_cv(self, task, y_train):
"""When ``train_size`` is set, ``max_train_size`` equals ``n_train_cv``."""
end_cv = task.config.end_train_ts - task.config.delta_val
end_cv = task.run_state.end_train_ts - task.config.delta_val
n_train_cv = len(y_train.loc[:end_cv])
# Each fold's training set should be at most n_train_cv observations.
for train_idx, _ in _skl_cv(task, y_train).split(y_train):
Expand All @@ -248,8 +248,8 @@ def test_more_folds_same_initial_train_size_fixed_window(self, tmp_path, y_train
task_5 = _make_task(tmp_path, number_folds=5)
task_10 = _make_task(tmp_path, number_folds=10)
# Both use the same n_train_cv derived from end_cv
end_cv_5 = task_5.config.end_train_ts - task_5.config.delta_val
end_cv_10 = task_10.config.end_train_ts - task_10.config.delta_val
end_cv_5 = task_5.run_state.end_train_ts - task_5.config.delta_val
end_cv_10 = task_10.run_state.end_train_ts - task_10.config.delta_val
n5 = len(y_train.loc[:end_cv_5])
n10 = len(y_train.loc[:end_cv_10])
# With fixed window: initial_train_size == max_train_size == n_train_cv
Expand All @@ -260,8 +260,8 @@ def test_more_folds_consumes_more_validation_data(self, tmp_path, y_train):
"""Larger number_folds enlarges delta_val, shrinking n_train_cv."""
task_5 = _make_task(tmp_path, number_folds=5)
task_15 = _make_task(tmp_path, number_folds=15)
end_cv_5 = task_5.config.end_train_ts - task_5.config.delta_val
end_cv_15 = task_15.config.end_train_ts - task_15.config.delta_val
end_cv_5 = task_5.run_state.end_train_ts - task_5.config.delta_val
end_cv_15 = task_15.run_state.end_train_ts - task_15.config.delta_val
n5 = len(y_train.loc[:end_cv_5])
n15 = len(y_train.loc[:end_cv_15])
assert (
Expand Down
4 changes: 2 additions & 2 deletions tests/test_docs_task_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_run_strategy_saves_models_when_auto_save_enabled(self):
from spotforecast2.multitask.base import BaseTask

task = BaseTask(predict_size=24, auto_save_models=True)
task.config.targets = ["t1"]
task.run_state.targets = ["t1"]
strategy = MagicMock(name="strategy")
strategy.prepare_forecaster.return_value = MagicMock(name="prepared")

Expand Down Expand Up @@ -382,7 +382,7 @@ def test_run_strategy_skips_save_models_when_disabled(self):
from spotforecast2.multitask.base import BaseTask

task = BaseTask(predict_size=24, auto_save_models=False)
task.config.targets = ["t1"]
task.run_state.targets = ["t1"]
strategy = MagicMock(name="strategy")
strategy.prepare_forecaster.return_value = MagicMock(name="prepared")

Expand Down
8 changes: 4 additions & 4 deletions tests/test_exog_providers_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def _make_task(**config_kwargs) -> MultiTask:
mt = MultiTask(cfg)
idx = pd.date_range("2024-01-01", periods=96, freq="h", tz="UTC")
mt.df_pipeline = pd.DataFrame({"target_0": np.arange(96.0)}, index=idx)
mt.config.targets = ["target_0"]
mt.config.data_start = idx[0]
mt.config.data_end = idx[-1]
mt.config.cov_end = idx[-1]
mt.run_state.targets = ["target_0"]
mt.run_state.data_start = idx[0]
mt.run_state.data_end = idx[-1]
mt.run_state.cov_end = idx[-1]
return mt


Expand Down
6 changes: 3 additions & 3 deletions tests/test_exogenous_failure_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def _make_task_ready(
mt = MultiTask(cfg)
idx = pd.date_range("2024-01-01", periods=48, freq="h", tz="UTC")
mt.df_pipeline = pd.DataFrame({"target_0": range(48)}, index=idx)
mt.config.targets = ["target_0"]
mt.config.data_start = idx[0]
mt.config.cov_end = idx[-1]
mt.run_state.targets = ["target_0"]
mt.run_state.data_start = idx[0]
mt.run_state.cov_end = idx[-1]
return mt


Expand Down
Loading
Loading