From 0ee7a1ff895c1ff54b6be7673fbd209f0bf5ab7f Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Sun, 7 Jun 2026 19:06:45 +0200 Subject: [PATCH 1/2] feat(multitask)!: drop config.task write and get_target_data config-window fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove two pieces of deprecated bridging code that were retained for backward compatibility during the RunState extraction (v18.0.0) but are now confirmed to have zero callers: 1. `get_target_data` config-window fallback (`src/spotforecast2_safe/manager/features.py`): the function formerly tried `getattr(config, "start_train_ts"/"end_train_ts", None)` when the explicit parameters were `None`, emitting `DeprecationWarning`. The fallback blocks (~lines 825-866 pre-removal) are deleted. When either explicit parameter is `None` the function now raises `ValueError` unconditionally. The `warnings` import is removed as it is no longer used. Docstring updated: the Args sections for `start_train_ts`/`end_train_ts` no longer mention the deprecated fallback; they state the parameter is keyword-only, required, and `None` → raises `ValueError`. The paragraph mentioning the config fallback for backward compatibility is replaced with a note that both parameters are required. The function signature now enforces this at the language level: everything after `config` is keyword-only (bare `*` separator), and `start_train_ts`/`end_train_ts` have no default. The runtime `if ... is None: raise ValueError` guards are kept so that callers who pass `None` explicitly get a clear error message rather than a `TypeError`. 2. `config.task` write in `MultiTaskBase.__init__` (`src/spotforecast2_safe/multitask/base.py`, line 361 pre-removal): the assignment `config.task = self.TASK` and its comment ("Propagate the task identifier so config-aware helpers know the mode.") are removed. `ConfigMulti.task` retains the value set by the caller at construction or via `set_params`/direct assignment and is no longer mutated as a side effect of task-class instantiation. Also: revert ruff-collapsed mi_sample_size f-string at features.py ~line 495 back to the original two-line form (cosmetic, out of scope of the breaking change); add parametrized regression test `test_none_window_timestamp_raises_value_error` to `tests/test_manager_features.py` (covers both `start_train_ts` and `end_train_ts`); update stale fixture comment in same file. BREAKING CHANGE: `get_target_data` no longer accepts `None` for `start_train_ts` or `end_train_ts` and will no longer fall back to `config.start_train_ts` / `config.end_train_ts` — callers must pass explicit `pd.Timestamp` values (e.g. from `task.run_state.start_train_ts` / `task.run_state.end_train_ts`). Both parameters are now keyword-only (bare `*` separator after `config`); positional passing of `data_with_exog`, `exog_feature_names`, or `exo_pred` is also no longer accepted. Additionally, constructing any `MultiTask` subclass no longer mutates `config.task`; code that relied on `config.task` reflecting the active task type after task instantiation must read `task.TASK` from the task object instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spotforecast2_safe/manager/features.py | 80 +++++++--------------- src/spotforecast2_safe/multitask/base.py | 2 - tests/test_manager_features.py | 17 ++++- 3 files changed, 38 insertions(+), 61 deletions(-) diff --git a/src/spotforecast2_safe/manager/features.py b/src/spotforecast2_safe/manager/features.py index 5e1b1260..e4f0a26b 100644 --- a/src/spotforecast2_safe/manager/features.py +++ b/src/spotforecast2_safe/manager/features.py @@ -26,7 +26,6 @@ from __future__ import annotations -import warnings from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import pandas as pd @@ -673,11 +672,12 @@ def get_target_data( target: str, df_pipeline: pd.DataFrame, config: "ConfigMulti", + *, data_with_exog: Optional[pd.DataFrame] = None, exog_feature_names: Optional[List[str]] = None, exo_pred: Optional[pd.DataFrame] = None, - start_train_ts: Optional[pd.Timestamp] = None, - end_train_ts: Optional[pd.Timestamp] = None, + start_train_ts: pd.Timestamp, + end_train_ts: pd.Timestamp, ) -> Tuple[pd.Series, Optional[pd.DataFrame], Optional[pd.DataFrame]]: """Extract the training series and exogenous slices for one target column. @@ -693,9 +693,8 @@ def get_target_data( The training-window timestamps are supplied as explicit parameters so that this helper stays decoupled from ``RunState`` (ADR - ``adr-multitask-configmulti-merge``, step 5). When either is ``None`` - the function falls back to the corresponding attribute on *config* for - backward compatibility with existing direct callers. + ``adr-multitask-configmulti-merge``, step 5). Both parameters are + required; passing ``None`` raises ``ValueError``. Args: target: Name of the target column to extract from *df_pipeline*. @@ -703,8 +702,7 @@ def get_target_data( containing all target columns produced by the preprocessing pipeline. config: Pipeline configuration object. ``use_exogenous_features`` - must be set. ``start_train_ts`` / ``end_train_ts`` are only read - from *config* when the explicit parameters are not supplied. + must be set. data_with_exog: Merged DataFrame of target and exogenous columns covering at least the training window. Required when ``config.use_exogenous_features`` is ``True``. @@ -716,17 +714,13 @@ def get_target_data( Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. start_train_ts: Inclusive start of the training window (tz-aware - ``pd.Timestamp``). **Required** — pass + ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.start_train_ts`` after the pipeline has been - prepared. Omitting this argument raises ``ValueError``; reading - the value from ``config.start_train_ts`` is deprecated and - emits a ``DeprecationWarning``. + prepared. Passing ``None`` raises ``ValueError``. end_train_ts: Inclusive end of the training window (tz-aware - ``pd.Timestamp``). **Required** — pass + ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.end_train_ts`` after the pipeline has been - prepared. Omitting this argument raises ``ValueError``; reading - the value from ``config.end_train_ts`` is deprecated and - emits a ``DeprecationWarning``. + prepared. Passing ``None`` raises ``ValueError``. Returns: Tuple[pd.Series, Optional[pd.DataFrame], Optional[pd.DataFrame]]: @@ -822,48 +816,22 @@ def get_target_data( print(f"exog_train dtype: {exog_train.dtypes.iloc[0]}") ``` """ - # Resolve training window: explicit params take precedence over config attrs. - # The config fallback is deprecated (RunState extraction, v18.0.0); pass - # start_train_ts/end_train_ts explicitly instead. - if start_train_ts is not None: - _start = start_train_ts - else: - _fallback_start = getattr(config, "start_train_ts", None) - if _fallback_start is None: - raise ValueError( - "start_train_ts is required: pass it as an explicit keyword " - "argument (start_train_ts=task.run_state.start_train_ts). " - "The config.start_train_ts fallback has been removed." - ) - warnings.warn( - "Passing the training window via config.start_train_ts is " - "deprecated (RunState extraction, v18.0.0). Pass " - "start_train_ts explicitly; the config fallback will be " - "removed in the next major release.", - DeprecationWarning, - stacklevel=2, + # Training window must be supplied explicitly (RunState extraction, v18.0.0). + if start_train_ts is None: + raise ValueError( + "start_train_ts is required: pass it as an explicit keyword " + "argument (start_train_ts=task.run_state.start_train_ts). " + "The config.start_train_ts fallback has been removed." ) - _start = _fallback_start - - if end_train_ts is not None: - _end = end_train_ts - else: - _fallback_end = getattr(config, "end_train_ts", None) - if _fallback_end is None: - raise ValueError( - "end_train_ts is required: pass it as an explicit keyword " - "argument (end_train_ts=task.run_state.end_train_ts). " - "The config.end_train_ts fallback has been removed." - ) - warnings.warn( - "Passing the training window via config.end_train_ts is " - "deprecated (RunState extraction, v18.0.0). Pass " - "end_train_ts explicitly; the config fallback will be " - "removed in the next major release.", - DeprecationWarning, - stacklevel=2, + _start = start_train_ts + + if end_train_ts is None: + raise ValueError( + "end_train_ts is required: pass it as an explicit keyword " + "argument (end_train_ts=task.run_state.end_train_ts). " + "The config.end_train_ts fallback has been removed." ) - _end = _fallback_end + _end = end_train_ts y_train = df_pipeline[target].loc[_start:_end].squeeze() diff --git a/src/spotforecast2_safe/multitask/base.py b/src/spotforecast2_safe/multitask/base.py index 4a7c9580..b0ff705d 100644 --- a/src/spotforecast2_safe/multitask/base.py +++ b/src/spotforecast2_safe/multitask/base.py @@ -357,8 +357,6 @@ def __init__( # config value when explicitly supplied. if cache_home is not None: config.cache_home = cache_home - # Propagate the task identifier so config-aware helpers know the mode. - config.task = self.TASK self.config = config # Call-time data and per-instance state diff --git a/tests/test_manager_features.py b/tests/test_manager_features.py index 4f2071a2..ef08680e 100644 --- a/tests/test_manager_features.py +++ b/tests/test_manager_features.py @@ -777,9 +777,9 @@ def df_pipeline(pipeline_idx): def train_window(pipeline_idx): """Explicit training-window dict covering the full 168-hour index. - Pass as ``**train_window`` to every ``get_target_data`` call so the - function never falls back to the deprecated ``config.start_train_ts`` / - ``config.end_train_ts`` attributes (RunState extraction, v18.0.0). + Pass as ``**train_window`` to every ``get_target_data`` call. The + function raises ``ValueError`` when these are ``None``; the config + fallback has been removed (19.0.0 breaking change). """ return { "start_train_ts": pipeline_idx[0], @@ -913,6 +913,17 @@ def test_missing_target_raises(self, df_pipeline, base_config, train_window): with pytest.raises(KeyError): get_target_data("nonexistent", df_pipeline, base_config, **train_window) + @pytest.mark.parametrize("missing", ["start_train_ts", "end_train_ts"]) + def test_none_window_timestamp_raises_value_error( + self, df_pipeline, base_config, train_window, missing + ): + """Passing None for either window timestamp raises ValueError (config + fallback removed in 19.0.0).""" + kwargs = dict(train_window) + kwargs[missing] = None + with pytest.raises(ValueError, match=missing): + get_target_data("load", df_pipeline, base_config, **kwargs) + # ============================================================================= # TestGetTargetDataWithExog From 1ddbfa470b219baf17eb640744da6484dec4d3e4 Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Sun, 7 Jun 2026 20:28:09 +0200 Subject: [PATCH 2/2] feat(multitask)!: remove the RunState mirror shim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Derived pipeline fields are no longer mirrored onto the config object after pipeline runs. Reading `config.data_start`, `config.data_end`, `config.cov_start`, `config.cov_end`, `config.start_download`, `config.end_download`, `config.end_train_ts`, or `config.start_train_ts` now raises AttributeError — these values exist only on `task.run_state`. No DeprecationWarning is emitted by the pipeline anymore. Changes: - Delete `_set_derived` method and its "Derived-state helpers" banner. - Delete `_run_state_deprecation_warned` instance flag. - Replace all 8 `_set_derived(...)` call sites with direct `self.run_state. = value` assignments. - Remove `warnings.catch_warnings()`/`simplefilter("ignore", DeprecationWarning)` wrappers from 5 docstring `{python}` examples in `base.py` and 2 in `defaults.py`; dedent the wrapped statements. - Remove dead `import warnings` (module-level) from `base.py`. - Harden `RunState` against silent typo assignments: changed to `@dataclass(slots=True)` so any write to an undeclared attribute raises `AttributeError` immediately. - Rewrite `TestMirrorShim` → `TestNoConfigMirror`: asserts all 8 derived fields (including `end_train_ts`/`start_train_ts`) are NOT set on config after `prepare_data()` + `_setup_training_window()`. - Rewrite `TestDeprecationWarning` → `TestNoDeprecationWarning`: asserts zero DeprecationWarnings emitted during the pipeline. - Remove `warnings.catch_warnings()` suppression wrappers from all other test classes in `test_run_state.py` (they existed only to silence the shim warning). - Update `tests/multitask/test_prepare_data_clamp.py` and `tests/multitask/test_prepare_data_target_corruption.py` to read `task.run_state.data_end` / `task.run_state.cov_end` instead of `task.config.data_end` / `task.config.cov_end`. - Fix stale "mirror" prose in `base.py` comments. BREAKING CHANGE: The following 8 fields no longer exist on the config object after pipeline runs and will raise AttributeError if accessed there: `start_download`, `end_download`, `data_start`, `data_end`, `cov_start`, `cov_end`, `end_train_ts`, `start_train_ts`. Read all of them from `task.run_state` instead. `config.targets` still holds the user-supplied input (unchanged); the resolved target list is available as `task.run_state.targets`. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../execute-results/html.json | 4 +- .../execute-results/html.json | 4 +- .../execute-results/html.json | 4 +- .../manager.features.get_target_data.qmd | 30 ++--- docs/reference/multitask.base.BaseTask.qmd | 27 +--- .../multitask.defaults.DefaultsTask.qmd | 9 +- src/spotforecast2_safe/multitask/base.py | 85 +++---------- src/spotforecast2_safe/multitask/defaults.py | 18 +-- src/spotforecast2_safe/multitask/run_state.py | 2 +- tests/multitask/test_prepare_data_clamp.py | 8 +- .../test_prepare_data_target_corruption.py | 8 +- tests/test_run_state.py | 117 +++++++++--------- 12 files changed, 121 insertions(+), 195 deletions(-) diff --git a/_freeze/docs/reference/manager.features.get_target_data/execute-results/html.json b/_freeze/docs/reference/manager.features.get_target_data/execute-results/html.json index 71f223fc..d308afe2 100644 --- a/_freeze/docs/reference/manager.features.get_target_data/execute-results/html.json +++ b/_freeze/docs/reference/manager.features.get_target_data/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "1f02f1dd8ca9dd2eb15c1b8d07b833bb", + "hash": "21aeedf16b5a2b156b9649bf82ad5b19", "result": { "engine": "jupyter", - "markdown": "---\ntitle: manager.features.get_target_data\n---\n\n\n\n```python\nmanager.features.get_target_data(\n target,\n df_pipeline,\n config,\n data_with_exog=None,\n exog_feature_names=None,\n exo_pred=None,\n start_train_ts=None,\n end_train_ts=None,\n)\n```\n\nExtract the training series and exogenous slices for one target column.\n\nClips the target column of *df_pipeline* to the training window defined by\n*start_train_ts* and *end_train_ts*. When exogenous features are enabled\n(``config.use_exogenous_features is True``) and *data_with_exog* is\nprovided, the matching exogenous training slice and forecast-horizon slice\nare also returned; otherwise both are ``None``.\n\nThis function is the canonical way to extract per-target data from the\nshared pipeline state so that outlier removal, imputation, and feature\nengineering are applied consistently across all forecasting tasks.\n\nThe training-window timestamps are supplied as explicit parameters so that\nthis helper stays decoupled from ``RunState`` (ADR\n``adr-multitask-configmulti-merge``, step 5). When either is ``None``\nthe function falls back to the corresponding attribute on *config* for\nbackward compatibility with existing direct callers.\n\n## Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------------|---------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the target column to extract from *df_pipeline*. | _required_ |\n| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | DataFrame with a tz-aware `DatetimeIndex` containing all target columns produced by the preprocessing pipeline. | _required_ |\n| config | \\'ConfigMulti\\' | Pipeline configuration object. ``use_exogenous_features`` must be set. ``start_train_ts`` / ``end_train_ts`` are only read from *config* when the explicit parameters are not supplied. | _required_ |\n| data_with_exog | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Merged DataFrame of target and exogenous columns covering at least the training window. Required when ``config.use_exogenous_features`` is ``True``. Pass ``None`` (default) to skip exogenous slicing. | `None` |\n| exog_feature_names | [Optional](`typing.Optional`)\\[[List](`typing.List`)\\[[str](`str`)\\]\\] | Column names to select from *data_with_exog* and *exo_pred*. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` |\n| exo_pred | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Exogenous feature DataFrame covering the forecast horizon. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` |\n| start_train_ts | [Optional](`typing.Optional`)\\[[pd](`pandas`).[Timestamp](`pandas.Timestamp`)\\] | Inclusive start of the training window (tz-aware ``pd.Timestamp``). **Required** — pass ``task.run_state.start_train_ts`` after the pipeline has been prepared. Omitting this argument raises ``ValueError``; reading the value from ``config.start_train_ts`` is deprecated and emits a ``DeprecationWarning``. | `None` |\n| end_train_ts | [Optional](`typing.Optional`)\\[[pd](`pandas`).[Timestamp](`pandas.Timestamp`)\\] | Inclusive end of the training window (tz-aware ``pd.Timestamp``). **Required** — pass ``task.run_state.end_train_ts`` after the pipeline has been prepared. Omitting this argument raises ``ValueError``; reading the value from ``config.end_train_ts`` is deprecated and emits a ``DeprecationWarning``. | `None` |\n\n## Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| | [pd](`pandas`).[Series](`pandas.Series`) | Tuple[pd.Series, Optional[pd.DataFrame], Optional[pd.DataFrame]]: |\n| | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | A three-tuple ``(y_train, exog_train, exog_future)`` where: |\n| | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | - **y_train** — 1-D Series with the target values over the training window, squeezed to a plain `Series`. |\n| | [Tuple](`typing.Tuple`)\\[[pd](`pandas`).[Series](`pandas.Series`), [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\], [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\]\\] | - **exog_train** — DataFrame of selected exogenous features over the training window, cast to ``float32``. ``None`` when exogenous features are disabled or *data_with_exog* is ``None``. |\n| | [Tuple](`typing.Tuple`)\\[[pd](`pandas`).[Series](`pandas.Series`), [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\], [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\]\\] | - **exog_future** — DataFrame of selected exogenous features covering the forecast horizon, cast to ``float32``. ``None`` when exogenous features are disabled or *exo_pred* is ``None``. |\n\n## Examples {.doc-section .doc-section-examples}\n\nExtract training data for a single target without exogenous features:\n\n\n::: {#ba66f4cf .cell execution_count=1}\n``` {.python .cell-code}\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.manager.features import get_target_data\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nidx = pd.date_range(\"2024-01-01\", periods=168, freq=\"h\", tz=\"UTC\")\ndf_pipeline = pd.DataFrame({\"load\": np.random.default_rng(0).normal(100, 10, 168)}, index=idx)\n\nconfig = ConfigMulti(\n targets=[\"load\"],\n use_exogenous_features=False,\n)\nstart_ts = pd.Timestamp(\"2024-01-01 00:00\", tz=\"UTC\")\nend_ts = pd.Timestamp(\"2024-01-07 23:00\", tz=\"UTC\")\n\ny_train, exog_train, exog_future = get_target_data(\n target=\"load\",\n df_pipeline=df_pipeline,\n config=config,\n start_train_ts=start_ts,\n end_train_ts=end_ts,\n)\nprint(f\"y_train length: {len(y_train)}\")\nprint(f\"exog_train: {exog_train}\")\nprint(f\"exog_future: {exog_future}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ny_train length: 168\nexog_train: None\nexog_future: None\n```\n:::\n:::\n\n\nExtract training data with exogenous features enabled:\n\n::: {#69c04eed .cell execution_count=2}\n``` {.python .cell-code}\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.manager.features import get_target_data\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(1)\nidx_train = pd.date_range(\"2024-01-01\", periods=168, freq=\"h\", tz=\"UTC\")\nidx_future = pd.date_range(\"2024-01-08\", periods=24, freq=\"h\", tz=\"UTC\")\n\ndf_pipeline = pd.DataFrame({\"load\": rng.normal(100, 10, 168)}, index=idx_train)\n\ndata_with_exog = pd.DataFrame(\n {\n \"load\": df_pipeline[\"load\"],\n \"hour_sin\": np.sin(2 * np.pi * idx_train.hour / 24),\n \"hour_cos\": np.cos(2 * np.pi * idx_train.hour / 24),\n },\n index=idx_train,\n)\nexo_pred = pd.DataFrame(\n {\n \"hour_sin\": np.sin(2 * np.pi * idx_future.hour / 24),\n \"hour_cos\": np.cos(2 * np.pi * idx_future.hour / 24),\n },\n index=idx_future,\n)\n\nstart_ts = pd.Timestamp(\"2024-01-01 00:00\", tz=\"UTC\")\nend_ts = pd.Timestamp(\"2024-01-07 23:00\", tz=\"UTC\")\nconfig = ConfigMulti(targets=[\"load\"], use_exogenous_features=True)\n\ny_train, exog_train, exog_future = get_target_data(\n target=\"load\",\n df_pipeline=df_pipeline,\n config=config,\n data_with_exog=data_with_exog,\n exog_feature_names=[\"hour_sin\", \"hour_cos\"],\n exo_pred=exo_pred,\n start_train_ts=start_ts,\n end_train_ts=end_ts,\n)\nprint(f\"y_train length: {len(y_train)}\")\nprint(f\"exog_train shape: {exog_train.shape}\")\nprint(f\"exog_future shape: {exog_future.shape}\")\nprint(f\"exog_train dtype: {exog_train.dtypes.iloc[0]}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ny_train length: 168\nexog_train shape: (168, 2)\nexog_future shape: (24, 2)\nexog_train dtype: float32\n```\n:::\n:::\n\n\n", + "markdown": "---\ntitle: manager.features.get_target_data\n---\n\n\n\n```python\nmanager.features.get_target_data(\n target,\n df_pipeline,\n config,\n *,\n data_with_exog=None,\n exog_feature_names=None,\n exo_pred=None,\n start_train_ts,\n end_train_ts,\n)\n```\n\nExtract the training series and exogenous slices for one target column.\n\nClips the target column of *df_pipeline* to the training window defined by\n*start_train_ts* and *end_train_ts*. When exogenous features are enabled\n(``config.use_exogenous_features is True``) and *data_with_exog* is\nprovided, the matching exogenous training slice and forecast-horizon slice\nare also returned; otherwise both are ``None``.\n\nThis function is the canonical way to extract per-target data from the\nshared pipeline state so that outlier removal, imputation, and feature\nengineering are applied consistently across all forecasting tasks.\n\nThe training-window timestamps are supplied as explicit parameters so that\nthis helper stays decoupled from ``RunState`` (ADR\n``adr-multitask-configmulti-merge``, step 5). Both parameters are\nrequired; passing ``None`` raises ``ValueError``.\n\n## Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the target column to extract from *df_pipeline*. | _required_ |\n| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | DataFrame with a tz-aware `DatetimeIndex` containing all target columns produced by the preprocessing pipeline. | _required_ |\n| config | \\'ConfigMulti\\' | Pipeline configuration object. ``use_exogenous_features`` must be set. | _required_ |\n| data_with_exog | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Merged DataFrame of target and exogenous columns covering at least the training window. Required when ``config.use_exogenous_features`` is ``True``. Pass ``None`` (default) to skip exogenous slicing. | `None` |\n| exog_feature_names | [Optional](`typing.Optional`)\\[[List](`typing.List`)\\[[str](`str`)\\]\\] | Column names to select from *data_with_exog* and *exo_pred*. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` |\n| exo_pred | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Exogenous feature DataFrame covering the forecast horizon. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` |\n| start_train_ts | [pd](`pandas`).[Timestamp](`pandas.Timestamp`) | Inclusive start of the training window (tz-aware ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.start_train_ts`` after the pipeline has been prepared. Passing ``None`` raises ``ValueError``. | _required_ |\n| end_train_ts | [pd](`pandas`).[Timestamp](`pandas.Timestamp`) | Inclusive end of the training window (tz-aware ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.end_train_ts`` after the pipeline has been prepared. Passing ``None`` raises ``ValueError``. | _required_ |\n\n## Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| | [pd](`pandas`).[Series](`pandas.Series`) | Tuple[pd.Series, Optional[pd.DataFrame], Optional[pd.DataFrame]]: |\n| | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | A three-tuple ``(y_train, exog_train, exog_future)`` where: |\n| | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | - **y_train** — 1-D Series with the target values over the training window, squeezed to a plain `Series`. |\n| | [Tuple](`typing.Tuple`)\\[[pd](`pandas`).[Series](`pandas.Series`), [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\], [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\]\\] | - **exog_train** — DataFrame of selected exogenous features over the training window, cast to ``float32``. ``None`` when exogenous features are disabled or *data_with_exog* is ``None``. |\n| | [Tuple](`typing.Tuple`)\\[[pd](`pandas`).[Series](`pandas.Series`), [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\], [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\]\\] | - **exog_future** — DataFrame of selected exogenous features covering the forecast horizon, cast to ``float32``. ``None`` when exogenous features are disabled or *exo_pred* is ``None``. |\n\n## Examples {.doc-section .doc-section-examples}\n\nExtract training data for a single target without exogenous features:\n\n\n::: {#e497f432 .cell execution_count=1}\n``` {.python .cell-code}\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.manager.features import get_target_data\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nidx = pd.date_range(\"2024-01-01\", periods=168, freq=\"h\", tz=\"UTC\")\ndf_pipeline = pd.DataFrame({\"load\": np.random.default_rng(0).normal(100, 10, 168)}, index=idx)\n\nconfig = ConfigMulti(\n targets=[\"load\"],\n use_exogenous_features=False,\n)\nstart_ts = pd.Timestamp(\"2024-01-01 00:00\", tz=\"UTC\")\nend_ts = pd.Timestamp(\"2024-01-07 23:00\", tz=\"UTC\")\n\ny_train, exog_train, exog_future = get_target_data(\n target=\"load\",\n df_pipeline=df_pipeline,\n config=config,\n start_train_ts=start_ts,\n end_train_ts=end_ts,\n)\nprint(f\"y_train length: {len(y_train)}\")\nprint(f\"exog_train: {exog_train}\")\nprint(f\"exog_future: {exog_future}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ny_train length: 168\nexog_train: None\nexog_future: None\n```\n:::\n:::\n\n\nExtract training data with exogenous features enabled:\n\n::: {#9872f650 .cell execution_count=2}\n``` {.python .cell-code}\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.manager.features import get_target_data\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(1)\nidx_train = pd.date_range(\"2024-01-01\", periods=168, freq=\"h\", tz=\"UTC\")\nidx_future = pd.date_range(\"2024-01-08\", periods=24, freq=\"h\", tz=\"UTC\")\n\ndf_pipeline = pd.DataFrame({\"load\": rng.normal(100, 10, 168)}, index=idx_train)\n\ndata_with_exog = pd.DataFrame(\n {\n \"load\": df_pipeline[\"load\"],\n \"hour_sin\": np.sin(2 * np.pi * idx_train.hour / 24),\n \"hour_cos\": np.cos(2 * np.pi * idx_train.hour / 24),\n },\n index=idx_train,\n)\nexo_pred = pd.DataFrame(\n {\n \"hour_sin\": np.sin(2 * np.pi * idx_future.hour / 24),\n \"hour_cos\": np.cos(2 * np.pi * idx_future.hour / 24),\n },\n index=idx_future,\n)\n\nstart_ts = pd.Timestamp(\"2024-01-01 00:00\", tz=\"UTC\")\nend_ts = pd.Timestamp(\"2024-01-07 23:00\", tz=\"UTC\")\nconfig = ConfigMulti(targets=[\"load\"], use_exogenous_features=True)\n\ny_train, exog_train, exog_future = get_target_data(\n target=\"load\",\n df_pipeline=df_pipeline,\n config=config,\n data_with_exog=data_with_exog,\n exog_feature_names=[\"hour_sin\", \"hour_cos\"],\n exo_pred=exo_pred,\n start_train_ts=start_ts,\n end_train_ts=end_ts,\n)\nprint(f\"y_train length: {len(y_train)}\")\nprint(f\"exog_train shape: {exog_train.shape}\")\nprint(f\"exog_future shape: {exog_future.shape}\")\nprint(f\"exog_train dtype: {exog_train.dtypes.iloc[0]}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ny_train length: 168\nexog_train shape: (168, 2)\nexog_future shape: (24, 2)\nexog_train dtype: float32\n```\n:::\n:::\n\n\n", "supporting": [ "manager.features.get_target_data_files/figure-html" ], diff --git a/_freeze/docs/reference/multitask.base.BaseTask/execute-results/html.json b/_freeze/docs/reference/multitask.base.BaseTask/execute-results/html.json index 168f3873..4c5e5ab8 100644 --- a/_freeze/docs/reference/multitask.base.BaseTask/execute-results/html.json +++ b/_freeze/docs/reference/multitask.base.BaseTask/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "bbfd7a488ea47156a35fcf0d419ba55e", + "hash": "25d10dc1682aae1b5342eabd5adefdba", "result": { "engine": "jupyter", - "markdown": "---\ntitle: multitask.base.BaseTask\n---\n\n\n\n```python\nmultitask.base.BaseTask(\n config=None,\n *,\n dataframe=None,\n data_test=None,\n cache_home=None,\n log_level=logging.INFO,\n **overrides,\n)\n```\n\nShared base for all multi-target forecasting pipeline tasks.\n\n``BaseTask`` encapsulates the data-preparation pipeline (steps 1-7)\nand all helper methods shared across the task modes (lazy, defaults,\npredict, clean). Subclasses implement the run method with task-specific\ntraining or prediction logic.\n\nThe constructor takes a single ``config`` object satisfying the\n``PipelineConfig`` protocol — typically a ``ConfigMulti``. All pipeline\nparameters (forecast horizon, training window, outlier policy, weather/\nholiday hooks, cross-validation fold count, persistence policy, ...) live\non that object. Only genuinely call-time state (the dataframes, the cache\ndirectory override, the logging level) is passed as separate kwargs.\nExtra ``**overrides`` are forwarded to ``config.set_params`` and mutate\nthe passed-in config in place.\n\nPlotting is not available in ``spotforecast2-safe``. The\n``_show_prediction_figure`` and ``_show_prediction_figure_agg`` hook\nmethods are no-ops; override them in a subclass or use the\n``spotforecast2`` sibling package for interactive visualisation.\n\n## Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|-------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|\n| config | [Optional](`typing.Optional`)\\[[PipelineConfig](`spotforecast2_safe.multitask.base.PipelineConfig`)\\] | A ``PipelineConfig``-conforming object owning every pipeline parameter. ``ConfigMulti`` satisfies the protocol. | `None` |\n| dataframe | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded input DataFrame with training data. Must contain a datetime column matching ``config.index_name`` plus at least one numeric target column. | `None` |\n| data_test | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded test DataFrame (ground truth for the forecast horizon). Optional. | `None` |\n| cache_home | [Optional](`typing.Optional`)\\[[Path](`pathlib.Path`)\\] | Cache directory override. When not ``None``, replaces ``config.cache_home`` for this task instance. | `None` |\n| log_level | [int](`int`) | Logging level for the pipeline logger. | `logging.INFO` |\n| **overrides | [Any](`typing.Any`) | Forwarded to ``config.set_params(**overrides)`` — a convenience for one-line tweaks without building a fresh config. Mutates the caller's config object. | `{}` |\n\n## Attributes {.doc-section .doc-section-attributes}\n\n| Name | Type | Description |\n|--------------------|----------------------------------------------------------------------|--------------------------------------------------------|\n| config | [PipelineConfig](`spotforecast2_safe.multitask.base.PipelineConfig`) | Centralised pipeline configuration. |\n| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Pipeline DataFrame after preparation. |\n| df_test | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Test DataFrame (ground truth). |\n| weight_func | [Optional](`typing.Optional`)\\[[Any](`typing.Any`)\\] | Sample-weight function from imputation. |\n| exogenous_features | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Combined exogenous feature matrix. |\n| exog_feature_names | [List](`typing.List`)\\[[str](`str`)\\] | Selected exogenous feature names. |\n| data_with_exog | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Merged target + exogenous data. |\n| exo_pred | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Exogenous covariates for the forecast horizon. |\n| results | [Dict](`typing.Dict`)\\[[str](`str`), [Dict](`typing.Dict`)\\] | Per-task mapping of target name to prediction package. |\n| agg_results | [Dict](`typing.Dict`) | Mapping of task name to aggregated prediction package. |\n\n## Examples {.doc-section .doc-section-examples}\n\n\n::: {#b09e8314 .cell execution_count=1}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask.base import BaseTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 7, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"load\": rng.normal(500, 30, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n task = BaseTask(cfg, dataframe=df)\n print(f\"Task mode: {task.TASK}\")\n print(f\"Config predict_size: {task.config.predict_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTask mode: lazy\nConfig predict_size: 6\n```\n:::\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [agg_predictor](#spotforecast2_safe.multitask.base.BaseTask.agg_predictor) | Aggregate per-target prediction packages into a weighted forecast. |\n| [build_exogenous_features](#spotforecast2_safe.multitask.base.BaseTask.build_exogenous_features) | Build, combine, encode, and merge exogenous feature covariates. |\n| [create_forecaster](#spotforecast2_safe.multitask.base.BaseTask.create_forecaster) | Create a fresh forecaster for the given target. |\n| [cv_ts](#spotforecast2_safe.multitask.base.BaseTask.cv_ts) | Build a ``TimeSeriesFold`` for cross-validation. |\n| [detect_outliers](#spotforecast2_safe.multitask.base.BaseTask.detect_outliers) | Apply hard-bound filtering and IsolationForest outlier detection. |\n| [impute](#spotforecast2_safe.multitask.base.BaseTask.impute) | Fill missing values using the configured imputation strategy. |\n| [load_models](#spotforecast2_safe.multitask.base.BaseTask.load_models) | Load the most recent fitted models from the cache directory. |\n| [load_tuning_results](#spotforecast2_safe.multitask.base.BaseTask.load_tuning_results) | Load the most recent tuning results for a target from cache. |\n| [log_summary](#spotforecast2_safe.multitask.base.BaseTask.log_summary) | Log a summary of the current pipeline configuration. |\n| [plot_with_outliers](#spotforecast2_safe.multitask.base.BaseTask.plot_with_outliers) | Visualise original vs. cleaned data with outlier markers. |\n| [prepare_data](#spotforecast2_safe.multitask.base.BaseTask.prepare_data) | Load, resample, validate, and configure the pipeline data. |\n| [run](#spotforecast2_safe.multitask.base.BaseTask.run) | Execute the task-specific training / prediction pipeline. |\n| [save_models](#spotforecast2_safe.multitask.base.BaseTask.save_models) | Save fitted forecaster models to the cache directory. |\n| [save_tuning_results](#spotforecast2_safe.multitask.base.BaseTask.save_tuning_results) | Save tuning results (best parameters and lags) to a JSON file. |\n\n### agg_predictor { #spotforecast2_safe.multitask.base.BaseTask.agg_predictor }\n\n```python\nmultitask.base.BaseTask.agg_predictor(results, targets, weights)\n```\n\nAggregate per-target prediction packages into a weighted forecast.\n\nDelegates to the module-level ``agg_predictor`` function.\nAvailable as an instance method so that subclasses can override the\naggregation strategy when needed.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|---------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------|------------|\n| results | [Dict](`typing.Dict`)\\[[str](`str`), [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | Mapping of target name to prediction package (as returned by ``build_prediction_package``). | _required_ |\n| targets | [List](`typing.List`)\\[[str](`str`)\\] | Ordered list of target names to include. | _required_ |\n| weights | [List](`typing.List`)\\[[float](`float`)\\] | Per-target aggregation weights aligned with ``targets``. | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|-------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package dict. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#42448f71 .cell execution_count=2}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx_train = pd.date_range(\"2023-01-01\", periods=48, freq=\"h\", tz=\"UTC\")\nidx_future = pd.date_range(\"2023-01-03\", periods=6, freq=\"h\", tz=\"UTC\")\n\ndef _pkg(train_val, future_val):\n return {\n \"train_actual\": pd.Series(np.full(48, train_val), index=idx_train),\n \"train_pred\": pd.Series(np.full(48, train_val * 0.99), index=idx_train),\n \"future_pred\": pd.Series(np.full(6, future_val), index=idx_future),\n \"future_actual\": pd.Series(dtype=\"float64\"),\n }\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(cache_home=tmp, verbose=False)\n task = LazyTask(cfg)\n results = {\"wind\": _pkg(100.0, 110.0), \"solar\": _pkg(200.0, 210.0)}\n agg = task.agg_predictor(results, [\"wind\", \"solar\"], [0.4, 0.6])\n print(f\"Weighted future_pred: {agg['future_pred'].iloc[0]:.1f}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nWeighted future_pred: 170.0\n```\n:::\n:::\n\n\n### build_exogenous_features { #spotforecast2_safe.multitask.base.BaseTask.build_exogenous_features }\n\n```python\nmultitask.base.BaseTask.build_exogenous_features()\n```\n\nBuild, combine, encode, and merge exogenous feature covariates.\n\nThis is step 4-7 of the pipeline (run after ``prepare_data``,\n``detect_outliers``, and ``impute``). It assembles the full\nexogenous-covariate matrix that the forecaster consumes, then merges\nit onto the target data. The orchestration proceeds in order:\n\n* 4a — Weather, via ``get_weather_features`` (Open-Meteo). The\n response is parquet-cached only when ``config.cache_home`` is set.\n Fetch failures are handled per ``config.on_weather_failure``:\n ``\"raise\"`` re-raises ``WeatherFetchError``; ``\"skip\"`` logs a\n warning and continues with an empty weather frame (fail-safe).\n* 4b — Calendar features, via ``get_calendar_features``.\n* 4c — Day/night (solar) features, via ``get_day_night_features``\n (computed with ``astral`` from ``config.latitude`` /\n ``config.longitude``).\n* 4d — Holiday features, via ``get_holiday_features`` for\n ``config.country_code`` / ``config.state``.\n* 5 — The four frames are concatenated along the columns and any\n residual gaps are back- then forward-filled. Provider-based\n exogenous columns are then appended via\n ``build_providers_from_config`` (requires ``spotforecast2-safe``\n >= 15.7.0). The active providers are governed by the config flags\n ``include_covid_infection_rate``,\n ``include_entsoe_forecast_load``,\n ``include_entsoe_renewable_forecast``,\n ``include_entsoe_net_load``, and\n ``include_entsoe_day_ahead_price``. Cyclical (sine/cosine)\n encoding is then applied via ``apply_cyclical_encoding``, and\n degree-``config.poly_features_degree`` interaction terms are added\n via ``create_interaction_features``. When the degree is at least\n 2, the polynomial columns are ranked by mutual information with the\n primary target and capped to ``config.max_poly_features`` via\n ``select_top_poly_features``.\n* 6 — The training feature set is chosen via\n ``select_exogenous_features``, with provider columns appended\n (order-preserving, de-duplicated).\n* 7 — Targets and covariates are merged via\n ``merge_data_and_covariates`` into ``self.data_with_exog`` and the\n forecast-horizon covariates ``self.exo_pred``.\n\nWhen ``config.use_exogenous_features`` is ``False`` the method is a\nno-op and returns ``self`` immediately, leaving the pipeline\ntarget-only.\n\n#### Attributes {.doc-section .doc-section-attributes}\n\n| Name | Type | Description |\n|--------------------|------------------------------------------------|-------------------------------------------------------------------------------------------------|\n| weather_aligned | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Weather frame aligned to the pipeline index, reused by the interaction and selection steps. |\n| exogenous_features | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Full combined, encoded, and capped exogenous feature matrix. |\n| exog_feature_names | [List](`typing.List`)\\[[str](`str`)\\] | Names of the exogenous features selected for training (including provider columns). |\n| data_with_exog | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Target data merged with the selected exogenous covariates. |\n| exo_pred | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Exogenous covariates spanning the forecast horizon, supplied to the forecaster at predict time. |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------|-----------------------------------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If ``prepare_data`` has not been called. |\n| | [WeatherFetchError](`spotforecast2_safe.weather.WeatherFetchError`) | If the Open-Meteo fetch fails and ``config.on_weather_failure == \"raise\"``. |\n\n#### Examples {.doc-section .doc-section-examples}\n\nWith exogenous features disabled the method is a no-op, so the\nexample below runs without any network access and leaves the\npipeline target-only.\n\n::: {#8fc07b55 .cell execution_count=3}\n``` {.python .cell-code}\nimport tempfile\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n print(f\"Exogenous features used: {mt.config.use_exogenous_features}\")\n print(f\"Selected exog feature names: {mt.exog_feature_names}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nExogenous features used: False\nSelected exog feature names: []\n```\n:::\n\n::: {.cell-output .cell-output-stderr}\n```\n/var/folders/dw/pvtj6mt91znd0hftcztqb0k00000gn/T/ipykernel_13789/3578559446.py:20: DeprecationWarning: Derived pipeline fields (start_download, end_download, data_start, data_end, cov_start, cov_end, end_train_ts, start_train_ts) have moved to task.run_state. Reading them from the config is deprecated and will stop working in the next major release. config.targets continues to hold the user input unchanged; read the resolved list from task.run_state.targets.\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n```\n:::\n:::\n\n\n### create_forecaster { #spotforecast2_safe.multitask.base.BaseTask.create_forecaster }\n\n```python\nmultitask.base.BaseTask.create_forecaster(target=None)\n```\n\nCreate a fresh forecaster for the given target.\n\nDelegates to ``config.forecaster_factory`` when set; otherwise falls\nback to ``default_lgbm_forecaster_factory``. This factory hook lets\ncallers swap the estimator without subclassing ``BaseTask``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-----------|\n| target | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Optional target column name. Forwarded to the factory so that custom factories can specialise per target. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------|--------------------------------------|\n| | [Any](`typing.Any`) | A new, unfitted forecaster instance. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#a7331d65 .cell execution_count=4}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n cache_home=Path(tmp),\n )\n task = LazyTask(cfg)\n forecaster = task.create_forecaster()\nprint(f\"Type: {type(forecaster).__name__}\")\nprint(f\"Lags: {forecaster.lags}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nType: ForecasterRecursive\nLags: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]\n```\n:::\n:::\n\n\n### cv_ts { #spotforecast2_safe.multitask.base.BaseTask.cv_ts }\n\n```python\nmultitask.base.BaseTask.cv_ts(y_train)\n```\n\nBuild a ``TimeSeriesFold`` for cross-validation.\n\nConstructs the cross-validation splitter used by all tuning tasks.\nInternally uses ``sklearn.model_selection.TimeSeriesSplit`` to\ncompute split boundaries that respect temporal ordering and avoid\ndata leakage between folds.\n\nThe validation boundary is determined by ``run_state.end_train_ts`` minus\n``config.delta_val``. When ``config.train_size`` is set, the sklearn\nsplitter uses a sliding fixed-size training window\n(``max_train_size``); otherwise an expanding window is used.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|---------|------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| y_train | [pd](`pandas`).[Series](`pandas.Series`) | Training time series for the current target. Used both to determine the validation boundary and as the sequence passed to ``TimeSeriesSplit.split`` to derive ``initial_train_size``. | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------------------------|----------------------------------------------------------------|\n| | [TimeSeriesFold](`spotforecast2_safe.splitter.split_ts_cv.TimeSeriesFold`) | A configured ``TimeSeriesFold`` instance ready to be passed to |\n| | [TimeSeriesFold](`spotforecast2_safe.splitter.split_ts_cv.TimeSeriesFold`) | a model-selection function. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#ea39abf1 .cell execution_count=5}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n number_folds=2,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n y_train = mt.df_pipeline[\"a\"]\n cv = mt.cv_ts(y_train)\n print(f\"TimeSeriesFold steps: {cv.steps}\")\n print(f\"initial_train_size: {cv.initial_train_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTimeSeriesFold steps: 6\ninitial_train_size: 324\n```\n:::\n:::\n\n\n### detect_outliers { #spotforecast2_safe.multitask.base.BaseTask.detect_outliers }\n\n```python\nmultitask.base.BaseTask.detect_outliers()\n```\n\nApply hard-bound filtering and IsolationForest outlier detection.\n\nHard bounds from ``config.bounds`` are applied to the pipeline data\n(out-of-bound values are removed and later filled by ``impute()``).\nIsolationForest detection (``config.use_outlier_detection``) is\nadvisory: detected outliers are logged per column but not removed.\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``prepare_data`` has not been called. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#a8f6f4ab .cell execution_count=6}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n mt.prepare_data()\n mt.detect_outliers()\n print(f\"Pipeline shape: {mt.df_pipeline.shape}\")\n assert mt.df_pipeline_original is not None\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPipeline shape: (336, 1)\n```\n:::\n:::\n\n\n### impute { #spotforecast2_safe.multitask.base.BaseTask.impute }\n\n```python\nmultitask.base.BaseTask.impute()\n```\n\nFill missing values using the configured imputation strategy.\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``prepare_data`` has not been called. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#e91926ef .cell execution_count=7}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\nvalues = rng.normal(100, 10, len(idx))\nvalues[10:13] = float(\"nan\") # inject a few gaps\ndf = pd.DataFrame({\"a\": values}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n mt.prepare_data().detect_outliers().impute()\n missing = mt.df_pipeline[\"a\"].isna().sum()\n print(f\"Missing values after imputation: {missing}\")\n assert missing == 0\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nMissing values after imputation: 0\n```\n:::\n:::\n\n\n### load_models { #spotforecast2_safe.multitask.base.BaseTask.load_models }\n\n```python\nmultitask.base.BaseTask.load_models(\n task_name=None,\n target=None,\n max_age_days=None,\n)\n```\n\nLoad the most recent fitted models from the cache directory.\n\nScans ``/models//`` for ``.joblib``\nfiles matching the current ``data_frame_name``. Optionally\nfilters by ``task_name``, ``target``, and ``max_age_days``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------|---------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------|-----------|\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only load models from this task (``\"lazy\"``, ``\"defaults\"``, ``\"optuna\"``, or ``\"spotoptim\"``). ``None`` accepts any task. | `None` |\n| target | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only load the model for this target column. ``None`` loads the most recent model for every target found. | `None` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days. Models older than this are ignored. ``None`` accepts any age. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|-----------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Mapping ``{target: forecaster}`` of loaded model objects. |\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Empty dict if no matching models were found. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#1cb2e2bf .cell execution_count=8}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n data_frame_name=\"demo\",\n cache_home=Path(tmp),\n verbose=False,\n )\n task = LazyTask(cfg)\n # Save a dummy object, then load it back.\n dummy_forecaster = {\"lags\": [1, 2, 24]}\n task.save_models(\n task_name=\"lazy\",\n forecasters={\"load\": dummy_forecaster},\n )\n loaded = task.load_models(task_name=\"lazy\")\n print(f\"Loaded targets: {list(loaded.keys())}\")\n assert loaded[\"load\"][\"lags\"] == [1, 2, 24]\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nLoaded targets: ['load']\n```\n:::\n:::\n\n\n### load_tuning_results { #spotforecast2_safe.multitask.base.BaseTask.load_tuning_results }\n\n```python\nmultitask.base.BaseTask.load_tuning_results(\n target,\n task_name=None,\n max_age_days=None,\n)\n```\n\nLoad the most recent tuning results for a target from cache.\n\nScans ``/tuning_results/`` for files matching the\ncurrent ``data_frame_name`` and ``target``. Optionally filters by\n``task_name`` and discards results older than ``max_age_days``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------|---------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the forecast target column. | _required_ |\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only consider results from this tuning algorithm (e.g. ``\"optuna\"`` or ``\"spotoptim\"``). ``None`` accepts any algorithm. | `None` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days. Results older than this are ignored. ``None`` accepts any age. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------------------------------|-----------------------------------------------------------|\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | A dictionary with keys ``best_params``, ``best_lags``, |\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | ``task_name``, ``target``, ``data_frame_name``, and |\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | ``timestamp``; or ``None`` if no matching file was found. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#eebd84f3 .cell execution_count=9}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", cache_home=Path(tmp))\n task = LazyTask(cfg)\n task.save_tuning_results(\n target=\"target_0\",\n task_name=\"optuna\",\n best_params={\"n_estimators\": 100},\n best_lags=24,\n )\n result = task.load_tuning_results(target=\"target_0\")\n print(result[\"best_params\"])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n{'n_estimators': 100}\n```\n:::\n:::\n\n\n### log_summary { #spotforecast2_safe.multitask.base.BaseTask.log_summary }\n\n```python\nmultitask.base.BaseTask.log_summary()\n```\n\nLog a summary of the current pipeline configuration.\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#4bf1f55e .cell execution_count=10}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n # log_summary writes to the pipeline logger; call it to confirm\n # it runs without error.\n mt.log_summary()\n print(\"log_summary completed without error\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nlog_summary completed without error\n```\n:::\n:::\n\n\n### plot_with_outliers { #spotforecast2_safe.multitask.base.BaseTask.plot_with_outliers }\n\n```python\nmultitask.base.BaseTask.plot_with_outliers()\n```\n\nVisualise original vs. cleaned data with outlier markers.\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``detect_outliers`` has not been called. |\n| | [NotImplementedError](`NotImplementedError`) | Always — plotting is not available in ``spotforecast2-safe``. Use the ``spotforecast2`` package for visualisation. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#7571920f .cell execution_count=11}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n mt.prepare_data().detect_outliers()\n try:\n mt.plot_with_outliers()\n except NotImplementedError as exc:\n print(f\"Plotting unavailable in spotforecast2-safe: {exc}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPlotting unavailable in spotforecast2-safe: Plotting is not available in spotforecast2-safe (no plotly/matplotlib). Use the spotforecast2 package for visualisation.\n```\n:::\n:::\n\n\n### prepare_data { #spotforecast2_safe.multitask.base.BaseTask.prepare_data }\n\n```python\nmultitask.base.BaseTask.prepare_data(demo_data=None, df_test=None)\n```\n\nLoad, resample, validate, and configure the pipeline data.\n\nUses the following precedence for the training data:\n\n1. ``demo_data`` argument (if provided).\n2. ``self._dataframe`` set via the constructor.\n\nSimilarly for test data:\n\n1. ``df_test`` argument (if provided).\n2. ``self.data_test`` set via the constructor.\n3. ``self.config.test_data_loader(self.config)`` if set.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-----------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|-----------|\n| demo_data | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded input DataFrame. When ``None``, the constructor ``dataframe`` is used. | `None` |\n| df_test | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded test DataFrame. When ``None``, the constructor ``data_test`` is used, then ``config.test_data_loader``. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------|----------------------------------------------------------------------------------|\n| | [ValueError](`ValueError`) | If no data source is available (no ``demo_data``, no constructor ``dataframe``). |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#d518bf13 .cell execution_count=12}\n``` {.python .cell-code}\nimport tempfile\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data()\n print(f\"Pipeline shape: {mt.df_pipeline.shape}\")\n print(f\"Targets: {mt.run_state.targets}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPipeline shape: (336, 1)\nTargets: ['a']\n```\n:::\n\n::: {.cell-output .cell-output-stderr}\n```\n/var/folders/dw/pvtj6mt91znd0hftcztqb0k00000gn/T/ipykernel_13789/2442723251.py:20: DeprecationWarning: Derived pipeline fields (start_download, end_download, data_start, data_end, cov_start, cov_end, end_train_ts, start_train_ts) have moved to task.run_state. Reading them from the config is deprecated and will stop working in the next major release. config.targets continues to hold the user input unchanged; read the resolved list from task.run_state.targets.\n mt.prepare_data()\n```\n:::\n:::\n\n\n### run { #spotforecast2_safe.multitask.base.BaseTask.run }\n\n```python\nmultitask.base.BaseTask.run(\n show=False,\n task=None,\n task_name=None,\n use_tuned_params=True,\n max_age_days=None,\n search_space=None,\n dry_run=False,\n cache_home=None,\n **kwargs,\n)\n```\n\nExecute the task-specific training / prediction pipeline.\n\nSubclasses must override this method.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|------------------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|\n| show | [bool](`bool`) | If ``True``, invoke the visualisation hooks (no-ops in this package; meaningful only in ``spotforecast2``). | `False` |\n| task | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Task mode override (used by ``MultiTask``). | `None` |\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Restrict model loading to a specific source task (used by ``PredictTask``). | `None` |\n| use_tuned_params | [bool](`bool`) | Load cached tuning results when available (used by ``LazyTask``). | `True` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days for cached results (used by ``LazyTask`` and ``PredictTask``). Freshness is judged against the wall-clock timestamp embedded in the cache filename, so the check is machine-local. | `None` |\n| search_space | [Optional](`typing.Optional`)\\[[Any](`typing.Any`)\\] | Hyperparameter search-space definition (accepted for API compatibility; not used in this package). | `None` |\n| dry_run | [bool](`bool`) | Report what would be deleted without removing anything (used by ``CleanTask``). | `False` |\n| cache_home | [Optional](`typing.Optional`)\\[[Path](`pathlib.Path`)\\] | Override the cache directory (used by ``CleanTask``). | `None` |\n| **kwargs | [Any](`typing.Any`) | Additional task-specific arguments. | `{}` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|---------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package for the task. |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------------------------|------------------------------------------|\n| | [NotImplementedError](`NotImplementedError`) | Always, unless overridden by a subclass. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#a23fe6f2 .cell execution_count=13}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask.base import BaseTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\n# BaseTask.run is abstract and always raises NotImplementedError.\n# Concrete subclasses (LazyTask, DefaultsTask, PredictTask, CleanTask)\n# provide the real implementation.\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(cache_home=Path(tmp), verbose=False)\n task = BaseTask(cfg)\n try:\n task.run()\n except NotImplementedError as exc:\n print(f\"Expected: {exc}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nExpected: BaseTask must implement run(). Use LazyTask, DefaultsTask, PredictTask, or CleanTask.\n```\n:::\n:::\n\n\n### save_models { #spotforecast2_safe.multitask.base.BaseTask.save_models }\n\n```python\nmultitask.base.BaseTask.save_models(task_name, forecasters=None)\n```\n\nSave fitted forecaster models to the cache directory.\n\nEach model is serialised with ``joblib`` (compress=3) into\n``/models//`` using a datetime-stamped\nfilename so that multiple snapshots can coexist.\n\nFilename format::\n\n ___.joblib\n\nIf ``forecasters`` is ``None`` the method collects fitted models\nfrom ``self.results[task_name]``, where each prediction package is\nexpected to contain a ``\"forecaster\"`` key.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| task_name | [str](`str`) | Task identifier (``\"lazy\"``, ``\"defaults\"``). The names ``\"optuna\"`` and ``\"spotoptim\"`` are also accepted so that model caches produced by the ``spotforecast2`` sibling package can be saved and loaded; no tuning is performed in this package. | _required_ |\n| forecasters | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | Optional mapping ``{target: fitted_forecaster}``. When ``None``, models are taken from the prediction packages stored in ``self.results``. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------|-------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Path](`pathlib.Path`)\\] | Mapping ``{target: Path}`` of saved model file paths. |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------------------------------------------------|\n| | [ValueError](`ValueError`) | If ``task_name`` is not one of ``\"lazy\"``, ``\"defaults\"``, ``\"optuna\"``, ``\"spotoptim\"``. |\n| | [RuntimeError](`RuntimeError`) | If no fitted models are available for the requested task. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#0055eaf0 .cell execution_count=14}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n data_frame_name=\"demo\",\n cache_home=Path(tmp),\n verbose=False,\n )\n task = LazyTask(cfg)\n # Supply a tiny in-memory object as a stand-in for a fitted forecaster.\n dummy_forecaster = object()\n saved = task.save_models(\n task_name=\"lazy\",\n forecasters={\"load\": dummy_forecaster},\n )\n print(f\"Saved targets: {list(saved.keys())}\")\n assert saved[\"load\"].suffix == \".joblib\"\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nSaved targets: ['load']\n```\n:::\n:::\n\n\n### save_tuning_results { #spotforecast2_safe.multitask.base.BaseTask.save_tuning_results }\n\n```python\nmultitask.base.BaseTask.save_tuning_results(\n target,\n task_name,\n best_params,\n best_lags,\n)\n```\n\nSave tuning results (best parameters and lags) to a JSON file.\n\nThe file is stored under ``/tuning_results/`` with a\ndatetime-stamped filename so that loaders can determine freshness.\n\nFilename format::\n\n ___.json\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|------------------------------------------------------------|-------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the forecast target column. | _required_ |\n| task_name | [str](`str`) | Tuning algorithm identifier (e.g. ``\"optuna\"``, ``\"spotoptim\"``). | _required_ |\n| best_params | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Best hyperparameters discovered during tuning. | _required_ |\n| best_lags | [Any](`typing.Any`) | Best lag configuration (int, list, or nested list). | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------|------------------------------|\n| | [Path](`pathlib.Path`) | Path to the saved JSON file. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#b48d249a .cell execution_count=15}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", cache_home=Path(tmp))\n task = LazyTask(cfg)\n path = task.save_tuning_results(\n target=\"target_0\",\n task_name=\"optuna\",\n best_params={\"n_estimators\": 100, \"learning_rate\": 0.05},\n best_lags=[1, 2, 24],\n )\n print(path.name[:10])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ndemo10_tar\n```\n:::\n:::\n\n\n", + "markdown": "---\ntitle: multitask.base.BaseTask\n---\n\n\n\n```python\nmultitask.base.BaseTask(\n config=None,\n *,\n dataframe=None,\n data_test=None,\n cache_home=None,\n log_level=logging.INFO,\n **overrides,\n)\n```\n\nShared base for all multi-target forecasting pipeline tasks.\n\n``BaseTask`` encapsulates the data-preparation pipeline (steps 1-7)\nand all helper methods shared across the task modes (lazy, defaults,\npredict, clean). Subclasses implement the run method with task-specific\ntraining or prediction logic.\n\nThe constructor takes a single ``config`` object satisfying the\n``PipelineConfig`` protocol — typically a ``ConfigMulti``. All pipeline\nparameters (forecast horizon, training window, outlier policy, weather/\nholiday hooks, cross-validation fold count, persistence policy, ...) live\non that object. Only genuinely call-time state (the dataframes, the cache\ndirectory override, the logging level) is passed as separate kwargs.\nExtra ``**overrides`` are forwarded to ``config.set_params`` and mutate\nthe passed-in config in place.\n\nPlotting is not available in ``spotforecast2-safe``. The\n``_show_prediction_figure`` and ``_show_prediction_figure_agg`` hook\nmethods are no-ops; override them in a subclass or use the\n``spotforecast2`` sibling package for interactive visualisation.\n\n## Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|-------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|\n| config | [Optional](`typing.Optional`)\\[[PipelineConfig](`spotforecast2_safe.multitask.base.PipelineConfig`)\\] | A ``PipelineConfig``-conforming object owning every pipeline parameter. ``ConfigMulti`` satisfies the protocol. | `None` |\n| dataframe | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded input DataFrame with training data. Must contain a datetime column matching ``config.index_name`` plus at least one numeric target column. | `None` |\n| data_test | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded test DataFrame (ground truth for the forecast horizon). Optional. | `None` |\n| cache_home | [Optional](`typing.Optional`)\\[[Path](`pathlib.Path`)\\] | Cache directory override. When not ``None``, replaces ``config.cache_home`` for this task instance. | `None` |\n| log_level | [int](`int`) | Logging level for the pipeline logger. | `logging.INFO` |\n| **overrides | [Any](`typing.Any`) | Forwarded to ``config.set_params(**overrides)`` — a convenience for one-line tweaks without building a fresh config. Mutates the caller's config object. | `{}` |\n\n## Attributes {.doc-section .doc-section-attributes}\n\n| Name | Type | Description |\n|--------------------|----------------------------------------------------------------------|--------------------------------------------------------|\n| config | [PipelineConfig](`spotforecast2_safe.multitask.base.PipelineConfig`) | Centralised pipeline configuration. |\n| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Pipeline DataFrame after preparation. |\n| df_test | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Test DataFrame (ground truth). |\n| weight_func | [Optional](`typing.Optional`)\\[[Any](`typing.Any`)\\] | Sample-weight function from imputation. |\n| exogenous_features | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Combined exogenous feature matrix. |\n| exog_feature_names | [List](`typing.List`)\\[[str](`str`)\\] | Selected exogenous feature names. |\n| data_with_exog | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Merged target + exogenous data. |\n| exo_pred | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Exogenous covariates for the forecast horizon. |\n| results | [Dict](`typing.Dict`)\\[[str](`str`), [Dict](`typing.Dict`)\\] | Per-task mapping of target name to prediction package. |\n| agg_results | [Dict](`typing.Dict`) | Mapping of task name to aggregated prediction package. |\n\n## Examples {.doc-section .doc-section-examples}\n\n\n::: {#e686c3d0 .cell execution_count=1}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask.base import BaseTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 7, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"load\": rng.normal(500, 30, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n task = BaseTask(cfg, dataframe=df)\n print(f\"Task mode: {task.TASK}\")\n print(f\"Config predict_size: {task.config.predict_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTask mode: lazy\nConfig predict_size: 6\n```\n:::\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [agg_predictor](#spotforecast2_safe.multitask.base.BaseTask.agg_predictor) | Aggregate per-target prediction packages into a weighted forecast. |\n| [build_exogenous_features](#spotforecast2_safe.multitask.base.BaseTask.build_exogenous_features) | Build, combine, encode, and merge exogenous feature covariates. |\n| [create_forecaster](#spotforecast2_safe.multitask.base.BaseTask.create_forecaster) | Create a fresh forecaster for the given target. |\n| [cv_ts](#spotforecast2_safe.multitask.base.BaseTask.cv_ts) | Build a ``TimeSeriesFold`` for cross-validation. |\n| [detect_outliers](#spotforecast2_safe.multitask.base.BaseTask.detect_outliers) | Apply hard-bound filtering and IsolationForest outlier detection. |\n| [impute](#spotforecast2_safe.multitask.base.BaseTask.impute) | Fill missing values using the configured imputation strategy. |\n| [load_models](#spotforecast2_safe.multitask.base.BaseTask.load_models) | Load the most recent fitted models from the cache directory. |\n| [load_tuning_results](#spotforecast2_safe.multitask.base.BaseTask.load_tuning_results) | Load the most recent tuning results for a target from cache. |\n| [log_summary](#spotforecast2_safe.multitask.base.BaseTask.log_summary) | Log a summary of the current pipeline configuration. |\n| [plot_with_outliers](#spotforecast2_safe.multitask.base.BaseTask.plot_with_outliers) | Visualise original vs. cleaned data with outlier markers. |\n| [prepare_data](#spotforecast2_safe.multitask.base.BaseTask.prepare_data) | Load, resample, validate, and configure the pipeline data. |\n| [run](#spotforecast2_safe.multitask.base.BaseTask.run) | Execute the task-specific training / prediction pipeline. |\n| [save_models](#spotforecast2_safe.multitask.base.BaseTask.save_models) | Save fitted forecaster models to the cache directory. |\n| [save_tuning_results](#spotforecast2_safe.multitask.base.BaseTask.save_tuning_results) | Save tuning results (best parameters and lags) to a JSON file. |\n\n### agg_predictor { #spotforecast2_safe.multitask.base.BaseTask.agg_predictor }\n\n```python\nmultitask.base.BaseTask.agg_predictor(results, targets, weights)\n```\n\nAggregate per-target prediction packages into a weighted forecast.\n\nDelegates to the module-level ``agg_predictor`` function.\nAvailable as an instance method so that subclasses can override the\naggregation strategy when needed.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|---------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------|------------|\n| results | [Dict](`typing.Dict`)\\[[str](`str`), [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | Mapping of target name to prediction package (as returned by ``build_prediction_package``). | _required_ |\n| targets | [List](`typing.List`)\\[[str](`str`)\\] | Ordered list of target names to include. | _required_ |\n| weights | [List](`typing.List`)\\[[float](`float`)\\] | Per-target aggregation weights aligned with ``targets``. | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|-------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package dict. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#5a996f74 .cell execution_count=2}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx_train = pd.date_range(\"2023-01-01\", periods=48, freq=\"h\", tz=\"UTC\")\nidx_future = pd.date_range(\"2023-01-03\", periods=6, freq=\"h\", tz=\"UTC\")\n\ndef _pkg(train_val, future_val):\n return {\n \"train_actual\": pd.Series(np.full(48, train_val), index=idx_train),\n \"train_pred\": pd.Series(np.full(48, train_val * 0.99), index=idx_train),\n \"future_pred\": pd.Series(np.full(6, future_val), index=idx_future),\n \"future_actual\": pd.Series(dtype=\"float64\"),\n }\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(cache_home=tmp, verbose=False)\n task = LazyTask(cfg)\n results = {\"wind\": _pkg(100.0, 110.0), \"solar\": _pkg(200.0, 210.0)}\n agg = task.agg_predictor(results, [\"wind\", \"solar\"], [0.4, 0.6])\n print(f\"Weighted future_pred: {agg['future_pred'].iloc[0]:.1f}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nWeighted future_pred: 170.0\n```\n:::\n:::\n\n\n### build_exogenous_features { #spotforecast2_safe.multitask.base.BaseTask.build_exogenous_features }\n\n```python\nmultitask.base.BaseTask.build_exogenous_features()\n```\n\nBuild, combine, encode, and merge exogenous feature covariates.\n\nThis is step 4-7 of the pipeline (run after ``prepare_data``,\n``detect_outliers``, and ``impute``). It assembles the full\nexogenous-covariate matrix that the forecaster consumes, then merges\nit onto the target data. The orchestration proceeds in order:\n\n* 4a — Weather, via ``get_weather_features`` (Open-Meteo). The\n response is parquet-cached only when ``config.cache_home`` is set.\n Fetch failures are handled per ``config.on_weather_failure``:\n ``\"raise\"`` re-raises ``WeatherFetchError``; ``\"skip\"`` logs a\n warning and continues with an empty weather frame (fail-safe).\n* 4b — Calendar features, via ``get_calendar_features``.\n* 4c — Day/night (solar) features, via ``get_day_night_features``\n (computed with ``astral`` from ``config.latitude`` /\n ``config.longitude``).\n* 4d — Holiday features, via ``get_holiday_features`` for\n ``config.country_code`` / ``config.state``.\n* 5 — The four frames are concatenated along the columns and any\n residual gaps are back- then forward-filled. Provider-based\n exogenous columns are then appended via\n ``build_providers_from_config`` (requires ``spotforecast2-safe``\n >= 15.7.0). The active providers are governed by the config flags\n ``include_covid_infection_rate``,\n ``include_entsoe_forecast_load``,\n ``include_entsoe_renewable_forecast``,\n ``include_entsoe_net_load``, and\n ``include_entsoe_day_ahead_price``. Cyclical (sine/cosine)\n encoding is then applied via ``apply_cyclical_encoding``, and\n degree-``config.poly_features_degree`` interaction terms are added\n via ``create_interaction_features``. When the degree is at least\n 2, the polynomial columns are ranked by mutual information with the\n primary target and capped to ``config.max_poly_features`` via\n ``select_top_poly_features``.\n* 6 — The training feature set is chosen via\n ``select_exogenous_features``, with provider columns appended\n (order-preserving, de-duplicated).\n* 7 — Targets and covariates are merged via\n ``merge_data_and_covariates`` into ``self.data_with_exog`` and the\n forecast-horizon covariates ``self.exo_pred``.\n\nWhen ``config.use_exogenous_features`` is ``False`` the method is a\nno-op and returns ``self`` immediately, leaving the pipeline\ntarget-only.\n\n#### Attributes {.doc-section .doc-section-attributes}\n\n| Name | Type | Description |\n|--------------------|------------------------------------------------|-------------------------------------------------------------------------------------------------|\n| weather_aligned | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Weather frame aligned to the pipeline index, reused by the interaction and selection steps. |\n| exogenous_features | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Full combined, encoded, and capped exogenous feature matrix. |\n| exog_feature_names | [List](`typing.List`)\\[[str](`str`)\\] | Names of the exogenous features selected for training (including provider columns). |\n| data_with_exog | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Target data merged with the selected exogenous covariates. |\n| exo_pred | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | Exogenous covariates spanning the forecast horizon, supplied to the forecaster at predict time. |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------|-----------------------------------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If ``prepare_data`` has not been called. |\n| | [WeatherFetchError](`spotforecast2_safe.weather.WeatherFetchError`) | If the Open-Meteo fetch fails and ``config.on_weather_failure == \"raise\"``. |\n\n#### Examples {.doc-section .doc-section-examples}\n\nWith exogenous features disabled the method is a no-op, so the\nexample below runs without any network access and leaves the\npipeline target-only.\n\n::: {#11e8ae02 .cell execution_count=3}\n``` {.python .cell-code}\nimport tempfile\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n print(f\"Exogenous features used: {mt.config.use_exogenous_features}\")\n print(f\"Selected exog feature names: {mt.exog_feature_names}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nExogenous features used: False\nSelected exog feature names: []\n```\n:::\n:::\n\n\n### create_forecaster { #spotforecast2_safe.multitask.base.BaseTask.create_forecaster }\n\n```python\nmultitask.base.BaseTask.create_forecaster(target=None)\n```\n\nCreate a fresh forecaster for the given target.\n\nDelegates to ``config.forecaster_factory`` when set; otherwise falls\nback to ``default_lgbm_forecaster_factory``. This factory hook lets\ncallers swap the estimator without subclassing ``BaseTask``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-----------|\n| target | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Optional target column name. Forwarded to the factory so that custom factories can specialise per target. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------|--------------------------------------|\n| | [Any](`typing.Any`) | A new, unfitted forecaster instance. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#f3df1cc5 .cell execution_count=4}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n cache_home=Path(tmp),\n )\n task = LazyTask(cfg)\n forecaster = task.create_forecaster()\nprint(f\"Type: {type(forecaster).__name__}\")\nprint(f\"Lags: {forecaster.lags}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nType: ForecasterRecursive\nLags: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]\n```\n:::\n:::\n\n\n### cv_ts { #spotforecast2_safe.multitask.base.BaseTask.cv_ts }\n\n```python\nmultitask.base.BaseTask.cv_ts(y_train)\n```\n\nBuild a ``TimeSeriesFold`` for cross-validation.\n\nConstructs the cross-validation splitter used by all tuning tasks.\nInternally uses ``sklearn.model_selection.TimeSeriesSplit`` to\ncompute split boundaries that respect temporal ordering and avoid\ndata leakage between folds.\n\nThe validation boundary is determined by ``run_state.end_train_ts`` minus\n``config.delta_val``. When ``config.train_size`` is set, the sklearn\nsplitter uses a sliding fixed-size training window\n(``max_train_size``); otherwise an expanding window is used.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|---------|------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| y_train | [pd](`pandas`).[Series](`pandas.Series`) | Training time series for the current target. Used both to determine the validation boundary and as the sequence passed to ``TimeSeriesSplit.split`` to derive ``initial_train_size``. | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------------------------|----------------------------------------------------------------|\n| | [TimeSeriesFold](`spotforecast2_safe.splitter.split_ts_cv.TimeSeriesFold`) | A configured ``TimeSeriesFold`` instance ready to be passed to |\n| | [TimeSeriesFold](`spotforecast2_safe.splitter.split_ts_cv.TimeSeriesFold`) | a model-selection function. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#3e08117f .cell execution_count=5}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n number_folds=2,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n y_train = mt.df_pipeline[\"a\"]\n cv = mt.cv_ts(y_train)\n print(f\"TimeSeriesFold steps: {cv.steps}\")\n print(f\"initial_train_size: {cv.initial_train_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTimeSeriesFold steps: 6\ninitial_train_size: 324\n```\n:::\n:::\n\n\n### detect_outliers { #spotforecast2_safe.multitask.base.BaseTask.detect_outliers }\n\n```python\nmultitask.base.BaseTask.detect_outliers()\n```\n\nApply hard-bound filtering and IsolationForest outlier detection.\n\nHard bounds from ``config.bounds`` are applied to the pipeline data\n(out-of-bound values are removed and later filled by ``impute()``).\nIsolationForest detection (``config.use_outlier_detection``) is\nadvisory: detected outliers are logged per column but not removed.\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``prepare_data`` has not been called. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#168e8951 .cell execution_count=6}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data()\n mt.detect_outliers()\n print(f\"Pipeline shape: {mt.df_pipeline.shape}\")\n assert mt.df_pipeline_original is not None\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPipeline shape: (336, 1)\n```\n:::\n:::\n\n\n### impute { #spotforecast2_safe.multitask.base.BaseTask.impute }\n\n```python\nmultitask.base.BaseTask.impute()\n```\n\nFill missing values using the configured imputation strategy.\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``prepare_data`` has not been called. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#559a6dd2 .cell execution_count=7}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\nvalues = rng.normal(100, 10, len(idx))\nvalues[10:13] = float(\"nan\") # inject a few gaps\ndf = pd.DataFrame({\"a\": values}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers().impute()\n missing = mt.df_pipeline[\"a\"].isna().sum()\n print(f\"Missing values after imputation: {missing}\")\n assert missing == 0\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nMissing values after imputation: 0\n```\n:::\n:::\n\n\n### load_models { #spotforecast2_safe.multitask.base.BaseTask.load_models }\n\n```python\nmultitask.base.BaseTask.load_models(\n task_name=None,\n target=None,\n max_age_days=None,\n)\n```\n\nLoad the most recent fitted models from the cache directory.\n\nScans ``/models//`` for ``.joblib``\nfiles matching the current ``data_frame_name``. Optionally\nfilters by ``task_name``, ``target``, and ``max_age_days``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------|---------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------|-----------|\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only load models from this task (``\"lazy\"``, ``\"defaults\"``, ``\"optuna\"``, or ``\"spotoptim\"``). ``None`` accepts any task. | `None` |\n| target | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only load the model for this target column. ``None`` loads the most recent model for every target found. | `None` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days. Models older than this are ignored. ``None`` accepts any age. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|-----------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Mapping ``{target: forecaster}`` of loaded model objects. |\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Empty dict if no matching models were found. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#97daebbb .cell execution_count=8}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n data_frame_name=\"demo\",\n cache_home=Path(tmp),\n verbose=False,\n )\n task = LazyTask(cfg)\n # Save a dummy object, then load it back.\n dummy_forecaster = {\"lags\": [1, 2, 24]}\n task.save_models(\n task_name=\"lazy\",\n forecasters={\"load\": dummy_forecaster},\n )\n loaded = task.load_models(task_name=\"lazy\")\n print(f\"Loaded targets: {list(loaded.keys())}\")\n assert loaded[\"load\"][\"lags\"] == [1, 2, 24]\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nLoaded targets: ['load']\n```\n:::\n:::\n\n\n### load_tuning_results { #spotforecast2_safe.multitask.base.BaseTask.load_tuning_results }\n\n```python\nmultitask.base.BaseTask.load_tuning_results(\n target,\n task_name=None,\n max_age_days=None,\n)\n```\n\nLoad the most recent tuning results for a target from cache.\n\nScans ``/tuning_results/`` for files matching the\ncurrent ``data_frame_name`` and ``target``. Optionally filters by\n``task_name`` and discards results older than ``max_age_days``.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|--------------|---------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the forecast target column. | _required_ |\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | If given, only consider results from this tuning algorithm (e.g. ``\"optuna\"`` or ``\"spotoptim\"``). ``None`` accepts any algorithm. | `None` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days. Results older than this are ignored. ``None`` accepts any age. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------------------------------------|-----------------------------------------------------------|\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | A dictionary with keys ``best_params``, ``best_lags``, |\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | ``task_name``, ``target``, ``data_frame_name``, and |\n| | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | ``timestamp``; or ``None`` if no matching file was found. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#e3a55e96 .cell execution_count=9}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", cache_home=Path(tmp))\n task = LazyTask(cfg)\n task.save_tuning_results(\n target=\"target_0\",\n task_name=\"optuna\",\n best_params={\"n_estimators\": 100},\n best_lags=24,\n )\n result = task.load_tuning_results(target=\"target_0\")\n print(result[\"best_params\"])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n{'n_estimators': 100}\n```\n:::\n:::\n\n\n### log_summary { #spotforecast2_safe.multitask.base.BaseTask.log_summary }\n\n```python\nmultitask.base.BaseTask.log_summary()\n```\n\nLog a summary of the current pipeline configuration.\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#dae810e6 .cell execution_count=10}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers().impute().build_exogenous_features()\n # log_summary writes to the pipeline logger; call it to confirm\n # it runs without error.\n mt.log_summary()\n print(\"log_summary completed without error\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nlog_summary completed without error\n```\n:::\n:::\n\n\n### plot_with_outliers { #spotforecast2_safe.multitask.base.BaseTask.plot_with_outliers }\n\n```python\nmultitask.base.BaseTask.plot_with_outliers()\n```\n\nVisualise original vs. cleaned data with outlier markers.\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------|\n| | [RuntimeError](`RuntimeError`) | If method ``detect_outliers`` has not been called. |\n| | [NotImplementedError](`NotImplementedError`) | Always — plotting is not available in ``spotforecast2-safe``. Use the ``spotforecast2`` package for visualisation. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#a4d98db0 .cell execution_count=11}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n auto_save_models=False,\n verbose=False,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data().detect_outliers()\n try:\n mt.plot_with_outliers()\n except NotImplementedError as exc:\n print(f\"Plotting unavailable in spotforecast2-safe: {exc}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPlotting unavailable in spotforecast2-safe: Plotting is not available in spotforecast2-safe (no plotly/matplotlib). Use the spotforecast2 package for visualisation.\n```\n:::\n:::\n\n\n### prepare_data { #spotforecast2_safe.multitask.base.BaseTask.prepare_data }\n\n```python\nmultitask.base.BaseTask.prepare_data(demo_data=None, df_test=None)\n```\n\nLoad, resample, validate, and configure the pipeline data.\n\nUses the following precedence for the training data:\n\n1. ``demo_data`` argument (if provided).\n2. ``self._dataframe`` set via the constructor.\n\nSimilarly for test data:\n\n1. ``df_test`` argument (if provided).\n2. ``self.data_test`` set via the constructor.\n3. ``self.config.test_data_loader(self.config)`` if set.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-----------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|-----------|\n| demo_data | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded input DataFrame. When ``None``, the constructor ``dataframe`` is used. | `None` |\n| df_test | [Optional](`typing.Optional`)\\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\\] | Pre-loaded test DataFrame. When ``None``, the constructor ``data_test`` is used, then ``config.test_data_loader``. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|----------------------------------------------------------|---------------------------------|\n| | [BaseTask](`spotforecast2_safe.multitask.base.BaseTask`) | ``self`` (for method chaining). |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------|----------------------------------------------------------------------------------|\n| | [ValueError](`ValueError`) | If no data source is available (no ``demo_data``, no constructor ``dataframe``). |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#4c639271 .cell execution_count=12}\n``` {.python .cell-code}\nimport tempfile\nimport pandas as pd\nimport numpy as np\nfrom spotforecast2_safe.multitask import MultiTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"a\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n cache_home=tmp,\n )\n mt = MultiTask(cfg, dataframe=df)\n mt.prepare_data()\n print(f\"Pipeline shape: {mt.df_pipeline.shape}\")\n print(f\"Targets: {mt.run_state.targets}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nPipeline shape: (336, 1)\nTargets: ['a']\n```\n:::\n:::\n\n\n### run { #spotforecast2_safe.multitask.base.BaseTask.run }\n\n```python\nmultitask.base.BaseTask.run(\n show=False,\n task=None,\n task_name=None,\n use_tuned_params=True,\n max_age_days=None,\n search_space=None,\n dry_run=False,\n cache_home=None,\n **kwargs,\n)\n```\n\nExecute the task-specific training / prediction pipeline.\n\nSubclasses must override this method.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|------------------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|\n| show | [bool](`bool`) | If ``True``, invoke the visualisation hooks (no-ops in this package; meaningful only in ``spotforecast2``). | `False` |\n| task | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Task mode override (used by ``MultiTask``). | `None` |\n| task_name | [Optional](`typing.Optional`)\\[[str](`str`)\\] | Restrict model loading to a specific source task (used by ``PredictTask``). | `None` |\n| use_tuned_params | [bool](`bool`) | Load cached tuning results when available (used by ``LazyTask``). | `True` |\n| max_age_days | [Optional](`typing.Optional`)\\[[float](`float`)\\] | Maximum age in days for cached results (used by ``LazyTask`` and ``PredictTask``). Freshness is judged against the wall-clock timestamp embedded in the cache filename, so the check is machine-local. | `None` |\n| search_space | [Optional](`typing.Optional`)\\[[Any](`typing.Any`)\\] | Hyperparameter search-space definition (accepted for API compatibility; not used in this package). | `None` |\n| dry_run | [bool](`bool`) | Report what would be deleted without removing anything (used by ``CleanTask``). | `False` |\n| cache_home | [Optional](`typing.Optional`)\\[[Path](`pathlib.Path`)\\] | Override the cache directory (used by ``CleanTask``). | `None` |\n| **kwargs | [Any](`typing.Any`) | Additional task-specific arguments. | `{}` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|---------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package for the task. |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|----------------------------------------------|------------------------------------------|\n| | [NotImplementedError](`NotImplementedError`) | Always, unless overridden by a subclass. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#01b0778d .cell execution_count=13}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask.base import BaseTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\n# BaseTask.run is abstract and always raises NotImplementedError.\n# Concrete subclasses (LazyTask, DefaultsTask, PredictTask, CleanTask)\n# provide the real implementation.\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(cache_home=Path(tmp), verbose=False)\n task = BaseTask(cfg)\n try:\n task.run()\n except NotImplementedError as exc:\n print(f\"Expected: {exc}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nExpected: BaseTask must implement run(). Use LazyTask, DefaultsTask, PredictTask, or CleanTask.\n```\n:::\n:::\n\n\n### save_models { #spotforecast2_safe.multitask.base.BaseTask.save_models }\n\n```python\nmultitask.base.BaseTask.save_models(task_name, forecasters=None)\n```\n\nSave fitted forecaster models to the cache directory.\n\nEach model is serialised with ``joblib`` (compress=3) into\n``/models//`` using a datetime-stamped\nfilename so that multiple snapshots can coexist.\n\nFilename format::\n\n ___.joblib\n\nIf ``forecasters`` is ``None`` the method collects fitted models\nfrom ``self.results[task_name]``, where each prediction package is\nexpected to contain a ``\"forecaster\"`` key.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| task_name | [str](`str`) | Task identifier (``\"lazy\"``, ``\"defaults\"``). The names ``\"optuna\"`` and ``\"spotoptim\"`` are also accepted so that model caches produced by the ``spotforecast2`` sibling package can be saved and loaded; no tuning is performed in this package. | _required_ |\n| forecasters | [Optional](`typing.Optional`)\\[[Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\]\\] | Optional mapping ``{target: fitted_forecaster}``. When ``None``, models are taken from the prediction packages stored in ``self.results``. | `None` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|---------------------------------------------------------------|-------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Path](`pathlib.Path`)\\] | Mapping ``{target: Path}`` of saved model file paths. |\n\n#### Raises {.doc-section .doc-section-raises}\n\n| Name | Type | Description |\n|--------|--------------------------------|-------------------------------------------------------------------------------------------|\n| | [ValueError](`ValueError`) | If ``task_name`` is not one of ``\"lazy\"``, ``\"defaults\"``, ``\"optuna\"``, ``\"spotoptim\"``. |\n| | [RuntimeError](`RuntimeError`) | If no fitted models are available for the requested task. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#8c650908 .cell execution_count=14}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n data_frame_name=\"demo\",\n cache_home=Path(tmp),\n verbose=False,\n )\n task = LazyTask(cfg)\n # Supply a tiny in-memory object as a stand-in for a fitted forecaster.\n dummy_forecaster = object()\n saved = task.save_models(\n task_name=\"lazy\",\n forecasters={\"load\": dummy_forecaster},\n )\n print(f\"Saved targets: {list(saved.keys())}\")\n assert saved[\"load\"].suffix == \".joblib\"\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nSaved targets: ['load']\n```\n:::\n:::\n\n\n### save_tuning_results { #spotforecast2_safe.multitask.base.BaseTask.save_tuning_results }\n\n```python\nmultitask.base.BaseTask.save_tuning_results(\n target,\n task_name,\n best_params,\n best_lags,\n)\n```\n\nSave tuning results (best parameters and lags) to a JSON file.\n\nThe file is stored under ``/tuning_results/`` with a\ndatetime-stamped filename so that loaders can determine freshness.\n\nFilename format::\n\n ___.json\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|-------------|------------------------------------------------------------|-------------------------------------------------------------------|------------|\n| target | [str](`str`) | Name of the forecast target column. | _required_ |\n| task_name | [str](`str`) | Tuning algorithm identifier (e.g. ``\"optuna\"``, ``\"spotoptim\"``). | _required_ |\n| best_params | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Best hyperparameters discovered during tuning. | _required_ |\n| best_lags | [Any](`typing.Any`) | Best lag configuration (int, list, or nested list). | _required_ |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------|------------------------------|\n| | [Path](`pathlib.Path`) | Path to the saved JSON file. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#d5d26f68 .cell execution_count=15}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import LazyTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", cache_home=Path(tmp))\n task = LazyTask(cfg)\n path = task.save_tuning_results(\n target=\"target_0\",\n task_name=\"optuna\",\n best_params={\"n_estimators\": 100, \"learning_rate\": 0.05},\n best_lags=[1, 2, 24],\n )\n print(path.name[:10])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\ndemo10_tar\n```\n:::\n:::\n\n\n", "supporting": [ "multitask.base.BaseTask_files/figure-html" ], diff --git a/_freeze/docs/reference/multitask.defaults.DefaultsTask/execute-results/html.json b/_freeze/docs/reference/multitask.defaults.DefaultsTask/execute-results/html.json index dcb2d5f3..c7116f36 100644 --- a/_freeze/docs/reference/multitask.defaults.DefaultsTask/execute-results/html.json +++ b/_freeze/docs/reference/multitask.defaults.DefaultsTask/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "30931594ac51bdd2e08ac48f0fdeb89e", + "hash": "59a92755f2d22d8d2f65aff5974d179e", "result": { "engine": "jupyter", - "markdown": "---\ntitle: multitask.defaults.DefaultsTask\n---\n\n\n\n```python\nmultitask.defaults.DefaultsTask(\n config=None,\n *,\n dataframe=None,\n data_test=None,\n cache_home=None,\n log_level=logging.INFO,\n **overrides,\n)\n```\n\nTask 2 — Defaults fitting (no tuning, no cached params).\n\nCreates an unfitted forecaster per target via ``config.forecaster_factory``\n(or the package default) and fits with whatever parameters that factory\nchooses. Unlike ``LazyTask``, never reads the tuning-result cache.\n\n## Examples {.doc-section .doc-section-examples}\n\n\n::: {#00891dc1 .cell execution_count=1}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import DefaultsTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", predict_size=24, cache_home=Path(tmp))\n task = DefaultsTask(cfg)\n print(f\"Task: {task.TASK}\")\n print(f\"Predict size: {task.config.predict_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTask: defaults\nPredict size: 24\n```\n:::\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [run](#spotforecast2_safe.multitask.defaults.DefaultsTask.run) | Run defaults fitting for all targets. |\n\n### run { #spotforecast2_safe.multitask.defaults.DefaultsTask.run }\n\n```python\nmultitask.defaults.DefaultsTask.run(show=False, **kwargs)\n```\n\nRun defaults fitting for all targets.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|----------|---------------------|------------------------------------------------------------------------------------------------------------|-----------|\n| show | [bool](`bool`) | If ``True``, invoke the visualisation hooks. | `False` |\n| **kwargs | [Any](`typing.Any`) | Forwarded for compatibility with ``BaseTask.run``; ``DefaultsTask`` does not consume any extra parameters. | `{}` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|----------------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package. Per-target packages are stored |\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | on ``self.results[\"defaults\"]``. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#c2229c1d .cell execution_count=2}\n``` {.python .cell-code}\nimport tempfile\nimport warnings\nimport numpy as np\nimport pandas as pd\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask.defaults import DefaultsTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"load\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n auto_save_models=False,\n number_folds=2,\n cache_home=Path(tmp),\n verbose=False,\n )\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", DeprecationWarning)\n task = DefaultsTask(cfg, dataframe=df)\n task.prepare_data().detect_outliers().impute().build_exogenous_features()\n result = task.run()\n\nprint(f\"Future predictions: {len(result['future_pred'])} steps\")\nassert \"defaults\" in task.results\nassert isinstance(result[\"future_pred\"], pd.Series)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nFuture predictions: 6 steps\n```\n:::\n:::\n\n\n", + "markdown": "---\ntitle: multitask.defaults.DefaultsTask\n---\n\n\n\n```python\nmultitask.defaults.DefaultsTask(\n config=None,\n *,\n dataframe=None,\n data_test=None,\n cache_home=None,\n log_level=logging.INFO,\n **overrides,\n)\n```\n\nTask 2 — Defaults fitting (no tuning, no cached params).\n\nCreates an unfitted forecaster per target via ``config.forecaster_factory``\n(or the package default) and fits with whatever parameters that factory\nchooses. Unlike ``LazyTask``, never reads the tuning-result cache.\n\n## Examples {.doc-section .doc-section-examples}\n\n\n::: {#c3da1d24 .cell execution_count=1}\n``` {.python .cell-code}\nimport tempfile\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask import DefaultsTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(data_frame_name=\"demo10\", predict_size=24, cache_home=Path(tmp))\n task = DefaultsTask(cfg)\n print(f\"Task: {task.TASK}\")\n print(f\"Predict size: {task.config.predict_size}\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nTask: defaults\nPredict size: 24\n```\n:::\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [run](#spotforecast2_safe.multitask.defaults.DefaultsTask.run) | Run defaults fitting for all targets. |\n\n### run { #spotforecast2_safe.multitask.defaults.DefaultsTask.run }\n\n```python\nmultitask.defaults.DefaultsTask.run(show=False, **kwargs)\n```\n\nRun defaults fitting for all targets.\n\n#### Parameters {.doc-section .doc-section-parameters}\n\n| Name | Type | Description | Default |\n|----------|---------------------|------------------------------------------------------------------------------------------------------------|-----------|\n| show | [bool](`bool`) | If ``True``, invoke the visualisation hooks. | `False` |\n| **kwargs | [Any](`typing.Any`) | Forwarded for compatibility with ``BaseTask.run``; ``DefaultsTask`` does not consume any extra parameters. | `{}` |\n\n#### Returns {.doc-section .doc-section-returns}\n\n| Name | Type | Description |\n|--------|------------------------------------------------------------|----------------------------------------------------------------|\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | Aggregated prediction package. Per-target packages are stored |\n| | [Dict](`typing.Dict`)\\[[str](`str`), [Any](`typing.Any`)\\] | on ``self.results[\"defaults\"]``. |\n\n#### Examples {.doc-section .doc-section-examples}\n\n::: {#f4d150b3 .cell execution_count=2}\n``` {.python .cell-code}\nimport tempfile\nimport numpy as np\nimport pandas as pd\nfrom pathlib import Path\nfrom spotforecast2_safe.multitask.defaults import DefaultsTask\nfrom spotforecast2_safe.configurator.config_multi import ConfigMulti\n\nrng = np.random.default_rng(0)\nidx = pd.date_range(\"2023-01-01\", periods=24 * 14, freq=\"h\", tz=\"UTC\")\ndf = pd.DataFrame({\"load\": rng.normal(100, 10, len(idx))}, index=idx)\ndf.index.name = \"DateTime\"\n\nwith tempfile.TemporaryDirectory() as tmp:\n cfg = ConfigMulti(\n predict_size=6,\n use_exogenous_features=False,\n use_outlier_detection=False,\n auto_save_models=False,\n number_folds=2,\n cache_home=Path(tmp),\n verbose=False,\n )\n task = DefaultsTask(cfg, dataframe=df)\n task.prepare_data().detect_outliers().impute().build_exogenous_features()\n result = task.run()\n\nprint(f\"Future predictions: {len(result['future_pred'])} steps\")\nassert \"defaults\" in task.results\nassert isinstance(result[\"future_pred\"], pd.Series)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nFuture predictions: 6 steps\n```\n:::\n:::\n\n\n", "supporting": [ "multitask.defaults.DefaultsTask_files/figure-html" ], diff --git a/docs/reference/manager.features.get_target_data.qmd b/docs/reference/manager.features.get_target_data.qmd index 0e2a63cb..367e176b 100644 --- a/docs/reference/manager.features.get_target_data.qmd +++ b/docs/reference/manager.features.get_target_data.qmd @@ -5,11 +5,12 @@ manager.features.get_target_data( target, df_pipeline, config, + *, data_with_exog=None, exog_feature_names=None, exo_pred=None, - start_train_ts=None, - end_train_ts=None, + start_train_ts, + end_train_ts, ) ``` @@ -27,22 +28,21 @@ engineering are applied consistently across all forecasting tasks. The training-window timestamps are supplied as explicit parameters so that this helper stays decoupled from ``RunState`` (ADR -``adr-multitask-configmulti-merge``, step 5). When either is ``None`` -the function falls back to the corresponding attribute on *config* for -backward compatibility with existing direct callers. +``adr-multitask-configmulti-merge``, step 5). Both parameters are +required; passing ``None`` raises ``ValueError``. ## Parameters {.doc-section .doc-section-parameters} -| Name | Type | Description | Default | -|--------------------|---------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------| -| target | [str](`str`) | Name of the target column to extract from *df_pipeline*. | _required_ | -| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | DataFrame with a tz-aware `DatetimeIndex` containing all target columns produced by the preprocessing pipeline. | _required_ | -| config | \'ConfigMulti\' | Pipeline configuration object. ``use_exogenous_features`` must be set. ``start_train_ts`` / ``end_train_ts`` are only read from *config* when the explicit parameters are not supplied. | _required_ | -| data_with_exog | [Optional](`typing.Optional`)\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\] | Merged DataFrame of target and exogenous columns covering at least the training window. Required when ``config.use_exogenous_features`` is ``True``. Pass ``None`` (default) to skip exogenous slicing. | `None` | -| exog_feature_names | [Optional](`typing.Optional`)\[[List](`typing.List`)\[[str](`str`)\]\] | Column names to select from *data_with_exog* and *exo_pred*. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` | -| exo_pred | [Optional](`typing.Optional`)\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\] | Exogenous feature DataFrame covering the forecast horizon. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` | -| start_train_ts | [Optional](`typing.Optional`)\[[pd](`pandas`).[Timestamp](`pandas.Timestamp`)\] | Inclusive start of the training window (tz-aware ``pd.Timestamp``). **Required** — pass ``task.run_state.start_train_ts`` after the pipeline has been prepared. Omitting this argument raises ``ValueError``; reading the value from ``config.start_train_ts`` is deprecated and emits a ``DeprecationWarning``. | `None` | -| end_train_ts | [Optional](`typing.Optional`)\[[pd](`pandas`).[Timestamp](`pandas.Timestamp`)\] | Inclusive end of the training window (tz-aware ``pd.Timestamp``). **Required** — pass ``task.run_state.end_train_ts`` after the pipeline has been prepared. Omitting this argument raises ``ValueError``; reading the value from ``config.end_train_ts`` is deprecated and emits a ``DeprecationWarning``. | `None` | +| Name | Type | Description | Default | +|--------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------| +| target | [str](`str`) | Name of the target column to extract from *df_pipeline*. | _required_ | +| df_pipeline | [pd](`pandas`).[DataFrame](`pandas.DataFrame`) | DataFrame with a tz-aware `DatetimeIndex` containing all target columns produced by the preprocessing pipeline. | _required_ | +| config | \'ConfigMulti\' | Pipeline configuration object. ``use_exogenous_features`` must be set. | _required_ | +| data_with_exog | [Optional](`typing.Optional`)\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\] | Merged DataFrame of target and exogenous columns covering at least the training window. Required when ``config.use_exogenous_features`` is ``True``. Pass ``None`` (default) to skip exogenous slicing. | `None` | +| exog_feature_names | [Optional](`typing.Optional`)\[[List](`typing.List`)\[[str](`str`)\]\] | Column names to select from *data_with_exog* and *exo_pred*. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` | +| exo_pred | [Optional](`typing.Optional`)\[[pd](`pandas`).[DataFrame](`pandas.DataFrame`)\] | Exogenous feature DataFrame covering the forecast horizon. Required when *data_with_exog* is not ``None``. Pass ``None`` (default) when exogenous features are disabled. | `None` | +| start_train_ts | [pd](`pandas`).[Timestamp](`pandas.Timestamp`) | Inclusive start of the training window (tz-aware ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.start_train_ts`` after the pipeline has been prepared. Passing ``None`` raises ``ValueError``. | _required_ | +| end_train_ts | [pd](`pandas`).[Timestamp](`pandas.Timestamp`) | Inclusive end of the training window (tz-aware ``pd.Timestamp``). **Keyword-only, required** — pass ``task.run_state.end_train_ts`` after the pipeline has been prepared. Passing ``None`` raises ``ValueError``. | _required_ | ## Returns {.doc-section .doc-section-returns} diff --git a/docs/reference/multitask.base.BaseTask.qmd b/docs/reference/multitask.base.BaseTask.qmd index 317f3917..f05d8476 100644 --- a/docs/reference/multitask.base.BaseTask.qmd +++ b/docs/reference/multitask.base.BaseTask.qmd @@ -345,7 +345,6 @@ splitter uses a sliding fixed-size training window ```{python} import tempfile -import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -367,9 +366,7 @@ with tempfile.TemporaryDirectory() as tmp: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute().build_exogenous_features() + mt.prepare_data().detect_outliers().impute().build_exogenous_features() y_train = mt.df_pipeline["a"] cv = mt.cv_ts(y_train) print(f"TimeSeriesFold steps: {cv.steps}") @@ -405,7 +402,6 @@ advisory: detected outliers are logged per column but not removed. ```{python} import tempfile -import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -426,10 +422,8 @@ with tempfile.TemporaryDirectory() as tmp: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data() - mt.detect_outliers() + mt.prepare_data() + mt.detect_outliers() print(f"Pipeline shape: {mt.df_pipeline.shape}") assert mt.df_pipeline_original is not None ``` @@ -458,7 +452,6 @@ Fill missing values using the configured imputation strategy. ```{python} import tempfile -import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -481,9 +474,7 @@ with tempfile.TemporaryDirectory() as tmp: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute() + mt.prepare_data().detect_outliers().impute() missing = mt.df_pipeline["a"].isna().sum() print(f"Missing values after imputation: {missing}") assert missing == 0 @@ -611,7 +602,6 @@ Log a summary of the current pipeline configuration. ```{python} import tempfile -import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -632,9 +622,7 @@ with tempfile.TemporaryDirectory() as tmp: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute().build_exogenous_features() + mt.prepare_data().detect_outliers().impute().build_exogenous_features() # log_summary writes to the pipeline logger; call it to confirm # it runs without error. mt.log_summary() @@ -660,7 +648,6 @@ Visualise original vs. cleaned data with outlier markers. ```{python} import tempfile -import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -681,9 +668,7 @@ with tempfile.TemporaryDirectory() as tmp: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers() + mt.prepare_data().detect_outliers() try: mt.plot_with_outliers() except NotImplementedError as exc: diff --git a/docs/reference/multitask.defaults.DefaultsTask.qmd b/docs/reference/multitask.defaults.DefaultsTask.qmd index 856c0532..2300e9cf 100644 --- a/docs/reference/multitask.defaults.DefaultsTask.qmd +++ b/docs/reference/multitask.defaults.DefaultsTask.qmd @@ -65,7 +65,6 @@ Run defaults fitting for all targets. ```{python} import tempfile -import warnings import numpy as np import pandas as pd from pathlib import Path @@ -87,11 +86,9 @@ with tempfile.TemporaryDirectory() as tmp: cache_home=Path(tmp), verbose=False, ) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task = DefaultsTask(cfg, dataframe=df) - task.prepare_data().detect_outliers().impute().build_exogenous_features() - result = task.run() + task = DefaultsTask(cfg, dataframe=df) + task.prepare_data().detect_outliers().impute().build_exogenous_features() + result = task.run() print(f"Future predictions: {len(result['future_pred'])} steps") assert "defaults" in task.results diff --git a/src/spotforecast2_safe/multitask/base.py b/src/spotforecast2_safe/multitask/base.py index b0ff705d..8640be22 100644 --- a/src/spotforecast2_safe/multitask/base.py +++ b/src/spotforecast2_safe/multitask/base.py @@ -18,7 +18,6 @@ import json import logging -import warnings from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Protocol @@ -371,7 +370,6 @@ def __init__( # Task-owned runtime-derived state (see RunState / ADR adr-multitask-configmulti-merge) self.run_state = RunState() - self._run_state_deprecation_warned: bool = False self._data_last_ts_utc: Optional[pd.Timestamp] = None # Pipeline state (populated by methods) @@ -411,43 +409,6 @@ def _attach_file_handler(self) -> None: ) self.logger.addHandler(handler) - # ------------------------------------------------------------------ - # Derived-state helpers (ADR adr-multitask-configmulti-merge, step 7) - # ------------------------------------------------------------------ - - def _set_derived(self, field: str, value: Any) -> None: - """Write a derived pipeline value to ``run_state`` and mirror it onto - ``config`` (one-minor-cycle shim with ``DeprecationWarning``). - - The derived fields were historically stored directly on the config - object. They now live on ``self.run_state``. During this transition - cycle the value is also mirrored via ``setattr`` so that any legacy - reader that reads ``config.`` continues to get a valid value. - A single ``DeprecationWarning`` is emitted per task instance. - - .. deprecated:: - Reading derived pipeline fields from the config is deprecated and - will stop working in the next major release. Read them from - ``task.run_state`` instead. - """ - setattr(self.run_state, field, value) - if not self._run_state_deprecation_warned: - warnings.warn( - "Derived pipeline fields (start_download, end_download, " - "data_start, data_end, cov_start, cov_end, end_train_ts, " - "start_train_ts) have moved to task.run_state. " - "Reading them from the config is deprecated and will stop " - "working in the next major release. " - "config.targets continues to hold the user input unchanged; " - "read the resolved list from task.run_state.targets.", - DeprecationWarning, - # stacklevel=3: caller → prepare_data/_setup_training_window - # → _set_derived. Adjust if call depth changes. - stacklevel=3, - ) - self._run_state_deprecation_warned = True - setattr(self.config, field, value) - # ------------------------------------------------------------------ # Step 1 — Data Preparation # ------------------------------------------------------------------ @@ -537,8 +498,8 @@ def prepare_data( first_ts = pd.Timestamp(demo_data[self.config.index_name].iloc[0]) last_ts = pd.Timestamp(demo_data[self.config.index_name].iloc[-1]) - self._set_derived("start_download", first_ts.strftime("%Y%m%d%H%M")) - self._set_derived("end_download", last_ts.strftime("%Y%m%d%H%M")) + self.run_state.start_download = first_ts.strftime("%Y%m%d%H%M") + self.run_state.end_download = last_ts.strftime("%Y%m%d%H%M") # Store the effective last data timestamp for later use in # _setup_training_window to clamp end_train_ts (the clamp is no @@ -577,7 +538,7 @@ def prepare_data( _tc_dev_ref = getattr(self.config, "target_qc_deviation_ref", None) _tc_dev_slots = getattr(self.config, "target_qc_deviation_slots", 2) - # Derive the effective cutoff for the anchor-zone check: mirror the + # Derive the effective cutoff for the anchor-zone check: replicate the # end_train_default / last_ts logic above (ADR §2 step 1). _tc_cutoff: "pd.Timestamp | None" = None if _tc_window is not None: @@ -748,14 +709,13 @@ def prepare_data( _data_end = pd.to_datetime(_data_end_str, utc=True) _cov_start = pd.to_datetime(_cov_start_str, utc=True) _cov_end = pd.to_datetime(_cov_end_str, utc=True) - self._set_derived("data_start", _data_start) - self._set_derived("data_end", _data_end) - self._set_derived("cov_start", _cov_start) - self._set_derived("cov_end", _cov_end) + self.run_state.data_start = _data_start + self.run_state.data_end = _data_end + self.run_state.cov_start = _cov_start + self.run_state.cov_end = _cov_end # Write the resolved target list to run_state only. # config.targets must remain unchanged (user input). - # The mirror shim is NOT applied for targets (it would overwrite user input). self.run_state.targets = _working_targets self.df_pipeline = df_pipeline @@ -783,7 +743,6 @@ def detect_outliers(self) -> "BaseTask": Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -804,10 +763,8 @@ def detect_outliers(self) -> "BaseTask": verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data() - mt.detect_outliers() + mt.prepare_data() + mt.detect_outliers() print(f"Pipeline shape: {mt.df_pipeline.shape}") assert mt.df_pipeline_original is not None ``` @@ -857,7 +814,6 @@ def plot_with_outliers(self) -> None: Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -878,9 +834,7 @@ def plot_with_outliers(self) -> None: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers() + mt.prepare_data().detect_outliers() try: mt.plot_with_outliers() except NotImplementedError as exc: @@ -911,7 +865,6 @@ def impute(self) -> "BaseTask": Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -934,9 +887,7 @@ def impute(self) -> "BaseTask": verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute() + mt.prepare_data().detect_outliers().impute() missing = mt.df_pipeline["a"].isna().sum() print(f"Missing values after imputation: {missing}") assert missing == 0 @@ -1373,8 +1324,8 @@ def _setup_training_window(self) -> None: _start_train = effective_end - self.config.train_size _start_train = max(_start_train, self.df_pipeline.index.min()) - self._set_derived("end_train_ts", effective_end) - self._set_derived("start_train_ts", _start_train) + self.run_state.end_train_ts = effective_end + self.run_state.start_train_ts = _start_train self.logger.info( "Training window: %s to %s", self.run_state.start_train_ts, @@ -1410,7 +1361,6 @@ def cv_ts(self, y_train: pd.Series) -> TimeSeriesFold: Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -1432,9 +1382,7 @@ def cv_ts(self, y_train: pd.Series) -> TimeSeriesFold: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute().build_exogenous_features() + mt.prepare_data().detect_outliers().impute().build_exogenous_features() y_train = mt.df_pipeline["a"] cv = mt.cv_ts(y_train) print(f"TimeSeriesFold steps: {cv.steps}") @@ -2192,7 +2140,6 @@ def log_summary(self) -> None: Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from spotforecast2_safe.multitask import MultiTask @@ -2213,9 +2160,7 @@ def log_summary(self) -> None: verbose=False, ) mt = MultiTask(cfg, dataframe=df) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - mt.prepare_data().detect_outliers().impute().build_exogenous_features() + mt.prepare_data().detect_outliers().impute().build_exogenous_features() # log_summary writes to the pipeline logger; call it to confirm # it runs without error. mt.log_summary() diff --git a/src/spotforecast2_safe/multitask/defaults.py b/src/spotforecast2_safe/multitask/defaults.py index 9e526c22..349ba4f9 100644 --- a/src/spotforecast2_safe/multitask/defaults.py +++ b/src/spotforecast2_safe/multitask/defaults.py @@ -40,7 +40,6 @@ def execute_defaults( Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from pathlib import Path @@ -62,11 +61,9 @@ def execute_defaults( cache_home=Path(tmp), verbose=False, ) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task = DefaultsTask(cfg, dataframe=df) - task.prepare_data().detect_outliers().impute().build_exogenous_features() - result = execute_defaults(task) + task = DefaultsTask(cfg, dataframe=df) + task.prepare_data().detect_outliers().impute().build_exogenous_features() + result = execute_defaults(task) print(f"Future predictions: {len(result['future_pred'])} steps") assert isinstance(result["future_pred"], pd.Series) @@ -125,7 +122,6 @@ def run( Examples: ```{python} import tempfile - import warnings import numpy as np import pandas as pd from pathlib import Path @@ -147,11 +143,9 @@ def run( cache_home=Path(tmp), verbose=False, ) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task = DefaultsTask(cfg, dataframe=df) - task.prepare_data().detect_outliers().impute().build_exogenous_features() - result = task.run() + task = DefaultsTask(cfg, dataframe=df) + task.prepare_data().detect_outliers().impute().build_exogenous_features() + result = task.run() print(f"Future predictions: {len(result['future_pred'])} steps") assert "defaults" in task.results diff --git a/src/spotforecast2_safe/multitask/run_state.py b/src/spotforecast2_safe/multitask/run_state.py index 11c5e660..6ddc7ee3 100644 --- a/src/spotforecast2_safe/multitask/run_state.py +++ b/src/spotforecast2_safe/multitask/run_state.py @@ -35,7 +35,7 @@ import pandas as pd -@dataclass +@dataclass(slots=True) class RunState: """Runtime-derived window geometry for one pipeline execution. diff --git a/tests/multitask/test_prepare_data_clamp.py b/tests/multitask/test_prepare_data_clamp.py index 1ade50dc..bfec03d5 100644 --- a/tests/multitask/test_prepare_data_clamp.py +++ b/tests/multitask/test_prepare_data_clamp.py @@ -69,7 +69,7 @@ def test_data_end_anchored_on_last_observed_target(self, frontier_df, tmp_path): ) task.prepare_data() last_target = frontier_df["Actual Load"].dropna().index.max() - assert pd.to_datetime(task.config.data_end, utc=True) == last_target + assert pd.to_datetime(task.run_state.data_end, utc=True) == last_target def test_cov_end_is_data_end_plus_predict_size(self, frontier_df, tmp_path): task = LazyTask( @@ -77,8 +77,8 @@ def test_cov_end_is_data_end_plus_predict_size(self, frontier_df, tmp_path): dataframe=frontier_df, ) task.prepare_data() - data_end = pd.to_datetime(task.config.data_end, utc=True) - cov_end = pd.to_datetime(task.config.cov_end, utc=True) + data_end = pd.to_datetime(task.run_state.data_end, utc=True) + cov_end = pd.to_datetime(task.run_state.cov_end, utc=True) assert cov_end == data_end + pd.Timedelta(hours=PREDICT_SIZE) def test_clamp_emits_warning_log(self, frontier_df, tmp_path, caplog): @@ -102,7 +102,7 @@ def test_aligned_targets_are_not_clamped(self, frontier_df, tmp_path): ) task.prepare_data() assert task.df_pipeline.index.max() == aligned.index.max() - assert pd.to_datetime(task.config.data_end, utc=True) == aligned.index.max() + assert pd.to_datetime(task.run_state.data_end, utc=True) == aligned.index.max() def test_all_columns_as_targets_keeps_extent(self, frontier_df, tmp_path): """With targets=None every column is a target; the exog-like column is diff --git a/tests/multitask/test_prepare_data_target_corruption.py b/tests/multitask/test_prepare_data_target_corruption.py index cb5e0f14..9700c112 100644 --- a/tests/multitask/test_prepare_data_target_corruption.py +++ b/tests/multitask/test_prepare_data_target_corruption.py @@ -157,7 +157,7 @@ def test_truncate_retracts_data_end(self, tmp_path): assert report.fired assert report.action == "truncate" - data_end = pd.to_datetime(task.config.data_end, utc=True) + data_end = pd.to_datetime(task.run_state.data_end, utc=True) first_flagged = report.first_flagged_hour assert ( data_end < first_flagged @@ -194,7 +194,7 @@ def test_truncate_absolute_end_invariant(self, tmp_path): ) task.prepare_data() - data_end_post = pd.to_datetime(task.config.data_end, utc=True) + data_end_post = pd.to_datetime(task.run_state.data_end, utc=True) lhs = data_end_post + task.config.predict_size * pd.Timedelta(hours=1) rhs = untruncated_data_end + PREDICT_SIZE * pd.Timedelta(hours=1) assert lhs == rhs, ( @@ -234,7 +234,7 @@ def test_truncate_absolute_end_invariant_mid_hour_last_slot(self, tmp_path): ) task.prepare_data() - data_end_post = pd.to_datetime(task.config.data_end, utc=True) + data_end_post = pd.to_datetime(task.run_state.data_end, utc=True) lhs = data_end_post + task.config.predict_size * pd.Timedelta(hours=1) rhs = untruncated_data_end + PREDICT_SIZE * pd.Timedelta(hours=1) assert lhs == rhs, f"Mid-hour invariant violated: {lhs} != {rhs}" @@ -265,7 +265,7 @@ def test_truncate_absolute_end_invariant_early_end_train_default(self, tmp_path) f"{PREDICT_SIZE + _EXPECTED_BUMP}, got {task.config.predict_size}" ) - data_end_post = pd.to_datetime(task.config.data_end, utc=True) + data_end_post = pd.to_datetime(task.run_state.data_end, utc=True) lhs = data_end_post + task.config.predict_size * pd.Timedelta(hours=1) rhs = untruncated_data_end + PREDICT_SIZE * pd.Timedelta(hours=1) assert lhs == rhs, ( diff --git a/tests/test_run_state.py b/tests/test_run_state.py index 3a01044b..dc7b59be 100644 --- a/tests/test_run_state.py +++ b/tests/test_run_state.py @@ -8,12 +8,12 @@ - After prepare_data() the 6 window fields + targets are populated on run_state. - After _setup_training_window() the 2 training-window fields are populated. - Derived fields are NOT declared params on config (_PARAM_NAMES, get_params()). -- Mirror shim: config.data_start etc. are accessible via the shim after prepare_data. +- Derived fields are NOT accessible on config after pipeline runs (no mirror shim). - set_params() raises ValueError for derived fields. - Clamp semantics: user end_train_default beyond data extent → end_train_ts clamps to data end; an explicitly earlier cutoff is honoured unchanged. - config.targets is unchanged by the pipeline (user input preserved). -- DeprecationWarning emitted once per task instance on first _set_derived call. +- No DeprecationWarning is emitted by the pipeline. """ import warnings @@ -106,9 +106,7 @@ class TestRunStateAfterPrepareData: @pytest.fixture(autouse=True) def setup(self, tmp_path): self.task = _make_task(tmp_path) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - self.task.prepare_data() + self.task.prepare_data() def test_start_download_populated(self): assert self.task.run_state.start_download is not None @@ -158,10 +156,8 @@ class TestRunStateAfterTrainingWindow: @pytest.fixture(autouse=True) def setup(self, tmp_path): self.task = _make_task(tmp_path) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - self.task.prepare_data() - self.task._setup_training_window() + self.task.prepare_data() + self.task._setup_training_window() def test_end_train_ts_is_timestamp(self): assert isinstance(self.task.run_state.end_train_ts, pd.Timestamp) @@ -219,66 +215,87 @@ def test_set_params_raises(self, field): # --------------------------------------------------------------------------- -# Mirror shim: derived values are also accessible via config +# No config mirror: derived values are NOT accessible via config # --------------------------------------------------------------------------- -class TestMirrorShim: - """The one-cycle shim must mirror derived values onto config via setattr.""" +class TestNoConfigMirror: + """After pipeline runs, derived fields must NOT be set on config (shim removed).""" + + # All 8 derived fields — including the two set only by _setup_training_window. + DERIVED = [ + "start_download", + "end_download", + "data_start", + "data_end", + "cov_start", + "cov_end", + "end_train_ts", + "start_train_ts", + ] @pytest.fixture(autouse=True) def setup(self, tmp_path): self.task = _make_task(tmp_path) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - self.task.prepare_data() - - def test_data_start_on_config_via_shim(self): - assert self.task.config.data_start == self.task.run_state.data_start + self.task.prepare_data() + # Also run _setup_training_window so end_train_ts / start_train_ts are + # populated on run_state — the assertion checks they are still absent + # from config even after both pipeline phases complete. + self.task._setup_training_window() - def test_data_end_on_config_via_shim(self): - assert self.task.config.data_end == self.task.run_state.data_end + @pytest.mark.parametrize("field", DERIVED) + def test_derived_field_not_on_config(self, field): + """After prepare_data() + _setup_training_window(), config must NOT carry any derived field.""" + assert not hasattr(self.task.config, field), ( + f"config.{field} is set after the pipeline ran; " + "the mirror shim has been removed — derived fields live only on run_state." + ) - def test_cov_end_on_config_via_shim(self): - assert self.task.config.cov_end == self.task.run_state.cov_end + def test_run_state_carries_derived_values(self): + """run_state must carry all populated derived values.""" + assert self.task.run_state.data_start is not None + assert self.task.run_state.data_end is not None + assert self.task.run_state.cov_end is not None + assert self.task.run_state.start_download is not None + assert self.task.run_state.end_train_ts is not None + assert self.task.run_state.start_train_ts is not None - def test_targets_not_mirrored_to_config(self): - """targets is intentionally NOT mirrored onto config (would overwrite user input).""" + def test_targets_not_on_config_after_pipeline(self): + """targets is not mirrored onto config — config.targets keeps user input (None).""" # config.targets must remain as the user passed it (None here) assert self.task.config.targets is None # run_state.targets holds the resolved list assert self.task.run_state.targets == ["load"] - def test_start_download_on_config_via_shim(self): - assert self.task.config.start_download == self.task.run_state.start_download - # --------------------------------------------------------------------------- -# DeprecationWarning emitted exactly once per task instance +# No DeprecationWarning emitted by the pipeline # --------------------------------------------------------------------------- -class TestDeprecationWarning: - """The shim must emit one DeprecationWarning per task instance.""" +class TestNoDeprecationWarning: + """The pipeline must not emit any DeprecationWarning after shim removal.""" - def test_deprecation_warning_emitted(self, tmp_path): + def test_no_deprecation_warning_on_prepare_data(self, tmp_path): task = _make_task(tmp_path) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") task.prepare_data() dep_warnings = [x for x in w if issubclass(x.category, DeprecationWarning)] - assert len(dep_warnings) >= 1 + assert len(dep_warnings) == 0, ( + f"prepare_data() emitted {len(dep_warnings)} DeprecationWarning(s); " + "the mirror shim has been removed so no such warnings should be emitted." + ) - def test_deprecation_warning_only_once(self, tmp_path): - """Only one DeprecationWarning per task instance (not per field).""" + def test_no_deprecation_warning_on_setup_training_window(self, tmp_path): + """_setup_training_window must not emit DeprecationWarning either.""" task = _make_task(tmp_path) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") task.prepare_data() task._setup_training_window() dep_warnings = [x for x in w if issubclass(x.category, DeprecationWarning)] - # Exactly one warning for the whole task's lifecycle. - assert len(dep_warnings) == 1 + assert len(dep_warnings) == 0 # --------------------------------------------------------------------------- @@ -292,10 +309,8 @@ class TestClampSemantics: def test_stale_end_train_default_clamped_to_data_end(self, tmp_path): """When end_train_default is far in the future, end_train_ts == data_end.""" task = _make_task(tmp_path, end_train_default="2099-12-31 00:00+00:00") - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() - task._setup_training_window() + task.prepare_data() + task._setup_training_window() # The clamp should prevent end_train_ts from exceeding data_end. assert task.run_state.end_train_ts <= task.run_state.data_end @@ -303,10 +318,8 @@ def test_earlier_explicit_cutoff_honoured(self, tmp_path): """An explicit end_train_default earlier than the data is honoured.""" # Data covers 2023-01; set cutoff to early 2023-01 task = _make_task(tmp_path, end_train_default="2023-01-07 00:00+00:00") - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() - task._setup_training_window() + task.prepare_data() + task._setup_training_window() expected = pd.Timestamp("2023-01-07 00:00+00:00") assert task.run_state.end_train_ts == expected @@ -314,9 +327,7 @@ def test_config_end_train_default_not_mutated(self, tmp_path): """prepare_data must never mutate config.end_train_default.""" original = "2099-12-31 00:00+00:00" task = _make_task(tmp_path, end_train_default=original) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() + task.prepare_data() assert task.config.end_train_default == original @@ -331,24 +342,18 @@ class TestConfigTargetsPreserved: def test_none_targets_preserved(self, tmp_path): """When user passes no targets, config.targets remains None.""" task = _make_task(tmp_path, targets=None) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() + task.prepare_data() assert task.config.targets is None def test_explicit_targets_preserved(self, tmp_path): """When user passes explicit targets, config.targets is unchanged.""" task = _make_task(tmp_path, targets=["load"]) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() + task.prepare_data() assert task.config.targets == ["load"] def test_run_state_targets_has_resolved_list(self, tmp_path): """The resolved list (after column reconciliation) lives on run_state.""" task = _make_task(tmp_path, targets=None) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - task.prepare_data() + task.prepare_data() assert task.run_state.targets is not None assert len(task.run_state.targets) > 0