diff --git a/src/spotforecast2/model_selection/bayesian_search.py b/src/spotforecast2/model_selection/bayesian_search.py index 1170694b..96c41b03 100644 --- a/src/spotforecast2/model_selection/bayesian_search.py +++ b/src/spotforecast2/model_selection/bayesian_search.py @@ -127,6 +127,46 @@ def bayesian_search_forecaster( TypeError: If cv is not an instance of TimeSeriesFold or OneStepAheadFold. ValueError: If metric list contains duplicate metric names. + Examples: + ```{python} + import numpy as np + import pandas as pd + from sklearn.linear_model import Ridge + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from spotforecast2_safe.splitter import TimeSeriesFold + from spotforecast2.model_selection.bayesian_search import bayesian_search_forecaster + + rng = np.random.default_rng(0) + y = pd.Series(rng.standard_normal(40), name="y") + + forecaster = ForecasterRecursive(estimator=Ridge(), lags=2) + cv = TimeSeriesFold(steps=2, initial_train_size=25, refit=False) + + def search_space(trial): + return { + "estimator__alpha": trial.suggest_float("estimator__alpha", 0.01, 10.0), + } + + results, best_trial = bayesian_search_forecaster( + forecaster=forecaster, + y=y, + cv=cv, + search_space=search_space, + metric="mean_squared_error", + n_trials=3, + random_state=0, + return_best=False, + verbose=False, + show_progress=False, + suppress_warnings=True, + ) + + print(results.shape) + print(results.columns.tolist()) + assert results.shape[0] == 3 + assert "mean_squared_error" in results.columns + assert "estimator__alpha" in results.columns + ``` """ if return_best and exog is not None and (len(exog) != len(y)): diff --git a/src/spotforecast2/model_selection/grid_search.py b/src/spotforecast2/model_selection/grid_search.py index a0fafebc..11655c7b 100644 --- a/src/spotforecast2/model_selection/grid_search.py +++ b/src/spotforecast2/model_selection/grid_search.py @@ -305,6 +305,45 @@ def grid_search_forecaster( ) -> pd.DataFrame: """ Exhaustive grid search over parameter values for a Forecaster. + + Examples: + ```{python} + import warnings + import numpy as np + import pandas as pd + from sklearn.linear_model import Ridge + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from spotforecast2_safe.splitter import TimeSeriesFold + from spotforecast2.model_selection.grid_search import grid_search_forecaster + + rng = np.random.default_rng(0) + idx = pd.date_range("2020-01-01", periods=120, freq="h") + y = pd.Series(rng.normal(0, 1, 120), index=idx) + + forecaster = ForecasterRecursive(estimator=Ridge(), lags=3) + cv = TimeSeriesFold(steps=3, initial_train_size=90, refit=False) + param_grid = {"alpha": [0.1, 1.0]} + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + results = grid_search_forecaster( + forecaster=forecaster, + y=y, + cv=cv, + param_grid=param_grid, + metric="mean_absolute_error", + lags_grid=[3, 5], + return_best=True, + n_jobs=1, + verbose=False, + show_progress=False, + suppress_warnings=True, + ) + + print(results[["lags_label", "params", "mean_absolute_error"]].head()) + assert results.shape == (4, 5) + assert "mean_absolute_error" in results.columns + ``` """ param_grid = list(ParameterGrid(param_grid)) diff --git a/src/spotforecast2/multitask/base.py b/src/spotforecast2/multitask/base.py index 67c923dd..1cc3c6a1 100644 --- a/src/spotforecast2/multitask/base.py +++ b/src/spotforecast2/multitask/base.py @@ -49,6 +49,41 @@ class PlottingMixin: plot_with_outliers: Display original vs. cleaned data with outlier markers. _show_prediction_figure: Show an interactive per-target prediction figure. _show_prediction_figure_agg: Show an interactive aggregated prediction figure. + + Examples: + ```{python} + import tempfile + import numpy as np + import pandas as pd + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import LazyTask + from spotforecast2.multitask.base import PlottingMixin + + rng = np.random.default_rng(0) + idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC") + df = pd.DataFrame({"load": rng.normal(100, 10, len(idx))}, index=idx) + df.index.name = "DateTime" + + with tempfile.TemporaryDirectory() as tmp: + cfg = ConfigMulti( + predict_size=6, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + cache_home=tmp, + ) + task = LazyTask(cfg, dataframe=df) + + # LazyTask inherits from BaseTask(PlottingMixin, SafeBaseTask), so + # PlottingMixin.plot_with_outliers overrides the safe-base no-op stub. + print("PlottingMixin in MRO:", PlottingMixin in type(task).__mro__) + print( + "plot_with_outliers wired to PlottingMixin:", + type(task).plot_with_outliers is PlottingMixin.plot_with_outliers, + ) + assert PlottingMixin in type(task).__mro__ + assert type(task).plot_with_outliers is PlottingMixin.plot_with_outliers + ``` """ def plot_with_outliers(self) -> None: @@ -56,6 +91,33 @@ def plot_with_outliers(self) -> None: Raises: RuntimeError: If ``detect_outliers`` has not been called. + + Examples: + ```{python} + import tempfile + import numpy as np + import pandas as pd + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import LazyTask + + rng = np.random.default_rng(0) + idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC") + df = pd.DataFrame({"load": rng.normal(100, 10, len(idx))}, index=idx) + df.index.name = "DateTime" + + with tempfile.TemporaryDirectory() as tmp: + cfg = ConfigMulti( + predict_size=6, + use_exogenous_features=False, + use_outlier_detection=False, + bounds=[(50, 150)], + auto_save_models=False, + cache_home=tmp, + ) + task = LazyTask(cfg, dataframe=df) + task.prepare_data().detect_outliers() + task.plot_with_outliers() + ``` """ if self.df_pipeline_original is None: # type: ignore[attr-defined] raise RuntimeError("Call detect_outliers() before plot_with_outliers().") @@ -125,10 +187,43 @@ class BaseTask(PlottingMixin, SafeBaseTask): Visualisation additions over the safe base: plot_with_outliers: Renders original vs. cleaned data with outlier - markers via ``spotforecast2.plots.plotter.plot_with_outliers``. - _show_prediction_figure: Calls ``make_plot`` and shows the figure + markers via `spotforecast2.plots.plotter.plot_with_outliers`. + _show_prediction_figure: Calls `make_plot` and shows the figure interactively. _show_prediction_figure_agg: Same for the aggregated prediction. + + Examples: + ```{python} + import tempfile + import numpy as np + import pandas as pd + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask.base import BaseTask, PlottingMixin + + rng = np.random.default_rng(0) + idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC") + df = pd.DataFrame({"load": rng.normal(100, 10, len(idx))}, index=idx) + df.index.name = "DateTime" + + with tempfile.TemporaryDirectory() as tmp: + cfg = ConfigMulti( + predict_size=6, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + cache_home=tmp, + ) + task = BaseTask(cfg, dataframe=df) + # Data-preparation pipeline (steps 1-3) + task.prepare_data().detect_outliers().impute() + + print("Pipeline shape:", task.df_pipeline.shape) + print("Targets:", task.config.targets) + # PlottingMixin is in the MRO — visualisation hooks are live Plotly calls. + print("PlottingMixin in MRO:", PlottingMixin in type(task).__mro__) + assert task.df_pipeline.shape[1] == 1 + assert PlottingMixin in type(task).__mro__ + ``` """ # ``_show_prediction_figure`` and ``_show_prediction_figure_agg`` are @@ -223,6 +318,40 @@ def run( # noqa: PLR0913 Raises: NotImplementedError: Always, unless overridden by a subclass. + + Examples: + `BaseTask.run` is abstract — it raises `NotImplementedError` to + enforce that every concrete task subclass provides its own + implementation. Use `LazyTask`, `OptunaTask`, `SpotOptimTask`, + `PredictTask`, or `CleanTask` for live pipelines. + + ```{python} + import tempfile + import numpy as np + import pandas as pd + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask.base import BaseTask + from spotforecast2.multitask import LazyTask + + rng = np.random.default_rng(0) + idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC") + df = pd.DataFrame({"load": rng.normal(100, 10, len(idx))}, index=idx) + df.index.name = "DateTime" + + # BaseTask.run raises NotImplementedError — use a concrete subclass. + with tempfile.TemporaryDirectory() as tmp: + cfg = ConfigMulti(cache_home=tmp) + base = BaseTask(cfg) + try: + base.run() + except NotImplementedError as exc: + print("BaseTask.run() raised NotImplementedError (expected).") + print(str(exc)[:60]) + + # LazyTask overrides run() with lazy fitting logic. + print("LazyTask.run is overridden:", LazyTask.run is not BaseTask.run) + assert LazyTask.run is not BaseTask.run + ``` """ raise NotImplementedError( f"{self.__class__.__name__} must implement run(). " diff --git a/src/spotforecast2/multitask/defaults.py b/src/spotforecast2/multitask/defaults.py index f03c3b03..5080dd2d 100644 --- a/src/spotforecast2/multitask/defaults.py +++ b/src/spotforecast2/multitask/defaults.py @@ -52,6 +52,33 @@ def run( Returns: Aggregated prediction package. Per-target packages are stored on ``self.results["defaults"]``. + + Examples: + ```{python} + from unittest.mock import MagicMock, patch + from spotforecast2.multitask import DefaultsTask + + task = DefaultsTask(predict_size=24, auto_save_models=False) + task.config.targets = ["t1"] + + sentinel = {"future_pred": MagicMock(name="predictions")} + with ( + patch.object(task, "_ensure_pipeline_ready"), + patch.object( + task, + "_get_target_data", + return_value=(MagicMock(), MagicMock(), MagicMock()), + ), + patch.object(task, "create_forecaster"), + patch.object(task, "_train_and_predict_target", return_value=sentinel), + patch.object(task, "_aggregate_and_show", return_value=sentinel), + ): + result = task.run(show=False) + + assert "future_pred" in result + print(f"task.TASK: {task.TASK!r}") + print(f"result keys: {list(result.keys())}") + ``` """ del kwargs # DefaultsTask has no tuning- or cache-related parameters return execute_defaults(self, show=show) diff --git a/src/spotforecast2/multitask/lazy.py b/src/spotforecast2/multitask/lazy.py index 038daff7..389ebce5 100644 --- a/src/spotforecast2/multitask/lazy.py +++ b/src/spotforecast2/multitask/lazy.py @@ -61,6 +61,54 @@ def run( Returns: Aggregated prediction package. Per-target packages are stored on ``self.results["lazy"]``. + + Examples: + ```{python} + import tempfile + import numpy as np + import pandas as pd + from lightgbm import LGBMRegressor + from spotforecast2.multitask import LazyTask + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from spotforecast2_safe.preprocessing import RollingFeatures + + rng = np.random.default_rng(0) + n = 24 * 14 # two weeks of hourly data + idx = pd.date_range("2023-01-01", periods=n, freq="h", tz="UTC") + idx.name = "DateTime" + df = pd.DataFrame({"load": rng.normal(100, 10, n)}, index=idx) + + def _fast_factory(config, *, weight_func=None, target=None): + return ForecasterRecursive( + estimator=LGBMRegressor( + n_estimators=10, + random_state=config.random_state, + verbose=-1, + ), + lags=6, + window_features=RollingFeatures(stats=["mean"], window_sizes=6), + weight_func=weight_func, + ) + + with tempfile.TemporaryDirectory() as tmp: + cfg = ConfigMulti( + predict_size=6, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + number_folds=2, + random_state=42, + forecaster_factory=_fast_factory, + cache_home=tmp, + ) + task = LazyTask(cfg, dataframe=df) + task.prepare_data().detect_outliers().impute().build_exogenous_features() + result = task.run(show=False, use_tuned_params=False) + + print(f"Future predictions: {len(result['future_pred'])} steps") + assert len(result["future_pred"]) == 6 + ``` """ return execute_lazy( self, diff --git a/src/spotforecast2/multitask/multi.py b/src/spotforecast2/multitask/multi.py index 38b51d0d..a4c0aa9a 100644 --- a/src/spotforecast2/multitask/multi.py +++ b/src/spotforecast2/multitask/multi.py @@ -128,6 +128,39 @@ def run_task_lazy(self, show: bool = True) -> Dict[str, Any]: Returns: Aggregated prediction package. Per-target results in ``self.results["lazy"]``. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + ) + config.cache_home = tempfile.mkdtemp() + + mt = MultiTask(config, task="lazy", dataframe=df, show_progress=False) + mt.prepare_data() + mt.impute() + result = mt.run_task_lazy(show=False) + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ return execute_lazy(self, show=show) @@ -144,6 +177,39 @@ def run_task_defaults(self, show: bool = True) -> Dict[str, Any]: Returns: Aggregated prediction package. Per-target results in ``self.results["defaults"]``. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + ) + config.cache_home = tempfile.mkdtemp() + + mt = MultiTask(config, task="defaults", dataframe=df, show_progress=False) + mt.prepare_data() + mt.impute() + result = mt.run_task_defaults(show=False) + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ return execute_defaults(self, show=show) @@ -162,6 +228,40 @@ def run_task_optuna( Returns: Aggregated prediction package. Per-target results in ``self.results["optuna"]``. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + n_trials_optuna=2, + ) + config.cache_home = tempfile.mkdtemp() + + mt = MultiTask(config, task="optuna", dataframe=df, show_progress=False) + mt.prepare_data() + mt.impute() + result = mt.run_task_optuna(show=False) + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ return execute_optuna(self, show=show, search_space=search_space) @@ -179,6 +279,41 @@ def run_task_spotoptim( Returns: Aggregated prediction package. Per-target results in ``self.results["spotoptim"]``. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + n_trials_spotoptim=2, + n_initial_spotoptim=1, + ) + config.cache_home = tempfile.mkdtemp() + + mt = MultiTask(config, task="spotoptim", dataframe=df, show_progress=False) + mt.prepare_data() + mt.impute() + result = mt.run_task_spotoptim(show=False) + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ return execute_spotoptim(self, show=show, search_space=search_space) @@ -208,6 +343,58 @@ def run_task_predict( Raises: RuntimeError: If no saved models are found. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + cache_dir = tempfile.mkdtemp() + + # First train and save a model with the lazy task. + config_train = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=True, + verbose=False, + ) + config_train.cache_home = cache_dir + mt_train = MultiTask(config_train, task="lazy", dataframe=df, show_progress=False) + mt_train.prepare_data() + mt_train.impute() + mt_train.run_task_lazy(show=False) + + # Then load and predict without re-training. + config_pred = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + ) + config_pred.cache_home = cache_dir + mt_pred = MultiTask(config_pred, task="predict", dataframe=df, show_progress=False) + mt_pred.prepare_data() + mt_pred.impute() + result = mt_pred.run_task_predict(show=False, task_name="lazy") + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ return execute_predict( self, show=show, task_name=task_name, max_age_days=max_age_days @@ -235,6 +422,39 @@ def run_task_clean( Raises: RuntimeError: If the cache directory cannot be removed. + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + cache_dir = tempfile.mkdtemp() + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + ) + config.cache_home = cache_dir + + # dry_run=True reports what would be removed without deleting. + mt = MultiTask(config, task="clean", dataframe=df, show_progress=False) + result = mt.run_task_clean(dry_run=True) + print("status:", result["status"]) + assert result["status"] == "dry_run" + ``` """ return execute_clean(self, cache_home=cache_home, dry_run=dry_run) @@ -263,6 +483,40 @@ def run( ``"optuna"``, ``"spotoptim"``, ``"predict"``, ``"clean"``. RuntimeError: If method ``prepare_data`` has not been called (for training and prediction tasks). + + Examples: + ```{python} + import warnings + import tempfile + warnings.filterwarnings("ignore") + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2_safe.configurator.config_multi import ConfigMulti + from spotforecast2.multitask import MultiTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")).iloc[:500] + + config = ConfigMulti( + predict_size=12, + targets=["A"], + lags_consider=[1, 2, 3], + window_size=4, + number_folds=2, + use_exogenous_features=False, + use_outlier_detection=False, + auto_save_models=False, + verbose=False, + ) + config.cache_home = tempfile.mkdtemp() + + # run() dispatches to run_task_lazy when task="lazy". + mt = MultiTask(config, task="lazy", dataframe=df, show_progress=False) + mt.prepare_data() + mt.impute() + result = mt.run(task="lazy", show=False) + print("Result keys:", list(result.keys())[:4]) + assert "future_pred" in result + ``` """ task = task or self.TASK dispatch = { diff --git a/src/spotforecast2/multitask/optuna.py b/src/spotforecast2/multitask/optuna.py index b2a80149..96ab3724 100644 --- a/src/spotforecast2/multitask/optuna.py +++ b/src/spotforecast2/multitask/optuna.py @@ -40,6 +40,36 @@ def execute_optuna( Per-target packages are stored on ``task.results["optuna"]``. When ``task.config.auto_save_models`` is ``True`` (the default), fitted models are saved to disk so PredictTask can load them directly. + + Examples: + ```{python} + import warnings + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2.multitask import OptunaTask + from spotforecast2.multitask.optuna import execute_optuna + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")) + tiny_df = df.iloc[:500][["A"]] + + task = OptunaTask( + n_trials_optuna=2, + predict_size=24, + auto_save_models=False, + lags_consider=[1, 2, 24], + number_folds=2, + verbose=False, + ) + task.prepare_data(demo_data=tiny_df) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = execute_optuna(task, show=False) + + assert isinstance(result, dict) + assert "future_pred" in result + print("execute_optuna result keys:", sorted(result.keys())) + ``` """ strategy = OptunaStrategy(search_space=search_space) return task._run_strategy( @@ -90,5 +120,34 @@ def run( Returns: Aggregated prediction package. Per-target packages are stored on ``self.results["optuna"]``. + + Examples: + ```{python} + import warnings + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2.multitask import OptunaTask + + data_home = get_package_data_home() + df = fetch_data(filename=str(data_home / "demo10.csv")) + tiny_df = df.iloc[:500][["A"]] + + task = OptunaTask( + n_trials_optuna=2, + predict_size=24, + auto_save_models=False, + lags_consider=[1, 2, 24], + number_folds=2, + verbose=False, + ) + task.prepare_data(demo_data=tiny_df) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = task.run(show=False) + + assert "future_pred" in result + assert result.get("validation_passed") is True + print("OptunaTask.run result keys:", sorted(result.keys())) + ``` """ return execute_optuna(self, show=show, search_space=search_space) diff --git a/src/spotforecast2/multitask/predict.py b/src/spotforecast2/multitask/predict.py index b15a0236..f2f7ae0f 100644 --- a/src/spotforecast2/multitask/predict.py +++ b/src/spotforecast2/multitask/predict.py @@ -66,6 +66,52 @@ def run( Raises: RuntimeError: If no saved models are found in the cache directory, or if a target has no matching model. + + Examples: + ```{python} + import tempfile + from pathlib import Path + from spotforecast2_safe.data.fetch_data import fetch_data, get_package_data_home + from spotforecast2.multitask import LazyTask, PredictTask + + demo_df = fetch_data(filename=str(get_package_data_home() / "demo10.csv")) + + with tempfile.TemporaryDirectory() as tmp: + # Train and persist a model for a single target. + lazy = LazyTask( + data_frame_name="demo10", + cache_home=Path(tmp), + predict_size=24, + targets=["A"], + use_exogenous_features=False, + ) + lazy.prepare_data(demo_data=demo_df) + lazy.detect_outliers() + lazy.impute() + lazy.build_exogenous_features() + lazy.run(show=False) + lazy.save_models(task_name="lazy") + + # Load the saved model and produce predictions. + pred = PredictTask( + data_frame_name="demo10", + cache_home=Path(tmp), + predict_size=24, + targets=["A"], + use_exogenous_features=False, + ) + pred.prepare_data(demo_data=demo_df) + pred.detect_outliers() + pred.impute() + pred.build_exogenous_features() + result = pred.run(show=False, task_name="lazy") + + pkg = pred.results["predict"]["A"] + print(f"future_pred length: {len(pkg['future_pred'])}") + print(f"result keys: {sorted(result.keys())}") + assert len(pkg["future_pred"]) == 24 + assert "future_pred" in result + ``` """ return execute_predict( self, diff --git a/src/spotforecast2/multitask/search_spaces.py b/src/spotforecast2/multitask/search_spaces.py index cfd1aa07..168b62f7 100644 --- a/src/spotforecast2/multitask/search_spaces.py +++ b/src/spotforecast2/multitask/search_spaces.py @@ -80,7 +80,7 @@ def _default_spotoptim_search_space() -> Dict[str, Any]: """Built-in SpotOptim search space for LightGBM. Estimator hyperparameters carry the ``estimator__`` prefix; see the docstring - of :func:`_default_optuna_search_space` for the rationale. + of `_default_optuna_search_space` for the rationale. """ return { "estimator__num_leaves": (8, 256), @@ -107,7 +107,7 @@ def search_space_lgbm(trial: Any) -> Dict[str, Any]: Consumed by ``ForecasterRecursiveModelFull.tune`` via the ``SEARCH_SPACES`` registry below. Estimator keys use the ``estimator__`` - prefix; see :func:`_default_optuna_search_space` for the rationale. + prefix; see `_default_optuna_search_space` for the rationale. Args: trial: An ``optuna.trial.Trial`` instance. @@ -115,6 +115,24 @@ def search_space_lgbm(trial: Any) -> Dict[str, Any]: Returns: Mapping of hyperparameter name to suggested value for the current trial. + + Examples: + ```{python} + import optuna + from spotforecast2.multitask.search_spaces import search_space_lgbm + + optuna.logging.set_verbosity(optuna.logging.WARNING) + study = optuna.create_study( + direction="minimize", + sampler=optuna.samplers.TPESampler(seed=42), + ) + trial = study.ask() + params = search_space_lgbm(trial) + print("Keys:", list(params.keys())) + assert "estimator__num_leaves" in params + assert "lags" in params + assert isinstance(params["estimator__learning_rate"], float) + ``` """ return { "estimator__num_leaves": trial.suggest_int("estimator__num_leaves", 8, 256), @@ -144,7 +162,7 @@ def search_space_xgb(trial: Any) -> Dict[str, Any]: Consumed by ``ForecasterRecursiveModelFull.tune`` via the ``SEARCH_SPACES`` registry below. Estimator keys use the ``estimator__`` - prefix; see :func:`_default_optuna_search_space` for the rationale. + prefix; see `_default_optuna_search_space` for the rationale. Args: trial: An ``optuna.trial.Trial`` instance. @@ -152,6 +170,24 @@ def search_space_xgb(trial: Any) -> Dict[str, Any]: Returns: Mapping of hyperparameter name to suggested value for the current trial. + + Examples: + ```{python} + import optuna + from spotforecast2.multitask.search_spaces import search_space_xgb + + optuna.logging.set_verbosity(optuna.logging.WARNING) + study = optuna.create_study( + direction="minimize", + sampler=optuna.samplers.TPESampler(seed=42), + ) + trial = study.ask() + params = search_space_xgb(trial) + print("Keys:", list(params.keys())) + assert "estimator__max_depth" in params + assert "lags" in params + assert isinstance(params["estimator__learning_rate"], float) + ``` """ return { "estimator__max_depth": trial.suggest_int("estimator__max_depth", 2, 10), diff --git a/src/spotforecast2/multitask/spotoptim.py b/src/spotforecast2/multitask/spotoptim.py index 9e083afd..d73f9668 100644 --- a/src/spotforecast2/multitask/spotoptim.py +++ b/src/spotforecast2/multitask/spotoptim.py @@ -30,7 +30,7 @@ def execute_spotoptim( Thin wrapper around ``BaseTask._run_strategy`` using ``SpotOptimStrategy``. Args: - task: A class `BaseTask` (or subclass) instance with prepared data. + task: A `BaseTask` (or subclass) instance with prepared data. show: If ``True``, display prediction figures. search_space: Dictionary defining the SpotOptim search space. ``None`` uses the built-in default. @@ -40,6 +40,30 @@ def execute_spotoptim( Per-target packages are stored on ``task.results["spotoptim"]``. When ``task.config.auto_save_models`` is ``True`` (the default), fitted models are saved to disk so PredictTask can load them directly. + + Examples: + ```{python} + # Demonstrate the strategy wiring that execute_spotoptim sets up. + # A full run requires prepared data; here we inspect the strategy object + # and the search space it would use. + from spotforecast2.multitask.strategies import SpotOptimStrategy + from spotforecast2.multitask.search_spaces import _default_spotoptim_search_space + + strategy = SpotOptimStrategy() + print(f"Strategy name: {strategy.name}") + assert strategy.search_space is None # uses built-in default when None + + default_space = _default_spotoptim_search_space() + print(f"Search space keys: {list(default_space.keys())[:4]}") + assert "lags" in default_space + assert "estimator__num_leaves" in default_space + + # Custom search space can be injected: + custom_space = {"lags": ["24", "48"], "estimator__num_leaves": (8, 64)} + strategy_custom = SpotOptimStrategy(search_space=custom_space) + assert strategy_custom.search_space is custom_space + print(f"Custom space lags options: {strategy_custom.search_space['lags']}") + ``` """ strategy = SpotOptimStrategy(search_space=search_space) return task._run_strategy( @@ -90,5 +114,25 @@ def run( Returns: Aggregated prediction package. Per-target packages are stored on ``self.results["spotoptim"]``. + + Examples: + ```{python} + # Construct the task and verify configuration before running. + # A full run requires prepared data (prepare_data, impute, etc.); + # this example demonstrates construction and config inspection. + from spotforecast2.multitask.spotoptim import SpotOptimTask + + task = SpotOptimTask( + n_trials_spotoptim=5, + n_initial_spotoptim=3, + predict_size=24, + auto_save_models=False, + ) + print(f"Task type: {task.TASK}") + print(f"Trials: {task.config.n_trials_spotoptim}") + print(f"Initial evaluations: {task.config.n_initial_spotoptim}") + assert task.config.n_trials_spotoptim == 5 + assert task.config.auto_save_models is False + ``` """ return execute_spotoptim(self, show=show, search_space=search_space) diff --git a/src/spotforecast2/multitask/strategies.py b/src/spotforecast2/multitask/strategies.py index 197daecf..0931c496 100644 --- a/src/spotforecast2/multitask/strategies.py +++ b/src/spotforecast2/multitask/strategies.py @@ -39,7 +39,26 @@ class OptunaStrategy: - """Approach 3 — Optuna Bayesian tuning, then apply best params.""" + """Approach 3 — Optuna Bayesian tuning, then apply best params. + + Examples: + ```{python} + from spotforecast2.multitask.strategies import OptunaStrategy + + strategy = OptunaStrategy() + print(f"name: {strategy.name}") + assert strategy.name == "optuna" + assert strategy.search_space is None + + # Custom search space can be injected at construction time. + def my_space(trial): + return {"estimator__n_estimators": trial.suggest_int("estimator__n_estimators", 10, 50)} + + custom = OptunaStrategy(search_space=my_space) + assert custom.search_space is my_space + print(f"custom search_space: {custom.search_space.__name__}") + ``` + """ name = "optuna" @@ -54,6 +73,77 @@ def prepare_forecaster( y_train: pd.Series, exog_train: Optional[pd.DataFrame] = None, ) -> Any: + """Run Optuna search and return a forecaster initialised with the best params. + + Args: + task: A `BaseTask` (or compatible) instance that supplies ``cv_ts``, + ``config``, ``logger``, ``save_tuning_results``, and + ``create_forecaster``. + target: Target column name; forwarded to ``task.create_forecaster`` + and ``task.save_tuning_results``. + forecaster: An unfitted forecaster instance used as the search + template. + y_train: Training time series for the current target. + exog_train: Optional exogenous features aligned with ``y_train``. + + Returns: + A fresh, unfitted forecaster with ``set_params`` and ``set_lags`` + applied from the best trial found by Optuna. + + Examples: + ```{python} + import types, logging, warnings + import numpy as np + import pandas as pd + from lightgbm import LGBMRegressor + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from spotforecast2_safe.splitter import TimeSeriesFold + from spotforecast2.multitask.strategies import OptunaStrategy + + rng = np.random.default_rng(0) + n = 300 + idx = pd.date_range("2023-01-01", periods=n, freq="h", tz="UTC") + y_train = pd.Series(rng.normal(0, 1, n), index=idx, name="A") + + forecaster = ForecasterRecursive( + estimator=LGBMRegressor(n_estimators=10, verbose=-1), lags=3 + ) + cv = TimeSeriesFold( + steps=24, initial_train_size=200, refit=False, + gap=0, allow_incomplete_fold=True, + ) + + def tiny_space(trial): + return { + "estimator__n_estimators": trial.suggest_int( + "estimator__n_estimators", 10, 30 + ), + "lags": trial.suggest_categorical("lags", [3, 5, 6]), + } + + cfg = types.SimpleNamespace( + n_trials_optuna=1, random_state=0, verbose=False + ) + task = types.SimpleNamespace( + config=cfg, + logger=logging.getLogger("example"), + cv_ts=lambda y: cv, + create_forecaster=lambda target=None: ForecasterRecursive( + estimator=LGBMRegressor(n_estimators=10, verbose=-1), lags=3 + ), + save_tuning_results=lambda **kw: None, + _show_progress=False, + ) + + strategy = OptunaStrategy(search_space=tiny_space) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + tuned = strategy.prepare_forecaster(task, "A", forecaster, y_train) + + print(f"tuned type: {type(tuned).__name__}") + assert isinstance(tuned, ForecasterRecursive) + ``` + """ from spotforecast2.model_selection import bayesian_search_forecaster search_space = self.search_space or _default_optuna_search_space @@ -89,7 +179,27 @@ def prepare_forecaster( class SpotOptimStrategy: - """Approach 4 — SpotOptim surrogate-model tuning, then apply best params.""" + """Approach 4 — SpotOptim surrogate-model tuning, then apply best params. + + Examples: + ```{python} + from spotforecast2.multitask.strategies import SpotOptimStrategy + + strategy = SpotOptimStrategy() + print(f"name: {strategy.name}") + assert strategy.name == "spotoptim" + assert strategy.search_space is None + + # A custom search space narrows the grid explored by the surrogate model. + custom_space = { + "estimator__n_estimators": (10, 50), + "lags": ["3", "5", "6"], + } + custom = SpotOptimStrategy(search_space=custom_space) + assert custom.search_space is custom_space + print(f"custom search_space keys: {list(custom.search_space.keys())}") + ``` + """ name = "spotoptim" @@ -104,6 +214,81 @@ def prepare_forecaster( y_train: pd.Series, exog_train: Optional[pd.DataFrame] = None, ) -> Any: + """Run SpotOptim surrogate search and return a forecaster with best params. + + Args: + task: A `BaseTask` (or compatible) instance that supplies ``cv_ts``, + ``config``, ``logger``, ``save_tuning_results``, and + ``create_forecaster``. The config must expose + ``n_trials_spotoptim``, ``n_initial_spotoptim``, + ``random_state``, ``warm_start_lags``, and optionally + ``lags_consider`` and ``n_jobs_spotoptim``. + target: Target column name; forwarded to ``task.create_forecaster`` + and ``task.save_tuning_results``. + forecaster: An unfitted forecaster instance used as the search + template. + y_train: Training time series for the current target. + exog_train: Optional exogenous features aligned with ``y_train``. + + Returns: + A fresh, unfitted forecaster with ``set_params`` and ``set_lags`` + applied from the best configuration found by the SpotOptim surrogate. + + Examples: + ```{python} + import types, logging, warnings + import numpy as np + import pandas as pd + from lightgbm import LGBMRegressor + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from spotforecast2_safe.splitter import TimeSeriesFold + from spotforecast2.multitask.strategies import SpotOptimStrategy + + rng = np.random.default_rng(0) + n = 300 + idx = pd.date_range("2023-01-01", periods=n, freq="h", tz="UTC") + y_train = pd.Series(rng.normal(0, 1, n), index=idx, name="A") + + forecaster = ForecasterRecursive( + estimator=LGBMRegressor(n_estimators=10, verbose=-1), lags=3 + ) + cv = TimeSeriesFold( + steps=24, initial_train_size=200, refit=False, + gap=0, allow_incomplete_fold=True, + ) + + tiny_space = { + "estimator__n_estimators": (10, 30), + "lags": ["3", "5", "6"], + } + + cfg = types.SimpleNamespace( + n_trials_spotoptim=5, + n_initial_spotoptim=3, + random_state=0, + warm_start_lags=False, + n_jobs_spotoptim=None, + ) + task = types.SimpleNamespace( + config=cfg, + logger=logging.getLogger("example"), + cv_ts=lambda y: cv, + create_forecaster=lambda target=None: ForecasterRecursive( + estimator=LGBMRegressor(n_estimators=10, verbose=-1), lags=3 + ), + save_tuning_results=lambda **kw: None, + _show_progress=False, + ) + + strategy = SpotOptimStrategy(search_space=tiny_space) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + tuned = strategy.prepare_forecaster(task, "A", forecaster, y_train) + + print(f"tuned type: {type(tuned).__name__}") + assert isinstance(tuned, ForecasterRecursive) + ``` + """ from spotforecast2.model_selection import ( build_warm_start_x0, spotoptim_search_forecaster, diff --git a/src/spotforecast2/tasks/task_entsoe.py b/src/spotforecast2/tasks/task_entsoe.py index 740232f0..d989a3d2 100644 --- a/src/spotforecast2/tasks/task_entsoe.py +++ b/src/spotforecast2/tasks/task_entsoe.py @@ -61,8 +61,8 @@ def entsoe_data_loader(config: ConfigEntsoe) -> pd.DataFrame: """Read the merged interim ENTSO-E CSV that ``config.data_filename`` points at. Args: - config: A ``ConfigEntsoe`` with ``data_filename`` set. Relative paths - are resolved against ``spotforecast2_safe.data.fetch_data.get_data_home``. + config: A `ConfigEntsoe` with ``data_filename`` set. Relative paths + are resolved against `spotforecast2_safe.data.fetch_data.get_data_home`. Returns: DataFrame indexed by the ENTSO-E timestamp column (``Time (UTC)``) @@ -71,6 +71,34 @@ def entsoe_data_loader(config: ConfigEntsoe) -> pd.DataFrame: Raises: FileNotFoundError: If the merged CSV does not exist. Run ``spotforecast2-entsoe download`` and ``merge`` first. + + Examples: + ```{python} + import os + import tempfile + + import pandas as pd + from spotforecast2_safe.configurator import ConfigEntsoe + + from spotforecast2.tasks.task_entsoe import entsoe_data_loader + + # Build a tiny synthetic interim CSV in a temp directory. + tmp = tempfile.mkdtemp() + csv_path = os.path.join(tmp, "energy_load.csv") + idx = pd.date_range( + "2025-01-01", periods=48, freq="h", tz="UTC", name="Time (UTC)" + ) + pd.DataFrame({"Actual Load": range(48)}, index=idx).to_csv(csv_path) + + # Absolute path bypasses get_data_home; loader returns the full frame. + config = ConfigEntsoe() + config.data_filename = csv_path + df = entsoe_data_loader(config) + + print(df.shape) + assert df.shape == (48, 1) + assert df.index.name == "Time (UTC)" + ``` """ path = Path(config.data_filename) if not path.is_absolute(): @@ -100,7 +128,7 @@ def entsoe_test_data_loader(config: ConfigEntsoe) -> pd.DataFrame: actually predicting, rather than always "yesterday in wall-clock UTC". Args: - config: A ``ConfigEntsoe`` with ``data_filename``, ``end_train_default``, + config: A `ConfigEntsoe` with ``data_filename``, ``end_train_default``, and ``predict_size`` set; the merged interim CSV must already contain data covering the forecast horizon (run ``spotforecast2-entsoe download`` first). @@ -108,6 +136,37 @@ def entsoe_test_data_loader(config: ConfigEntsoe) -> pd.DataFrame: Returns: DataFrame indexed by ``Time (UTC)`` with the rows the forecast will be scored against. + + Examples: + ```{python} + import os + import tempfile + + import pandas as pd + from spotforecast2_safe.configurator import ConfigEntsoe + + from spotforecast2.tasks.task_entsoe import entsoe_test_data_loader + + # Synthetic interim CSV spanning the forecast window. + tmp = tempfile.mkdtemp() + csv_path = os.path.join(tmp, "energy_load.csv") + idx = pd.date_range( + "2025-12-29 00:00", periods=120, freq="h", tz="UTC", name="Time (UTC)" + ) + pd.DataFrame({"Actual Load": range(120)}, index=idx).to_csv(csv_path) + + config = ConfigEntsoe() + config.data_filename = csv_path + config.end_train_default = "2025-12-31 00:00+00:00" + config.predict_size = 24 + + test_df = entsoe_test_data_loader(config) + + # The slice covers exactly predict_size hourly steps after end_train. + print(test_df.shape) + assert test_df.shape == (24, 1) + assert test_df.index[0] == pd.Timestamp("2025-12-31 01:00", tz="UTC") + ``` """ df = entsoe_data_loader(config) end_train = pd.Timestamp(config.end_train_default) @@ -136,9 +195,26 @@ def entsoe_lgbm_factory( Args: config: Any object exposing ``random_state``, ``lags_consider``, and - ``window_size`` (typically ``ConfigEntsoe``). + ``window_size`` (typically `ConfigEntsoe`). weight_func: Per-sample weight function from the imputation step. target: Ignored; accepted for factory-signature compatibility. + + Examples: + ```{python} + from spotforecast2_safe.configurator import ConfigEntsoe + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + + from spotforecast2.tasks.task_entsoe import entsoe_lgbm_factory + + config = ConfigEntsoe() + forecaster = entsoe_lgbm_factory(config, weight_func=None, target="Actual Load") + + print(type(forecaster).__name__) + assert isinstance(forecaster, ForecasterRecursive) + # The lags array is derived from lags_consider[-1] = 23. + assert len(forecaster.lags) == config.lags_consider[-1] + print("lags:", forecaster.lags) + ``` """ del target return ForecasterRecursive( @@ -157,7 +233,35 @@ def entsoe_xgb_factory( weight_func: Optional[Any] = None, target: Optional[str] = None, ) -> ForecasterRecursive: - """XGBoost ForecasterRecursive for the ENTSO-E pipeline.""" + """XGBoost ForecasterRecursive for the ENTSO-E pipeline. + + Mirrors `entsoe_lgbm_factory()` but uses an `XGBRegressor` estimator. + Kept as a named helper so the XGBoost variant is explicit at the + configuration site. + + Args: + config: Any object exposing ``random_state``, ``lags_consider``, and + ``window_size`` (typically `ConfigEntsoe`). + weight_func: Per-sample weight function from the imputation step. + target: Ignored; accepted for factory-signature compatibility. + + Examples: + ```{python} + from spotforecast2_safe.configurator import ConfigEntsoe + from spotforecast2_safe.forecaster.recursive import ForecasterRecursive + from xgboost import XGBRegressor + + from spotforecast2.tasks.task_entsoe import entsoe_xgb_factory + + config = ConfigEntsoe() + forecaster = entsoe_xgb_factory(config, weight_func=None, target="Actual Load") + + print(type(forecaster).__name__) + assert isinstance(forecaster, ForecasterRecursive) + assert isinstance(forecaster.estimator, XGBRegressor) + print("lags:", forecaster.lags) + ``` + """ del target return ForecasterRecursive( estimator=XGBRegressor(random_state=config.random_state, verbosity=0), @@ -258,7 +362,24 @@ def _run_entsoe_pipeline( def main() -> None: - """Entry point for the ``spotforecast2-entsoe`` console script.""" + """Entry point for the ``spotforecast2-entsoe`` console script. + + Parses ``sys.argv`` and dispatches to one of four subcommands: + ``download``, ``merge``, ``train``, or ``predict``. Calling with no + subcommand prints the top-level help and returns. + + Examples: + ```{python} + import sys + + from spotforecast2.tasks.task_entsoe import main + + # With no subcommand, main() prints the usage summary and returns + # without error — useful for verifying the CLI is wired correctly. + sys.argv = ["spotforecast2-entsoe"] + main() # prints usage and returns normally + ``` + """ parser = argparse.ArgumentParser(description="spotforecast2 ENTSO-E pipeline") subparsers = parser.add_subparsers(dest="subcommand") diff --git a/src/spotforecast2/tasks/task_n_to_1.py b/src/spotforecast2/tasks/task_n_to_1.py index 9fbef439..d1896455 100644 --- a/src/spotforecast2/tasks/task_n_to_1.py +++ b/src/spotforecast2/tasks/task_n_to_1.py @@ -10,6 +10,54 @@ def main(): + """Run the N-to-1 baseline forecasting pipeline with automatic data acquisition. + + Fetches time-series data from the default source (no explicit DataFrame + supplied), applies outlier detection, imputation, and equivalent-date + forecasting via `n2n_predict`, then aggregates the per-target predictions + into a single combined series via `agg_predict`. + + This function is the CLI entry point registered as + `spotforecast-n2o1` in `pyproject.toml`. It requires the target CSV file + to be present in the data home directory or a network connection to fetch + it automatically. + + Examples: + ```{python} + # Demonstrate the n2n_predict + agg_predict pipeline that main() wires + # together, using a small synthetic DataFrame instead of the live data + # source that main() fetches automatically. + import numpy as np + import pandas as pd + from spotforecast2_safe.processing.agg_predict import agg_predict + from spotforecast2_safe.processing.n2n_predict import n2n_predict + + rng = np.random.default_rng(0) + dates = pd.date_range("2020-01-01", periods=500, freq="h", tz="UTC") + data = pd.DataFrame( + rng.standard_normal((500, 2)), + index=dates, + columns=["solar", "wind"], + ) + + predictions, forecasters = n2n_predict( + data=data, + columns=["solar", "wind"], + forecast_horizon=3, + contamination=0.01, + window_size=24, + verbose=False, + show_progress=False, + ) + print("Predictions shape:", predictions.shape) + assert predictions.shape == (3, 2) + assert set(predictions.columns) == {"solar", "wind"} + + combined = agg_predict(predictions, weights=[1.0, -1.0]) + print("Combined prediction:", combined.tolist()) + assert len(combined) == 3 + ``` + """ FORECAST_HORIZON = 24 CONTAMINATION = 0.01 WINDOW_SIZE = 72 diff --git a/src/spotforecast2/tasks/task_n_to_1_dataframe.py b/src/spotforecast2/tasks/task_n_to_1_dataframe.py index 840fba0a..b8f66d3a 100644 --- a/src/spotforecast2/tasks/task_n_to_1_dataframe.py +++ b/src/spotforecast2/tasks/task_n_to_1_dataframe.py @@ -10,7 +10,33 @@ warnings.simplefilter("ignore") -def main(): +def main() -> None: + """Execute the complete N-to-1 baseline forecasting pipeline with default parameters. + + This is the entry point when running the script directly. It fetches data + from the user's data home directory, runs the equivalent-date baseline + forecasting pipeline via `n2n_predict`, and aggregates the multi-output + predictions into a single combined series with `agg_predict`. + + The default configuration: + - Reads ``data_in.csv`` from `get_data_home()` (user-specific path) + - Forecasts 24 steps ahead + - Applies 1% contamination for outlier detection + - Uses a 72-step rolling window + - Aggregates with predefined weights + + Returns: + None. Results are printed to stdout. + + Examples: + ```{python} + #| eval: false + # main() reads data_in.csv from the user's data home directory; not reproducible without that file. + from spotforecast2.tasks.task_n_to_1_dataframe import main + + main() + ``` + """ FORECAST_HORIZON = 24 CONTAMINATION = 0.01 WINDOW_SIZE = 72 diff --git a/src/spotforecast2/warnings/exceptions.py b/src/spotforecast2/warnings/exceptions.py index 6fdcc407..2064c694 100644 --- a/src/spotforecast2/warnings/exceptions.py +++ b/src/spotforecast2/warnings/exceptions.py @@ -98,6 +98,18 @@ def format_warning_handler( Returns: None + + Examples: + ```{python} + import warnings + from spotforecast2_safe.exceptions import MissingValuesWarning + from spotforecast2.warnings.exceptions import format_warning_handler + + # Construct the warning instance the same way Python's warnings machinery does + msg = MissingValuesWarning("Missing values detected in column 'price'.") + # Call the handler directly (bypasses warnings.warn overhead) + format_warning_handler(msg, MissingValuesWarning, "example.py", 42) + ``` """ if isinstance(message, tuple(warn_skforecast_categories)): @@ -148,6 +160,19 @@ def rich_warning_handler( Returns: None + + Examples: + ```{python} + import warnings + from spotforecast2_safe.exceptions import MissingValuesWarning + from spotforecast2.warnings.exceptions import rich_warning_handler + + # Construct the warning instance the same way Python's warnings machinery does + msg = MissingValuesWarning("Missing values detected in column 'price'.") + # Call the handler directly; falls back to format_warning_handler when + # the optional 'rich' package is not installed + rich_warning_handler(msg, MissingValuesWarning, "example.py", 42) + ``` """ if isinstance(message, tuple(warn_skforecast_categories)):