From 264543dd0ff1f9cdcd3459cd72a86bddb56a7717 Mon Sep 17 00:00:00 2001 From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com> Date: Sun, 31 May 2026 23:43:36 +0200 Subject: [PATCH] fix(steady-state): store evaluated points in natural scale `update_storage_steady` stored points in internal (transformed) scale, unlike the sequential `update_storage` which inverse-transforms first. With a `log10`-transformed variable this left `X_` in log space; the surrogate refit then re-applied `transform_X`, taking `log10` of a negative value -> NaN -> `ValueError: Input X contains NaN` in the GP fit. Factor variables were likewise stored as numeric codes rather than the natural-scale representation the sequential path produces. Apply `inverse_transform_X` in `update_storage_steady`, mirroring `spotoptim.core.storage.update_storage`, so `X_` and `best_x_` hold user-facing natural-scale values for every `n_jobs`. This unblocks parallel optimization of any search space containing a transformed hyperparameter (e.g. spotforecast2's `learning_rate`/`alpha`). Add regression tests: a parallel run over a `log10` + factor search space no longer crashes and stores natural-scale `X_`, and the seeded initial design matches the sequential path. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spotoptim/optimizer/steady_state.py | 14 +++- tests/test_steady_state_inverse_transform.py | 84 ++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 tests/test_steady_state_inverse_transform.py diff --git a/src/spotoptim/optimizer/steady_state.py b/src/spotoptim/optimizer/steady_state.py index 0f3aaf02..0f179c45 100644 --- a/src/spotoptim/optimizer/steady_state.py +++ b/src/spotoptim/optimizer/steady_state.py @@ -27,12 +27,24 @@ def update_storage_steady(optimizer: SpotOptimProtocol, x, y): """Helper to safely append single point (for steady state). + The evaluated point arrives in internal (transformed, reduced) scale -- the + representation produced by ``get_initial_design`` and + ``suggest_next_infill_point``. It is converted to natural scale via + ``inverse_transform_X`` before storage, mirroring the sequential + ``update_storage`` path (:mod:`spotoptim.core.storage`) so that ``X_`` and + ``best_x_`` hold user-facing original-scale values regardless of ``n_jobs``. + Without this conversion a transformed variable (e.g. ``log10``) is stored in + transformed space and then re-transformed when the surrogate is refit, + producing ``NaN`` and crashing the Gaussian-process fit. + Args: optimizer: SpotOptim instance. - x (ndarray): New point(s) in original scale. + x (ndarray): New point(s) in internal scale, shape (n_features,) or + (N, n_features). y (float or ndarray): Corresponding function value(s). """ x = np.atleast_2d(x) + x = optimizer.inverse_transform_X(x) if optimizer.X_ is None: optimizer.X_ = x optimizer.y_ = np.array([y]) diff --git a/tests/test_steady_state_inverse_transform.py b/tests/test_steady_state_inverse_transform.py new file mode 100644 index 00000000..48373ff1 --- /dev/null +++ b/tests/test_steady_state_inverse_transform.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: 2026 bartzbeielstein +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Regression tests for natural-scale storage in the steady-state path. + +The steady-state (``n_jobs > 1``) loop must convert evaluated points to natural +scale via ``inverse_transform_X`` before storing them in ``X_`` -- mirroring the +sequential ``update_storage`` path. Before the fix, points were stored in +transformed scale, so a ``log10`` variable was re-transformed when the surrogate +was refit, yielding ``log10`` of a negative number => ``NaN`` => a crash in the +Gaussian-process fit (``ValueError: Input X contains NaN``). +""" + +import numpy as np + +from spotoptim import SpotOptim + + +def _obj(X): + """Objective on the (log10) float column only; ignores the factor column.""" + import numpy as np + + X = np.atleast_2d(X) + return np.array([float(np.asarray(row[0], dtype=float)) for row in X]) + + +def test_steady_state_log10_does_not_crash_and_stores_natural_scale(): + bounds = [(0.001, 0.1), ["A", "B", "C"]] + var_type = ["float", "factor"] + var_trans = ["log10", None] + + opt = SpotOptim( + fun=_obj, + bounds=bounds, + var_type=var_type, + var_trans=var_trans, + n_initial=5, + max_iter=10, + seed=7, + n_jobs=2, + verbose=False, + ) + + # Must not raise "ValueError: Input X contains NaN". + opt.optimize() + + col0 = opt.X_[:, 0].astype(float) + # Natural scale: every stored value lies within the original [0.001, 0.1] + # bound, never in the log10-internal range [-3, -1]. + assert np.all(col0 >= 0.001 - 1e-9) + assert np.all(col0 <= 0.1 + 1e-9) + assert np.all(np.isfinite(col0)) + assert np.all(np.isfinite(opt.y_)) + + +def test_steady_state_matches_sequential_initial_design_scale(): + """Sequential and steady-state must store the seeded initial design in the + same (natural) scale -- the first ``n_initial`` rows coincide.""" + bounds = [(0.001, 0.1)] + var_trans = ["log10"] + + def build(n_jobs): + opt = SpotOptim( + fun=_obj, + bounds=bounds, + var_trans=var_trans, + n_initial=6, + max_iter=8, + seed=11, + n_jobs=n_jobs, + verbose=False, + ) + opt.optimize() + return np.asarray(opt.X_[:, 0], dtype=float) + + seq = build(1) + par = build(2) + # The seeded initial design (same seed) is identical across paths once both + # store natural scale. + n = min(6, len(seq), len(par)) + assert np.allclose(np.sort(seq[:n]), np.sort(par[:n]), rtol=1e-6, atol=1e-9) + # And both lie in the natural bound. + assert np.all(par >= 0.001 - 1e-9) and np.all(par <= 0.1 + 1e-9)