0.14.43

bartzbeielstein · bartzbeielstein · commit 2b1d17b8002d · 2024-07-13T18:49:34.000+02:00
emove_nan(self.X, self.y, stop_on_zero_return)
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotpython"
-version = "0.14.42"
+version = "0.14.43"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotPython/hyperparameters/values.py b/src/spotPython/hyperparameters/values.py
@@ -1804,7 +1804,11 @@ def set_hyperparameter(fun_control, key, values):
         >>> set_hyperparameter(fun_control, "step", [0.2, 5.0])
         >>> set_hyperparameter(fun_control, "use_aggregation", [False, True])
         >>> set_hyperparameter(fun_control, "leaf_model", ["LinearRegression", "Perceptron"])
+        >>> set_hyperparameter(fun_control, "leaf_model", "LinearRegression")
     """
+    # if values is only a string  and not a list of strings, convert it to a list
+    if isinstance(values, str):
+        values = [values]
     if isinstance(values, list):
         if all(isinstance(v, int) for v in values):
             _set_int_hyperparameter_values(fun_control, key, values[0], values[1])
diff --git a/src/spotPython/plot/xy.py b/src/spotPython/plot/xy.py
@@ -0,0 +1,50 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def plot_y_vs_X(X, y, nrows=5, ncols=2, figsize=(30, 20), ylabel="y", feature_names=None):
+    """
+    Plots y versus each feature in X.
+
+    Args:
+        X (ndarray):
+            2D array of input features.
+        y (ndarray):
+            1D array of target values.
+        nrows (int, optional):
+            Number of rows in the subplot grid. Defaults to 5.
+        ncols (int, optional):
+            Number of columns in the subplot grid. Defaults to 2.
+        figsize (tuple, optional):
+            Size of the entire figure. Defaults to (30, 20).
+        ylabel (str, optional):
+            Label for the y-axis. Defaults to 'y'.
+        feature_names (list of str, optional):
+            List of feature names. Defaults to None. If None, generates feature names as x0, x1, etc.
+
+    Example:
+        >>> from sklearn.datasets import load_diabetes
+        >>> from spotPython.plot.xy import plot_y_vs_X
+        >>> data = load_diabetes()
+        >>> X, y = data.data, data.target
+        >>> plot_y_vs_X(X, y, nrows=5, ncols=2, figsize=(20, 15))
+    """
+    if feature_names is None:
+        feature_names = [f"x{i}" for i in range(X.shape[1])]
+
+    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
+
+    for i, (ax, col) in enumerate(zip(axs.flat, feature_names)):
+        x = X[:, i]
+        pf = np.polyfit(x, y, 1)
+        p = np.poly1d(pf)
+
+        ax.plot(x, y, "o")
+        ax.plot(x, p(x), "r--")
+
+        ax.set_title(col + " " + ylabel)
+        ax.set_xlabel(col)
+        ax.set_ylabel(ylabel)
+
+    plt.tight_layout()
+    plt.show()
diff --git a/src/spotPython/spot/spot.py b/src/spotPython/spot/spot.py
@@ -932,7 +932,7 @@ def initialize_design(self, X_start=None) -> None:
                 writer.add_hparams(config, {"spot_y": y_j})
                 writer.flush()
         #
-        self.X, self.y = remove_nan(self.X, self.y)
+        self.X, self.y = remove_nan(self.X, self.y, stop_on_zero_return=True)
         logger.debug("In Spot() initialize_design(), final X val, after remove nan: self.X: %s", self.X)
         logger.debug("In Spot() initialize_design(), final y val, after remove nan: self.y: %s", self.y)
 
@@ -1063,7 +1063,7 @@ def update_design(self) -> None:
         )
         # (S-18): Evaluating New Solutions:
         y0 = self.fun(X=X_all, fun_control=self.fun_control)
-        X0, y0 = remove_nan(X0, y0)
+        X0, y0 = remove_nan(X0, y0, stop_on_zero_return=False)
         # Append New Solutions:
         self.X = np.append(self.X, X0, axis=0)
         self.y = np.append(self.y, y0)
diff --git a/src/spotPython/utils/file.py b/src/spotPython/utils/file.py
@@ -5,6 +5,7 @@
 import json
 import sys
 import importlib
+from spotPython.hyperparameters.values import get_tuned_architecture
 
 # from torch.utils.tensorboard import SummaryWriter
 
@@ -178,3 +179,26 @@ def load_core_model_from_file(coremodel, dirname="userModel"):
     module = importlib.import_module(coremodel)
     core_model = getattr(module, coremodel)
     return core_model
+
+
+def get_experiment_from_PREFIX(PREFIX) -> tuple:
+    """
+    Setup the experiment based on the PREFIX provided and return the relevant configuration
+    and control objects.
+
+    Args:
+        PREFIX (str): The prefix for the experiment filename.
+
+    Returns:
+        tuple:
+            A tuple containing config, spot_tuner, fun_control, design_control, surrogate_control,
+            and optimizer_control.
+
+    Example:
+        >>> config, _, _, _, _, _ = get_experiment_from_PREFIX("100")
+
+    """
+    experiment_name = get_experiment_filename(PREFIX)
+    spot_tuner, fun_control, design_control, surrogate_control, optimizer_control = load_experiment(experiment_name)
+    config = get_tuned_architecture(spot_tuner, fun_control)
+    return config, spot_tuner, fun_control, design_control, surrogate_control, optimizer_control
diff --git a/src/spotPython/utils/repair.py b/src/spotPython/utils/repair.py
@@ -27,19 +27,23 @@ def repair_non_numeric(X: np.ndarray, var_type: List[str]) -> np.ndarray:
     return X
 
 
-def remove_nan(X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Remove rows from X and y where y contains NaN values and issue a warning
-    if the dimension of the returned y array is smaller than the dimension of the original y array.
-    Issues a ValueError if the dimension of the returned y array is less than 2.
+def remove_nan(X: np.ndarray, y: np.ndarray, stop_on_zero_return: bool = False) -> Tuple[np.ndarray, np.ndarray]:
+    """Remove rows from X and y where y contains NaN values and issue a warning
+        if the dimension of the returned y array is smaller than the dimension of the original y array.
+        Issues a ValueError if the dimension of the returned y array is less than 2.
 
     Args:
-        X (numpy.ndarray): X array
-        y (numpy.ndarray): y array
+        X (numpy.ndarray):
+            X array
+        y (numpy.ndarray):
+            y array
+        stop_on_zero_return (bool):
+            whether to stop if the returned dimension is less than 1.
+            Default is False.
 
     Returns:
         Tuple[numpy.ndarray, np.ndarray]:
-        X and y arrays with rows containing NaN values in y removed.
+            X and y arrays with rows containing NaN values in y removed.
 
     Examples:
         >>> import numpy as np
@@ -70,7 +74,7 @@ def remove_nan(X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
         )
         warnings.warn("\n!!! Check whether to continue with the reduced dimension is useful.")
     # throw an error if the returned dimension is smaller than one
-    if returned_dim < 1:
+    if returned_dim < 1 and stop_on_zero_return:
         raise ValueError("!!!! The dimension of the returned y array is less than 1. Check the input data.")
 
     return X_cleaned, y_cleaned
diff --git a/test/test_get_experiment_from_prefix.py b/test/test_get_experiment_from_prefix.py
@@ -0,0 +1,20 @@
+from spotPython.utils.file import get_experiment_from_PREFIX
+import pytest
+from unittest.mock import patch
+
+
+def test_get_experiment_from_PREFIX_invalid_prefix():
+    PREFIX = "invalid"
+
+    with patch(
+        "spotPython.utils.file.get_experiment_filename", return_value=None
+    ) as mock_get_experiment_filename, patch(
+        "spotPython.utils.file.load_experiment", side_effect=FileNotFoundError("Experiment not found")
+    ) as mock_load_experiment:
+        with pytest.raises(FileNotFoundError, match="Experiment not found"):
+            get_experiment_from_PREFIX(PREFIX)
+
+        # Ensure the filename function was called
+        mock_get_experiment_filename.assert_called_once_with(PREFIX)
+        # Ensure the load experiment function was called
+        mock_load_experiment.assert_called_once()
diff --git a/test/test_remove_nan.py b/test/test_remove_nan.py
@@ -1,6 +1,15 @@
 import numpy as np
 from spotPython.utils.repair import remove_nan
 import pytest
+import warnings
+
+
+def test_remove_nan_dimension_error():
+    # Case that should raise ValueError
+    X = np.array([[1, 2]])
+    y = np.array([np.nan])
+    with pytest.raises(ValueError):
+        X_cleaned, y_cleaned = remove_nan(X, y, stop_on_zero_return=True)
 
 
 def test_remove_nan_no_nan():
@@ -30,9 +39,44 @@ def test_remove_nan_dimension_warning():
     assert y_cleaned.shape[0] < y.shape[0], "Expected dimension reduction did not trigger a warning."
 
 
-def test_remove_nan_dimension_error():
-    # Case that should raise ValueError
-    X = np.array([[1, 2]])
-    y = np.array([np.nan])
-    with pytest.raises(ValueError):
+def test_remove_nan_basic():
+    X = np.array([[1, 2], [3, 4], [5, 6]])
+    y = np.array([1, np.nan, 2])
+    X_cleaned, y_cleaned = remove_nan(X, y)
+    np.testing.assert_array_equal(X_cleaned, np.array([[1, 2], [5, 6]]))
+    np.testing.assert_array_equal(y_cleaned, np.array([1, 2]))
+
+
+def test_remove_nan_warning():
+    X = np.array([[1, 2], [3, 4], [5, 6]])
+    y = np.array([1, np.nan, 2])
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
         X_cleaned, y_cleaned = remove_nan(X, y)
+        assert len(w) == 2
+        assert issubclass(w[-1].category, UserWarning)
+        assert "smaller than the original dimension" in str(w[0].message)
+        assert "Check whether to continue with the reduced dimension is useful" in str(w[1].message)
+
+
+def test_remove_nan_value_error():
+    X = np.array([[1, 2], [3, 4], [5, 6]])
+    y = np.array([np.nan, np.nan, np.nan])
+    with pytest.raises(ValueError):
+        remove_nan(X, y, stop_on_zero_return=True)
+
+
+def test_no_nan():
+    X = np.array([[1, 2], [3, 4], [5, 6]])
+    y = np.array([1, 2, 3])
+    X_cleaned, y_cleaned = remove_nan(X, y)
+    np.testing.assert_array_equal(X_cleaned, X)
+    np.testing.assert_array_equal(y_cleaned, y)
+
+
+def test_remove_nan_empty_X():
+    X = np.array([[], [], []])
+    y = np.array([1, np.nan, 2])
+    X_cleaned, y_cleaned = remove_nan(X, y)
+    np.testing.assert_array_equal(X_cleaned, np.array([[], []]))
+    np.testing.assert_array_equal(y_cleaned, np.array([1, 2]))
diff --git a/test/test_set_hyperparameter.py b/test/test_set_hyperparameter.py
@@ -29,13 +29,22 @@ def test_set_hyperparameter_boolean():
 
 def test_set_hyperparameter_factor():
     fun_control = {
-        "core_model_hyper_dict": {"leaf_model": {"type": "factor", "default": "LinearRegression", "upper": 1}}
+        "core_model_hyper_dict": {"leaf_model": {"type": "factor", "default": "LinearRegression", "upper": 2}}
     }
     set_hyperparameter(fun_control, "leaf_model", ["LinearRegression", "Perceptron"])
     assert fun_control["core_model_hyper_dict"]["leaf_model"]["levels"] == ["LinearRegression", "Perceptron"]
     assert fun_control["core_model_hyper_dict"]["leaf_model"]["upper"] == 1
 
 
+def test_set_hyperparameter_single_string():
+    fun_control = {
+        "core_model_hyper_dict": {"leaf_model": {"type": "factor", "default": "LinearRegression", "upper": 0}}
+    }
+    set_hyperparameter(fun_control, "leaf_model", "LinearRegression")
+    assert fun_control["core_model_hyper_dict"]["leaf_model"]["levels"] == ["LinearRegression"]
+    assert fun_control["core_model_hyper_dict"]["leaf_model"]["upper"] == 0
+
+
 def test_set_hyperparameter_invalid_type():
     fun_control = {"core_model_hyper_dict": {"n_estimators": {"type": "int", "default": 10, "lower": 2, "upper": 1000}}}
     with pytest.raises(ValueError):
@@ -45,4 +54,4 @@ def test_set_hyperparameter_invalid_type():
 def test_set_hyperparameter_invalid_values_type():
     fun_control = {"core_model_hyper_dict": {"n_estimators": {"type": "int", "default": 10, "lower": 2, "upper": 1000}}}
     with pytest.raises(TypeError):
-        set_hyperparameter(fun_control, "n_estimators", "2, 5")
+        set_hyperparameter(fun_control, "n_estimators", 2, 5)

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotpython"`
`10`		`-version = "0.14.42"`
	`10`	`+version = "0.14.43"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`