0.29.9

bartzbeielstein · bartzbeielstein · commit d1ad54d23bc9 · 2025-04-13T23:23:02.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotpython"
-version = "0.29.8"
+version = "0.29.9"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotpython/utils/effects.py b/src/spotpython/utils/effects.py
@@ -300,7 +300,7 @@ def screening_plot(X, fun, xi, p, labels, bounds=None, show=True) -> None:
         plt.show()
 
 
-def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15)) -> None:
+def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15), title="") -> None:
     """
     Generates Partial Dependence Plots (PDPs) for every feature in a DataFrame against a target variable,
     arranged in a grid.
@@ -313,6 +313,7 @@ def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor"
         nrows (int, optional): Number of rows in the grid of subplots. Defaults to 5.
         ncols (int, optional): Number of columns in the grid of subplots. Defaults to 6.
         figsize (tuple, optional): Figure size (width, height) in inches. Defaults to (20, 15).
+        title (str, optional): Title for the subplots. Defaults to "".
 
     Returns:
         None
@@ -361,7 +362,7 @@ def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor"
     for i, feature in enumerate(features):
         ax = axes[i]  # Select the axis for the current feature
         PartialDependenceDisplay.from_estimator(gb_model, X_train, [feature], ax=ax)
-        ax.set_title(feature)  # Set the title of the subplot to the feature name
+        ax.set_title(title)  # Set the title of the subplot to the feature name
 
     # Remove empty subplots if the number of features is less than nrows * ncols
     for i in range(len(features), nrows * ncols):
diff --git a/src/spotpython/utils/sampling.py b/src/spotpython/utils/sampling.py
@@ -620,7 +620,7 @@ def mmlhs(X_start: np.ndarray, population: int, iterations: int, q: Optional[flo
     return X_best
 
 
-def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, verbosity=0, edges=0,  q_list = [1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:
+def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, verbosity=0, edges=0, q_list=[1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:
     """
     Generates an optimized Latin hypercube by evolving the Morris-Mitchell
     criterion across multiple exponents (q values) and selecting the best plan.
@@ -680,7 +680,6 @@ def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, ve
         raise ValueError("Latin hypercubes are not defined for dim k < 2")
 
     # A list of exponents (q) to optimize
-   
 
     # Start with a random Latin hypercube
     X_start = rlh(n, k, edges=edges)
diff --git a/src/spotpython/utils/stats.py b/src/spotpython/utils/stats.py
@@ -532,7 +532,7 @@ def vif(X, sorted=True) -> pd.DataFrame:
 
 def condition_index(df) -> pd.DataFrame:
     """
-    Calculates the Condition Index for each feature in a DataFrame to assess multicollinearity.
+    Calculates the Condition Index for a DataFrame to assess multicollinearity.
 
     The Condition Index is computed based on the eigenvalues of the covariance matrix
     of the standardized data. High condition indices suggest potential multicollinearity issues.
@@ -542,9 +542,9 @@ def condition_index(df) -> pd.DataFrame:
 
     Returns:
         pandas.DataFrame: A DataFrame with the following columns:
-            - 'Feature': The name of the feature.
+            - 'Index': The index of the eigenvalue.
             - 'Eigenvalue': The eigenvalue of the covariance matrix.
-            - 'Condition Index': The Condition Index for the feature.
+            - 'Condition Index': The Condition Index for the eigenvalue.
 
     Examples:
         >>> from spotpython.utils.stats import condition_index
@@ -555,28 +555,27 @@ def condition_index(df) -> pd.DataFrame:
         ...     'x3': [1, 3, 5, 7, 9]
         ... })
         >>> condition_index(data)
-           Feature  Eigenvalue  Condition Index
-        0      x1    1.140000         1.000000
-        1      x2    0.000000              inf
-        2      x3    0.002857        20.000000
+           Index  Eigenvalue  Condition Index
+        0      0    1.140000         1.000000
+        1      1    0.000000              inf
+        2      2    0.002857        20.000000
     """
-    # Standardisieren der Daten
+    # Standardize the data
     X = df.values
     X_centered = X - np.mean(X, axis=0)
 
-    # Berechnung der Kovarianzmatrix
+    # Compute the covariance matrix
     covariance_matrix = np.cov(X_centered, rowvar=False)
 
-    # Berechnung der Eigenwerte der Kovarianzmatrix
+    # Compute the eigenvalues of the covariance matrix
     eigenvalues, _ = np.linalg.eigh(covariance_matrix)
 
-    # Berechnung des Condition Index
-    # Condition Index ist die Wurzel des Verhältnisses des größten Eigenwertes zum jeweiligen Eigenwert
+    # Handle division by zero for eigenvalues
     max_eigenvalue = max(eigenvalues)
-    condition_indices = np.sqrt(max_eigenvalue / eigenvalues)
+    condition_indices = np.array([np.sqrt(max_eigenvalue / ev) if ev > 0 else np.inf for ev in eigenvalues])
 
-    # Erstellen eines DataFrames zur Anzeige der Ergebnisse
-    condition_index_df = pd.DataFrame({"Feature": df.columns, "Eigenvalue": eigenvalues, "Condition Index": condition_indices})
+    # Create a DataFrame for the results
+    condition_index_df = pd.DataFrame({"Index": range(len(eigenvalues)), "Eigenvalue": eigenvalues, "Condition Index": condition_indices})
 
     return condition_index_df
 

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotpython"`
`10`		`-version = "0.29.8"`
	`10`	`+version = "0.29.9"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`