Skip to content

Commit d1ad54d

Browse files
0.29.9
1 parent 7292a60 commit d1ad54d

4 files changed

Lines changed: 19 additions & 20 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.29.8"
10+
version = "0.29.9"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/utils/effects.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def screening_plot(X, fun, xi, p, labels, bounds=None, show=True) -> None:
300300
plt.show()
301301

302302

303-
def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15)) -> None:
303+
def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15), title="") -> None:
304304
"""
305305
Generates Partial Dependence Plots (PDPs) for every feature in a DataFrame against a target variable,
306306
arranged in a grid.
@@ -313,6 +313,7 @@ def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor"
313313
nrows (int, optional): Number of rows in the grid of subplots. Defaults to 5.
314314
ncols (int, optional): Number of columns in the grid of subplots. Defaults to 6.
315315
figsize (tuple, optional): Figure size (width, height) in inches. Defaults to (20, 15).
316+
title (str, optional): Title for the subplots. Defaults to "".
316317
317318
Returns:
318319
None
@@ -361,7 +362,7 @@ def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor"
361362
for i, feature in enumerate(features):
362363
ax = axes[i] # Select the axis for the current feature
363364
PartialDependenceDisplay.from_estimator(gb_model, X_train, [feature], ax=ax)
364-
ax.set_title(feature) # Set the title of the subplot to the feature name
365+
ax.set_title(title) # Set the title of the subplot to the feature name
365366

366367
# Remove empty subplots if the number of features is less than nrows * ncols
367368
for i in range(len(features), nrows * ncols):

src/spotpython/utils/sampling.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ def mmlhs(X_start: np.ndarray, population: int, iterations: int, q: Optional[flo
620620
return X_best
621621

622622

623-
def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, verbosity=0, edges=0, q_list = [1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:
623+
def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, verbosity=0, edges=0, q_list=[1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:
624624
"""
625625
Generates an optimized Latin hypercube by evolving the Morris-Mitchell
626626
criterion across multiple exponents (q values) and selecting the best plan.
@@ -680,7 +680,6 @@ def bestlh(n: int, k: int, population: int, iterations: int, p=1, plot=False, ve
680680
raise ValueError("Latin hypercubes are not defined for dim k < 2")
681681

682682
# A list of exponents (q) to optimize
683-
684683

685684
# Start with a random Latin hypercube
686685
X_start = rlh(n, k, edges=edges)

src/spotpython/utils/stats.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ def vif(X, sorted=True) -> pd.DataFrame:
532532

533533
def condition_index(df) -> pd.DataFrame:
534534
"""
535-
Calculates the Condition Index for each feature in a DataFrame to assess multicollinearity.
535+
Calculates the Condition Index for a DataFrame to assess multicollinearity.
536536
537537
The Condition Index is computed based on the eigenvalues of the covariance matrix
538538
of the standardized data. High condition indices suggest potential multicollinearity issues.
@@ -542,9 +542,9 @@ def condition_index(df) -> pd.DataFrame:
542542
543543
Returns:
544544
pandas.DataFrame: A DataFrame with the following columns:
545-
- 'Feature': The name of the feature.
545+
- 'Index': The index of the eigenvalue.
546546
- 'Eigenvalue': The eigenvalue of the covariance matrix.
547-
- 'Condition Index': The Condition Index for the feature.
547+
- 'Condition Index': The Condition Index for the eigenvalue.
548548
549549
Examples:
550550
>>> from spotpython.utils.stats import condition_index
@@ -555,28 +555,27 @@ def condition_index(df) -> pd.DataFrame:
555555
... 'x3': [1, 3, 5, 7, 9]
556556
... })
557557
>>> condition_index(data)
558-
Feature Eigenvalue Condition Index
559-
0 x1 1.140000 1.000000
560-
1 x2 0.000000 inf
561-
2 x3 0.002857 20.000000
558+
Index Eigenvalue Condition Index
559+
0 0 1.140000 1.000000
560+
1 1 0.000000 inf
561+
2 2 0.002857 20.000000
562562
"""
563-
# Standardisieren der Daten
563+
# Standardize the data
564564
X = df.values
565565
X_centered = X - np.mean(X, axis=0)
566566

567-
# Berechnung der Kovarianzmatrix
567+
# Compute the covariance matrix
568568
covariance_matrix = np.cov(X_centered, rowvar=False)
569569

570-
# Berechnung der Eigenwerte der Kovarianzmatrix
570+
# Compute the eigenvalues of the covariance matrix
571571
eigenvalues, _ = np.linalg.eigh(covariance_matrix)
572572

573-
# Berechnung des Condition Index
574-
# Condition Index ist die Wurzel des Verhältnisses des größten Eigenwertes zum jeweiligen Eigenwert
573+
# Handle division by zero for eigenvalues
575574
max_eigenvalue = max(eigenvalues)
576-
condition_indices = np.sqrt(max_eigenvalue / eigenvalues)
575+
condition_indices = np.array([np.sqrt(max_eigenvalue / ev) if ev > 0 else np.inf for ev in eigenvalues])
577576

578-
# Erstellen eines DataFrames zur Anzeige der Ergebnisse
579-
condition_index_df = pd.DataFrame({"Feature": df.columns, "Eigenvalue": eigenvalues, "Condition Index": condition_indices})
577+
# Create a DataFrame for the results
578+
condition_index_df = pd.DataFrame({"Index": range(len(eigenvalues)), "Eigenvalue": eigenvalues, "Condition Index": condition_indices})
580579

581580
return condition_index_df
582581

0 commit comments

Comments
 (0)