@@ -532,7 +532,7 @@ def vif(X, sorted=True) -> pd.DataFrame:
532532
533533def condition_index (df ) -> pd .DataFrame :
534534 """
535- Calculates the Condition Index for each feature in a DataFrame to assess multicollinearity.
535+ Calculates the Condition Index for a DataFrame to assess multicollinearity.
536536
537537 The Condition Index is computed based on the eigenvalues of the covariance matrix
538538 of the standardized data. High condition indices suggest potential multicollinearity issues.
@@ -542,9 +542,9 @@ def condition_index(df) -> pd.DataFrame:
542542
543543 Returns:
544544 pandas.DataFrame: A DataFrame with the following columns:
545- - 'Feature ': The name of the feature .
545+ - 'Index ': The index of the eigenvalue .
546546 - 'Eigenvalue': The eigenvalue of the covariance matrix.
547- - 'Condition Index': The Condition Index for the feature .
547+ - 'Condition Index': The Condition Index for the eigenvalue .
548548
549549 Examples:
550550 >>> from spotpython.utils.stats import condition_index
@@ -555,28 +555,27 @@ def condition_index(df) -> pd.DataFrame:
555555 ... 'x3': [1, 3, 5, 7, 9]
556556 ... })
557557 >>> condition_index(data)
558- Feature Eigenvalue Condition Index
559- 0 x1 1.140000 1.000000
560- 1 x2 0.000000 inf
561- 2 x3 0.002857 20.000000
558+ Index Eigenvalue Condition Index
559+ 0 0 1.140000 1.000000
560+ 1 1 0.000000 inf
561+ 2 2 0.002857 20.000000
562562 """
563- # Standardisieren der Daten
563+ # Standardize the data
564564 X = df .values
565565 X_centered = X - np .mean (X , axis = 0 )
566566
567- # Berechnung der Kovarianzmatrix
567+ # Compute the covariance matrix
568568 covariance_matrix = np .cov (X_centered , rowvar = False )
569569
570- # Berechnung der Eigenwerte der Kovarianzmatrix
570+ # Compute the eigenvalues of the covariance matrix
571571 eigenvalues , _ = np .linalg .eigh (covariance_matrix )
572572
573- # Berechnung des Condition Index
574- # Condition Index ist die Wurzel des Verhältnisses des größten Eigenwertes zum jeweiligen Eigenwert
573+ # Handle division by zero for eigenvalues
575574 max_eigenvalue = max (eigenvalues )
576- condition_indices = np .sqrt (max_eigenvalue / eigenvalues )
575+ condition_indices = np .array ([ np . sqrt (max_eigenvalue / ev ) if ev > 0 else np . inf for ev in eigenvalues ] )
577576
578- # Erstellen eines DataFrames zur Anzeige der Ergebnisse
579- condition_index_df = pd .DataFrame ({"Feature " : df . columns , "Eigenvalue" : eigenvalues , "Condition Index" : condition_indices })
577+ # Create a DataFrame for the results
578+ condition_index_df = pd .DataFrame ({"Index " : range ( len ( eigenvalues )) , "Eigenvalue" : eigenvalues , "Condition Index" : condition_indices })
580579
581580 return condition_index_df
582581
0 commit comments