0.24.28

bartzbeielstein · bartzbeielstein · commit 1f753eef6d4f · 2025-02-13T11:22:54.000+01:00
names arg removed
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotpython"
-version = "0.24.27"
+version = "0.24.28"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotpython/utils/compare.py b/src/spotpython/utils/compare.py
@@ -59,7 +59,7 @@ def find_equal_in_lists(a: List[int], b: List[int]) -> List[int]:
     return equal
 
 
-def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.DataFrame:
+def check_identical_columns_and_rows(df, remove=False, verbosity=1) -> pd.DataFrame:
     """
     Checks for exact identical columns and rows in the DataFrame.
 
@@ -69,7 +69,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
 
     Args:
         df (pd.DataFrame): The DataFrame to check.
-        name (str): Name of the DataFrame for reporting.
         remove (bool): Whether to remove duplicate columns/rows.
         verbosity (int): Level of verbosity; 0 for no output, 1 for standard messages.
 
@@ -85,7 +84,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
     # Check for exact identical columns
     col_mask = df.T.duplicated(keep="first")
     if col_mask.any() and verbosity > 0:
-        print(f"\nExact identical columns in {name}:")
         print(list(df.columns[col_mask]))
 
     if remove:
@@ -94,7 +92,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
     # Check for exact identical rows
     row_mask = df.duplicated(keep="first")
     if row_mask.any() and verbosity > 0:
-        print(f"\nExact identical rows in {name}:")
         print(list(df.index[row_mask]))
 
     if remove:
@@ -103,13 +100,12 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
     return df
 
 
-def check_identical_columns_and_rows_with_tol(df, name, tolerance, remove=False, verbosity=1) -> pd.DataFrame:
+def check_identical_columns_and_rows_with_tol(df, tolerance, remove=False, verbosity=1) -> pd.DataFrame:
     """
     Checks for identical columns and rows within a given tolerance.
 
     Args:
         df (pd.DataFrame): The DataFrame to check.
-        name (str): Name of the DataFrame for reporting.
         tolerance (float): The tolerance for checking equivalence.
         remove (bool): Whether to remove duplicates found within the tolerance.
         verbosity (int): Level of verbosity; 0 for no output, 1 for standard messages.
@@ -136,7 +132,6 @@ def is_identical_with_tolerance(series1, series2, tol):
                 identical_columns.append((df.columns[i], df.columns[j]))
 
     if identical_columns and verbosity > 0:
-        print(f"\nIdentical columns within tolerance in {name}:")
         for col_pair in identical_columns:
             print(col_pair)
 
@@ -151,7 +146,6 @@ def is_identical_with_tolerance(series1, series2, tol):
                 identical_rows.append((df.index[i], df.index[j]))
 
     if identical_rows and verbosity > 0:
-        print(f"\nIdentical rows within tolerance in {name}:")
         for row_pair in identical_rows:
             print(row_pair)
 
diff --git a/test/test_check_identical_columns_and_rows.py b/test/test_check_identical_columns_and_rows.py
@@ -16,11 +16,11 @@ def test_check_exact_identical_columns_and_rows():
     })
 
     # Exact duplicates - should identify and remove B
-    result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=True)
+    result_df = check_identical_columns_and_rows(df1, remove=True)
     assert list(result_df.columns) == ["A", "C"], "Failed to remove duplicate columns accurately"
 
     # No duplicates - should not remove any columns
-    result_df = check_identical_columns_and_rows(df2, "Test DataFrame 2", remove=True)
+    result_df = check_identical_columns_and_rows(df2, remove=True)
     assert list(result_df.columns) == ["X", "Y"], "Incorrectly removed columns when there were none to remove"
 
 def test_check_identical_columns_and_rows_with_tol():
@@ -32,11 +32,11 @@ def test_check_identical_columns_and_rows_with_tol():
     })
 
     # Within-tolerance duplicates - should identify and remove B
-    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=True)
+    result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=True)
     assert list(result_df.columns) == ["A", "C"], "Failed to remove near-duplicate columns accurately"
 
     # No near duplicates within a small tolerance
-    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.001, remove=True)
+    result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.001, remove=True)
     assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly removed columns when they are not near duplicates"
 
 def test_check_exact_identical_columns_and_rows_remove_true():
@@ -46,7 +46,7 @@ def test_check_exact_identical_columns_and_rows_remove_true():
         "C": [4, 5, 6]
     })
     
-    result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=True)
+    result_df = check_identical_columns_and_rows(df1, remove=True)
     assert list(result_df.columns) == ["A", "C"], "Failed to remove duplicate columns accurately"
 
 def test_check_exact_identical_columns_and_rows_remove_false():
@@ -57,7 +57,7 @@ def test_check_exact_identical_columns_and_rows_remove_false():
     })
     
     # Check without removing duplicates
-    result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=False)
+    result_df = check_identical_columns_and_rows(df1, remove=False)
     assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly identified or removed columns when remove=False"
 
 def test_check_identical_columns_and_rows_with_tol_remove_true():
@@ -67,7 +67,7 @@ def test_check_identical_columns_and_rows_with_tol_remove_true():
         "C": [4.00, 5.00, 6.00]
     })
 
-    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=True)
+    result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=True)
     assert list(result_df.columns) == ["A", "C"], "Failed to remove near-duplicate columns accurately with tolerance"
 
 def test_check_identical_columns_and_rows_with_tol_remove_false():
@@ -78,7 +78,7 @@ def test_check_identical_columns_and_rows_with_tol_remove_false():
     })
 
     # Check without removing duplicates
-    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=False)
+    result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=False)
     assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly identified or removed columns when remove=False"
 
 def test_with_no_duplicates():
@@ -87,10 +87,10 @@ def test_with_no_duplicates():
         "Y": [4, 5, 6],
         "Z": [7, 8, 9]
     })
-    result_df = check_identical_columns_and_rows(df, "Test DataFrame with No Duplicates", remove=True)
+    result_df = check_identical_columns_and_rows(df, remove=True)
     assert list(result_df.columns) == ["X", "Y", "Z"], "Incorrectly removed columns in a no-duplicates scenario"
 
-    result_df_with_tol = check_identical_columns_and_rows_with_tol(df, "Test DataFrame with No Duplicates", tolerance=0.1, remove=True)
+    result_df_with_tol = check_identical_columns_and_rows_with_tol(df, tolerance=0.1, remove=True)
     assert list(result_df_with_tol.columns) == ["X", "Y", "Z"], "Incorrectly removed columns in a no-duplicates scenario with tolerance"
 
 

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotpython"`
`10`		`-version = "0.24.27"`
	`10`	`+version = "0.24.28"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`