Skip to content

Commit 1f753ee

Browse files
0.24.28
names arg removed
1 parent c3e7104 commit 1f753ee

3 files changed

Lines changed: 13 additions & 19 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.24.27"
10+
version = "0.24.28"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/utils/compare.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def find_equal_in_lists(a: List[int], b: List[int]) -> List[int]:
5959
return equal
6060

6161

62-
def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.DataFrame:
62+
def check_identical_columns_and_rows(df, remove=False, verbosity=1) -> pd.DataFrame:
6363
"""
6464
Checks for exact identical columns and rows in the DataFrame.
6565
@@ -69,7 +69,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
6969
7070
Args:
7171
df (pd.DataFrame): The DataFrame to check.
72-
name (str): Name of the DataFrame for reporting.
7372
remove (bool): Whether to remove duplicate columns/rows.
7473
verbosity (int): Level of verbosity; 0 for no output, 1 for standard messages.
7574
@@ -85,7 +84,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
8584
# Check for exact identical columns
8685
col_mask = df.T.duplicated(keep="first")
8786
if col_mask.any() and verbosity > 0:
88-
print(f"\nExact identical columns in {name}:")
8987
print(list(df.columns[col_mask]))
9088

9189
if remove:
@@ -94,7 +92,6 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
9492
# Check for exact identical rows
9593
row_mask = df.duplicated(keep="first")
9694
if row_mask.any() and verbosity > 0:
97-
print(f"\nExact identical rows in {name}:")
9895
print(list(df.index[row_mask]))
9996

10097
if remove:
@@ -103,13 +100,12 @@ def check_identical_columns_and_rows(df, name, remove=False, verbosity=1) -> pd.
103100
return df
104101

105102

106-
def check_identical_columns_and_rows_with_tol(df, name, tolerance, remove=False, verbosity=1) -> pd.DataFrame:
103+
def check_identical_columns_and_rows_with_tol(df, tolerance, remove=False, verbosity=1) -> pd.DataFrame:
107104
"""
108105
Checks for identical columns and rows within a given tolerance.
109106
110107
Args:
111108
df (pd.DataFrame): The DataFrame to check.
112-
name (str): Name of the DataFrame for reporting.
113109
tolerance (float): The tolerance for checking equivalence.
114110
remove (bool): Whether to remove duplicates found within the tolerance.
115111
verbosity (int): Level of verbosity; 0 for no output, 1 for standard messages.
@@ -136,7 +132,6 @@ def is_identical_with_tolerance(series1, series2, tol):
136132
identical_columns.append((df.columns[i], df.columns[j]))
137133

138134
if identical_columns and verbosity > 0:
139-
print(f"\nIdentical columns within tolerance in {name}:")
140135
for col_pair in identical_columns:
141136
print(col_pair)
142137

@@ -151,7 +146,6 @@ def is_identical_with_tolerance(series1, series2, tol):
151146
identical_rows.append((df.index[i], df.index[j]))
152147

153148
if identical_rows and verbosity > 0:
154-
print(f"\nIdentical rows within tolerance in {name}:")
155149
for row_pair in identical_rows:
156150
print(row_pair)
157151

test/test_check_identical_columns_and_rows.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ def test_check_exact_identical_columns_and_rows():
1616
})
1717

1818
# Exact duplicates - should identify and remove B
19-
result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=True)
19+
result_df = check_identical_columns_and_rows(df1, remove=True)
2020
assert list(result_df.columns) == ["A", "C"], "Failed to remove duplicate columns accurately"
2121

2222
# No duplicates - should not remove any columns
23-
result_df = check_identical_columns_and_rows(df2, "Test DataFrame 2", remove=True)
23+
result_df = check_identical_columns_and_rows(df2, remove=True)
2424
assert list(result_df.columns) == ["X", "Y"], "Incorrectly removed columns when there were none to remove"
2525

2626
def test_check_identical_columns_and_rows_with_tol():
@@ -32,11 +32,11 @@ def test_check_identical_columns_and_rows_with_tol():
3232
})
3333

3434
# Within-tolerance duplicates - should identify and remove B
35-
result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=True)
35+
result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=True)
3636
assert list(result_df.columns) == ["A", "C"], "Failed to remove near-duplicate columns accurately"
3737

3838
# No near duplicates within a small tolerance
39-
result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.001, remove=True)
39+
result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.001, remove=True)
4040
assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly removed columns when they are not near duplicates"
4141

4242
def test_check_exact_identical_columns_and_rows_remove_true():
@@ -46,7 +46,7 @@ def test_check_exact_identical_columns_and_rows_remove_true():
4646
"C": [4, 5, 6]
4747
})
4848

49-
result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=True)
49+
result_df = check_identical_columns_and_rows(df1, remove=True)
5050
assert list(result_df.columns) == ["A", "C"], "Failed to remove duplicate columns accurately"
5151

5252
def test_check_exact_identical_columns_and_rows_remove_false():
@@ -57,7 +57,7 @@ def test_check_exact_identical_columns_and_rows_remove_false():
5757
})
5858

5959
# Check without removing duplicates
60-
result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=False)
60+
result_df = check_identical_columns_and_rows(df1, remove=False)
6161
assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly identified or removed columns when remove=False"
6262

6363
def test_check_identical_columns_and_rows_with_tol_remove_true():
@@ -67,7 +67,7 @@ def test_check_identical_columns_and_rows_with_tol_remove_true():
6767
"C": [4.00, 5.00, 6.00]
6868
})
6969

70-
result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=True)
70+
result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=True)
7171
assert list(result_df.columns) == ["A", "C"], "Failed to remove near-duplicate columns accurately with tolerance"
7272

7373
def test_check_identical_columns_and_rows_with_tol_remove_false():
@@ -78,7 +78,7 @@ def test_check_identical_columns_and_rows_with_tol_remove_false():
7878
})
7979

8080
# Check without removing duplicates
81-
result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05, remove=False)
81+
result_df = check_identical_columns_and_rows_with_tol(df1, tolerance=0.05, remove=False)
8282
assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly identified or removed columns when remove=False"
8383

8484
def test_with_no_duplicates():
@@ -87,10 +87,10 @@ def test_with_no_duplicates():
8787
"Y": [4, 5, 6],
8888
"Z": [7, 8, 9]
8989
})
90-
result_df = check_identical_columns_and_rows(df, "Test DataFrame with No Duplicates", remove=True)
90+
result_df = check_identical_columns_and_rows(df, remove=True)
9191
assert list(result_df.columns) == ["X", "Y", "Z"], "Incorrectly removed columns in a no-duplicates scenario"
9292

93-
result_df_with_tol = check_identical_columns_and_rows_with_tol(df, "Test DataFrame with No Duplicates", tolerance=0.1, remove=True)
93+
result_df_with_tol = check_identical_columns_and_rows_with_tol(df, tolerance=0.1, remove=True)
9494
assert list(result_df_with_tol.columns) == ["X", "Y", "Z"], "Incorrectly removed columns in a no-duplicates scenario with tolerance"
9595

9696

0 commit comments

Comments
 (0)