Skip to content

Commit 0a826f2

Browse files
v0.2.36
combine_features move to add_logical_features
1 parent e21d27c commit 0a826f2

3 files changed

Lines changed: 33 additions & 143 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotPython"
10-
version = "0.2.35"
10+
version = "0.2.36"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/data/vbdp.py

Lines changed: 1 addition & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import itertools
2-
import pandas as pd
1+
# Purpose: Functions for the VBDP project
32

43

54
def cluster_features(X):
@@ -63,102 +62,3 @@ def affinity_propagation_features(X):
6362
print("Estimated number of clusters: %d" % n_clusters_)
6463
X["cluster"] = af.labels_
6564
return X
66-
67-
68-
def combine_features(X):
69-
"""Combines all features in a dataframe with each other using bitwise operations
70-
71-
Args:
72-
X (pd.DataFrame): dataframe with features
73-
Returns:
74-
X (pd.DataFrame): dataframe with new features
75-
Examples:
76-
>>> df = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
77-
>>> df
78-
a b c
79-
0 True True False
80-
1 False True False
81-
2 True False True
82-
>>> combine_features(df)
83-
a b c a_and_b a_or_b a_xor_b a_and_c a_or_c a_xor_c b_and_c b_or_c b_xor_c
84-
0 True True False True True False False True True False True True
85-
1 False True False False True True False False False False False False
86-
2 True False True False True True True True False False True True
87-
"""
88-
new_cols = []
89-
# Iterate over all pairs of columns
90-
for col1, col2 in itertools.combinations(X.columns, 2):
91-
# Create new columns for the bitwise AND, OR and XOR operations
92-
and_col = X[[col1, col2]].apply(lambda x: x[col1] & x[col2], axis=1)
93-
or_col = X[[col1, col2]].apply(lambda x: x[col1] | x[col2], axis=1)
94-
xor_col = X[[col1, col2]].apply(lambda x: x[col1] ^ x[col2], axis=1)
95-
new_cols.extend([and_col, or_col, xor_col])
96-
# Join all the new columns at once
97-
X = pd.concat([X] + new_cols, axis=1)
98-
return X
99-
100-
101-
def symptom_features(X, y):
102-
"""Generate new features based on the joint symptoms of a disease
103-
Args:
104-
X (pd.DataFrame): dataframe with features
105-
y (pd.Series): series with target values
106-
"""
107-
# Combine X and y into one dataframe
108-
Xy = pd.concat([X, y], axis=1)
109-
# Add names to the columns: x1, x2, ..., xn, y
110-
Xy.columns = ["x" + str(i) for i in range(1, X.shape[1] + 1)] + ["y"]
111-
# full train data with X and y values
112-
marginals = Xy.groupby("y").mean()
113-
top_2_symptoms = {}
114-
bot_2_symptoms = {}
115-
# for feature generation
116-
combinations = []
117-
for i in range(marginals.shape[0]):
118-
symptoms = marginals.iloc[i]
119-
# for b in True, False:
120-
sorted = symptoms.sort_values(ascending=False)
121-
top_1 = sorted.keys()[0]
122-
top_1_per = sorted.values[0]
123-
top_2 = sorted.keys()[1]
124-
top_2_per = sorted.values[1]
125-
126-
bot_1 = sorted.keys()[-1]
127-
bot_1_per = sorted.values[-1]
128-
bot_2 = sorted.keys()[-2]
129-
bot_2_per = sorted.values[-2]
130-
131-
name = marginals.index[i]
132-
dic = {top_1: top_1_per, top_2: top_2_per}
133-
dic_bot = {bot_1: bot_1_per, bot_2: bot_2_per}
134-
top_2_symptoms[name] = dic
135-
bot_2_symptoms[name] = dic_bot
136-
combinations.append(((top_1, top_2), (bot_1, bot_2)))
137-
Xy_mod = Xy.copy()
138-
convert = Xy.drop(columns=["y"]).columns.values
139-
for val in convert:
140-
Xy_mod[val] = Xy_mod[val].astype("int")
141-
for group in combinations:
142-
for comb in group:
143-
col1, col2 = comb
144-
new_columns = pd.DataFrame(
145-
{
146-
f"{col1}_and_{col2}": Xy_mod[col1] & Xy_mod[col2],
147-
f"{col1}_or_{col2}": Xy_mod[col1] | Xy_mod[col2],
148-
f"{col1}_xor_{col2}": Xy_mod[col1] ^ Xy_mod[col2],
149-
}
150-
)
151-
Xy_mod = pd.concat([Xy_mod, new_columns], axis=1)
152-
# removing duplicate features
153-
Xy_mod = Xy_mod.loc[:, ~Xy_mod.columns.duplicated()].copy()
154-
print(f"Number of features: {Xy_mod.shape[1]}")
155-
print(f"Number of samples: {Xy_mod.shape[0]}")
156-
# remove the column y from the Xy_mod data frame
157-
X_mod = Xy_mod.drop(columns=["y"])
158-
# print the column names
159-
print(f"Column names: {Xy_mod.columns.values}")
160-
# X_new = add_logical_columns(X_mod, 2)
161-
X_new = combine_features(X_mod)
162-
print(f"Number of features: {X_new.shape[1]}")
163-
print(f"Number of samples: {X_new.shape[0]}")
164-
return X_new, top_2_symptoms, bot_2_symptoms

src/spotPython/utils/convert.py

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -71,47 +71,37 @@ def series_to_array(series):
7171
return series.to_numpy()
7272

7373

74-
def add_logical_columns(df, arity):
75-
"""Adds logical columns to a DataFrame.
74+
def add_logical_columns(X, arity=2, operations=["and", "or", "xor"]):
75+
"""Combines all features in a dataframe with each other using bitwise operations
76+
7677
Args:
77-
df (pandas.DataFrame): The input DataFrame.
78-
arity (int): The arity of the logical columns.
78+
X (pd.DataFrame): dataframe with features
79+
arity (int): the number of columns to combine at once
80+
operations (list of str): the operations to apply. Possible values are 'and', 'or' and 'xor'
7981
Returns:
80-
pandas.DataFrame: The output DataFrame.
81-
Example:
82-
>>> from spotPython.utils.convert import add_logical_columns
83-
>>> import pandas as pd
84-
>>> df = pd.DataFrame({'A': [True, False, True], 'B': [False, True, True], 'C': [True, True, False]})
85-
>>> result = add_logical_columns(df, 2)
86-
>>> print(result)
87-
A B C and_A_B or_A_B xor_A_B and_A_C or_A_C xor_A_C and_B_C or_B_C xor_B_C
88-
0 True False True False True True True True False False True True
89-
1 False True True False True True False True True False True True
90-
2 True True False True True False False True True True True False
91-
"""
92-
# Create a copy of the input DataFrame to avoid modifying it
93-
result = df.copy()
94-
95-
# Create empty DataFrames for the additional columns
96-
and_df = pd.DataFrame(index=df.index)
97-
or_df = pd.DataFrame(index=df.index)
98-
xor_df = pd.DataFrame(index=df.index)
82+
X (pd.DataFrame): dataframe with new features
83+
Examples:
84+
>>> X = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
85+
>>> add_logical_columns(X)
86+
a b c a_and_b a_and_c b_and_c a_or_b a_or_c b_or_c a_xor_b a_xor_c b_xor_c
87+
0 True True False True False False True True True False True True
88+
1 False True False False False False True False True True True False
89+
2 True False True False True False True True True True False True
9990
100-
# Get all combinations of columns with the specified arity
101-
column_combinations = list(combinations(df.columns, arity))
102-
103-
# Apply the logical_and, logical_or and logical_xor functions to all combinations of columns
104-
for cols in column_combinations:
105-
col_name = "_".join(cols)
106-
and_df[f"and_{col_name}"] = result[cols[0]]
107-
or_df[f"or_{col_name}"] = result[cols[0]]
108-
xor_df[f"xor_{col_name}"] = result[cols[0]]
109-
for col in cols[1:]:
110-
and_df[f"and_{col_name}"] &= result[col]
111-
or_df[f"or_{col_name}"] |= result[col]
112-
xor_df[f"xor_{col_name}"] ^= result[col]
113-
114-
# Concatenate the input DataFrame with the additional columns
115-
result = pd.concat([result, and_df, or_df, xor_df], axis=1)
116-
117-
return result
91+
"""
92+
new_cols = []
93+
# Iterate over all combinations of columns of the given arity
94+
for cols in combinations(X.columns, arity):
95+
# Create new columns for the specified operations
96+
if "and" in operations:
97+
and_col = X[list(cols)].apply(lambda x: x.all(), axis=1)
98+
new_cols.append(and_col)
99+
if "or" in operations:
100+
or_col = X[list(cols)].apply(lambda x: x.any(), axis=1)
101+
new_cols.append(or_col)
102+
if "xor" in operations:
103+
xor_col = X[list(cols)].apply(lambda x: x.sum() % 2 == 1, axis=1)
104+
new_cols.append(xor_col)
105+
# Join all the new columns at once
106+
X = pd.concat([X] + new_cols, axis=1)
107+
return X

0 commit comments

Comments
 (0)