11# Purpose: Functions for the VBDP project
22
33
4- def cluster_features (X ):
5- """Clusters the features of a dataframe based on similarity
4+ import pandas as pd
5+ from sklearn .cluster import AffinityPropagation
6+ from sklearn .metrics .pairwise import manhattan_distances
7+
8+
9+ def cluster_features (X : pd .DataFrame ) -> pd .DataFrame :
10+ """Clusters the features of a dataframe based on similarity.
11+
12+ This function takes a dataframe with features and clusters them based on similarity.
13+ The resulting dataframe contains the original features as well as new features representing the clusters.
614
715 Args:
8- X (pd.DataFrame): dataframe with features
16+ X (pd.DataFrame): A dataframe with features.
17+
918 Returns:
10- X (pd.DataFrame): dataframe with new features
19+ (pd.DataFrame): A dataframe with the original features and new cluster features.
20+
1121 Examples:
1222 >>> df = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
1323 >>> df
@@ -16,10 +26,10 @@ def cluster_features(X):
1626 1 False True False
1727 2 True False True
1828 >>> cluster_features(df)
19- a b c cluster
20- 0 True True False 0
21- 1 False True False 1
22- 2 True False True 2
29+ a b c c_0 c_1 c_2 c_3
30+ 0 True True False 0 0 0 0
31+ 1 False True False 0 0 0 0
32+ 2 True False True 0 0 0 0
2333 """
2434 c_0 = X .columns [X .columns .str .contains ("pain" )]
2535 c_1 = X .columns [X .columns .str .contains ("inflammation" )]
@@ -32,13 +42,21 @@ def cluster_features(X):
3242 return X
3343
3444
35- def affinity_propagation_features (X ):
36- """Clusters the features of a dataframe using Affinity Propagation
45+ def affinity_propagation_features (X : pd .DataFrame ) -> pd .DataFrame :
46+ """Clusters the features of a dataframe using Affinity Propagation.
47+
48+ This function takes a dataframe with features and clusters them using the
49+ Affinity Propagation algorithm. The resulting dataframe contains the original
50+ features as well as a new feature representing the cluster labels.
3751
3852 Args:
39- X (pd.DataFrame): dataframe with features
53+ X (pd.DataFrame):
54+ A dataframe with features.
55+
4056 Returns:
41- X (pd.DataFrame): dataframe with new features
57+ (pd.DataFrame):
58+ A dataframe with the original features and a new cluster feature.
59+
4260 Examples:
4361 >>> df = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
4462 >>> df
@@ -47,14 +65,12 @@ def affinity_propagation_features(X):
4765 1 False True False
4866 2 True False True
4967 >>> affinity_propagation_features(df)
68+ Estimated number of clusters: 3
5069 a b c cluster
5170 0 True True False 0
5271 1 False True False 1
5372 2 True False True 2
5473 """
55- from sklearn .cluster import AffinityPropagation
56- from sklearn .metrics .pairwise import manhattan_distances
57-
5874 D = manhattan_distances (X )
5975 af = AffinityPropagation (random_state = 0 , affinity = "precomputed" ).fit (D )
6076 cluster_centers_indices = af .cluster_centers_indices_
0 commit comments