Skip to content

Commit 3bed52d

Browse files
v0.6.3
documentation
1 parent 6211826 commit 3bed52d

21 files changed

Lines changed: 1207 additions & 256 deletions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotPython"
10-
version = "0.6.2"
10+
version = "0.6.3"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/data/base.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ class SyntheticDataset(Dataset):
271271
sparse (bool): Whether the dataset is sparse or not.
272272
273273
Returns:
274-
SyntheticDataset: A synthetic dataset object.
274+
(SyntheticDataset): A synthetic dataset object.
275275
276276
Examples:
277277
>>> from sklearn.datasets import make_classification
@@ -379,7 +379,7 @@ class FileConfig(Config):
379379
desc (dict): Extra config parameters to pass as keyword arguments.
380380
381381
Returns:
382-
FileConfig: A FileConfig object.
382+
(FileConfig): A FileConfig object.
383383
384384
Examples:
385385
>>> config = FileConfig(filename="config.json", directory="/path/to/directory")
@@ -436,7 +436,7 @@ class FileDataset(Dataset):
436436
desc (dict): Extra dataset parameters to pass as keyword arguments.
437437
438438
Returns:
439-
FileDataset: A FileDataset object.
439+
(FileDataset): A FileDataset object.
440440
441441
Examples:
442442
>>> dataset = FileDataset(filename="dataset.csv", directory="/path/to/directory")
@@ -494,7 +494,7 @@ class RemoteDataset(FileDataset):
494494
unpack (bool): Whether to unpack the download or not. Defaults to True.
495495
filename (str):
496496
An optional name to given to the file if the file is unpacked. Defaults to None.
497-
desc: Extra dataset parameters to pass as keyword arguments.
497+
desc (dict): Extra dataset parameters to pass as keyword arguments.
498498
499499
Examples:
500500
@@ -507,7 +507,7 @@ class RemoteDataset(FileDataset):
507507
508508
"""
509509

510-
def __init__(self, url: str, size: int, unpack: bool = True, filename: str = None, **desc):
510+
def __init__(self, url: str, size: int, unpack: bool = True, filename: str = None, **desc: dict):
511511
if filename is None:
512512
filename = path.basename(url)
513513

@@ -621,7 +621,8 @@ class GenericFileDataset(Dataset):
621621
parse_dates (list): A list of columns to parse as dates. Defaults to None.
622622
directory (str):
623623
The directory where the file is contained. Defaults to the location of the `datasets` module.
624-
desc: Extra dataset parameters to pass as keyword arguments.
624+
desc (dict): Extra dataset parameters to pass as keyword arguments.
625+
625626
626627
Examples:
627628
@@ -630,7 +631,11 @@ class GenericFileDataset(Dataset):
630631
>>> for x, y in dataset:
631632
... print(x, y)
632633
... break
633-
({'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2}, 'setosa')
634+
({'sepal_length': 5.1,
635+
'sepal_width': 3.5,
636+
'petal_length': 1.4,
637+
'petal_width': 0.2},
638+
'setosa')
634639
635640
"""
636641

@@ -641,7 +646,7 @@ def __init__(
641646
converters: dict = None,
642647
parse_dates: list = None,
643648
directory: str = None,
644-
**desc,
649+
**desc: dict,
645650
):
646651
super().__init__(**desc)
647652
self.filename = filename
Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,39 @@
11
import json
2-
from . import base
2+
from spotPython.data import base
33

44

55
class LightHyperDict(base.FileConfig):
6-
"""Lightning hyperparameter dictionary."""
6+
"""Lightning hyperparameter dictionary.
7+
8+
This class extends the FileConfig class to provide a dictionary for storing hyperparameters.
9+
10+
Attributes:
11+
filename (str):
12+
The name of the file where the hyperparameters are stored.
13+
"""
714

815
def __init__(self):
16+
"""Initialize the LightHyperDict object.
17+
18+
Examples:
19+
>>> lhd = LightHyperDict()
20+
"""
921
super().__init__(
1022
filename="light_hyper_dict.json",
1123
)
1224

13-
def load(self):
25+
def load(self) -> dict:
26+
"""Load the hyperparameters from the file.
27+
28+
Returns:
29+
dict: A dictionary containing the hyperparameters.
30+
31+
Examples:
32+
>>> lhd = LightHyperDict()
33+
>>> hyperparams = lhd.load()
34+
>>> print(hyperparams)
35+
{'learning_rate': 0.001, 'batch_size': 32, 'epochs': 10}
36+
"""
1437
with open(self.path, "r") as f:
1538
d = json.load(f)
1639
return d
Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,49 @@
11
import json
2-
from . import base
2+
from spotPython.data import base
33

44

55
class SklearnHyperDict(base.FileConfig):
6-
"""River hyperparameter dictionary."""
6+
"""Scikit-learn hyperparameter dictionary.
7+
8+
This class extends the FileConfig class to provide a dictionary for storing hyperparameters.
9+
10+
Attributes:
11+
filename (str): The name of the file where the hyperparameters are stored.
12+
"""
713

814
def __init__(self):
15+
"""Initialize the SklearnHyperDict object.
16+
17+
Examples:
18+
>>> shd = SklearnHyperDict()
19+
"""
920
super().__init__(
1021
filename="sklearn_hyper_dict.json",
1122
)
1223

13-
def load(self):
24+
def load(self) -> dict:
25+
"""Load the hyperparameters from the file.
26+
27+
Returns:
28+
(dict): A dictionary containing the hyperparameters.
29+
Examples:
30+
>>> shd = SklearnHyperDict()
31+
>>> hyperparams = shd.load()
32+
>>> print(hyperparams)
33+
{'learning_rate': 0.001, 'batch_size': 32, 'epochs': 10}
34+
"""
1435
with open(self.path, "r") as f:
1536
d = json.load(f)
1637
return d
38+
39+
40+
# Example usage
41+
if __name__ == "__main__":
42+
# Create a SklearnHyperDict object
43+
shd = SklearnHyperDict()
44+
45+
# Load the hyperparameters from the file
46+
hyperparams = shd.load()
47+
48+
# Print the hyperparameters
49+
print(hyperparams)
Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,48 @@
11
import json
2-
from . import base
2+
from spotPython.data import base
33

44

55
class TorchHyperDict(base.FileConfig):
6-
"""Torch hyperparameter dictionary."""
6+
"""PyTorch hyperparameter dictionary.
7+
8+
This class extends the FileConfig class to provide a dictionary for storing hyperparameters.
9+
10+
Attributes:
11+
filename (str): The name of the file where the hyperparameters are stored.
12+
"""
713

814
def __init__(self):
15+
"""Initialize the TorchHyperDict object.
16+
Examples:
17+
>>> thd = TorchHyperDict()
18+
"""
919
super().__init__(
1020
filename="torch_hyper_dict.json",
1121
)
1222

13-
def load(self):
23+
def load(self) -> dict:
24+
"""Load the hyperparameters from the file.
25+
26+
Returns:
27+
(dict): A dictionary containing the hyperparameters.
28+
Examples:
29+
>>> thd = TorchHyperDict()
30+
>>> hyperparams = thd.load()
31+
>>> print(hyperparams)
32+
{'learning_rate': 0.001, 'batch_size': 32, 'epochs': 10}
33+
"""
1434
with open(self.path, "r") as f:
1535
d = json.load(f)
1636
return d
37+
38+
39+
# Example usage
40+
if __name__ == "__main__":
41+
# Create a TorchHyperDict object
42+
thd = TorchHyperDict()
43+
44+
# Load the hyperparameters from the file
45+
hyperparams = thd.load()
46+
47+
# Print the hyperparameters
48+
print(hyperparams)

src/spotPython/data/torchdata.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,42 @@
11
from torchvision import datasets
22
import torchvision.transforms as transforms
3+
from typing import Tuple
34

45

5-
def load_data_cifar10(data_dir="./data"):
6+
def load_data_cifar10(data_dir: str = "./data") -> Tuple[datasets.CIFAR10, datasets.CIFAR10]:
7+
"""Load the CIFAR-10 dataset.
8+
This function loads the CIFAR-10 dataset using the torchvision library.
9+
The data is split into a training set and a test set.
10+
11+
Args:
12+
data_dir (str):
13+
The directory where the data is stored. Defaults to "./data".
14+
15+
Returns:
16+
Tuple[datasets.CIFAR10, datasets.CIFAR10]:
17+
A tuple containing the training set and the test set.
18+
Examples:
19+
>>> trainset, testset = load_data_cifar10()
20+
>>> print(f"Training set size: {len(trainset)}")
21+
Training set size: 50000
22+
>>> print(f"Test set size: {len(testset)}")
23+
Test set size: 10000
24+
25+
"""
626
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
727

828
trainset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)
929

1030
testset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)
1131

1232
return trainset, testset
33+
34+
35+
# Example usage
36+
if __name__ == "__main__":
37+
# Load the CIFAR-10 dataset
38+
trainset, testset = load_data_cifar10()
39+
40+
# Print the size of the training set and the test set
41+
print(f"Training set size: {len(trainset)}")
42+
print(f"Test set size: {len(testset)}")

src/spotPython/data/vbdp.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
11
# Purpose: Functions for the VBDP project
22

33

4-
def cluster_features(X):
5-
"""Clusters the features of a dataframe based on similarity
4+
import pandas as pd
5+
from sklearn.cluster import AffinityPropagation
6+
from sklearn.metrics.pairwise import manhattan_distances
7+
8+
9+
def cluster_features(X: pd.DataFrame) -> pd.DataFrame:
10+
"""Clusters the features of a dataframe based on similarity.
11+
12+
This function takes a dataframe with features and clusters them based on similarity.
13+
The resulting dataframe contains the original features as well as new features representing the clusters.
614
715
Args:
8-
X (pd.DataFrame): dataframe with features
16+
X (pd.DataFrame): A dataframe with features.
17+
918
Returns:
10-
X (pd.DataFrame): dataframe with new features
19+
(pd.DataFrame): A dataframe with the original features and new cluster features.
20+
1121
Examples:
1222
>>> df = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
1323
>>> df
@@ -16,10 +26,10 @@ def cluster_features(X):
1626
1 False True False
1727
2 True False True
1828
>>> cluster_features(df)
19-
a b c cluster
20-
0 True True False 0
21-
1 False True False 1
22-
2 True False True 2
29+
a b c c_0 c_1 c_2 c_3
30+
0 True True False 0 0 0 0
31+
1 False True False 0 0 0 0
32+
2 True False True 0 0 0 0
2333
"""
2434
c_0 = X.columns[X.columns.str.contains("pain")]
2535
c_1 = X.columns[X.columns.str.contains("inflammation")]
@@ -32,13 +42,21 @@ def cluster_features(X):
3242
return X
3343

3444

35-
def affinity_propagation_features(X):
36-
"""Clusters the features of a dataframe using Affinity Propagation
45+
def affinity_propagation_features(X: pd.DataFrame) -> pd.DataFrame:
46+
"""Clusters the features of a dataframe using Affinity Propagation.
47+
48+
This function takes a dataframe with features and clusters them using the
49+
Affinity Propagation algorithm. The resulting dataframe contains the original
50+
features as well as a new feature representing the cluster labels.
3751
3852
Args:
39-
X (pd.DataFrame): dataframe with features
53+
X (pd.DataFrame):
54+
A dataframe with features.
55+
4056
Returns:
41-
X (pd.DataFrame): dataframe with new features
57+
(pd.DataFrame):
58+
A dataframe with the original features and a new cluster feature.
59+
4260
Examples:
4361
>>> df = pd.DataFrame({"a": [True, False, True], "b": [True, True, False], "c": [False, False, True]})
4462
>>> df
@@ -47,14 +65,12 @@ def affinity_propagation_features(X):
4765
1 False True False
4866
2 True False True
4967
>>> affinity_propagation_features(df)
68+
Estimated number of clusters: 3
5069
a b c cluster
5170
0 True True False 0
5271
1 False True False 1
5372
2 True False True 2
5473
"""
55-
from sklearn.cluster import AffinityPropagation
56-
from sklearn.metrics.pairwise import manhattan_distances
57-
5874
D = manhattan_distances(X)
5975
af = AffinityPropagation(random_state=0, affinity="precomputed").fit(D)
6076
cluster_centers_indices = af.cluster_centers_indices_

0 commit comments

Comments
 (0)