Skip to content

Commit ccad22e

Browse files
0.14.54
passing of boolean enabled
1 parent 0bc0486 commit ccad22e

7 files changed

Lines changed: 122 additions & 12 deletions

File tree

notebooks/00_spotPython_tests.ipynb

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4444,6 +4444,82 @@
44444444
"data.to_csv('moon.csv', index=False)"
44454445
]
44464446
},
4447+
{
4448+
"cell_type": "markdown",
4449+
"metadata": {},
4450+
"source": [
4451+
"# Sklearn Data Sets"
4452+
]
4453+
},
4454+
{
4455+
"cell_type": "code",
4456+
"execution_count": 6,
4457+
"metadata": {},
4458+
"outputs": [],
4459+
"source": [
4460+
"from sklearn.datasets import make_classification\n",
4461+
"import pandas as pd\n",
4462+
"import numpy as np\n",
4463+
"X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n",
4464+
"# combine the training and test data and save to a csv file\n",
4465+
"data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))))\n",
4466+
"data.columns = [f\"x{i}\" for i in range(1, 21)] + [\"y\"]\n",
4467+
"data.to_csv('binary_classification.csv', index=False)\n",
4468+
"\n"
4469+
]
4470+
},
4471+
{
4472+
"cell_type": "code",
4473+
"execution_count": 8,
4474+
"metadata": {},
4475+
"outputs": [],
4476+
"source": [
4477+
"from sklearn.datasets import make_classification\n",
4478+
"import pandas as pd\n",
4479+
"import numpy as np\n",
4480+
"X, y = make_classification(n_samples=1000, n_features=20, n_informative=9, n_redundant=2, n_repeated=0, n_classes=10, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)\n",
4481+
"# combine the training and test data and save to a csv file\n",
4482+
"data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))))\n",
4483+
"data.columns = [f\"x{i}\" for i in range(1, 21)] + [\"y\"]\n",
4484+
"data.to_csv('multiple_classification.csv', index=False)"
4485+
]
4486+
},
4487+
{
4488+
"cell_type": "code",
4489+
"execution_count": 10,
4490+
"metadata": {},
4491+
"outputs": [],
4492+
"source": [
4493+
"from sklearn.datasets import make_regression\n",
4494+
"import pandas as pd\n",
4495+
"import numpy as np\n",
4496+
"X, y = make_regression(n_samples=1000, n_features=20, n_informative=10, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=True, coef=False, random_state=None)\n",
4497+
"# combine the training and test data and save to a csv file\n",
4498+
"data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))))\n",
4499+
"data.columns = [f\"x{i}\" for i in range(1, 21)] + [\"y\"]\n",
4500+
"data.to_csv('regression.csv', index=False)\n"
4501+
]
4502+
},
4503+
{
4504+
"cell_type": "code",
4505+
"execution_count": 11,
4506+
"metadata": {},
4507+
"outputs": [
4508+
{
4509+
"data": {
4510+
"text/plain": [
4511+
"True"
4512+
]
4513+
},
4514+
"execution_count": 11,
4515+
"metadata": {},
4516+
"output_type": "execute_result"
4517+
}
4518+
],
4519+
"source": [
4520+
"bool(1)"
4521+
]
4522+
},
44474523
{
44484524
"cell_type": "code",
44494525
"execution_count": null,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.14.52"
10+
version = "0.14.54"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/data/lightcrossvalidationdatamodule.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import torch
77

88

9-
109
class LightCrossValidationDataModule(L.LightningDataModule):
1110
"""
1211
A LightningDataModule for handling cross-validation data splits.
@@ -89,7 +88,7 @@ def setup(self, stage: Optional[str] = None) -> None:
8988
print(f"Train Dataset Size: {len(self.data_train)}")
9089
self.data_val = Subset(dataset_full, val_indexes)
9190
print(f"Val Dataset Size: {len(self.data_val)}")
92-
91+
9392
if self.scaler is not None:
9493
# Fit the scaler on training data and transform both train and val data
9594
scaler_train_data = torch.stack([self.data_train[i][0] for i in range(len(self.data_train))]).squeeze(1)

src/spotPython/fun/hypersklearn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def fun_sklearn(self, X: np.ndarray, fun_control: dict = None) -> np.ndarray:
144144
df_eval, _ = evaluate_model_oob(model, self.fun_control)
145145
elif eval_type == "train_cv":
146146
df_eval, _ = evaluate_cv(model, self.fun_control)
147-
else: # eval_type == "train_hold_out":
147+
else: # None or "evaluate_hold_out":
148148
df_eval, _ = evaluate_hold_out(model, self.fun_control)
149149
except Exception as err:
150150
print(f"Error in fun_sklearn(). Call to evaluate_model failed. {err=}, {type(err)=}")

src/spotPython/hyperparameters/values.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ def get_dict_with_levels_and_types(fun_control: Dict[str, Any], v: Dict[str, Any
265265
c = d[key]["levels"][value]
266266
k = class_for_name(mdl, c)
267267
new_dict[key] = k()
268+
# bool() introduced to convert 0 and 1 to False and True in v0.14.54
269+
elif d[key]["core_model_parameter_type"] == "bool":
270+
new_dict[key] = bool(d[key]["levels"][value])
268271
else:
269272
new_dict[key] = d[key]["levels"][value]
270273
else:
@@ -1821,6 +1824,24 @@ def get_prep_model(prepmodel_name) -> object:
18211824
return prepmodel
18221825

18231826

1827+
def get_sklearn_scaler(scaler_name) -> object:
1828+
"""
1829+
Get the sklearn scaler model from the name.
1830+
1831+
Args:
1832+
scaler_name (str): The name of the preprocessing model.
1833+
1834+
Returns:
1835+
sklearn.preprocessing (object): The sklearn scaler.
1836+
1837+
"""
1838+
if scaler_name == "None":
1839+
scaler = None
1840+
else:
1841+
scaler = getattr(sklearn.preprocessing, scaler_name)
1842+
return scaler
1843+
1844+
18241845
def get_metric_sklearn(metric_name) -> object:
18251846
"""
18261847
Returns the sklearn metric from the metric name.

src/spotPython/sklearn/traintest.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66

77

88
def evaluate_model(model, fun_control):
9-
# pprint.pprint(fun_control)
109
try:
1110
X_train, y_train = get_Xy_from_df(fun_control["train"], fun_control["target_column"])
1211
X_test, y_test = get_Xy_from_df(fun_control["test"], fun_control["target_column"])
12+
if fun_control["scaler"] is not None:
13+
X_train = fun_control["scaler"]().fit_transform(X_train)
14+
X_test = fun_control["scaler"]().transform(X_test)
1315
model.fit(X_train, y_train)
1416
if fun_control["predict_proba"]:
1517
df_preds = model.predict_proba(X_test)
@@ -24,30 +26,38 @@ def evaluate_model(model, fun_control):
2426

2527

2628
def evaluate_hold_out(model, fun_control):
27-
# pprint.pprint(fun_control)
2829
train_df = fun_control["train"]
2930
target_column = fun_control["target_column"]
3031
try:
3132
X_train, X_test, y_train, y_test = train_test_split(
3233
train_df.drop(target_column, axis=1),
3334
train_df[target_column],
3435
random_state=42,
35-
test_size=0.25,
36-
stratify=train_df[target_column],
36+
test_size=fun_control["test_size"],
37+
# stratify=train_df[target_column],
3738
)
38-
# scaler fit_transform(X_train)
39+
except Exception as err:
40+
print(f"Error in evaluate_hold_out(). Call to train_test_split() failed. {err=}, {type(err)=}")
41+
try:
42+
if fun_control["scaler"] is not None:
43+
scaler = fun_control["scaler"]()
44+
X_train = scaler.fit_transform(X_train)
3945
model.fit(X_train, y_train)
46+
except Exception as err:
47+
print(f"Error in evaluate_hold_out(). Call to fit() failed. {err=}, {type(err)=}")
48+
try:
4049
# convert to numpy array, see https://github.com/scikit-learn/scikit-learn/pull/26772
4150
X_test = np.array(X_test)
42-
# scaler transform(X_test)
51+
if fun_control["scaler"] is not None:
52+
X_test = scaler.transform(X_test)
4353
y_test = np.array(y_test)
44-
if fun_control["predict_proba"]:
54+
if fun_control["predict_proba"] or fun_control["task"] == "classification":
4555
df_preds = model.predict_proba(X_test)
4656
else:
4757
df_preds = model.predict(X_test)
4858
df_eval = fun_control["metric_sklearn"](y_test, df_preds, **fun_control["metric_params"])
4959
except Exception as err:
50-
print(f"Error in fun_sklearn(). Call to evaluate_hold_out failed. {err=}, {type(err)=}")
60+
print(f"Error in evaluate_hold_out(). Call to predict() failed. {err=}, {type(err)=}")
5161
df_eval = np.nan
5262
df_eval = np.nan
5363
return df_eval, df_preds

src/spotPython/utils/init.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def fun_control_init(
6464
prep_model_name=None,
6565
progress_file=None,
6666
scaler=None,
67+
scaler_name=None,
6768
scenario=None,
6869
seed=123,
6970
show_models=False,
@@ -198,6 +199,8 @@ def fun_control_init(
198199
scaler (object):
199200
The scaler object, e.g., the TorchStandard scaler from spot.utils.scaler.py.
200201
Default is None.
202+
scaler_name (str):
203+
The name of the scaler object. Default is None.
201204
scenario (str):
202205
The scenario to use. Default is None. Can be "river", "sklearn", or "lightning".
203206
seed (int):
@@ -416,6 +419,7 @@ def fun_control_init(
416419
"progress_file": progress_file,
417420
"save_model": False,
418421
"scaler": scaler,
422+
"scaler_name": scaler_name,
419423
"scenario": scenario,
420424
"seed": seed,
421425
"show_batch_interval": 1_000_000,

0 commit comments

Comments
 (0)