Skip to content

Commit 576f225

Browse files
0.27.11 pdp
1 parent e434bc7 commit 576f225

2 files changed

Lines changed: 75 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.27.10"
10+
version = "0.27.11"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/utils/effects.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
33
import pandas as pd
4+
from sklearn.ensemble import GradientBoostingRegressor
5+
from sklearn.inspection import PartialDependenceDisplay
6+
from sklearn.model_selection import train_test_split
47

58

69
def randorient(k, p, xi):
@@ -138,3 +141,74 @@ def screening(X, fun, xi, p, labels, range=None, print=False) -> pd.DataFrame:
138141
plt.gca().tick_params(labelsize=10)
139142
plt.grid(True)
140143
plt.show()
144+
145+
146+
def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15)) -> None:
147+
"""
148+
Generates Partial Dependence Plots (PDPs) for every feature in a DataFrame against a target variable,
149+
arranged in a grid.
150+
151+
Args:
152+
df (pd.DataFrame): DataFrame containing the features.
153+
df_target (pd.Series): Series containing the target variable.
154+
model (str, optional): Name of the model class to use (e.g., "GradientBoostingRegressor").
155+
Defaults to "GradientBoostingRegressor".
156+
nrows (int, optional): Number of rows in the grid of subplots. Defaults to 5.
157+
ncols (int, optional): Number of columns in the grid of subplots. Defaults to 6.
158+
figsize (tuple, optional): Figure size (width, height) in inches. Defaults to (20, 15).
159+
160+
Returns:
161+
None
162+
163+
Examples:
164+
>>> form spotpython.utils.effects import plot_all_partial_dependence
165+
>>> from sklearn.datasets import load_boston
166+
>>> import pandas as pd
167+
>>> data = load_boston()
168+
>>> df = pd.DataFrame(data.data, columns=data.feature_names)
169+
>>> df_target = pd.Series(data.target, name="target")
170+
>>> plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15))
171+
172+
"""
173+
174+
# Separate features and target
175+
X = df
176+
y = df_target # Target variable is now a Series
177+
178+
# Split data
179+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
180+
181+
# Instantiate the model
182+
if model == "GradientBoostingRegressor":
183+
gb_model = GradientBoostingRegressor(random_state=42)
184+
elif model == "RandomForestRegressor":
185+
from sklearn.ensemble import RandomForestRegressor
186+
187+
gb_model = RandomForestRegressor(random_state=42)
188+
elif model == "DecisionTreeRegressor":
189+
from sklearn.tree import DecisionTreeRegressor
190+
191+
gb_model = DecisionTreeRegressor(random_state=42)
192+
else:
193+
raise ValueError(f"Unsupported model: {model}")
194+
195+
# Train model
196+
gb_model.fit(X_train, y_train)
197+
198+
# Create subplots
199+
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
200+
axes = axes.flatten() # Flatten the 2D array of axes for easy iteration
201+
202+
# Generate PDP for each feature
203+
features = X.columns
204+
for i, feature in enumerate(features):
205+
ax = axes[i] # Select the axis for the current feature
206+
PartialDependenceDisplay.from_estimator(gb_model, X_train, [feature], ax=ax)
207+
ax.set_title(feature) # Set the title of the subplot to the feature name
208+
209+
# Remove empty subplots if the number of features is less than nrows * ncols
210+
for i in range(len(features), nrows * ncols):
211+
fig.delaxes(axes[i])
212+
213+
plt.tight_layout() # Adjust subplot parameters for a tight layout
214+
plt.show()

0 commit comments

Comments
 (0)