|
| 1 | +import numpy as np |
| 2 | +from sklearn.base import BaseEstimator, RegressorMixin |
| 3 | +from sklearn.cluster import KMeans |
| 4 | +from sklearn.neighbors import NearestNeighbors |
| 5 | +from spotpython.gp.gp_sep import GPsep |
| 6 | +import copy |
| 7 | + |
| 8 | + |
| 9 | +class TreeGP(BaseEstimator, RegressorMixin): |
| 10 | + """A tree-based Gaussian Process model that partitions the input space |
| 11 | + and fits local GP models on each partition. |
| 12 | +
|
| 13 | + This model divides the input space using clustering, trains a separate |
| 14 | + GPsep model on each cluster, and combines their predictions to cover |
| 15 | + the whole input space. |
| 16 | +
|
| 17 | + Attributes: |
| 18 | + n_clusters: Number of clusters to partition the input space. |
| 19 | + min_points: Minimum number of points required in each cluster. |
| 20 | + weighting: Method for combining predictions ('hard' or 'distance'). |
| 21 | + distance_power: Power parameter for distance-based weighting. |
| 22 | + gp_params: Parameters passed to each local GPsep model. |
| 23 | + cluster_model: The clustering algorithm used for partitioning. |
| 24 | + local_models: List of GPsep models for each partition. |
| 25 | + cluster_centers: Centers of each cluster. |
| 26 | + X_bounds: The bounds of the input space from training data. |
| 27 | + y_bounds: The bounds of the output space from training data. |
| 28 | + """ |
| 29 | + |
| 30 | + def __init__(self, n_clusters=3, min_points=10, weighting="distance", distance_power=2, gp_params=None): |
| 31 | + """Initialize the TreeGP model with partitioning and GP parameters. |
| 32 | +
|
| 33 | + Args: |
| 34 | + n_clusters: Number of clusters to create. Defaults to 3. |
| 35 | + min_points: Minimum points required per cluster. If a cluster has |
| 36 | + fewer points, it will be merged with nearest cluster. |
| 37 | + Defaults to 10. |
| 38 | + weighting: Method for combining predictions ('hard' or 'distance'). |
| 39 | + Defaults to 'distance'. |
| 40 | + distance_power: Power parameter for distance-based weighting. |
| 41 | + Higher values make the weighting more local. Defaults to 2. |
| 42 | + gp_params: Dictionary of parameters to pass to each GPsep model. |
| 43 | + Defaults to None. |
| 44 | + """ |
| 45 | + self.n_clusters = n_clusters |
| 46 | + self.min_points = min_points |
| 47 | + self.weighting = weighting |
| 48 | + self.distance_power = distance_power |
| 49 | + self.gp_params = gp_params or {} |
| 50 | + |
| 51 | + # These will be set during fitting |
| 52 | + self.cluster_model = None |
| 53 | + self.local_models = [] |
| 54 | + self.cluster_centers = None |
| 55 | + self.X_bounds = None |
| 56 | + self.y_bounds = None |
| 57 | + self.cluster_indices = None |
| 58 | + |
| 59 | + def get_params(self, deep=True): |
| 60 | + """Get parameters for this estimator. |
| 61 | +
|
| 62 | + Args: |
| 63 | + deep: If True, will return the parameters for this estimator and |
| 64 | + contained subobjects that are estimators. Defaults to True. |
| 65 | +
|
| 66 | + Returns: |
| 67 | + dict: Parameter names mapped to their values. |
| 68 | + """ |
| 69 | + params = { |
| 70 | + "n_clusters": self.n_clusters, |
| 71 | + "min_points": self.min_points, |
| 72 | + "weighting": self.weighting, |
| 73 | + "distance_power": self.distance_power, |
| 74 | + "gp_params": self.gp_params, |
| 75 | + } |
| 76 | + |
| 77 | + if deep and self.gp_params: |
| 78 | + gp_params_copy = copy.deepcopy(self.gp_params) |
| 79 | + params["gp_params"] = gp_params_copy |
| 80 | + |
| 81 | + return params |
| 82 | + |
| 83 | + def set_params(self, **parameters): |
| 84 | + """Set the parameters of this estimator. |
| 85 | +
|
| 86 | + Args: |
| 87 | + **parameters: Estimator parameters as keyword arguments. |
| 88 | +
|
| 89 | + Returns: |
| 90 | + self: Estimator instance. |
| 91 | + """ |
| 92 | + for parameter, value in parameters.items(): |
| 93 | + if parameter == "gp_params" and isinstance(value, dict): |
| 94 | + self.gp_params = copy.deepcopy(value) |
| 95 | + else: |
| 96 | + setattr(self, parameter, value) |
| 97 | + return self |
| 98 | + |
| 99 | + def fit(self, X, y): |
| 100 | + """Fit the TreeGP model by partitioning the input space and |
| 101 | + fitting local GP models to each partition. |
| 102 | +
|
| 103 | + Args: |
| 104 | + X: Training input samples of shape (n_samples, n_features). |
| 105 | + y: Target values of shape (n_samples,). |
| 106 | +
|
| 107 | + Returns: |
| 108 | + self: Returns self. |
| 109 | +
|
| 110 | + Raises: |
| 111 | + ValueError: If the number of samples is less than n_clusters or min_points. |
| 112 | + """ |
| 113 | + # Convert pandas objects to numpy arrays if necessary |
| 114 | + if hasattr(X, "to_numpy"): |
| 115 | + X = X.to_numpy() |
| 116 | + if hasattr(y, "to_numpy"): |
| 117 | + y = y.to_numpy() |
| 118 | + |
| 119 | + y = y.reshape(-1) |
| 120 | + n_samples = X.shape[0] |
| 121 | + |
| 122 | + if n_samples < max(self.n_clusters, self.min_points): |
| 123 | + raise ValueError(f"Number of samples ({n_samples}) must be at least " f"max(n_clusters, min_points) = {max(self.n_clusters, self.min_points)}") |
| 124 | + |
| 125 | + # Store data bounds for later normalization |
| 126 | + self.X_bounds = (np.min(X, axis=0), np.max(X, axis=0)) |
| 127 | + self.y_bounds = (np.min(y), np.max(y)) |
| 128 | + |
| 129 | + # Partition the input space using KMeans clustering |
| 130 | + self.cluster_model = KMeans(n_clusters=self.n_clusters, random_state=42, n_init="auto") |
| 131 | + |
| 132 | + cluster_labels = self.cluster_model.fit_predict(X) |
| 133 | + self.cluster_centers = self.cluster_model.cluster_centers_ |
| 134 | + |
| 135 | + # Handle clusters with too few points by merging them |
| 136 | + valid_clusters = [] |
| 137 | + clusters_to_merge = [] |
| 138 | + |
| 139 | + for i in range(self.n_clusters): |
| 140 | + cluster_size = np.sum(cluster_labels == i) |
| 141 | + if cluster_size >= self.min_points: |
| 142 | + valid_clusters.append(i) |
| 143 | + else: |
| 144 | + clusters_to_merge.append(i) |
| 145 | + |
| 146 | + # If there are clusters to merge, reassign their points to nearest valid cluster |
| 147 | + if clusters_to_merge: |
| 148 | + if not valid_clusters: |
| 149 | + # If no valid clusters, just use a single GPsep model |
| 150 | + print("No valid clusters with enough points. Falling back to single model.") |
| 151 | + self.n_clusters = 1 |
| 152 | + self.cluster_centers = np.mean(X, axis=0, keepdims=True) |
| 153 | + cluster_labels = np.zeros(n_samples, dtype=int) |
| 154 | + else: |
| 155 | + # Reassign points from small clusters to nearest valid cluster |
| 156 | + nn = NearestNeighbors(n_neighbors=1).fit(self.cluster_centers[valid_clusters]) |
| 157 | + |
| 158 | + for i in clusters_to_merge: |
| 159 | + # Find points in this cluster |
| 160 | + mask = cluster_labels == i |
| 161 | + if np.any(mask): |
| 162 | + # Find nearest valid cluster for each point |
| 163 | + _, indices = nn.kneighbors(X[mask]) |
| 164 | + # Reassign to nearest valid cluster |
| 165 | + cluster_labels[mask] = [valid_clusters[idx[0]] for idx in indices] |
| 166 | + |
| 167 | + # Save cluster assignments |
| 168 | + self.cluster_indices = [np.where(cluster_labels == i)[0] for i in range(self.n_clusters)] |
| 169 | + |
| 170 | + # Fit local GPsep models for each cluster |
| 171 | + self.local_models = [] |
| 172 | + |
| 173 | + for i in range(self.n_clusters): |
| 174 | + idx = self.cluster_indices[i] |
| 175 | + if len(idx) > 0: # Make sure we have points in this cluster |
| 176 | + # Create and fit a GPsep model for this cluster |
| 177 | + gp = GPsep(**self.gp_params) |
| 178 | + gp.fit(X[idx], y[idx]) |
| 179 | + self.local_models.append(gp) |
| 180 | + else: |
| 181 | + # This shouldn't happen after our merging step, but just in case |
| 182 | + self.local_models.append(None) |
| 183 | + |
| 184 | + return self |
| 185 | + |
| 186 | + def predict(self, X, return_std=False, **kwargs): |
| 187 | + """Predict using the TreeGP model. |
| 188 | +
|
| 189 | + Args: |
| 190 | + X: Query points of shape (n_samples, n_features). |
| 191 | + return_std: Whether to return standard deviations. Defaults to False. |
| 192 | + **kwargs: Additional keyword arguments passed to the local models' predict method. |
| 193 | +
|
| 194 | + Returns: |
| 195 | + If return_std is False, returns predicted values. |
| 196 | + If return_std is True, returns (predicted_values, standard_deviations). |
| 197 | + """ |
| 198 | + if hasattr(X, "to_numpy"): |
| 199 | + X = X.to_numpy() |
| 200 | + |
| 201 | + n_samples = X.shape[0] |
| 202 | + y_pred = np.zeros(n_samples) |
| 203 | + |
| 204 | + if return_std: |
| 205 | + y_std = np.zeros(n_samples) |
| 206 | + |
| 207 | + # Get distances to cluster centers for weighting |
| 208 | + distances = np.zeros((n_samples, self.n_clusters)) |
| 209 | + |
| 210 | + for i in range(self.n_clusters): |
| 211 | + # Calculate Euclidean distance to each cluster center |
| 212 | + diff = X - self.cluster_centers[i] |
| 213 | + distances[:, i] = np.sqrt(np.sum(diff**2, axis=1)) |
| 214 | + |
| 215 | + if self.weighting == "hard": |
| 216 | + # Hard assignment: use closest cluster's model for prediction |
| 217 | + cluster_assignments = np.argmin(distances, axis=1) |
| 218 | + |
| 219 | + for i in range(self.n_clusters): |
| 220 | + mask = cluster_assignments == i |
| 221 | + if np.any(mask) and self.local_models[i] is not None: |
| 222 | + if return_std: |
| 223 | + # Use return_full=True to get the full prediction dictionary |
| 224 | + preds = self.local_models[i].predict(X[mask], return_full=True) |
| 225 | + y_pred[mask] = preds["mean"] |
| 226 | + |
| 227 | + # Extract standard deviations from the covariance matrix |
| 228 | + if "Sigma" in preds: |
| 229 | + # If full covariance matrix is returned, extract diagonal |
| 230 | + if len(preds["Sigma"].shape) == 2: |
| 231 | + y_std[mask] = np.sqrt(np.diag(preds["Sigma"])) |
| 232 | + else: |
| 233 | + # If already diagonal elements, just use directly |
| 234 | + y_std[mask] = np.sqrt(preds["Sigma"]) |
| 235 | + elif "s2" in preds: |
| 236 | + # Some implementations return variances as "s2" |
| 237 | + y_std[mask] = np.sqrt(preds["s2"]) |
| 238 | + else: |
| 239 | + y_pred[mask] = self.local_models[i].predict(X[mask], return_full=False) |
| 240 | + |
| 241 | + else: # distance weighting |
| 242 | + # Compute weights from distances (closer = higher weight) |
| 243 | + weights = 1.0 / np.maximum(distances**self.distance_power, 1e-10) |
| 244 | + # Normalize weights |
| 245 | + weights_sum = np.sum(weights, axis=1, keepdims=True) |
| 246 | + weights = weights / np.maximum(weights_sum, 1e-10) |
| 247 | + |
| 248 | + # Get predictions from each model and combine with weights |
| 249 | + all_preds = np.zeros((n_samples, self.n_clusters)) |
| 250 | + all_var = np.zeros((n_samples, self.n_clusters)) # Store variances instead of std devs |
| 251 | + |
| 252 | + for i in range(self.n_clusters): |
| 253 | + if self.local_models[i] is not None: |
| 254 | + if return_std: |
| 255 | + # Use return_full=True to get the full prediction dictionary |
| 256 | + preds = self.local_models[i].predict(X, return_full=True) |
| 257 | + all_preds[:, i] = preds["mean"] |
| 258 | + |
| 259 | + # Extract variances from the covariance matrix |
| 260 | + if "Sigma" in preds: |
| 261 | + # If full covariance matrix is returned, extract diagonal |
| 262 | + if len(preds["Sigma"].shape) == 2: |
| 263 | + all_var[:, i] = np.diag(preds["Sigma"]) |
| 264 | + else: |
| 265 | + # If already diagonal elements, use directly |
| 266 | + all_var[:, i] = preds["Sigma"] |
| 267 | + elif "s2" in preds: |
| 268 | + all_var[:, i] = preds["s2"] |
| 269 | + else: |
| 270 | + all_preds[:, i] = self.local_models[i].predict(X) |
| 271 | + |
| 272 | + # Combine predictions using weights |
| 273 | + y_pred = np.sum(all_preds * weights, axis=1) |
| 274 | + |
| 275 | + if return_std: |
| 276 | + # Combine variances (with appropriate weighting) |
| 277 | + y_var = np.sum(all_var * (weights**2), axis=1) |
| 278 | + y_std = np.sqrt(y_var) |
| 279 | + |
| 280 | + if return_std: |
| 281 | + return y_pred, y_std |
| 282 | + else: |
| 283 | + return y_pred |
| 284 | + |
| 285 | + def score(self, X, y, **kwargs): |
| 286 | + """Calculate the coefficient of determination R^2 of the prediction. |
| 287 | +
|
| 288 | + Args: |
| 289 | + X: Test input samples. |
| 290 | + y: True values for X. |
| 291 | + **kwargs: Additional arguments passed to predict(). |
| 292 | +
|
| 293 | + Returns: |
| 294 | + float: R^2 score. |
| 295 | + """ |
| 296 | + from sklearn.metrics import r2_score |
| 297 | + |
| 298 | + return r2_score(y, self.predict(X, **kwargs)) |
| 299 | + |
| 300 | + |
| 301 | +# Helper function to create and fit a TreeGP model in a single call |
| 302 | +def newTreeGP(X, y, n_clusters=3, **kwargs): |
| 303 | + """ |
| 304 | + Create and fit a TreeGP model in a single function call. |
| 305 | +
|
| 306 | + Args: |
| 307 | + X: Training input samples. |
| 308 | + y: Target values. |
| 309 | + n_clusters: Number of clusters to partition the data. Defaults to 3. |
| 310 | + **kwargs: Additional parameters passed to TreeGP constructor. |
| 311 | +
|
| 312 | + Returns: |
| 313 | + TreeGP: The fitted TreeGP model. |
| 314 | + """ |
| 315 | + model = TreeGP(n_clusters=n_clusters, **kwargs) |
| 316 | + return model.fit(X, y) |
0 commit comments