Skip to content

Commit 22807f3

Browse files
0.33.16
documentation
1 parent d9f13db commit 22807f3

3 files changed

Lines changed: 73 additions & 5 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,4 @@ notebooks/00_spotPython_tests.pdf
343343
notebooks/00_spotPython_tests.tex
344344
notebooks/00_spotPython_tests_files/libs/bootstrap/bootstrap-973236bd072d72a04ee9cd82dcc9cb29.min.css
345345
notebooks/00_spotPython_tests_files/libs/quarto-html/quarto-syntax-highlighting-549806ee2085284f45b00abea8c6df48.css
346+
/Literature.d

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.33.15"
10+
version = "0.33.16"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/surrogate/kriging.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,11 @@ def fit(self, X: np.ndarray, y: np.ndarray, bounds: Optional[List[Tuple[float, f
364364
Fits the Kriging model to training data X and y. This method is compatible
365365
with scikit-learn and uses differential evolution to optimize the hyperparameters
366366
(log(theta)).
367+
Fitting pipeline (Forrester, Ch. 2–3):
368+
- Set masks and θ dimensionality (isotropic → n_theta=1, else n_theta=k).
369+
- Assemble bounds on [log10 θ] (+ [log10 λ] for regression/reinterpolation; + p if enabled).
370+
- Maximize the concentrated likelihood (Ch. 3) to get [log10 θ, log10 λ, p].
371+
- Build R and store U, μ-related quantities implicitly via U (used at prediction).
367372
368373
Args:
369374
X (np.ndarray):
@@ -402,6 +407,8 @@ def fit(self, X: np.ndarray, y: np.ndarray, bounds: Optional[List[Tuple[float, f
402407
# Calculate and store min and max of X
403408
self.min_X = np.min(self.X_, axis=0)
404409
self.max_X = np.max(self.X_, axis=0)
410+
411+
# Bounds: [θ] for interpolation; [θ, λ] for regression/reinterpolation; [+ p] if optim_p
405412
if bounds is None:
406413
if self.method == "interpolation":
407414
bounds = [(self.min_theta, self.max_theta)] * self.k
@@ -414,9 +421,11 @@ def fit(self, X: np.ndarray, y: np.ndarray, bounds: Optional[List[Tuple[float, f
414421
n_p = self.n_p if hasattr(self, "n_p") else self.k
415422
bounds += [(self.min_p, self.max_p)] * n_p
416423

424+
# ML optimization (Forrester, Ch. 3)
417425
self.logtheta_loglambda_p_, _ = self.max_likelihood(bounds)
418426

419427
# store theta and Lambda in log scale
428+
# Store parameters on log10 scale; convert to linear only where needed
420429
self.theta = self.logtheta_loglambda_p_[: self.n_theta]
421430
if (self.method == "regression") or (self.method == "reinterpolation"):
422431
# select the first n_theta values from logtheta_loglambda_p_:
@@ -431,6 +440,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, bounds: Optional[List[Tuple[float, f
431440
raise ValueError("method must be one of 'interpolation', 'regression', or 'reinterpolation'")
432441

433442
# Once logtheta_lambda is found, compute the final correlation matrix
443+
# Finalize: compute R and its Cholesky at the found hyperparameters
434444
self.negLnLike, self.Psi_, self.U_ = self.likelihood(self.logtheta_loglambda_p_)
435445

436446
# Update log with the current values
@@ -441,6 +451,10 @@ def predict(self, X: np.ndarray, return_std=False, return_val: str = "y") -> np.
441451
"""
442452
Predicts the Kriging response at a set of points X. This method is compatible
443453
with scikit-learn and returns predictions for the input points.
454+
Batch prediction wrapper around _pred:
455+
- Shapes normalized so X is (n_samples, k).
456+
- Forrester (Ch. 3/6): returns f̂(x), and optionally s(x) and EI
457+
(EI returned as −log10(EI) internally for stability).
444458
445459
Args:
446460
X (np.ndarray):
@@ -513,7 +527,13 @@ def build_Psi(self) -> None:
513527
correlation matrix as described in [Forr08a, p.57].
514528
515529
Notes:
516-
Returns only the upper triangle, as the matrix is symmetric and the diagonal will be handled later.
530+
- Correlation follows the stationary Gaussian kernel used in Kriging:
531+
R = exp(-D), with D a weighted distance. See Forrester et al. (2008),
532+
Ch. 2, correlation modelling.
533+
- The code builds D as a sum of per-dimension distance contributions
534+
scaled by 10**theta (theta is stored in log10), then applies exp(-D).
535+
- Returns only the upper triangle; the symmetric and diagonal parts
536+
are handled by the caller.
517537
518538
Attributes:
519539
Psi (np.matrix): Correlation matrix Psi. Shape (n,n).
@@ -537,25 +557,30 @@ def build_Psi(self) -> None:
537557
"""
538558
try:
539559
n, k = self.X_.shape
560+
# weights: 10**theta; isotropic expands to k values
540561
theta10 = self._get_theta10_from_logtheta()
541562

542563
# Initialize the Psi matrix
543564
Psi = np.zeros((n, n), dtype=np.float64)
544565

545566
# Calculate the distance matrix using ordered variables
567+
# Ordered/numeric part: squared Euclidean with weights (Forrester, Ch. 2)
546568
if self.ordered_mask.any():
547569
X_ordered = self.X_[:, self.ordered_mask]
548570
D_ordered = squareform(pdist(X_ordered, metric="sqeuclidean", w=theta10[self.ordered_mask]))
549571
Psi += D_ordered
550572

573+
# Factorial part: categorical metric wrapped by exp(-D) (still stationary envelope)
551574
# Add the contribution of factor variables to the distance matrix
552575
if self.factor_mask.any():
553576
X_factor = self.X_[:, self.factor_mask]
554577
D_factor = squareform(pdist(X_factor, metric=self.metric_factorial, w=theta10[self.factor_mask]))
555578
Psi += D_factor
556579

557-
# Calculate correlation from distance
580+
# Correlation from distance: R = exp(-D) (Forrester, Ch. 2)
558581
Psi = np.exp(-Psi)
582+
583+
# Diagnostics (not in Forrester): inf/cond for stability
559584
# Check for infinite values
560585
self.inf_Psi = np.isinf(Psi).any()
561586
# Calculate condition number
@@ -572,6 +597,10 @@ def likelihood(self, x: np.ndarray) -> Tuple[float, np.ndarray, np.ndarray]:
572597
"""
573598
Computes the negative of the concentrated log-likelihood for a given set
574599
of log(theta) parameters. Returns the negative log-likelihood, the correlation matrix Psi, and its Cholesky factor U.
600+
Negative concentrated log-likelihood (Forrester, Ch. 3):
601+
- Given R (here Psi with diagonal and nugget), μ = (1^T R^{-1} y)/(1^T R^{-1} 1),
602+
σ^2 = (r^T R^{-1} r)/n with r = y - 1·μ,
603+
- Concentrated −log L = (n/2) log(σ^2) + (1/2) log |R| (constants omitted).
575604
576605
Args:
577606
x (np.ndarray):
@@ -601,16 +630,18 @@ def likelihood(self, x: np.ndarray) -> Tuple[float, np.ndarray, np.ndarray]:
601630
X = self.X_
602631
y = self.y_.flatten()
603632

633+
# First entries are log10(theta); optionally followed by log10(lambda) and p
604634
self.theta = x[: self.n_theta]
605635

606636
if (self.method == "regression") or (self.method == "reinterpolation"):
607637
lambda_ = x[self.n_theta : self.n_theta + 1]
608-
# lambda is in log scale, so transform it back:
638+
# nugget on linear scale; lambda is in log scale, so transform it back:
609639
lambda_ = 10.0**lambda_
610640
if self.optim_p:
611641
self.p_val = x[self.n_theta + 1 : self.n_theta + 1 + self.n_p]
612642
elif self.method == "interpolation":
613643
# use the original, untransformed eps:
644+
# small “nugget” for interpolation (Forrester, sec. on interpolation)
614645
lambda_ = self.eps
615646
if self.optim_p:
616647
self.p_val = x[self.n_theta : self.n_theta + self.n_p]
@@ -620,34 +651,45 @@ def likelihood(self, x: np.ndarray) -> Tuple[float, np.ndarray, np.ndarray]:
620651
n = X.shape[0]
621652
one = np.ones(n)
622653

623-
# Build the correlation matrix Psi
654+
# Build the correlation matrix Psi.
655+
# Build correlation R = upper + upper^T + I + λ I (Forrester, Ch. 3)
624656
Psi_upper_triangle = self.build_Psi()
625657
Psi = Psi_upper_triangle + Psi_upper_triangle.T + np.eye(n) + np.eye(n) * lambda_
626658

659+
# Cholesky factorization R = U U^T
627660
try:
628661
U = np.linalg.cholesky(Psi)
629662
except LinAlgError:
663+
# Penalize ill-conditioning (implementation detail)
630664
return self.penalty, Psi, None
631665

666+
# log |R| via Cholesky (Forrester, Ch. 3)
632667
LnDetPsi = 2.0 * np.sum(np.log(np.abs(np.diag(U))))
633668

669+
# R^{-1}y and R^{-1}1 via triangular solves
634670
temp_y = np.linalg.solve(U, y)
635671
temp_one = np.linalg.solve(U, one)
636672
vy = np.linalg.solve(U.T, temp_y)
637673
vone = np.linalg.solve(U.T, temp_one)
638674

675+
# μ̂ (ordinary Kriging constant mean) (Forrester, Ch. 3)
639676
mu = (one @ vy) / (one @ vone)
677+
678+
# σ̂^2 (concentrated variance) (Forrester, Ch. 3)
640679
resid = y - one * mu
641680
tresid = np.linalg.solve(U, resid)
642681
tresid = np.linalg.solve(U.T, tresid)
643682
SigmaSqr = (resid @ tresid) / n
644683

684+
# Concentrated −log L (Forrester, Ch. 3)
645685
negLnLike = (n / 2.0) * np.log(SigmaSqr) + 0.5 * LnDetPsi
646686
return negLnLike, Psi, U
647687

648688
def build_psi_vec(self, x: np.ndarray) -> None:
649689
"""
650690
Build the psi vector required for predictive methods.
691+
ψ(x) := [exp(-D(x, x_i))]_{i=1..n}, i.e., correlation between a new x and the training sites
692+
using the same D as for R (Forrester, Ch. 2).
651693
652694
Args:
653695
x (ndarray): Point to calculate the psi vector for.
@@ -677,16 +719,19 @@ def build_psi_vec(self, x: np.ndarray) -> None:
677719
D = np.zeros(n)
678720

679721
# Compute ordered distance contributions
722+
# Ordered contributions to D(x, X) (weighted squared Euclidean)
680723
if self.ordered_mask.any():
681724
X_ordered = self.X_[:, self.ordered_mask]
682725
x_ordered = x[self.ordered_mask]
683726
D += cdist(x_ordered.reshape(1, -1), X_ordered, metric="sqeuclidean", w=theta10[self.ordered_mask]).ravel()
684727
# Compute factor distance contributions
728+
# Factorial contributions (categorical distance)
685729
if self.factor_mask.any():
686730
X_factor = self.X_[:, self.factor_mask]
687731
x_factor = x[self.factor_mask]
688732
D += cdist(x_factor.reshape(1, -1), X_factor, metric=self.metric_factorial, w=theta10[self.factor_mask]).ravel()
689733

734+
# Forrester, Ch. 2: R = exp(-D)
690735
psi = np.exp(-D)
691736
return psi
692737

@@ -697,6 +742,13 @@ def _pred(self, x: np.ndarray) -> float:
697742
"""
698743
Computes a single-point Kriging prediction using the correlation matrix
699744
information. Internal helper method.
745+
Ordinary Kriging predictor (Forrester, Ch. 3):
746+
- f̂(x) = μ̂ + ψ(x)^T R^{-1} (y − 1 μ̂)
747+
- s^2(x) ~ σ̂^2 [1 + λ − ψ(x)^T R^{-1} ψ(x)]
748+
Note: The classic OK variance includes a mean-uncertainty term
749+
(1 − 1^T R^{-1} ψ)^2 / (1^T R^{-1} 1). This implementation uses a
750+
simplified form common in engineering Kriging codes.
751+
Expected improvement (EI) (Forrester, Ch. 6) computed for minimization.
700752
701753
Args:
702754
x (np.ndarray): 1D array of length k for the point at which to predict.
@@ -708,6 +760,7 @@ def _pred(self, x: np.ndarray) -> float:
708760
"""
709761
y = self.y_.flatten()
710762

763+
# Load θ and λ (λ transformed to linear scale where needed)
711764
if (self.method == "regression") or (self.method == "reinterpolation"):
712765
self.theta = self.logtheta_loglambda_p_[: self.n_theta]
713766
lambda_ = self.logtheta_loglambda_p_[self.n_theta : self.n_theta + 1]
@@ -729,28 +782,36 @@ def _pred(self, x: np.ndarray) -> float:
729782
one = np.ones(n)
730783

731784
# Compute mu
785+
# μ̂ via backsolves (Forrester, Ch. 3)
732786
y_tilde = np.linalg.solve(U, y)
733787
y_tilde = np.linalg.solve(U.T, y_tilde)
734788
one_tilde = np.linalg.solve(U, one)
735789
one_tilde = np.linalg.solve(U.T, one_tilde)
736790
mu = (one @ y_tilde) / (one @ one_tilde)
737791

792+
# Residual backsolve for R^{-1}(y − 1 μ̂)
738793
resid = y - one * mu
739794
resid_tilde = np.linalg.solve(U, resid)
740795
resid_tilde = np.linalg.solve(U.T, resid_tilde)
741796

742797
psi = self.build_psi_vec(x)
743798

744799
# Compute SigmaSqr and SSqr
800+
# σ̂^2 (Forrester, Ch. 3)
801+
# Note: identical to likelihood computation to maintain consistency
802+
# (up to small numerical differences).
745803
if (self.method == "interpolation") or (self.method == "regression"):
746804
SigmaSqr = (resid @ resid_tilde) / n
747805
# Compute SSqr
806+
# s^2(x) ≈ σ̂^2 [1 + λ − ψ^T R^{-1} ψ]
748807
psi_tilde = np.linalg.solve(U, psi)
749808
psi_tilde = np.linalg.solve(U.T, psi_tilde)
750809
# Eq. (3.1) in [forr08a] without lambda:
810+
# cf. Forrester (Ch. 3) variance structure; simplified here
751811
SSqr = SigmaSqr * (1 + lambda_ - psi @ psi_tilde)
752812
else:
753813
# method is "reinterpolation"
814+
# Reinterpolation: variance with nugget removed and ε added in R for the variance solve
754815
Psi_adjusted = self.Psi_ - np.eye(n) * lambda_ + np.eye(n) * self.eps
755816
SigmaSqr = (resid @ np.linalg.solve(U.T, np.linalg.solve(U, Psi_adjusted @ resid_tilde))) / n
756817
# Compute Uint (Cholesky factor of the adjusted Psi matrix)
@@ -765,9 +826,12 @@ def _pred(self, x: np.ndarray) -> float:
765826
s = np.abs(SSqr) ** 0.5
766827

767828
# Final prediction
829+
# f̂(x) (Forrester, Ch. 3)
768830
f = mu + psi @ resid_tilde
769831

770832
# Compute ExpImp
833+
# EI (minimization), following standard closed form (Forrester, Ch. 6).
834+
# Implementation uses erf for Φ and returns −log10(EI) for numerical stability.
771835
if self.return_ei:
772836
yBest = np.min(y)
773837
EITermOne = (yBest - f) * (0.5 + 0.5 * erf((1 / np.sqrt(2)) * ((yBest - f) / s)))
@@ -781,6 +845,8 @@ def max_likelihood(self, bounds: List[Tuple[float, float]]) -> Tuple[np.ndarray,
781845
"""
782846
Maximizes the Kriging likelihood function using differential evolution
783847
over the range of log(theta) specified by bounds.
848+
Hyperparameter estimation via maximum likelihood (Forrester, Ch. 3).
849+
Objective is the concentrated −log L above, optimized with differential evolution.
784850
785851
Args:
786852
bounds (List[Tuple[float, float]]): Sequence of (low, high) bounds for log(theta).
@@ -807,6 +873,7 @@ def objective(logtheta_loglambda_p_: np.ndarray) -> float:
807873
neg_ln_like, _, _ = self.likelihood(logtheta_loglambda_p_)
808874
return neg_ln_like
809875

876+
# Use keyword args for easier test patching
810877
result = differential_evolution(func=objective, bounds=bounds, seed=self.seed)
811878
return result.x, result.fun
812879

0 commit comments

Comments
 (0)