Skip to content

Commit 96fceb0

Browse files
0.26.14
Forrester Sampling Plans
1 parent 923144f commit 96fceb0

4 files changed

Lines changed: 558 additions & 5 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.26.13"
10+
version = "0.26.14"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/gp/gp_sep.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,6 @@ def __init__(
324324
Initialize the GP model with data and hyperparameters.
325325
326326
Args:
327-
X (np.ndarray):
328-
Input data matrix of shape (n, m). If pandas DataFrame, will be converted to numpy array.
329-
y (np.ndarray):
330-
Output data vector of length n. If pandas Series, will be converted to numpy array.
331327
d (np.ndarray):
332328
Length-scale parameters.
333329
g (float):
@@ -348,6 +344,8 @@ def __init__(
348344
Whether to automatically optimize hyperparameters using MLE. Default is True.
349345
max_points (int):
350346
Maximum number of points to use for the model building. Default is None, which means all points are used.
347+
seed (int):
348+
Random seed for reproducibility. Default is 123.
351349
"""
352350
# Hyperparameters (do not store training data)
353351
self.d = d
@@ -462,6 +460,14 @@ def fit(self, X: np.ndarray, y: np.ndarray, d=None, g=None, dK: bool = True, aut
462460
463461
Raises:
464462
ValueError: If X has no rows or if X and y dimensions mismatch.
463+
464+
Examples:
465+
>>> from spotpython.gp.gp_sep import GPsep
466+
>>> import numpy as np
467+
>>> X = np.array([[1, 2], [3, 4], [5, 6]])
468+
>>> y = np.array([1, 2, 3])
469+
>>> model = GPsep()
470+
>>> model.fit(X, y)
465471
"""
466472
# if X or y are pandas dataframes or series, convert them to numpy arrays
467473
if hasattr(X, "to_numpy"):

src/spotpython/utils/sampling.py

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
import numpy as np
2+
from typing import Tuple, Optional
3+
4+
5+
def fullfactorial(q, Edges=1) -> np.ndarray:
6+
"""Generates a full factorial sampling plan in the unit cube.
7+
8+
Args:
9+
q (list or np.ndarray):
10+
A list or array containing the number of points along each dimension (k-vector).
11+
Edges (int, optional):
12+
Determines spacing of points. If `Edges=1`, points are equally spaced from edge to edge (default).
13+
Otherwise, points will be in the centers of n = q[0]*q[1]*...*q[k-1] bins filling the unit cube.
14+
15+
Returns:
16+
(np.ndarray): Full factorial sampling plan as an array of shape (n, k), where n is the total number of points and k is the number of dimensions.
17+
18+
Raises:
19+
ValueError: If any dimension in `q` is less than 2.
20+
21+
Example:
22+
>>> q = [3, 4]
23+
>>> X = fullfactorial(q, Edges=1)
24+
>>> print(X)
25+
"""
26+
q = np.array(q)
27+
if np.min(q) < 2:
28+
raise ValueError("You must have at least two points per dimension.")
29+
30+
# Total number of points in the sampling plan
31+
n = np.prod(q)
32+
33+
# Number of dimensions
34+
k = len(q)
35+
36+
# Pre-allocate memory for the sampling plan
37+
X = np.zeros((n, k))
38+
39+
# Additional phantom element
40+
q = np.append(q, 1)
41+
42+
for j in range(k):
43+
if Edges == 1:
44+
one_d_slice = np.linspace(0, 1, q[j])
45+
else:
46+
one_d_slice = np.linspace(1 / (2 * q[j]), 1, q[j]) - 1 / (2 * q[j])
47+
48+
column = np.array([])
49+
50+
while len(column) < n:
51+
for ll in range(q[j]):
52+
column = np.append(column, np.ones(np.prod(q[j + 1 : k])) * one_d_slice[ll])
53+
54+
X[:, j] = column
55+
56+
return X
57+
58+
59+
def rlh(n: int, k: int, edges: int = 0) -> np.ndarray:
60+
"""
61+
Generates a random Latin hypercube within the [0,1]^k hypercube.
62+
63+
Args:
64+
n (int): Desired number of points.
65+
k (int): Number of design variables (dimensions).
66+
edges (int, optional):
67+
If 1, places centers of the extreme bins at the domain edges ([0,1]).
68+
Otherwise, bins are fully contained within the domain, i.e. midpoints.
69+
Defaults to 0.
70+
71+
Returns:
72+
np.ndarray: A Latin hypercube sampling plan of n points in k dimensions,
73+
with each coordinate in the range [0,1].
74+
75+
Example:
76+
>>> import numpy as np
77+
>>> # Generate a 2D Latin hypercube with 5 points and edges=0
78+
>>> X = rlh(n=5, k=2, edges=0)
79+
>>> print(X)
80+
# Example output (values vary due to randomness):
81+
# [[0.1 0.5 ]
82+
# [0.7 0.1 ]
83+
# [0.9 0.7 ]
84+
# [0.3 0.9 ]
85+
# [0.5 0.3 ]]
86+
87+
"""
88+
# Initialize array
89+
X = np.zeros((n, k), dtype=float)
90+
91+
# Fill with random permutations
92+
for i in range(k):
93+
X[:, i] = np.random.permutation(n)
94+
95+
# Adjust normalization based on the edges flag
96+
if edges == 1:
97+
# [X=0..n-1] -> [0..1]
98+
X = X / (n - 1)
99+
else:
100+
# Points at true midpoints
101+
# [X=0..n-1] -> [0.5/n..(n-0.5)/n]
102+
X = (X + 0.5) / n
103+
104+
return X
105+
106+
107+
def jd(X: np.ndarray, p: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
108+
"""
109+
Computes and counts the distinct p-norm distances between all pairs of points in X.
110+
It returns:
111+
1) A list of distinct distances (sorted), and
112+
2) A corresponding multiplicity array that indicates how often each distance occurs.
113+
114+
Args:
115+
X (np.ndarray):
116+
A 2D array of shape (n, d) representing n points in d-dimensional space.
117+
p (float, optional):
118+
The distance norm to use. p=1 uses the Manhattan (L1) norm, while p=2 uses the
119+
Euclidean (L2) norm. Defaults to 1.0 (Manhattan norm).
120+
121+
Returns:
122+
(np.ndarray, np.ndarray):
123+
A tuple (J, distinct_d), where:
124+
- distinct_d is a 1D float array of unique, sorted distances between points.
125+
- J is a 1D integer array that provides the multiplicity (occurrence count)
126+
of each distance in distinct_d.
127+
128+
Example:
129+
>>> import numpy as np
130+
>>> from your_module import jd
131+
>>> # A small 3-point set in 2D
132+
>>> X = np.array([[0.0, 0.0],
133+
... [1.0, 1.0],
134+
... [2.0, 2.0]])
135+
>>> J, distinct_d = jd(X, p=2.0)
136+
>>> print("Distinct distances:", distinct_d)
137+
>>> print("Occurrences:", J)
138+
# Possible output (using Euclidean norm):
139+
# Distinct distances: [1.41421356 2.82842712]
140+
# Occurrences: [1 1]
141+
# Explanation: Distances are sqrt(2) between consecutive points and 2*sqrt(2) for the farthest pair.
142+
"""
143+
n = X.shape[0]
144+
145+
# Allocate enough space for all pairwise distances
146+
# (n*(n-1))/2 pairs for an n-point set
147+
pair_count = n * (n - 1) // 2
148+
d = np.zeros(pair_count, dtype=float)
149+
150+
# Fill the distance array
151+
idx = 0
152+
for i in range(n - 1):
153+
for j in range(i + 1, n):
154+
# Compute the p-norm distance
155+
d[idx] = np.linalg.norm(X[i] - X[j], ord=p)
156+
idx += 1
157+
158+
# Find unique distances and their multiplicities
159+
distinct_d = np.unique(d)
160+
J = np.zeros_like(distinct_d, dtype=int)
161+
for i, val in enumerate(distinct_d):
162+
J[i] = np.sum(d == val)
163+
164+
return J, distinct_d
165+
166+
167+
def mm(X1: np.ndarray, X2: np.ndarray, p: Optional[float] = 1.0) -> int:
168+
"""
169+
Determines which of two sampling plans has better space-filling properties
170+
according to the Morris-Mitchell criterion.
171+
172+
Args:
173+
X1 (np.ndarray): A 2D array representing the first sampling plan.
174+
X2 (np.ndarray): A 2D array representing the second sampling plan.
175+
p (float, optional): The distance metric. p=1 uses Manhattan (L1) distance,
176+
while p=2 uses Euclidean (L2). Defaults to 1.0.
177+
178+
Returns:
179+
int:
180+
- 0 if both plans are identical or equally space-filling
181+
- 1 if X1 is more space-filling
182+
- 2 if X2 is more space-filling
183+
184+
Example:
185+
>>> import numpy as np
186+
>>> from your_module import mm
187+
>>> # Create two 3-point sampling plans in 2D
188+
>>> X1 = np.array([[0.0, 0.0],
189+
... [0.5, 0.5],
190+
... [0.0, 1.0]])
191+
>>> X2 = np.array([[0.1, 0.1],
192+
... [0.4, 0.6],
193+
... [0.1, 0.9]])
194+
>>> # Compare which plan has better space-filling (Morris-Mitchell)
195+
>>> better = mm(X1, X2, p=2.0)
196+
>>> print(better)
197+
# Prints either 0, 1, or 2 depending on which plan is more space-filling.
198+
"""
199+
# Quick check if the sorted sets of points are identical
200+
# (mimicking MATLAB's sortrows check)
201+
X1_sorted = X1[np.lexsort(np.rot90(X1))]
202+
X2_sorted = X2[np.lexsort(np.rot90(X2))]
203+
if np.array_equal(X1_sorted, X2_sorted):
204+
return 0 # Identical sampling plans
205+
206+
# Compute distance multiplicities for each plan
207+
J1, d1 = jd(X1, p)
208+
J2, d2 = jd(X2, p)
209+
m1, m2 = len(d1), len(d2)
210+
211+
# Construct V1 and V2: alternate distance and negative multiplicity
212+
V1 = np.zeros(2 * m1)
213+
V1[0::2] = d1
214+
V1[1::2] = -J1
215+
216+
V2 = np.zeros(2 * m2)
217+
V2[0::2] = d2
218+
V2[1::2] = -J2
219+
220+
# Trim the longer vector to match the size of the shorter
221+
m = min(m1, m2)
222+
V1 = V1[:m]
223+
V2 = V2[:m]
224+
225+
# Compare element-by-element:
226+
# c[i] = 1 if V1[i] > V2[i], 2 if V1[i] < V2[i], 0 otherwise.
227+
c = (V1 > V2).astype(int) + 2 * (V1 < V2).astype(int)
228+
229+
if np.sum(c) == 0:
230+
# Equally space-filling
231+
return 0
232+
else:
233+
# The first non-zero entry indicates which plan is better
234+
idx = np.argmax(c != 0)
235+
return c[idx]
236+
237+
238+
def mmphi(X: np.ndarray, q: Optional[float] = 2.0, p: Optional[float] = 1.0) -> float:
239+
"""
240+
Calculates the Morris-Mitchell sampling plan quality criterion.
241+
242+
Args:
243+
X (np.ndarray):
244+
A 2D array representing the sampling plan, where each row is a point in
245+
d-dimensional space (shape: (n, d)).
246+
q (float, optional):
247+
Exponent used in the computation of the metric. Defaults to 2.0.
248+
p (float, optional):
249+
The distance norm to use. For example, p=1 is Manhattan (L1),
250+
p=2 is Euclidean (L2). Defaults to 1.0.
251+
252+
Returns:
253+
float:
254+
The space-fillingness metric Phiq. Larger values typically indicate a more
255+
space-filling plan according to the Morris-Mitchell criterion.
256+
257+
Example:
258+
>>> import numpy as np
259+
>>> from your_module import mmphi
260+
>>> # Simple 3-point sampling plan in 2D
261+
>>> X = np.array([
262+
... [0.0, 0.0],
263+
... [0.5, 0.5],
264+
... [1.0, 1.0]
265+
... ])
266+
>>> # Calculate the space-fillingness metric with q=2, using Euclidean distances (p=2)
267+
>>> quality = mmphi(X, q=2, p=2)
268+
>>> print(quality)
269+
# This value indicates how well points are spread out, with higher being better.
270+
"""
271+
# Compute the distance multiplicities: J, and unique distances: d
272+
J, d = jd(X, p)
273+
274+
# Summation of J[i] * d[i]^(-q), then raised to 1/q
275+
# This follows the Morris-Mitchell definition.
276+
Phiq = np.sum(J * (d ** (-q))) ** (1.0 / q)
277+
return Phiq

0 commit comments

Comments
 (0)