Skip to content

Commit f146c87

Browse files
0.29.20
1 parent 3d4bd55 commit f146c87

3 files changed

Lines changed: 271 additions & 3 deletions

File tree

notebooks/00_spotPython_tests.ipynb

Lines changed: 152 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13477,12 +13477,161 @@
1347713477
"randorient(k, p, xi, seed=1)"
1347813478
]
1347913479
},
13480+
{
13481+
"cell_type": "markdown",
13482+
"metadata": {},
13483+
"source": [
13484+
"# Morris-Mitchell\n"
13485+
]
13486+
},
1348013487
{
1348113488
"cell_type": "code",
13482-
"execution_count": null,
13489+
"execution_count": 2,
1348313490
"metadata": {},
13484-
"outputs": [],
13485-
"source": []
13491+
"outputs": [
13492+
{
13493+
"name": "stdout",
13494+
"output_type": "stream",
13495+
"text": [
13496+
"Design | Original mmphi (Extensive) | New mmphi_intensive (Intensive)\n",
13497+
"----------------------------------------------------------------------------\n",
13498+
"LHS (N=20) | 41.3303 | 2.9984 \n",
13499+
"Random (N=20) | 68.7818 | 4.9900 \n",
13500+
"LHS (N=50) | 136.7957 | 3.9084 \n",
13501+
"Random (N=50) | 179.4802 | 5.1280 \n"
13502+
]
13503+
}
13504+
],
13505+
"source": [
13506+
"# --- Experimental Setup ---\n",
13507+
"from spotpython.utils.sampling import jd, mmphi, mmphi_intensive\n",
13508+
"from scipy.stats import qmc\n",
13509+
"import numpy as np\n",
13510+
"\n",
13511+
"N_DIM = 2\n",
13512+
"Q_EXP = 2.0\n",
13513+
"P_NORM = 2.0 # Euclidean distance\n",
13514+
"\n",
13515+
"# We will compare two high-quality LHS designs of different sizes\n",
13516+
"# with two lower-quality random designs of the same sizes.\n",
13517+
"lhs_design_20 = qmc.LatinHypercube(d=N_DIM, seed=1).random(n=20)\n",
13518+
"random_design_20 = np.random.default_rng(1).random(size=(20, N_DIM))\n",
13519+
"\n",
13520+
"lhs_design_50 = qmc.LatinHypercube(d=N_DIM, seed=2).random(n=50)\n",
13521+
"random_design_50 = np.random.default_rng(2).random(size=(50, N_DIM))\n",
13522+
"\n",
13523+
"designs_to_test = {\n",
13524+
" \"LHS (N=20)\": lhs_design_20,\n",
13525+
" \"Random (N=20)\": random_design_20,\n",
13526+
" \"LHS (N=50)\": lhs_design_50,\n",
13527+
" \"Random (N=50)\": random_design_50,\n",
13528+
"}\n",
13529+
"\n",
13530+
"results = []\n",
13531+
"for label, design in designs_to_test.items():\n",
13532+
" raw_score = mmphi(design, q=Q_EXP, p=P_NORM)\n",
13533+
" intensive_score = mmphi_intensive(design, q=Q_EXP, p=P_NORM)\n",
13534+
" results.append({\n",
13535+
" \"Design\": label,\n",
13536+
" \"Original mmphi (Extensive)\": f\"{raw_score:.4f}\",\n",
13537+
" \"New mmphi_intensive (Intensive)\": f\"{intensive_score:.4f}\",\n",
13538+
" })\n",
13539+
" \n",
13540+
"# --- Print Results Table ---\n",
13541+
"headers = list(results[0].keys())\n",
13542+
"widths = {h: max(len(h), max(len(row[h]) for row in results)) for h in headers}\n",
13543+
"\n",
13544+
"header_line = \" | \".join(h.ljust(widths[h]) for h in headers)\n",
13545+
"print(header_line)\n",
13546+
"print(\"-\" * len(header_line))\n",
13547+
"\n",
13548+
"for row in results:\n",
13549+
" row_line = \" | \".join(row[h].ljust(widths[h]) for h in headers)\n",
13550+
" print(row_line)"
13551+
]
13552+
},
13553+
{
13554+
"cell_type": "code",
13555+
"execution_count": 7,
13556+
"metadata": {},
13557+
"outputs": [
13558+
{
13559+
"name": "stdout",
13560+
"output_type": "stream",
13561+
"text": [
13562+
"Running comparative analysis...\n",
13563+
"\n",
13564+
"Design | Original mmphi | Intensive mmphi\n",
13565+
"-------------------------------------------------------------\n",
13566+
"Good Design (LHS, N=20) | 497.4601 | 4.7058 \n",
13567+
"Poor Design (Random, N=20) | 587.2249 | 5.5550 \n",
13568+
"Good Design (LHS, N=50) | 515.4561 | 4.8436 \n",
13569+
"Poor Design (Random, N=50) | 589.4369 | 5.5388 \n",
13570+
"\n",
13571+
"--- Interpretation ---\n",
13572+
"Notice how 'Original mmphi' scores are not comparable between N=20 and N=50 designs.\n",
13573+
"The 'Intensive mmphi' scores, however, are comparable. The two 'Good' LHS designs have\n",
13574+
"similar low scores, which are clearly better (lower) than the scores for the 'Poor' random designs.\n"
13575+
]
13576+
}
13577+
],
13578+
"source": [
13579+
"# --- Helper function for design generation ---\n",
13580+
"def generate_design(n_points: int, n_dim: int, seed: int, design_type: str) -> np.ndarray:\n",
13581+
" if design_type == 'lhs':\n",
13582+
" sampler = qmc.LatinHypercube(d=n_dim, seed=seed)\n",
13583+
" return sampler.random(n=n_points)\n",
13584+
" elif design_type == 'random':\n",
13585+
" rng = np.random.default_rng(seed)\n",
13586+
" return rng.random(size=(n_points, n_dim))\n",
13587+
" else:\n",
13588+
" raise ValueError(\"Unknown design type\")\n",
13589+
"\n",
13590+
"# --- Analysis Parameters ---\n",
13591+
"N_DIM = 2\n",
13592+
"Q_EXP = 2.0\n",
13593+
"P_NORM = 2.0 # Euclidean distance\n",
13594+
"\n",
13595+
"designs_to_compare = {\n",
13596+
" \"Good Design (LHS, N=20)\": generate_design(150, N_DIM, 42, 'lhs'),\n",
13597+
" \"Poor Design (Random, N=20)\": generate_design(150, N_DIM, 42, 'random'),\n",
13598+
" \"Good Design (LHS, N=50)\": generate_design(151, N_DIM, 42, 'lhs'),\n",
13599+
" \"Poor Design (Random, N=50)\": generate_design(151, N_DIM, 42, 'random')\n",
13600+
"}\n",
13601+
"\n",
13602+
"results = []\n",
13603+
"\n",
13604+
"print(\"Running comparative analysis...\\n\")\n",
13605+
"for label, design in designs_to_compare.items():\n",
13606+
" # Calculate original (extensive) metric\n",
13607+
" original_score = mmphi(design, q=Q_EXP, p=P_NORM)\n",
13608+
" \n",
13609+
" # Calculate new (intensive) metric\n",
13610+
" intensive_score = mmphi_intensive(design, q=Q_EXP, p=P_NORM)\n",
13611+
" \n",
13612+
" results.append({\n",
13613+
" 'Design': label,\n",
13614+
" 'Original mmphi': f\"{original_score:.4f}\",\n",
13615+
" 'Intensive mmphi': f\"{intensive_score:.4f}\"\n",
13616+
" })\n",
13617+
"\n",
13618+
"# --- Print Results Table ---\n",
13619+
"headers = list(results[0].keys())\n",
13620+
"widths = {h: max(len(h), max(len(row[h]) for row in results)) for h in headers}\n",
13621+
"\n",
13622+
"header_line = \" | \".join(h.ljust(widths[h]) for h in headers)\n",
13623+
"print(header_line)\n",
13624+
"print(\"-\" * len(header_line))\n",
13625+
"\n",
13626+
"for row in results:\n",
13627+
" row_line = \" | \".join(row[h].ljust(widths[h]) for h in headers)\n",
13628+
" print(row_line)\n",
13629+
"\n",
13630+
"print(\"\\n--- Interpretation ---\")\n",
13631+
"print(\"Notice how 'Original mmphi' scores are not comparable between N=20 and N=50 designs.\")\n",
13632+
"print(\"The 'Intensive mmphi' scores, however, are comparable. The two 'Good' LHS designs have\")\n",
13633+
"print(\"similar low scores, which are clearly better (lower) than the scores for the 'Poor' random designs.\")\n"
13634+
]
1348613635
},
1348713636
{
1348813637
"cell_type": "code",

src/spotpython/utils/sampling.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,3 +881,73 @@ def subset(X: np.ndarray, ns: int) -> Tuple[np.ndarray, np.ndarray]:
881881
Xs[j, :] = orig_point
882882

883883
return Xs, Xr
884+
885+
886+
def mmphi_intensive(X: np.ndarray, q: Optional[float] = 2.0, p: Optional[float] = 2.0) -> float:
887+
"""
888+
Calculates a size-invariant Morris-Mitchell criterion.
889+
890+
This "intensive" version of the criterion allows for the comparison of
891+
sampling plans with different sample sizes by normalizing for the number
892+
of point pairs. A smaller value indicates a better (more space-filling)
893+
design.
894+
895+
Args:
896+
X (np.ndarray):
897+
A 2D array representing the sampling plan (shape: (n, d)).
898+
q (float, optional):
899+
The exponent used in the computation of the metric. Defaults to 2.0.
900+
p (float, optional):
901+
The distance norm to use (e.g., p=1 for Manhattan, p=2 for Euclidean).
902+
Defaults to 2.0.
903+
904+
Returns:
905+
float:
906+
The size-invariant space-fillingness metric. Smaller is better.
907+
908+
Examples:
909+
>>> import numpy as np
910+
>>> from spotpython.utils.sampling import mmphi_intensive
911+
>>> # Create a simple 3-point sampling plan in 2D
912+
>>> X = np.array([
913+
... [0.0, 0.0],
914+
... [0.5, 0.5],
915+
... [1.0, 1.0]
916+
... ])
917+
>>> # Calculate the intensive space-fillingness metric with q=2, using Euclidean distances (p=2)
918+
>>> quality = mmphi_intensive(X, q=2, p=2)
919+
>>> print(quality)
920+
"""
921+
# Ensure there are no duplicate points
922+
if X.shape[0] != len(np.unique(X, axis=0)):
923+
X = np.unique(X, axis=0)
924+
925+
n_points = X.shape[0]
926+
927+
# The criterion is not well-defined for fewer than 2 points.
928+
if n_points < 2:
929+
return np.inf
930+
931+
# Get the unique distances and their multiplicities
932+
J, d = jd(X, p=p)
933+
934+
# If all points are identical, the design is infinitely bad.
935+
if d.size == 0:
936+
return np.inf
937+
938+
# Calculate the number of unique pairs of points
939+
M = n_points * (n_points - 1) / 2
940+
941+
try:
942+
# Calculate the sum term of the original mmphi
943+
sum_term = np.sum(J * (d ** (-q)))
944+
# Normalize the sum by M before taking the final root
945+
intensive_phiq = (sum_term / M) ** (1.0 / q)
946+
except ZeroDivisionError:
947+
return np.inf
948+
except FloatingPointError:
949+
return np.inf
950+
except Exception:
951+
return np.inf
952+
953+
return intensive_phiq
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import numpy as np
2+
import pytest
3+
from spotpython.utils.sampling import mmphi_intensive
4+
from spotpython.utils import sampling
5+
from scipy.spatial.distance import pdist
6+
from spotpython.utils import sampling
7+
8+
class DummyJD:
9+
"""Helper to monkeypatch jd for controlled testing."""
10+
def __init__(self, J, d):
11+
self.J = J
12+
self.d = d
13+
def __call__(self, X, p=2.0):
14+
return self.J, self.d
15+
16+
def test_mmphi_intensive_basic(monkeypatch):
17+
# Use a simple 2D square, distances are all 1 or sqrt(2)
18+
X = np.array([
19+
[0, 0],
20+
[1, 0],
21+
[0, 1],
22+
[1, 1]
23+
])
24+
# Patch jd to use real distances for this test
25+
orig_jd = getattr(sampling, "jd", None)
26+
def real_jd(X, p=2.0):
27+
dists = pdist(X, metric="minkowski", p=p)
28+
# Count unique distances and their multiplicities
29+
vals, counts = np.unique(np.round(dists, 8), return_counts=True)
30+
return counts, vals
31+
monkeypatch.setattr(sampling, "jd", real_jd)
32+
val = mmphi_intensive(X, q=2.0, p=2.0)
33+
assert np.isscalar(val)
34+
assert val > 0
35+
if orig_jd:
36+
monkeypatch.setattr(sampling, "jd", orig_jd)
37+
38+
def test_mmphi_intensive_duplicates(monkeypatch):
39+
# All points identical: should return np.inf
40+
X = np.ones((4, 2))
41+
monkeypatch.setattr(sampling, "jd", lambda X, p=2.0: (np.array([]), np.array([])))
42+
val = mmphi_intensive(X)
43+
assert val == np.inf
44+
45+
def test_mmphi_intensive_too_few_points():
46+
# Only one point: should return np.inf
47+
X = np.array([[0.5, 0.5]])
48+
val = mmphi_intensive(X)
49+
assert val == np.inf

0 commit comments

Comments
 (0)