0.29.20

bartzbeielstein · bartzbeielstein · commit f146c87b33d9 · 2025-06-12T22:19:23.000+02:00
diff --git a/notebooks/00_spotPython_tests.ipynb b/notebooks/00_spotPython_tests.ipynb
@@ -13477,12 +13477,161 @@
     "randorient(k, p, xi, seed=1)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Morris-Mitchell\n"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Design        | Original mmphi (Extensive) | New mmphi_intensive (Intensive)\n",
+      "----------------------------------------------------------------------------\n",
+      "LHS (N=20)    | 41.3303                    | 2.9984                         \n",
+      "Random (N=20) | 68.7818                    | 4.9900                         \n",
+      "LHS (N=50)    | 136.7957                   | 3.9084                         \n",
+      "Random (N=50) | 179.4802                   | 5.1280                         \n"
+     ]
+    }
+   ],
+   "source": [
+    "# --- Experimental Setup ---\n",
+    "from spotpython.utils.sampling import jd, mmphi, mmphi_intensive\n",
+    "from scipy.stats import qmc\n",
+    "import numpy as np\n",
+    "\n",
+    "N_DIM = 2\n",
+    "Q_EXP = 2.0\n",
+    "P_NORM = 2.0 # Euclidean distance\n",
+    "\n",
+    "# We will compare two high-quality LHS designs of different sizes\n",
+    "# with two lower-quality random designs of the same sizes.\n",
+    "lhs_design_20 = qmc.LatinHypercube(d=N_DIM, seed=1).random(n=20)\n",
+    "random_design_20 = np.random.default_rng(1).random(size=(20, N_DIM))\n",
+    "\n",
+    "lhs_design_50 = qmc.LatinHypercube(d=N_DIM, seed=2).random(n=50)\n",
+    "random_design_50 = np.random.default_rng(2).random(size=(50, N_DIM))\n",
+    "\n",
+    "designs_to_test = {\n",
+    "    \"LHS (N=20)\": lhs_design_20,\n",
+    "    \"Random (N=20)\": random_design_20,\n",
+    "    \"LHS (N=50)\": lhs_design_50,\n",
+    "    \"Random (N=50)\": random_design_50,\n",
+    "}\n",
+    "\n",
+    "results = []\n",
+    "for label, design in designs_to_test.items():\n",
+    "    raw_score = mmphi(design, q=Q_EXP, p=P_NORM)\n",
+    "    intensive_score = mmphi_intensive(design, q=Q_EXP, p=P_NORM)\n",
+    "    results.append({\n",
+    "        \"Design\": label,\n",
+    "        \"Original mmphi (Extensive)\": f\"{raw_score:.4f}\",\n",
+    "        \"New mmphi_intensive (Intensive)\": f\"{intensive_score:.4f}\",\n",
+    "    })\n",
+    "    \n",
+    "# --- Print Results Table ---\n",
+    "headers = list(results[0].keys())\n",
+    "widths = {h: max(len(h), max(len(row[h]) for row in results)) for h in headers}\n",
+    "\n",
+    "header_line = \" | \".join(h.ljust(widths[h]) for h in headers)\n",
+    "print(header_line)\n",
+    "print(\"-\" * len(header_line))\n",
+    "\n",
+    "for row in results:\n",
+    "    row_line = \" | \".join(row[h].ljust(widths[h]) for h in headers)\n",
+    "    print(row_line)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running comparative analysis...\n",
+      "\n",
+      "Design                     | Original mmphi | Intensive mmphi\n",
+      "-------------------------------------------------------------\n",
+      "Good Design (LHS, N=20)    | 497.4601       | 4.7058         \n",
+      "Poor Design (Random, N=20) | 587.2249       | 5.5550         \n",
+      "Good Design (LHS, N=50)    | 515.4561       | 4.8436         \n",
+      "Poor Design (Random, N=50) | 589.4369       | 5.5388         \n",
+      "\n",
+      "--- Interpretation ---\n",
+      "Notice how 'Original mmphi' scores are not comparable between N=20 and N=50 designs.\n",
+      "The 'Intensive mmphi' scores, however, are comparable. The two 'Good' LHS designs have\n",
+      "similar low scores, which are clearly better (lower) than the scores for the 'Poor' random designs.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# --- Helper function for design generation ---\n",
+    "def generate_design(n_points: int, n_dim: int, seed: int, design_type: str) -> np.ndarray:\n",
+    "    if design_type == 'lhs':\n",
+    "        sampler = qmc.LatinHypercube(d=n_dim, seed=seed)\n",
+    "        return sampler.random(n=n_points)\n",
+    "    elif design_type == 'random':\n",
+    "        rng = np.random.default_rng(seed)\n",
+    "        return rng.random(size=(n_points, n_dim))\n",
+    "    else:\n",
+    "        raise ValueError(\"Unknown design type\")\n",
+    "\n",
+    "# --- Analysis Parameters ---\n",
+    "N_DIM = 2\n",
+    "Q_EXP = 2.0\n",
+    "P_NORM = 2.0 # Euclidean distance\n",
+    "\n",
+    "designs_to_compare = {\n",
+    "    \"Good Design (LHS, N=20)\":   generate_design(150, N_DIM, 42, 'lhs'),\n",
+    "    \"Poor Design (Random, N=20)\": generate_design(150, N_DIM, 42, 'random'),\n",
+    "    \"Good Design (LHS, N=50)\":   generate_design(151, N_DIM, 42, 'lhs'),\n",
+    "    \"Poor Design (Random, N=50)\": generate_design(151, N_DIM, 42, 'random')\n",
+    "}\n",
+    "\n",
+    "results = []\n",
+    "\n",
+    "print(\"Running comparative analysis...\\n\")\n",
+    "for label, design in designs_to_compare.items():\n",
+    "    # Calculate original (extensive) metric\n",
+    "    original_score = mmphi(design, q=Q_EXP, p=P_NORM)\n",
+    "    \n",
+    "    # Calculate new (intensive) metric\n",
+    "    intensive_score = mmphi_intensive(design, q=Q_EXP, p=P_NORM)\n",
+    "    \n",
+    "    results.append({\n",
+    "        'Design': label,\n",
+    "        'Original mmphi': f\"{original_score:.4f}\",\n",
+    "        'Intensive mmphi': f\"{intensive_score:.4f}\"\n",
+    "    })\n",
+    "\n",
+    "# --- Print Results Table ---\n",
+    "headers = list(results[0].keys())\n",
+    "widths = {h: max(len(h), max(len(row[h]) for row in results)) for h in headers}\n",
+    "\n",
+    "header_line = \" | \".join(h.ljust(widths[h]) for h in headers)\n",
+    "print(header_line)\n",
+    "print(\"-\" * len(header_line))\n",
+    "\n",
+    "for row in results:\n",
+    "    row_line = \" | \".join(row[h].ljust(widths[h]) for h in headers)\n",
+    "    print(row_line)\n",
+    "\n",
+    "print(\"\\n--- Interpretation ---\")\n",
+    "print(\"Notice how 'Original mmphi' scores are not comparable between N=20 and N=50 designs.\")\n",
+    "print(\"The 'Intensive mmphi' scores, however, are comparable. The two 'Good' LHS designs have\")\n",
+    "print(\"similar low scores, which are clearly better (lower) than the scores for the 'Poor' random designs.\")\n"
+   ]
   },
   {
    "cell_type": "code",
diff --git a/src/spotpython/utils/sampling.py b/src/spotpython/utils/sampling.py
@@ -881,3 +881,73 @@ def subset(X: np.ndarray, ns: int) -> Tuple[np.ndarray, np.ndarray]:
             Xs[j, :] = orig_point
 
     return Xs, Xr
+
+
+def mmphi_intensive(X: np.ndarray, q: Optional[float] = 2.0, p: Optional[float] = 2.0) -> float:
+    """
+    Calculates a size-invariant Morris-Mitchell criterion.
+
+    This "intensive" version of the criterion allows for the comparison of
+    sampling plans with different sample sizes by normalizing for the number
+    of point pairs. A smaller value indicates a better (more space-filling)
+    design.
+
+    Args:
+        X (np.ndarray):
+            A 2D array representing the sampling plan (shape: (n, d)).
+        q (float, optional):
+            The exponent used in the computation of the metric. Defaults to 2.0.
+        p (float, optional):
+            The distance norm to use (e.g., p=1 for Manhattan, p=2 for Euclidean).
+            Defaults to 2.0.
+
+    Returns:
+        float:
+            The size-invariant space-fillingness metric. Smaller is better.
+
+    Examples:
+        >>> import numpy as np
+        >>> from spotpython.utils.sampling import mmphi_intensive
+        >>> # Create a simple 3-point sampling plan in 2D
+        >>> X = np.array([
+        ...     [0.0, 0.0],
+        ...     [0.5, 0.5],
+        ...     [1.0, 1.0]
+        ... ])
+        >>> # Calculate the intensive space-fillingness metric with q=2, using Euclidean distances (p=2)
+        >>> quality = mmphi_intensive(X, q=2, p=2)
+        >>> print(quality)
+    """
+    # Ensure there are no duplicate points
+    if X.shape[0] != len(np.unique(X, axis=0)):
+        X = np.unique(X, axis=0)
+
+    n_points = X.shape[0]
+
+    # The criterion is not well-defined for fewer than 2 points.
+    if n_points < 2:
+        return np.inf
+
+    # Get the unique distances and their multiplicities
+    J, d = jd(X, p=p)
+
+    # If all points are identical, the design is infinitely bad.
+    if d.size == 0:
+        return np.inf
+
+    # Calculate the number of unique pairs of points
+    M = n_points * (n_points - 1) / 2
+
+    try:
+        # Calculate the sum term of the original mmphi
+        sum_term = np.sum(J * (d ** (-q)))
+        # Normalize the sum by M before taking the final root
+        intensive_phiq = (sum_term / M) ** (1.0 / q)
+    except ZeroDivisionError:
+        return np.inf
+    except FloatingPointError:
+        return np.inf
+    except Exception:
+        return np.inf
+
+    return intensive_phiq
diff --git a/test/test_sampling_mmphi_intensive.py b/test/test_sampling_mmphi_intensive.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pytest
+from spotpython.utils.sampling import mmphi_intensive
+from spotpython.utils import sampling
+from scipy.spatial.distance import pdist
+from spotpython.utils import sampling
+
+class DummyJD:
+    """Helper to monkeypatch jd for controlled testing."""
+    def __init__(self, J, d):
+        self.J = J
+        self.d = d
+    def __call__(self, X, p=2.0):
+        return self.J, self.d
+
+def test_mmphi_intensive_basic(monkeypatch):
+    # Use a simple 2D square, distances are all 1 or sqrt(2)
+    X = np.array([
+        [0, 0],
+        [1, 0],
+        [0, 1],
+        [1, 1]
+    ])
+    # Patch jd to use real distances for this test
+    orig_jd = getattr(sampling, "jd", None)
+    def real_jd(X, p=2.0):
+        dists = pdist(X, metric="minkowski", p=p)
+        # Count unique distances and their multiplicities
+        vals, counts = np.unique(np.round(dists, 8), return_counts=True)
+        return counts, vals
+    monkeypatch.setattr(sampling, "jd", real_jd)
+    val = mmphi_intensive(X, q=2.0, p=2.0)
+    assert np.isscalar(val)
+    assert val > 0
+    if orig_jd:
+        monkeypatch.setattr(sampling, "jd", orig_jd)
+
+def test_mmphi_intensive_duplicates(monkeypatch):
+    # All points identical: should return np.inf
+    X = np.ones((4, 2))
+    monkeypatch.setattr(sampling, "jd", lambda X, p=2.0: (np.array([]), np.array([])))
+    val = mmphi_intensive(X)
+    assert val == np.inf
+
+def test_mmphi_intensive_too_few_points():
+    # Only one point: should return np.inf
+    X = np.array([[0.5, 0.5]])
+    val = mmphi_intensive(X)
+    assert val == np.inf