From 8c908af116846334eaab6b2c35a65e08041737fc Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 27 May 2026 23:04:37 +0200 Subject: [PATCH 01/68] docs: update benchmark readme --- benchmarks/README.md | 114 ++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 66 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 22ac73ce..c2d1df04 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,94 +1,76 @@ # Internal Performance Benchmarks -Measures linopy's own performance (build time, LP write speed, memory usage) across problem sizes using [pytest-benchmark](https://pytest-benchmark.readthedocs.io/) and [pytest-memray](https://pytest-memray.readthedocs.io/). Use these to check whether a code change introduces a regression or improvement. +This suite benchmarks the **linopy part end-to-end** in two phases: -> **Note:** The `benchmark/` directory (singular) contains *external* benchmarks comparing linopy against other modeling frameworks. This directory (`benchmarks/`) is for *internal* performance tracking only. +1. **Build**: construct the linopy model. +2. **Solver handoff**: convert a built model into solver-consumable form. -## Setup - -```bash -pip install -e ".[benchmarks]" -``` - -## Running benchmarks +> **Note:** `benchmark/` (singular) is for external framework comparisons. `benchmarks/` is only for internal linopy performance tracking. -```bash -# Quick smoke test (small sizes only) -pytest benchmarks/ --quick - -# Full timing benchmarks -pytest benchmarks/test_build.py benchmarks/test_lp_write.py benchmarks/test_matrices.py +## What is covered -# Run a specific model -pytest benchmarks/test_build.py -k basic -``` +- **Build** (`benchmarks/test_build.py`): variable creation, expression construction, constraints, objective. +- **Solver handoff**: + - canonical in-memory (`benchmarks/test_matrices.py`) via `A`, `b`, `c`, bounds, labels (**required**), + - file handoff (`benchmarks/test_lp_write.py`) via LP serialization (**optional**), + - direct API handoff (e.g. `to_highspy`) when enabled (**optional**, solver-specific). -## Comparing timing between branches +## What is not covered -```bash -# Save baseline results on master -git checkout master -pytest benchmarks/test_build.py --benchmark-save=master +- Solver algorithm performance (optimize/solve runtime). +- Cross-solver ranking. +- Nonlinear/quadratic benchmark suites. -# Switch to feature branch and compare -git checkout my-feature -pytest benchmarks/test_build.py --benchmark-save=my-feature --benchmark-compare=0001_master +## Models -# Compare saved results without re-running -pytest-benchmark compare 0001_master 0002_my-feature --columns=median,iqr -``` +Core models: -Results are stored in `.benchmarks/` (gitignored). +- `basic` +- `knapsack` +- `expression_arithmetic` +- `sparse_network` -## Memory benchmarks +Extended (optional dependency): -`memory.py` runs each test in a separate process with pytest-memray to get accurate per-test peak memory (including C/numpy allocations). Results are saved as JSON and can be compared across branches. +- `pypsa_scigrid` -By default, only the build phase (`test_build.py`) is measured. Unlike timing benchmarks where `benchmark()` isolates the measured function, memray tracks all allocations within a test — including model construction in setup. This means LP write and matrix tests would report build + phase memory combined, making the phase-specific contribution impossible to isolate. Since model construction dominates memory usage, measuring build alone gives the most actionable numbers. +## Setup ```bash -# Save baseline on master -git checkout master -python benchmarks/memory.py save master +pip install -e ".[benchmarks]" +``` -# Save feature branch -git checkout my-feature -python benchmarks/memory.py save my-feature +## Run benchmarks -# Compare -python benchmarks/memory.py compare master my-feature +```bash +# Quick smoke run +pytest benchmarks/ --quick -# Quick mode (smaller sizes, faster) -python benchmarks/memory.py save master --quick +# Full timing run (build + handoff) +pytest benchmarks/test_build.py benchmarks/test_matrices.py benchmarks/test_lp_write.py -# Measure a specific phase (includes build overhead) -python benchmarks/memory.py save master --test-path benchmarks/test_lp_write.py +# Single model +pytest benchmarks/test_build.py -k basic ``` -Results are stored in `.benchmarks/memory/` (gitignored). Requires Linux or macOS (memray is not available on Windows). - -> **Note:** Small tests (~5 MiB) are near the import-overhead floor and may show noise of ~1 MiB between runs. Focus on larger tests for meaningful memory comparisons. Do not combine `--memray` with timing benchmarks — memray adds ~2x overhead that invalidates timing results. +## Metrics -## Models +- **Time**: pytest-benchmark median runtime (IQR for stability). +- **Memory**: pytest-memray peak RSS (MiB), primarily tracked for Build. -| Model | Description | Sizes | -|-------|-------------|-------| -| `basic` | Dense N*N model, 2*N^2 vars/cons | 10 — 1600 | -| `knapsack` | N binary variables, 1 constraint | 100 — 1M | -| `expression_arithmetic` | Broadcasting, scaling, summation across dims | 10 — 1000 | -| `sparse_network` | Ring network with mismatched bus/line coords | 10 — 1000 | -| `pypsa_scigrid` | Real power system (requires `pypsa`) | 10 — 200 snapshots | +## Results and history -## Phases +- Raw outputs live in `.benchmarks/` (gitignored). +- Store comparison snapshots as JSON and compare to a rolling `master` baseline. -| Phase | File | What it measures | -|-------|------|------------------| -| Build | `test_build.py` | Model construction (add_variables, add_constraints, add_objective) | -| LP write | `test_lp_write.py` | Writing the model to an LP file | -| Matrices | `test_matrices.py` | Generating sparse matrices (A, b, c, bounds) from the model | +```bash +# Timing snapshot +pytest benchmarks/test_build.py benchmarks/test_matrices.py benchmarks/test_lp_write.py \ + --benchmark-json ".benchmarks/timing-$(date +%Y%m%d-%H%M%S).json" -## Adding a new model +# Memory snapshot (Build by default) +python benchmarks/memory.py save "$(git rev-parse --short HEAD)" -1. Create `benchmarks/models/my_model.py` with a `build_my_model(n)` function and a `SIZES` list -2. Add parametrized tests in the relevant `test_*.py` files -3. Add a quick threshold in `conftest.py` +# Compare memory snapshots +python benchmarks/memory.py compare +``` From 413f1c6adbf146809748efad3cea418578d32738 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 08:37:30 +0200 Subject: [PATCH 02/68] benchmarks: reusable model registry, new model types, new phases, CI smoke Refactors the internal benchmark suite around a reusable ModelSpec / REGISTRY pattern so adding a model is one self-registering file with metadata (features, applicable phases, sizes, optional deps). Other tests and scripts can import it via `from benchmarks import REGISTRY`. New model specs cover gaps in the existing coverage: - milp: general (non-binary) integers (capacitated facility location) - qp: continuous quadratic objective (diagonal portfolio) - sos: SOS1 multi-mode generation (Model.add_sos_constraints) - piecewise: piecewise-linear fuel cost (Model.add_piecewise_formulation) - masked: sparse-route transportation using mask= on add_variables SOS and piecewise specs gate their own registration on API availability, so the suite stays runnable on older linopy. New phase tests: - test_solver_handoff.py: parametrizes lp.io.to_highspy/to_gurobipy/ to_mosek/to_xpress across applicable models, skipping per-solver when the solver isn't installed. Uses stable lp.io wrappers (not the new Solver.from_name API) for backward compatibility. - test_netcdf.py: separate to_netcdf / read_netcdf benchmarks. CI: new benchmark-smoke.yml runs the suite under --quick --benchmark-disable on PRs, so refactors that break a model spec get caught early. Timings stay off CI (~35s smoke locally, no regression tracking). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/benchmark-smoke.yml | 41 ++++++ benchmarks/README.md | 115 ++++++++++----- benchmarks/__init__.py | 84 ++++++++++- benchmarks/conftest.py | 24 ++- benchmarks/models/__init__.py | 32 +++- benchmarks/models/basic.py | 16 +- benchmarks/models/expression_arithmetic.py | 14 +- benchmarks/models/knapsack.py | 17 ++- benchmarks/models/masked.py | 90 ++++++++++++ benchmarks/models/milp.py | 79 ++++++++++ benchmarks/models/piecewise.py | 92 ++++++++++++ benchmarks/models/pypsa_scigrid.py | 18 ++- benchmarks/models/qp.py | 65 ++++++++ benchmarks/models/sos.py | 99 +++++++++++++ benchmarks/models/sparse_network.py | 14 +- benchmarks/registry.py | 163 +++++++++++++++++++++ benchmarks/test_build.py | 51 +------ benchmarks/test_lp_write.py | 59 +------- benchmarks/test_matrices.py | 39 ++--- benchmarks/test_netcdf.py | 34 +++++ benchmarks/test_solver_handoff.py | 61 ++++++++ 21 files changed, 1027 insertions(+), 180 deletions(-) create mode 100644 .github/workflows/benchmark-smoke.yml create mode 100644 benchmarks/models/masked.py create mode 100644 benchmarks/models/milp.py create mode 100644 benchmarks/models/piecewise.py create mode 100644 benchmarks/models/qp.py create mode 100644 benchmarks/models/sos.py create mode 100644 benchmarks/registry.py create mode 100644 benchmarks/test_netcdf.py create mode 100644 benchmarks/test_solver_handoff.py diff --git a/.github/workflows/benchmark-smoke.yml b/.github/workflows/benchmark-smoke.yml new file mode 100644 index 00000000..c6b37028 --- /dev/null +++ b/.github/workflows/benchmark-smoke.yml @@ -0,0 +1,41 @@ +name: Benchmark smoke + +# Runs the internal benchmark suite under --quick --benchmark-disable so every +# model spec is built and every phase fires at least once, but no timings are +# recorded. The goal is "did refactor X break a model spec?" — not regression +# tracking, which is done out-of-CI on dedicated hardware. + +on: + push: + branches: [ master ] + pull_request: + branches: [ '*' ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + smoke: + name: Benchmark smoke (quick) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # setuptools_scm + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install package and benchmark dependencies + run: | + python -m pip install uv + # [dev] for pytest + netcdf4; [benchmarks] for pytest-benchmark + pypsa. + uv pip install --system -e ".[dev,benchmarks]" + + - name: Run benchmark smoke + run: | + pytest benchmarks/ --quick --benchmark-disable -q diff --git a/benchmarks/README.md b/benchmarks/README.md index c2d1df04..59cc0594 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,71 +1,120 @@ # Internal Performance Benchmarks -This suite benchmarks the **linopy part end-to-end** in two phases: +This suite benchmarks the **linopy part end-to-end** across three phases: -1. **Build**: construct the linopy model. -2. **Solver handoff**: convert a built model into solver-consumable form. +1. **Build** — construct the linopy model. +2. **Solver handoff** — convert a built model into solver-consumable form + (in-memory matrices, LP file, native solver instance, netCDF). +3. **Persistence round-trip** — `to_netcdf` / `read_netcdf`. -> **Note:** `benchmark/` (singular) is for external framework comparisons. `benchmarks/` is only for internal linopy performance tracking. +> **Note:** `benchmark/` (singular) is for external framework comparisons. +> `benchmarks/` is only for internal linopy performance tracking. ## What is covered -- **Build** (`benchmarks/test_build.py`): variable creation, expression construction, constraints, objective. -- **Solver handoff**: - - canonical in-memory (`benchmarks/test_matrices.py`) via `A`, `b`, `c`, bounds, labels (**required**), - - file handoff (`benchmarks/test_lp_write.py`) via LP serialization (**optional**), - - direct API handoff (e.g. `to_highspy`) when enabled (**optional**, solver-specific). +| Phase | Test file | Notes | +| --------------------- | ------------------------------- | -------------------------------------------------- | +| Build | `test_build.py` | variables / expressions / constraints / objective | +| Matrices | `test_matrices.py` | `A`, `b`, `c`, bounds, labels, `Q` for QP | +| LP write | `test_lp_write.py` | `model.to_file(...)` | +| netCDF write/read | `test_netcdf.py` | `to_netcdf` / `read_netcdf` | +| Solver handoff | `test_solver_handoff.py` | `lp.io.to_highspy / to_gurobipy / to_mosek / to_xpress` — skipped per-solver when not installed | +| PyPSA carbon handoff | `test_pypsa_carbon_management.py` | `set_names=True/False`, `freeze_constraints=True/False` | -## What is not covered - -- Solver algorithm performance (optimize/solve runtime). -- Cross-solver ranking. -- Nonlinear/quadratic benchmark suites. +What we *don't* cover: solver algorithm performance (`Solver.solve()` +runtime), cross-solver ranking, nonlinear / general-quadratic constraint +suites. ## Models -Core models: +The suite is driven by a **reusable model registry**. Each model file under +`benchmarks/models/` exposes a `build_(size) -> linopy.Model` callable +and a module-level `SPEC` describing features, applicable phases, default +sizes, and optional dependencies. + +| Name | Features | Typical use | +| ----------------------- | ------------------- | --------------------------------------------------- | +| `basic` | continuous | dense LP scaling | +| `knapsack` | binary | MIP binary-section path | +| `expression_arithmetic` | continuous | stresses `+`, `*`, `sum`, broadcasting | +| `sparse_network` | continuous | mismatched-coordinate / sparse coefficient handling | +| `milp` | integer | general-integer (non-binary) MIP path | +| `qp` | quadratic | continuous QP / `matrices.Q` path | +| `sos` *(linopy ≥ recent)* | sos | `Model.add_sos_constraints` + LP SOS section | +| `piecewise` *(linopy ≥ recent)* | piecewise | `Model.add_piecewise_formulation` | +| `masked` | masked | `mask=` on `add_variables` / `add_constraints` | +| `pypsa_scigrid` *(optional)* | continuous | real PyPSA model | + +The `sos` and `piecewise` specs are skipped automatically if the underlying +APIs aren't present in the installed linopy. + +### Reusing the registry outside the suite + +The registry is a plain importable object — use it from any test, script, +or profiling session: + +```python +from benchmarks import REGISTRY -- `basic` -- `knapsack` -- `expression_arithmetic` -- `sparse_network` +# Look up by name +model = REGISTRY["basic"].build(100) -Extended (optional dependency): +# Iterate (e.g. parametrize your own test) +for spec in REGISTRY.values(): + m = spec.build(spec.sizes[0]) + ... -- `pypsa_scigrid` +# Filter by feature or phase +from benchmarks import filter_by, QUADRATIC, TO_GUROBIPY + +qp_specs = filter_by(has_feature=QUADRATIC) +gurobi_specs = filter_by(has_phase=TO_GUROBIPY) +``` + +To add a new model, drop a file under `benchmarks/models/`, expose a +`build_(size)`, and call `register(ModelSpec(...))`. Import it from +`benchmarks/models/__init__.py` so the registration fires. ## Setup ```bash -pip install -e ".[benchmarks]" +uv sync --extra dev --extra solvers +source .venv/bin/activate ``` ## Run benchmarks ```bash -# Quick smoke run -pytest benchmarks/ --quick +# Quick smoke run (small sizes only, no timing) +pytest benchmarks/ --quick --benchmark-disable + +# Full timing run +pytest benchmarks/ --benchmark-only + +# A single phase +pytest benchmarks/test_build.py -# Full timing run (build + handoff) -pytest benchmarks/test_build.py benchmarks/test_matrices.py benchmarks/test_lp_write.py +# A single model across all phases +pytest benchmarks/ -k basic -# Single model -pytest benchmarks/test_build.py -k basic +# A single (phase, model) pair +pytest benchmarks/test_lp_write.py -k "knapsack and n=1000" ``` ## Metrics -- **Time**: pytest-benchmark median runtime (IQR for stability). -- **Memory**: pytest-memray peak RSS (MiB), primarily tracked for Build. +- **Time** — pytest-benchmark median runtime (IQR for stability). +- **Memory** — pytest-memray peak RSS (MiB), tracked for Build only because + later phases include build allocations and make attribution unreliable. ## Results and history -- Raw outputs live in `.benchmarks/` (gitignored). -- Store comparison snapshots as JSON and compare to a rolling `master` baseline. +Raw outputs live in `.benchmarks/` (gitignored). Store comparison snapshots +as JSON and compare to a rolling `master` baseline: ```bash # Timing snapshot -pytest benchmarks/test_build.py benchmarks/test_matrices.py benchmarks/test_lp_write.py \ +pytest benchmarks/ \ --benchmark-json ".benchmarks/timing-$(date +%Y%m%d-%H%M%S).json" # Memory snapshot (Build by default) diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py index 6bf202cc..ceaaff77 100644 --- a/benchmarks/__init__.py +++ b/benchmarks/__init__.py @@ -1 +1,83 @@ -"""Linopy benchmark suite — run with ``pytest benchmarks/`` (use ``--quick`` for smaller sizes).""" +""" +Linopy benchmark suite. + +Run with ``pytest benchmarks/`` (use ``--quick`` for smaller sizes). + +This package also exposes a **reusable model registry** for any test, profiling +session, or example that wants ready-made linopy models of varying sizes and +features. Each entry exposes a ``build(size) -> linopy.Model`` callable plus +metadata:: + + from benchmarks import REGISTRY, QUADRATIC + + # Look up by name + model = REGISTRY["basic"].build(100) + + # Iterate / filter + for spec in REGISTRY.values(): + m = spec.build(spec.sizes[0]) + ... + + from benchmarks import filter_by + qp_specs = filter_by(has_feature=QUADRATIC) +""" + +# Importing the models package triggers each module's ``register(...)`` call. +from benchmarks import models # noqa: F401, E402 +from benchmarks.registry import ( # noqa: F401 — re-export + ALL_FEATURES, + ALL_PHASES, + BINARY, + BUILD, + CONTINUOUS, + DEFAULT_PHASES, + INTEGER, + LP_WRITE, + MASKED, + MATRICES, + NETCDF, + PIECEWISE, + QUADRATIC, + REGISTRY, + SOLVER_BUILD, + SOS, + TO_GUROBIPY, + TO_HIGHSPY, + TO_MOSEK, + TO_XPRESS, + ModelSpec, + filter_by, + get, + iter_params, + param_ids, + register, +) + +__all__ = [ + "ALL_FEATURES", + "ALL_PHASES", + "BINARY", + "BUILD", + "CONTINUOUS", + "DEFAULT_PHASES", + "INTEGER", + "LP_WRITE", + "MASKED", + "MATRICES", + "ModelSpec", + "NETCDF", + "PIECEWISE", + "QUADRATIC", + "REGISTRY", + "SOLVER_BUILD", + "SOS", + "TO_GUROBIPY", + "TO_HIGHSPY", + "TO_MOSEK", + "TO_XPRESS", + "filter_by", + "get", + "iter_params", + "param_ids", + "register", +] diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index 6f9a9467..f5c31df2 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -4,13 +4,7 @@ import pytest -QUICK_THRESHOLD = { - "basic": 100, - "knapsack": 10_000, - "pypsa_scigrid": 50, - "expression_arithmetic": 100, - "sparse_network": 100, -} +from benchmarks.registry import ModelSpec def pytest_addoption(parser): @@ -22,9 +16,13 @@ def pytest_addoption(parser): ) -def skip_if_quick(request, model: str, size: int): - """Skip large sizes when --quick is passed.""" - if request.config.getoption("--quick"): - threshold = QUICK_THRESHOLD.get(model, float("inf")) - if size > threshold: - pytest.skip(f"--quick: skipping {model} size {size}") +def maybe_skip(request: pytest.FixtureRequest, spec: ModelSpec, size: int) -> None: + """ + Apply ``--quick`` size cap and ``spec.requires`` importorskips. + + Centralised so every phase test stays a one-liner. + """ + for mod in spec.requires: + pytest.importorskip(mod) + if request.config.getoption("--quick") and size > spec.quick_threshold: + pytest.skip(f"--quick: skipping {spec.name} size {size}") diff --git a/benchmarks/models/__init__.py b/benchmarks/models/__init__.py index fcff9caf..8e5b9ca2 100644 --- a/benchmarks/models/__init__.py +++ b/benchmarks/models/__init__.py @@ -1,4 +1,10 @@ -"""Model builders for benchmarks.""" +""" +Model builders for benchmarks. + +Importing this package registers every model in :data:`benchmarks.registry.REGISTRY`. +Each module exposes a ``build_(size) -> linopy.Model`` callable and a +module-level ``SPEC`` :class:`~benchmarks.registry.ModelSpec`. +""" from benchmarks.models.basic import SIZES as BASIC_SIZES from benchmarks.models.basic import build_basic @@ -6,6 +12,18 @@ from benchmarks.models.expression_arithmetic import build_expression_arithmetic from benchmarks.models.knapsack import SIZES as KNAPSACK_SIZES from benchmarks.models.knapsack import build_knapsack +from benchmarks.models.masked import SIZES as MASKED_SIZES +from benchmarks.models.masked import build_masked +from benchmarks.models.milp import SIZES as MILP_SIZES +from benchmarks.models.milp import build_milp +from benchmarks.models.piecewise import SIZES as PIECEWISE_SIZES +from benchmarks.models.piecewise import build_piecewise +from benchmarks.models.pypsa_scigrid import SIZES as PYPSA_SIZES +from benchmarks.models.pypsa_scigrid import build_pypsa_scigrid +from benchmarks.models.qp import SIZES as QP_SIZES +from benchmarks.models.qp import build_qp +from benchmarks.models.sos import SIZES as SOS_SIZES +from benchmarks.models.sos import build_sos from benchmarks.models.sparse_network import SIZES as SPARSE_SIZES from benchmarks.models.sparse_network import build_sparse_network @@ -13,9 +31,21 @@ "BASIC_SIZES", "EXPR_SIZES", "KNAPSACK_SIZES", + "MASKED_SIZES", + "MILP_SIZES", + "PIECEWISE_SIZES", + "PYPSA_SIZES", + "QP_SIZES", + "SOS_SIZES", "SPARSE_SIZES", "build_basic", "build_expression_arithmetic", "build_knapsack", + "build_masked", + "build_milp", + "build_piecewise", + "build_pypsa_scigrid", + "build_qp", + "build_sos", "build_sparse_network", ] diff --git a/benchmarks/models/basic.py b/benchmarks/models/basic.py index 2aea49d9..4f1205a7 100644 --- a/benchmarks/models/basic.py +++ b/benchmarks/models/basic.py @@ -1,10 +1,11 @@ -"""Basic benchmark model: 2*N^2 variables and constraints.""" +"""Basic benchmark model: 2*N^2 variables and constraints (continuous LP).""" from __future__ import annotations import linopy +from benchmarks.registry import CONTINUOUS, ModelSpec, register -SIZES = [10, 50, 100, 250, 500, 1000, 1600] +SIZES = (10, 50, 100, 250, 500, 1000, 1600) def build_basic(n: int) -> linopy.Model: @@ -16,3 +17,14 @@ def build_basic(n: int) -> linopy.Model: m.add_constraints(x - y >= -5, name="lower") m.add_objective(x.sum() + 2 * y.sum()) return m + + +SPEC = register( + ModelSpec( + name="basic", + build=build_basic, + sizes=SIZES, + features=frozenset({CONTINUOUS}), + quick_threshold=100, + ) +) diff --git a/benchmarks/models/expression_arithmetic.py b/benchmarks/models/expression_arithmetic.py index 339c651d..795fce59 100644 --- a/benchmarks/models/expression_arithmetic.py +++ b/benchmarks/models/expression_arithmetic.py @@ -5,8 +5,9 @@ import numpy as np import linopy +from benchmarks.registry import CONTINUOUS, ModelSpec, register -SIZES = [10, 50, 100, 250, 500, 1000] +SIZES = (10, 50, 100, 250, 500, 1000) def build_expression_arithmetic(n: int) -> linopy.Model: @@ -28,3 +29,14 @@ def build_expression_arithmetic(n: int) -> linopy.Model: m.add_constraints(expr1.sum("j") >= -10, name="row_sum") m.add_objective(combined.sum()) return m + + +SPEC = register( + ModelSpec( + name="expression_arithmetic", + build=build_expression_arithmetic, + sizes=SIZES, + features=frozenset({CONTINUOUS}), + quick_threshold=100, + ) +) diff --git a/benchmarks/models/knapsack.py b/benchmarks/models/knapsack.py index 83ce7394..20aa35ec 100644 --- a/benchmarks/models/knapsack.py +++ b/benchmarks/models/knapsack.py @@ -1,12 +1,13 @@ -"""Knapsack benchmark model: N binary variables, 1 constraint.""" +"""Knapsack benchmark model: N binary variables, 1 constraint (MILP, binary).""" from __future__ import annotations import numpy as np import linopy +from benchmarks.registry import BINARY, DEFAULT_PHASES, ModelSpec, register -SIZES = [100, 1_000, 10_000, 100_000, 1_000_000] +SIZES = (100, 1_000, 10_000, 100_000, 1_000_000) def build_knapsack(n: int) -> linopy.Model: @@ -21,3 +22,15 @@ def build_knapsack(n: int) -> linopy.Model: m.add_constraints((x * weights).sum() <= capacity, name="capacity") m.add_objective(-(x * values).sum()) return m + + +SPEC = register( + ModelSpec( + name="knapsack", + build=build_knapsack, + sizes=SIZES, + features=frozenset({BINARY}), + phases=DEFAULT_PHASES, # HiGHS handles binary; matrices handles MILP + quick_threshold=10_000, + ) +) diff --git a/benchmarks/models/masked.py b/benchmarks/models/masked.py new file mode 100644 index 00000000..190b4031 --- /dev/null +++ b/benchmarks/models/masked.py @@ -0,0 +1,90 @@ +""" +Masked-variables benchmark: transportation with sparse allowed routes. + +A standard transportation LP, but only a sparse subset of (origin, dest) pairs +are valid routes. The ``mask=`` keyword on ``add_variables`` skips the rest, +keeping the variable count sub-quadratic. + +Decision variables: + x[origin, dest] >= 0 continuous, only created for allowed routes + +Constraints: + sum_dest x[o, .] <= supply[o] + sum_orig x[., d] == demand[d] + +Objective: + minimize sum cost[o, d] * x[o, d] + +The mask is dense at small sizes and sparser at large sizes, mimicking +real-world transport networks where each origin only serves a fixed +fan-out regardless of total node count. +""" + +from __future__ import annotations + +import numpy as np +import xarray as xr + +import linopy +from benchmarks.registry import ( + CONTINUOUS, + DEFAULT_PHASES, + MASKED, + ModelSpec, + register, +) + +SIZES = (10, 50, 100, 500, 1000) + + +def build_masked(n: int) -> linopy.Model: + rng = np.random.default_rng(42) + origins = np.arange(n) + dests = np.arange(n) + + # Each origin serves at most ~min(20, n) destinations. + fan_out = min(20, n) + mask_np = np.zeros((n, n), dtype=bool) + for o in range(n): + # Deterministic fan-out so size determines connectivity. + targets = rng.choice(n, size=fan_out, replace=False) + mask_np[o, targets] = True + + mask = xr.DataArray(mask_np, coords=[("origin", origins), ("dest", dests)]) + cost = xr.DataArray( + rng.uniform(1, 10, size=(n, n)), + coords=[("origin", origins), ("dest", dests)], + ) + + # Supply scaled so the problem stays feasible at any size: + # each origin can ship up to ``demand_per_dest * fan_out`` units. + demand_per_dest = 5.0 + supply_per_origin = demand_per_dest * n # plenty of slack + supply = xr.DataArray(np.full(n, supply_per_origin), coords=[("origin", origins)]) + demand = xr.DataArray(np.full(n, demand_per_dest), coords=[("dest", dests)]) + + m = linopy.Model() + x = m.add_variables( + lower=0, + coords=[("origin", origins), ("dest", dests)], + mask=mask, + name="x", + ) + + m.add_constraints(x.sum("dest") <= supply, name="supply", mask=mask.any("dest")) + m.add_constraints(x.sum("origin") == demand, name="demand", mask=mask.any("origin")) + + m.add_objective((cost * x).sum()) + return m + + +SPEC = register( + ModelSpec( + name="masked", + build=build_masked, + sizes=SIZES, + features=frozenset({CONTINUOUS, MASKED}), + phases=DEFAULT_PHASES, + quick_threshold=100, + ) +) diff --git a/benchmarks/models/milp.py b/benchmarks/models/milp.py new file mode 100644 index 00000000..bdad39ab --- /dev/null +++ b/benchmarks/models/milp.py @@ -0,0 +1,79 @@ +""" +MILP benchmark: capacitated facility location with general integers. + +Decision variables: + y_f in {0,1,...,K} integer "modules" to open at facility f + x_{f,c} >= 0 continuous flow from facility f to customer c + +Constraints: + sum_c x_{f,c} <= cap * y_f (capacity per facility) + sum_f x_{f,c} == d_c (demand at each customer) + +Objective: + minimize sum_{f,c} t_{f,c} * x_{f,c} + sum_f f_f * y_f + +The general-integer ``y`` exercises the matrix accessor's MIP integer-section +path and the LP-writer's general-integer block — neither the binary knapsack +nor the continuous LPs hit those paths. +""" + +from __future__ import annotations + +import numpy as np + +import linopy +from benchmarks.registry import ( + CONTINUOUS, + DEFAULT_PHASES, + INTEGER, + ModelSpec, + register, +) + +SIZES = (10, 25, 50, 100, 200) + + +def build_milp(n: int) -> linopy.Model: + rng = np.random.default_rng(42) + facilities = np.arange(n) + customers = np.arange(n) + + cap = 100.0 # capacity per module + Y_MAX = 5 # max modules per facility + transport = rng.uniform(1, 20, size=(n, n)) # per-unit shipping cost + fixed = rng.uniform(50, 200, size=n) # cost per facility module + demand = rng.uniform(20, 80, size=n) # demand at each customer + + m = linopy.Model() + y = m.add_variables( + lower=0, + upper=Y_MAX, + coords=[facilities], + dims=["facility"], + integer=True, + name="y", + ) + x = m.add_variables( + lower=0, + coords=[facilities, customers], + dims=["facility", "customer"], + name="x", + ) + + m.add_constraints(x.sum("customer") - cap * y <= 0, name="capacity") + m.add_constraints(x.sum("facility") == demand, name="demand") + + m.add_objective((transport * x).sum() + (fixed * y).sum()) + return m + + +SPEC = register( + ModelSpec( + name="milp", + build=build_milp, + sizes=SIZES, + features=frozenset({INTEGER, CONTINUOUS}), + phases=DEFAULT_PHASES, + quick_threshold=25, + ) +) diff --git a/benchmarks/models/piecewise.py b/benchmarks/models/piecewise.py new file mode 100644 index 00000000..38cfef84 --- /dev/null +++ b/benchmarks/models/piecewise.py @@ -0,0 +1,92 @@ +""" +Piecewise-linear benchmark: generation with piecewise fuel-cost curves. + +Each generator has a piecewise fuel cost curve pinned via +``add_piecewise_formulation``. The default ``method="auto"`` picks an +SOS2 or incremental expansion, generating auxiliary variables and +constraints — that overhead is what we want to measure. + +Decision variables: + power[gen] in [0, 100] (continuous) + fuel[gen] in [0, inf) (continuous, pinned to piecewise curve) + +Constraints: + sum_gen power[gen] >= demand + piecewise: fuel[gen] = f(power[gen]) for each gen + +Objective: + minimize sum_gen fuel[gen] +""" + +from __future__ import annotations + +import warnings + +import linopy +from benchmarks.registry import ( + CONTINUOUS, + DEFAULT_PHASES, + PIECEWISE, + ModelSpec, + register, +) + +SIZES = (10, 100, 1_000, 5_000) + +_API_AVAILABLE = hasattr(linopy.Model, "add_piecewise_formulation") and hasattr( + linopy, "EvolvingAPIWarning" +) + + +def build_piecewise(n_gens: int) -> linopy.Model: + # Shared breakpoints, broadcast across generators. + x_pts = [0.0, 30.0, 60.0, 100.0] + y_pts = [0.0, 36.0, 84.0, 170.0] # convex-ish fuel curve + + m = linopy.Model() + power = m.add_variables( + lower=0, + upper=100, + coords=[range(n_gens)], + dims=["gen"], + name="power", + ) + fuel = m.add_variables( + lower=0, + coords=[range(n_gens)], + dims=["gen"], + name="fuel", + ) + + demand = 0.5 * n_gens * x_pts[-1] + m.add_constraints(power.sum() >= demand, name="demand") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=linopy.EvolvingAPIWarning) + m.add_piecewise_formulation( + (power, x_pts), + (fuel, y_pts), + ) + + m.add_objective(fuel.sum()) + return m + + +# ``add_piecewise_formulation`` is a recent (still-evolving) API. Skip +# registration silently on older linopy so the rest of the suite stays usable. +if _API_AVAILABLE: + SPEC = register( + ModelSpec( + name="piecewise", + build=build_piecewise, + sizes=SIZES, + features=frozenset({CONTINUOUS, PIECEWISE}), + # Monotonic breakpoints + ``method="auto"`` → incremental + # reformulation (pure MILP with binaries), which every supported + # solver handles. + phases=DEFAULT_PHASES, + quick_threshold=100, + ) + ) +else: + SPEC = None diff --git a/benchmarks/models/pypsa_scigrid.py b/benchmarks/models/pypsa_scigrid.py index 2fcce217..29c3e450 100644 --- a/benchmarks/models/pypsa_scigrid.py +++ b/benchmarks/models/pypsa_scigrid.py @@ -1,13 +1,15 @@ -"""PyPSA SciGrid-DE benchmark model.""" +"""PyPSA SciGrid-DE benchmark model (requires pypsa).""" from __future__ import annotations from typing import TYPE_CHECKING +from benchmarks.registry import CONTINUOUS, ModelSpec, register + if TYPE_CHECKING: import linopy -SIZES = [10, 50, 100, 200] +SIZES = (10, 50, 100, 200) def build_pypsa_scigrid(snapshots: int = 100) -> linopy.Model: @@ -18,3 +20,15 @@ def build_pypsa_scigrid(snapshots: int = 100) -> linopy.Model: n.set_snapshots(n.snapshots[:snapshots]) n.optimize.create_model() return n.model + + +SPEC = register( + ModelSpec( + name="pypsa_scigrid", + build=build_pypsa_scigrid, + sizes=SIZES, + features=frozenset({CONTINUOUS}), + quick_threshold=50, + requires=("pypsa",), + ) +) diff --git a/benchmarks/models/qp.py b/benchmarks/models/qp.py new file mode 100644 index 00000000..6e6517d2 --- /dev/null +++ b/benchmarks/models/qp.py @@ -0,0 +1,65 @@ +""" +QP benchmark: continuous quadratic objective on a portfolio-style model. + +Decision variables: + x_i >= 0 (weight on asset i, continuous) + +Constraints: + sum_i x_i == 1 + x_i <= 0.3 (no asset > 30% of portfolio) + +Objective: + minimize sum_i q_i * x_i^2 - sum_i r_i * x_i + +A pure diagonal quadratic — enough to exercise the QP build / write / matrix +paths without paying for cross-terms. Cross-term coupling needs single-term +factors on both sides (see ``LinearExpression._multiply_by_linear_expression``), +which is awkward to set up cleanly via the public API. +""" + +from __future__ import annotations + +import numpy as np + +import linopy +from benchmarks.registry import ( + CONTINUOUS, + DEFAULT_PHASES, + QUADRATIC, + ModelSpec, + register, +) + +SIZES = (10, 100, 1_000, 5_000, 20_000) + + +def build_qp(n_assets: int) -> linopy.Model: + rng = np.random.default_rng(42) + q = rng.uniform(0.5, 2.0, size=n_assets) + r = rng.uniform(0.05, 0.15, size=n_assets) + + m = linopy.Model() + x = m.add_variables( + lower=0, + upper=0.3, + coords=[range(n_assets)], + dims=["asset"], + name="x", + ) + + m.add_constraints(x.sum() == 1, name="budget") + + m.add_objective((q * x**2).sum() - (r * x).sum()) + return m + + +SPEC = register( + ModelSpec( + name="qp", + build=build_qp, + sizes=SIZES, + features=frozenset({CONTINUOUS, QUADRATIC}), + phases=DEFAULT_PHASES, + quick_threshold=100, + ) +) diff --git a/benchmarks/models/sos.py b/benchmarks/models/sos.py new file mode 100644 index 00000000..31cca2ea --- /dev/null +++ b/benchmarks/models/sos.py @@ -0,0 +1,99 @@ +""" +SOS1 benchmark: multi-mode generation with at-most-one-mode-per-generator. + +Each generator has ``n_modes`` operating modes (different cap/cost tradeoff). +SOS1 over the ``mode`` dimension enforces that each generator picks at most +one mode. + +Decision variables: + y[gen, mode] >= 0 continuous output per (generator, mode) + +Constraints: + y[gen, mode] <= cap[mode] + sum_{gen,mode} y >= demand_total + SOS1 over "mode" for each gen + +This benchmark exercises ``Model.add_sos_constraints`` (commits be6d3a3 / +8aa8d0c) and the LP-writer's SOS section. In linopy, native SOS support is +declared by Gurobi / Cplex / Xpress only (see ``SolverFeature.SOS_CONSTRAINTS``). +HiGHS and Mosek would need ``apply_sos_reformulation()`` first. +""" + +from __future__ import annotations + +import numpy as np +import xarray as xr + +import linopy +from benchmarks.registry import ( + BUILD, + CONTINUOUS, + LP_WRITE, + MATRICES, + NETCDF, + SOLVER_BUILD, + SOS, + TO_GUROBIPY, + TO_XPRESS, + ModelSpec, + register, +) + +SIZES = (10, 100, 1_000, 10_000) + +_N_MODES = 5 +_API_AVAILABLE = hasattr(linopy.Model, "add_sos_constraints") + + +def build_sos(n_gens: int) -> linopy.Model: + modes = np.arange(_N_MODES) + cap = xr.DataArray(np.linspace(20.0, 100.0, _N_MODES), coords=[("mode", modes)]) + cost = xr.DataArray(np.linspace(1.0, 8.0, _N_MODES), coords=[("mode", modes)]) + + m = linopy.Model() + y = m.add_variables( + lower=0, + upper=float(cap.max()), + coords=[range(n_gens), modes], + dims=["gen", "mode"], + name="y", + ) + + m.add_constraints(y <= cap, name="mode_cap") + demand_total = 0.4 * n_gens * float(cap.max()) + m.add_constraints(y.sum() >= demand_total, name="demand") + + m.add_sos_constraints(y, sos_type=1, sos_dim="mode") + + m.add_objective((cost * y).sum()) + return m + + +# ``add_sos_constraints`` is a recent API. On older linopy we silently skip +# registering this model — the rest of the suite stays usable. +if _API_AVAILABLE: + SPEC = register( + ModelSpec( + name="sos", + build=build_sos, + sizes=SIZES, + features=frozenset({CONTINUOUS, SOS}), + # HiGHS / Mosek lack native SOS in linopy — would need + # ``reformulate_sos=True``, which mutates the model and defeats + # the benchmark. Only solvers with native SOS appear here. + phases=frozenset( + { + BUILD, + MATRICES, + LP_WRITE, + NETCDF, + SOLVER_BUILD, + TO_GUROBIPY, + TO_XPRESS, + } + ), + quick_threshold=100, + ) + ) +else: + SPEC = None diff --git a/benchmarks/models/sparse_network.py b/benchmarks/models/sparse_network.py index afc6be06..a750ae90 100644 --- a/benchmarks/models/sparse_network.py +++ b/benchmarks/models/sparse_network.py @@ -7,8 +7,9 @@ import xarray as xr import linopy +from benchmarks.registry import CONTINUOUS, ModelSpec, register -SIZES = [10, 50, 100, 250, 500, 1000] +SIZES = (10, 50, 100, 250, 500, 1000) def build_sparse_network(n_buses: int) -> linopy.Model: @@ -48,3 +49,14 @@ def build_sparse_network(n_buses: int) -> linopy.Model: m.add_objective(gen.sum()) return m + + +SPEC = register( + ModelSpec( + name="sparse_network", + build=build_sparse_network, + sizes=SIZES, + features=frozenset({CONTINUOUS}), + quick_threshold=100, + ) +) diff --git a/benchmarks/registry.py b/benchmarks/registry.py new file mode 100644 index 00000000..07e6c322 --- /dev/null +++ b/benchmarks/registry.py @@ -0,0 +1,163 @@ +""" +Reusable registry of benchmark models. + +A :class:`ModelSpec` captures everything needed to drive a model through the +benchmark suite *and* to use it from any other test or script: + +- ``build(size) -> linopy.Model`` the actual builder +- ``sizes`` canonical sizes the model has been tuned for +- ``features`` what kinds of variables / constraints it uses +- ``phases`` which benchmark phases apply (lp_write, to_highspy, ...) +- ``quick_threshold`` max size to keep under ``pytest --quick`` +- ``requires`` extra modules to ``pytest.importorskip`` + +Pattern for downstream use:: + + from benchmarks import REGISTRY + model = REGISTRY["basic"].build(100) + + # Or pick a subset by feature/phase: + from benchmarks import filter_by, QUADRATIC + qp_specs = filter_by(has_feature=QUADRATIC) +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator +from dataclasses import dataclass + +import linopy + +# --- Feature tags ----------------------------------------------------------- + +CONTINUOUS = "continuous" +BINARY = "binary" +INTEGER = "integer" +QUADRATIC = "quadratic" +SOS = "sos" +PIECEWISE = "piecewise" +MASKED = "masked" + +ALL_FEATURES = frozenset( + {CONTINUOUS, BINARY, INTEGER, QUADRATIC, SOS, PIECEWISE, MASKED} +) + +# --- Phase tags ------------------------------------------------------------- + +BUILD = "build" +MATRICES = "matrices" +LP_WRITE = "lp_write" +NETCDF = "netcdf" +SOLVER_BUILD = "solver_build" # generic Solver.from_name(..., io_api="direct") +TO_HIGHSPY = "to_highspy" +TO_GUROBIPY = "to_gurobipy" +TO_MOSEK = "to_mosek" +TO_XPRESS = "to_xpress" + +ALL_PHASES = frozenset( + { + BUILD, + MATRICES, + LP_WRITE, + NETCDF, + SOLVER_BUILD, + TO_HIGHSPY, + TO_GUROBIPY, + TO_MOSEK, + TO_XPRESS, + } +) + +# Phases every "well-behaved LP / MILP" can do. Models with features the +# default solvers can't ingest natively (e.g. native SOS for HiGHS) override +# this with a narrower set. +DEFAULT_PHASES = frozenset( + { + BUILD, + MATRICES, + LP_WRITE, + NETCDF, + SOLVER_BUILD, + TO_HIGHSPY, + TO_GUROBIPY, + TO_MOSEK, + TO_XPRESS, + } +) + + +@dataclass(frozen=True) +class ModelSpec: + """Declarative description of one benchmark model.""" + + name: str + build: Callable[[int], linopy.Model] + sizes: tuple[int, ...] + features: frozenset[str] = frozenset({CONTINUOUS}) + phases: frozenset[str] = DEFAULT_PHASES + quick_threshold: int = 10**9 + requires: tuple[str, ...] = () + + def applies_to(self, phase: str) -> bool: + return phase in self.phases + + def has_feature(self, feature: str) -> bool: + return feature in self.features + + +REGISTRY: dict[str, ModelSpec] = {} + + +def register(spec: ModelSpec) -> ModelSpec: + """Add ``spec`` to the global registry. Returns the spec for chaining.""" + if spec.name in REGISTRY: + raise ValueError(f"model {spec.name!r} already registered") + unknown_features = spec.features - ALL_FEATURES + if unknown_features: + raise ValueError( + f"model {spec.name!r}: unknown features {sorted(unknown_features)}" + ) + unknown_phases = spec.phases - ALL_PHASES + if unknown_phases: + raise ValueError( + f"model {spec.name!r}: unknown phases {sorted(unknown_phases)}" + ) + REGISTRY[spec.name] = spec + return spec + + +def get(name: str) -> ModelSpec: + return REGISTRY[name] + + +def filter_by( + *, + has_feature: str | None = None, + has_phase: str | None = None, +) -> list[ModelSpec]: + out = [] + for spec in REGISTRY.values(): + if has_feature is not None and not spec.has_feature(has_feature): + continue + if has_phase is not None and not spec.applies_to(has_phase): + continue + out.append(spec) + return out + + +def iter_params(phase: str) -> list[tuple[ModelSpec, int]]: + """Pytest parametrize helper — flatten (spec, size) pairs for one phase.""" + return [ + (spec, size) + for spec in REGISTRY.values() + if spec.applies_to(phase) + for size in spec.sizes + ] + + +def param_ids(params: list[tuple[ModelSpec, int]]) -> list[str]: + return [f"{spec.name}-n={size}" for spec, size in params] + + +def __iter__() -> Iterator[ModelSpec]: # pragma: no cover - convenience + return iter(REGISTRY.values()) diff --git a/benchmarks/test_build.py b/benchmarks/test_build.py index f657715e..98559b3c 100644 --- a/benchmarks/test_build.py +++ b/benchmarks/test_build.py @@ -4,50 +4,13 @@ import pytest -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - KNAPSACK_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_knapsack, - build_sparse_network, -) -from benchmarks.models.pypsa_scigrid import SIZES as PYPSA_SIZES +from benchmarks.conftest import maybe_skip +from benchmarks.registry import BUILD, iter_params, param_ids +_PARAMS = iter_params(BUILD) -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_build_basic(benchmark, n, request): - skip_if_quick(request, "basic", n) - benchmark(build_basic, n) - -@pytest.mark.parametrize("n", KNAPSACK_SIZES, ids=[f"n={n}" for n in KNAPSACK_SIZES]) -def test_build_knapsack(benchmark, n, request): - skip_if_quick(request, "knapsack", n) - benchmark(build_knapsack, n) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_build_expression_arithmetic(benchmark, n, request): - skip_if_quick(request, "expression_arithmetic", n) - benchmark(build_expression_arithmetic, n) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_build_sparse_network(benchmark, n, request): - skip_if_quick(request, "sparse_network", n) - benchmark(build_sparse_network, n) - - -@pytest.mark.parametrize( - "snapshots", PYPSA_SIZES, ids=[f"snapshots={s}" for s in PYPSA_SIZES] -) -def test_build_pypsa_scigrid(benchmark, snapshots, request): - pytest.importorskip("pypsa") - skip_if_quick(request, "pypsa_scigrid", snapshots) - from benchmarks.models.pypsa_scigrid import build_pypsa_scigrid - - benchmark(build_pypsa_scigrid, snapshots) +@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +def test_build(benchmark, spec, size, request): + maybe_skip(request, spec, size) + benchmark(spec.build, size) diff --git a/benchmarks/test_lp_write.py b/benchmarks/test_lp_write.py index 6442ccd6..ea3e04d7 100644 --- a/benchmarks/test_lp_write.py +++ b/benchmarks/test_lp_write.py @@ -4,60 +4,15 @@ import pytest -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - KNAPSACK_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_knapsack, - build_sparse_network, -) -from benchmarks.models.pypsa_scigrid import SIZES as PYPSA_SIZES +from benchmarks.conftest import maybe_skip +from benchmarks.registry import LP_WRITE, iter_params, param_ids +_PARAMS = iter_params(LP_WRITE) -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_lp_write_basic(benchmark, n, request, tmp_path): - skip_if_quick(request, "basic", n) - m = build_basic(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", KNAPSACK_SIZES, ids=[f"n={n}" for n in KNAPSACK_SIZES]) -def test_lp_write_knapsack(benchmark, n, request, tmp_path): - skip_if_quick(request, "knapsack", n) - m = build_knapsack(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_lp_write_expression_arithmetic(benchmark, n, request, tmp_path): - skip_if_quick(request, "expression_arithmetic", n) - m = build_expression_arithmetic(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_lp_write_sparse_network(benchmark, n, request, tmp_path): - skip_if_quick(request, "sparse_network", n) - m = build_sparse_network(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize( - "snapshots", PYPSA_SIZES, ids=[f"snapshots={s}" for s in PYPSA_SIZES] -) -def test_lp_write_pypsa_scigrid(benchmark, snapshots, request, tmp_path): - pytest.importorskip("pypsa") - skip_if_quick(request, "pypsa_scigrid", snapshots) - from benchmarks.models.pypsa_scigrid import build_pypsa_scigrid - m = build_pypsa_scigrid(snapshots) +@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +def test_lp_write(benchmark, spec, size, request, tmp_path): + maybe_skip(request, spec, size) + m = spec.build(size) lp_file = tmp_path / "model.lp" benchmark(m.to_file, lp_file, progress=False) diff --git a/benchmarks/test_matrices.py b/benchmarks/test_matrices.py index 352844fb..bd36a467 100644 --- a/benchmarks/test_matrices.py +++ b/benchmarks/test_matrices.py @@ -4,19 +4,14 @@ import pytest -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_sparse_network, -) +from benchmarks.conftest import maybe_skip +from benchmarks.registry import MATRICES, iter_params, param_ids + +_PARAMS = iter_params(MATRICES) def _access_matrices(m): - """Access all matrix properties to force computation.""" + """Touch every matrix property to force computation.""" matrices = m.matrices _ = matrices.A _ = matrices.b @@ -26,24 +21,12 @@ def _access_matrices(m): _ = matrices.sense _ = matrices.vlabels _ = matrices.clabels + if m.is_quadratic: + _ = matrices.Q # exercise the QP path when present -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_matrices_basic(benchmark, n, request): - skip_if_quick(request, "basic", n) - m = build_basic(n) - benchmark(_access_matrices, m) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_matrices_expression_arithmetic(benchmark, n, request): - skip_if_quick(request, "expression_arithmetic", n) - m = build_expression_arithmetic(n) - benchmark(_access_matrices, m) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_matrices_sparse_network(benchmark, n, request): - skip_if_quick(request, "sparse_network", n) - m = build_sparse_network(n) +@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +def test_matrices(benchmark, spec, size, request): + maybe_skip(request, spec, size) + m = spec.build(size) benchmark(_access_matrices, m) diff --git a/benchmarks/test_netcdf.py b/benchmarks/test_netcdf.py new file mode 100644 index 00000000..f26ae0fc --- /dev/null +++ b/benchmarks/test_netcdf.py @@ -0,0 +1,34 @@ +""" +Benchmarks for the netCDF persistence round-trip. + +We track ``to_netcdf`` and ``read_netcdf`` separately because the cost split +matters in practice: distributed workflows tend to do many reads of a single +written artifact. +""" + +from __future__ import annotations + +import pytest + +from benchmarks.conftest import maybe_skip +from benchmarks.registry import NETCDF, iter_params, param_ids +from linopy import read_netcdf + +_PARAMS = iter_params(NETCDF) + + +@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +def test_netcdf_write(benchmark, spec, size, request, tmp_path): + maybe_skip(request, spec, size) + m = spec.build(size) + out = tmp_path / "model.nc" + benchmark(m.to_netcdf, out) + + +@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +def test_netcdf_read(benchmark, spec, size, request, tmp_path): + maybe_skip(request, spec, size) + m = spec.build(size) + out = tmp_path / "model.nc" + m.to_netcdf(out) + benchmark(read_netcdf, out) diff --git a/benchmarks/test_solver_handoff.py b/benchmarks/test_solver_handoff.py new file mode 100644 index 00000000..c7d649fe --- /dev/null +++ b/benchmarks/test_solver_handoff.py @@ -0,0 +1,61 @@ +""" +Benchmarks for solver handoff (model -> native solver instance). + +Times each ``linopy.io.to_`` wrapper. These wrappers delegate to the +same direct-API build path as the new stateful Solver API +(``Solver.from_name(name, model, io_api="direct")``), so the numbers serve +double duty: regression tracking for the wrappers, *and* for the underlying +``Solver._build_direct`` paths. They've also been available for many releases +— using them keeps the suite runnable on older linopy versions. + +The actual ``Solver.solve()`` runtime (i.e. solver-side algorithm time) is +intentionally not benchmarked. +""" + +from __future__ import annotations + +import pytest + +import linopy.io as lio +from benchmarks.conftest import maybe_skip +from benchmarks.registry import ( + TO_GUROBIPY, + TO_HIGHSPY, + TO_MOSEK, + TO_XPRESS, + iter_params, +) +from linopy.solvers import available_solvers + +# (solver_name, phase tag, wrapper function) +_SOLVER_PHASES = [ + ("highs", TO_HIGHSPY, lio.to_highspy), + ("gurobi", TO_GUROBIPY, lio.to_gurobipy), + ("mosek", TO_MOSEK, lio.to_mosek), + ("xpress", TO_XPRESS, lio.to_xpress), +] + + +def _make_params(): + out = [] + for solver_name, phase, wrapper in _SOLVER_PHASES: + for spec, size in iter_params(phase): + out.append( + pytest.param( + solver_name, + wrapper, + spec, + size, + id=f"{solver_name}-{spec.name}-n={size}", + ) + ) + return out + + +@pytest.mark.parametrize("solver_name,wrapper,spec,size", _make_params()) +def test_solver_handoff(benchmark, solver_name, wrapper, spec, size, request): + if solver_name not in available_solvers: + pytest.skip(f"{solver_name} not installed") + maybe_skip(request, spec, size) + model = spec.build(size) + benchmark(wrapper, model) From a6cc83bb8fdc4d68cf05992e3a694a99562e2f3c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 10:33:30 +0200 Subject: [PATCH 03/68] benchmarks: add --long flag, gate super-long sizes by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default ``pytest benchmarks/`` run now skips the slowest 1-2 sizes per spec (e.g. knapsack at 1M, basic at 1600, pypsa_scigrid at >50) so a full timing pass completes in ~2 minutes instead of 20-45. ModelSpec grows a ``long_threshold`` mirror of ``quick_threshold``: - ``--quick`` → ``size <= quick_threshold`` (CI smoke) - default → ``size <= long_threshold`` (medium-cost regression) - ``--long`` → no cap (full sweep) Verified locally: - --quick: 227 passed / 230 skipped / 35s - default: 333 passed / 124 skipped / 45s - --long : 457 passed / 0 skipped / 2m13s Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/README.md | 14 ++++++++- benchmarks/conftest.py | 34 ++++++++++++++++++---- benchmarks/models/basic.py | 1 + benchmarks/models/expression_arithmetic.py | 1 + benchmarks/models/knapsack.py | 1 + benchmarks/models/masked.py | 1 + benchmarks/models/milp.py | 1 + benchmarks/models/piecewise.py | 1 + benchmarks/models/pypsa_scigrid.py | 1 + benchmarks/models/qp.py | 1 + benchmarks/models/sos.py | 1 + benchmarks/models/sparse_network.py | 1 + benchmarks/registry.py | 13 ++++++++- 13 files changed, 64 insertions(+), 7 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 59cc0594..8d2e3fd1 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -84,13 +84,25 @@ source .venv/bin/activate ## Run benchmarks +The suite has three size tiers, each spec declaring its own `quick_threshold` +and `long_threshold`: + +| Flag | Sizes included | Typical use | +| ---------- | ------------------------------------- | ---------------------------------------- | +| `--quick` | `size <= quick_threshold` | CI smoke, fast local sanity check | +| _(none)_ | `size <= long_threshold` | Default: medium-cost regression timing | +| `--long` | all sizes | Full sweep (the slow stuff — many min) | + ```bash # Quick smoke run (small sizes only, no timing) pytest benchmarks/ --quick --benchmark-disable -# Full timing run +# Default timing run (skips the super-long sizes) pytest benchmarks/ --benchmark-only +# Full sweep — every size on every model +pytest benchmarks/ --benchmark-only --long + # A single phase pytest benchmarks/test_build.py diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index f5c31df2..de416167 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -12,17 +12,41 @@ def pytest_addoption(parser): "--quick", action="store_true", default=False, - help="Use smaller problem sizes for quick benchmarking", + help="Use smaller problem sizes for quick benchmarking (CI smoke).", + ) + parser.addoption( + "--long", + action="store_true", + default=False, + help=( + "Include the slowest sizes (above each spec's long_threshold). " + "Default runs skip them." + ), ) def maybe_skip(request: pytest.FixtureRequest, spec: ModelSpec, size: int) -> None: """ - Apply ``--quick`` size cap and ``spec.requires`` importorskips. + Apply size-tier skips and ``spec.requires`` importorskips. - Centralised so every phase test stays a one-liner. + Tiers (most restrictive first): + + - ``--quick`` → skip ``size > quick_threshold`` + - default (no flag) → skip ``size > long_threshold`` + - ``--long`` → no size cap + + If both ``--quick`` and ``--long`` are passed, ``--quick`` wins (the more + restrictive mode is honoured). """ for mod in spec.requires: pytest.importorskip(mod) - if request.config.getoption("--quick") and size > spec.quick_threshold: - pytest.skip(f"--quick: skipping {spec.name} size {size}") + + quick = request.config.getoption("--quick") + long_ = request.config.getoption("--long") + + if quick: + if size > spec.quick_threshold: + pytest.skip(f"--quick: skipping {spec.name} size {size}") + elif not long_: + if size > spec.long_threshold: + pytest.skip(f"long size needs --long: skipping {spec.name} size {size}") diff --git a/benchmarks/models/basic.py b/benchmarks/models/basic.py index 4f1205a7..3f044a17 100644 --- a/benchmarks/models/basic.py +++ b/benchmarks/models/basic.py @@ -26,5 +26,6 @@ def build_basic(n: int) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS}), quick_threshold=100, + long_threshold=500, ) ) diff --git a/benchmarks/models/expression_arithmetic.py b/benchmarks/models/expression_arithmetic.py index 795fce59..43bbd21b 100644 --- a/benchmarks/models/expression_arithmetic.py +++ b/benchmarks/models/expression_arithmetic.py @@ -38,5 +38,6 @@ def build_expression_arithmetic(n: int) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS}), quick_threshold=100, + long_threshold=500, ) ) diff --git a/benchmarks/models/knapsack.py b/benchmarks/models/knapsack.py index 20aa35ec..5491a6b9 100644 --- a/benchmarks/models/knapsack.py +++ b/benchmarks/models/knapsack.py @@ -32,5 +32,6 @@ def build_knapsack(n: int) -> linopy.Model: features=frozenset({BINARY}), phases=DEFAULT_PHASES, # HiGHS handles binary; matrices handles MILP quick_threshold=10_000, + long_threshold=10_000, ) ) diff --git a/benchmarks/models/masked.py b/benchmarks/models/masked.py index 190b4031..366e3ff8 100644 --- a/benchmarks/models/masked.py +++ b/benchmarks/models/masked.py @@ -86,5 +86,6 @@ def build_masked(n: int) -> linopy.Model: features=frozenset({CONTINUOUS, MASKED}), phases=DEFAULT_PHASES, quick_threshold=100, + long_threshold=500, ) ) diff --git a/benchmarks/models/milp.py b/benchmarks/models/milp.py index bdad39ab..7c181f30 100644 --- a/benchmarks/models/milp.py +++ b/benchmarks/models/milp.py @@ -75,5 +75,6 @@ def build_milp(n: int) -> linopy.Model: features=frozenset({INTEGER, CONTINUOUS}), phases=DEFAULT_PHASES, quick_threshold=25, + long_threshold=100, ) ) diff --git a/benchmarks/models/piecewise.py b/benchmarks/models/piecewise.py index 38cfef84..389e4669 100644 --- a/benchmarks/models/piecewise.py +++ b/benchmarks/models/piecewise.py @@ -86,6 +86,7 @@ def build_piecewise(n_gens: int) -> linopy.Model: # solver handles. phases=DEFAULT_PHASES, quick_threshold=100, + long_threshold=1_000, ) ) else: diff --git a/benchmarks/models/pypsa_scigrid.py b/benchmarks/models/pypsa_scigrid.py index 29c3e450..a1428d23 100644 --- a/benchmarks/models/pypsa_scigrid.py +++ b/benchmarks/models/pypsa_scigrid.py @@ -29,6 +29,7 @@ def build_pypsa_scigrid(snapshots: int = 100) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS}), quick_threshold=50, + long_threshold=50, requires=("pypsa",), ) ) diff --git a/benchmarks/models/qp.py b/benchmarks/models/qp.py index 6e6517d2..4df793da 100644 --- a/benchmarks/models/qp.py +++ b/benchmarks/models/qp.py @@ -61,5 +61,6 @@ def build_qp(n_assets: int) -> linopy.Model: features=frozenset({CONTINUOUS, QUADRATIC}), phases=DEFAULT_PHASES, quick_threshold=100, + long_threshold=1_000, ) ) diff --git a/benchmarks/models/sos.py b/benchmarks/models/sos.py index 31cca2ea..e16f5bef 100644 --- a/benchmarks/models/sos.py +++ b/benchmarks/models/sos.py @@ -93,6 +93,7 @@ def build_sos(n_gens: int) -> linopy.Model: } ), quick_threshold=100, + long_threshold=1_000, ) ) else: diff --git a/benchmarks/models/sparse_network.py b/benchmarks/models/sparse_network.py index a750ae90..47417032 100644 --- a/benchmarks/models/sparse_network.py +++ b/benchmarks/models/sparse_network.py @@ -58,5 +58,6 @@ def build_sparse_network(n_buses: int) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS}), quick_threshold=100, + long_threshold=500, ) ) diff --git a/benchmarks/registry.py b/benchmarks/registry.py index 07e6c322..228152b8 100644 --- a/benchmarks/registry.py +++ b/benchmarks/registry.py @@ -88,7 +88,17 @@ @dataclass(frozen=True) class ModelSpec: - """Declarative description of one benchmark model.""" + """ + Declarative description of one benchmark model. + + Three size tiers gate the cost of a default ``pytest benchmarks/`` run: + + - ``size <= quick_threshold``: included under ``--quick`` (smoke / CI). + - ``size <= long_threshold``: included by default (medium-cost regression). + - ``size > long_threshold``: only included under ``--long`` (full sweep). + + Without explicit values, both thresholds default to "no cap". + """ name: str build: Callable[[int], linopy.Model] @@ -96,6 +106,7 @@ class ModelSpec: features: frozenset[str] = frozenset({CONTINUOUS}) phases: frozenset[str] = DEFAULT_PHASES quick_threshold: int = 10**9 + long_threshold: int = 10**9 requires: tuple[str, ...] = () def applies_to(self, phase: str) -> bool: From 300abb5e39a259c1bdf3d7c1cde5e613986739fd Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 10:41:54 +0200 Subject: [PATCH 04/68] =?UTF-8?q?benchmarks:=20make=20--quick=20truly=20qu?= =?UTF-8?q?ick=20(35s=20=E2=86=92=2018s)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop pypsa_scigrid from --quick entirely (quick_threshold=0). PyPSA import + example loading dominates the smoke wall-clock; the model still runs in default and --long modes. - Lower every other spec's quick_threshold to its smallest size, so --quick exercises one size per model across all phases. The default tier (which uses long_threshold) still gives broad regression coverage. Verified locally: - --quick: 85 passed / 372 skipped / 18.5s (was 35s) - default: 333 passed / 124 skipped / 44.8s (unchanged) - --long : 457 passed / 0 skipped / 2m11s (unchanged) Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/models/basic.py | 2 +- benchmarks/models/expression_arithmetic.py | 2 +- benchmarks/models/knapsack.py | 2 +- benchmarks/models/masked.py | 2 +- benchmarks/models/milp.py | 2 +- benchmarks/models/piecewise.py | 2 +- benchmarks/models/pypsa_scigrid.py | 5 ++++- benchmarks/models/qp.py | 2 +- benchmarks/models/sos.py | 2 +- benchmarks/models/sparse_network.py | 2 +- 10 files changed, 13 insertions(+), 10 deletions(-) diff --git a/benchmarks/models/basic.py b/benchmarks/models/basic.py index 3f044a17..6959e188 100644 --- a/benchmarks/models/basic.py +++ b/benchmarks/models/basic.py @@ -25,7 +25,7 @@ def build_basic(n: int) -> linopy.Model: build=build_basic, sizes=SIZES, features=frozenset({CONTINUOUS}), - quick_threshold=100, + quick_threshold=10, long_threshold=500, ) ) diff --git a/benchmarks/models/expression_arithmetic.py b/benchmarks/models/expression_arithmetic.py index 43bbd21b..80590951 100644 --- a/benchmarks/models/expression_arithmetic.py +++ b/benchmarks/models/expression_arithmetic.py @@ -37,7 +37,7 @@ def build_expression_arithmetic(n: int) -> linopy.Model: build=build_expression_arithmetic, sizes=SIZES, features=frozenset({CONTINUOUS}), - quick_threshold=100, + quick_threshold=10, long_threshold=500, ) ) diff --git a/benchmarks/models/knapsack.py b/benchmarks/models/knapsack.py index 5491a6b9..7860f285 100644 --- a/benchmarks/models/knapsack.py +++ b/benchmarks/models/knapsack.py @@ -31,7 +31,7 @@ def build_knapsack(n: int) -> linopy.Model: sizes=SIZES, features=frozenset({BINARY}), phases=DEFAULT_PHASES, # HiGHS handles binary; matrices handles MILP - quick_threshold=10_000, + quick_threshold=100, long_threshold=10_000, ) ) diff --git a/benchmarks/models/masked.py b/benchmarks/models/masked.py index 366e3ff8..fccac137 100644 --- a/benchmarks/models/masked.py +++ b/benchmarks/models/masked.py @@ -85,7 +85,7 @@ def build_masked(n: int) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS, MASKED}), phases=DEFAULT_PHASES, - quick_threshold=100, + quick_threshold=10, long_threshold=500, ) ) diff --git a/benchmarks/models/milp.py b/benchmarks/models/milp.py index 7c181f30..e762f207 100644 --- a/benchmarks/models/milp.py +++ b/benchmarks/models/milp.py @@ -74,7 +74,7 @@ def build_milp(n: int) -> linopy.Model: sizes=SIZES, features=frozenset({INTEGER, CONTINUOUS}), phases=DEFAULT_PHASES, - quick_threshold=25, + quick_threshold=10, long_threshold=100, ) ) diff --git a/benchmarks/models/piecewise.py b/benchmarks/models/piecewise.py index 389e4669..0fb393bd 100644 --- a/benchmarks/models/piecewise.py +++ b/benchmarks/models/piecewise.py @@ -85,7 +85,7 @@ def build_piecewise(n_gens: int) -> linopy.Model: # reformulation (pure MILP with binaries), which every supported # solver handles. phases=DEFAULT_PHASES, - quick_threshold=100, + quick_threshold=10, long_threshold=1_000, ) ) diff --git a/benchmarks/models/pypsa_scigrid.py b/benchmarks/models/pypsa_scigrid.py index a1428d23..41d8836b 100644 --- a/benchmarks/models/pypsa_scigrid.py +++ b/benchmarks/models/pypsa_scigrid.py @@ -28,7 +28,10 @@ def build_pypsa_scigrid(snapshots: int = 100) -> linopy.Model: build=build_pypsa_scigrid, sizes=SIZES, features=frozenset({CONTINUOUS}), - quick_threshold=50, + # quick_threshold=0 keeps pypsa_scigrid out of --quick entirely — + # PyPSA import + example loading dominates the smoke wall-clock + # otherwise. It still runs in default and --long modes. + quick_threshold=0, long_threshold=50, requires=("pypsa",), ) diff --git a/benchmarks/models/qp.py b/benchmarks/models/qp.py index 4df793da..a040df45 100644 --- a/benchmarks/models/qp.py +++ b/benchmarks/models/qp.py @@ -60,7 +60,7 @@ def build_qp(n_assets: int) -> linopy.Model: sizes=SIZES, features=frozenset({CONTINUOUS, QUADRATIC}), phases=DEFAULT_PHASES, - quick_threshold=100, + quick_threshold=10, long_threshold=1_000, ) ) diff --git a/benchmarks/models/sos.py b/benchmarks/models/sos.py index e16f5bef..c5ac1d36 100644 --- a/benchmarks/models/sos.py +++ b/benchmarks/models/sos.py @@ -92,7 +92,7 @@ def build_sos(n_gens: int) -> linopy.Model: TO_XPRESS, } ), - quick_threshold=100, + quick_threshold=10, long_threshold=1_000, ) ) diff --git a/benchmarks/models/sparse_network.py b/benchmarks/models/sparse_network.py index 47417032..7ac71db1 100644 --- a/benchmarks/models/sparse_network.py +++ b/benchmarks/models/sparse_network.py @@ -57,7 +57,7 @@ def build_sparse_network(n_buses: int) -> linopy.Model: build=build_sparse_network, sizes=SIZES, features=frozenset({CONTINUOUS}), - quick_threshold=100, + quick_threshold=10, long_threshold=500, ) ) From c725c6857842a05212f3753f2ec02c2ece5b96fd Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 10:57:24 +0200 Subject: [PATCH 05/68] benchmarks: add registry-usage notebook + execute in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit benchmarks/notebooks/registry_usage.py is the canonical walkthrough for the model registry. Authored in jupytext percent format so it triples as: - runnable Python script (CI executes it on every PR) - notebook in JupyterLab / VSCode with the jupytext extension - readable doc on GitHub (markdown cells render directly) Covers: import, lookup by name, iterate, filter_by feature/phase, parametrize-your-own-pytest pattern, one-off tracemalloc profiling, and the three CLI tiers. CI: benchmark-smoke.yml gains an "Execute registry-usage notebook" step right after the pytest smoke — so doc rot fails the build instead of hiding until someone next opens the file. README: new "Worked walkthrough" subsection points at the notebook. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/benchmark-smoke.yml | 6 + benchmarks/README.md | 13 ++ benchmarks/notebooks/registry_usage.py | 204 +++++++++++++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 benchmarks/notebooks/registry_usage.py diff --git a/.github/workflows/benchmark-smoke.yml b/.github/workflows/benchmark-smoke.yml index c6b37028..eeb6d2c5 100644 --- a/.github/workflows/benchmark-smoke.yml +++ b/.github/workflows/benchmark-smoke.yml @@ -39,3 +39,9 @@ jobs: - name: Run benchmark smoke run: | pytest benchmarks/ --quick --benchmark-disable -q + + - name: Execute registry-usage notebook + # Catches doc rot — the notebook is the canonical "how to use the + # registry" walkthrough and must stay runnable end-to-end. + run: | + python benchmarks/notebooks/registry_usage.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 8d2e3fd1..19df4572 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -75,6 +75,19 @@ To add a new model, drop a file under `benchmarks/models/`, expose a `build_(size)`, and call `register(ModelSpec(...))`. Import it from `benchmarks/models/__init__.py` so the registration fires. +### Worked walkthrough + +[`notebooks/registry_usage.py`](notebooks/registry_usage.py) is a jupytext +percent-format notebook that runs through every pattern above end-to-end. +Three ways to consume it: + +- **Script:** `python benchmarks/notebooks/registry_usage.py` — every cell + executes top-to-bottom; CI runs it this way on every PR so the examples + can't silently rot. +- **Notebook:** open the `.py` file in JupyterLab or VSCode with the jupytext + extension installed and it renders as a notebook. +- **Read:** the `# %% [markdown]` blocks render fine on GitHub directly. + ## Setup ```bash diff --git a/benchmarks/notebooks/registry_usage.py b/benchmarks/notebooks/registry_usage.py new file mode 100644 index 00000000..e999970b --- /dev/null +++ b/benchmarks/notebooks/registry_usage.py @@ -0,0 +1,204 @@ +# --- +# jupyter: +# jupytext: +# formats: py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.0 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Benchmark model registry — usage guide +# +# This file is the canonical walkthrough for the benchmark **model registry**. +# It's authored in [jupytext](https://jupytext.readthedocs.io/) percent format, +# which means: +# +# - **Run as a script:** `python benchmarks/notebooks/registry_usage.py` — every +# pattern below executes end-to-end. CI runs it this way on every PR, so the +# examples can't silently rot. +# - **Open as a notebook:** in JupyterLab or VSCode with the jupytext extension, +# this file appears as a notebook with markdown + code cells. +# - **Lint:** `ruff check` works because it's plain Python. +# +# The registry lives in `benchmarks/registry.py`. Each model file under +# `benchmarks/models/` self-registers a `ModelSpec` on import, so just touching +# the `benchmarks` package populates `REGISTRY`. + +# %% [markdown] +# ## 1. Import the registry +# +# Single entry point: `from benchmarks import REGISTRY` plus the feature / phase +# constants you need for filtering. + +# %% +# Put the repo root on sys.path so the file runs from anywhere +# (e.g. ``python benchmarks/notebooks/registry_usage.py``). +import sys +from pathlib import Path + +_REPO_ROOT = Path(__file__).resolve().parents[2] +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + +from benchmarks import ( # noqa: E402 + INTEGER, + QUADRATIC, + REGISTRY, + TO_GUROBIPY, + filter_by, + get, +) + +print(f"{len(REGISTRY)} models registered: {sorted(REGISTRY)}") + +# %% [markdown] +# ## 2. Look up one model by name +# +# `REGISTRY[name]` returns a `ModelSpec` (frozen dataclass). `.build(size)` +# constructs and returns a `linopy.Model`. + +# %% +spec = REGISTRY["basic"] +print(f"name: {spec.name}") +print(f"sizes: {spec.sizes}") +print(f"features: {sorted(spec.features)}") +print(f"quick_threshold: {spec.quick_threshold}") +print(f"long_threshold: {spec.long_threshold}") + +m = spec.build(50) +print( + f"\nbuilt at n=50: {len(m.variables)} variable arrays, " + f"{len(m.constraints)} constraint arrays" +) + +# %% [markdown] +# `get("name")` is an equivalent functional accessor — handy when you don't +# want to import `REGISTRY` directly. + +# %% +assert get("basic") is REGISTRY["basic"] + +# %% [markdown] +# ## 3. Iterate the whole registry +# +# Useful when you want to sweep your own test or profiling logic across every +# model — e.g. checking that a refactor didn't break any spec. + +# %% +print(f"{'name':<25} {'features':<35} {'sizes':<20}") +print("-" * 80) +for name, spec in REGISTRY.items(): + feats = ",".join(sorted(spec.features)) + sizes = f"{spec.sizes[0]}..{spec.sizes[-1]}" + print(f"{name:<25} {feats:<35} {sizes:<20}") + +# %% [markdown] +# ## 4. Filter by feature +# +# `filter_by(has_feature=...)` returns specs that advertise that feature. The +# feature tag constants (`CONTINUOUS`, `BINARY`, `INTEGER`, `QUADRATIC`, `SOS`, +# `PIECEWISE`, `MASKED`) are exported from `benchmarks`. + +# %% +qp_specs = filter_by(has_feature=QUADRATIC) +print("Quadratic models:", [s.name for s in qp_specs]) + +mip_specs = filter_by(has_feature=INTEGER) +print("Integer models: ", [s.name for s in mip_specs]) + +# %% [markdown] +# ## 5. Filter by phase +# +# Each spec declares which **phases** apply — `BUILD`, `MATRICES`, `LP_WRITE`, +# `NETCDF`, `SOLVER_BUILD`, plus per-solver `TO_HIGHSPY` / `TO_GUROBIPY` / +# `TO_MOSEK` / `TO_XPRESS`. Use `has_phase=` to narrow to solver-compatible +# models, e.g. when writing a Gurobi-specific regression test. + +# %% +gurobi_specs = filter_by(has_phase=TO_GUROBIPY) +print(f"{len(gurobi_specs)} models declare TO_GUROBIPY:") +for s in gurobi_specs: + print(f" - {s.name}") + +# %% [markdown] +# ## 6. Reuse pattern — parametrize your own pytest +# +# The pattern the suite itself uses (see `benchmarks/test_build.py` etc.) — +# `iter_params(phase)` returns `(spec, size)` pairs for the given phase, and +# `param_ids(...)` builds stable test IDs for `pytest.mark.parametrize`: +# +# ```python +# import pytest +# from benchmarks import BUILD, iter_params, param_ids +# +# _PARAMS = iter_params(BUILD) +# +# @pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +# def test_my_invariant(spec, size): +# m = spec.build(size) +# # ... assertion that should hold for every model +# ``` + +# %% [markdown] +# ## 7. Reuse pattern — one-off profiling +# +# Grab a single model at a chosen size, measure something, throw it away. +# `tracemalloc` works well for in-process peak-RSS spot checks (use +# `benchmarks/memory.py` + pytest-memray for the real metric). + +# %% +import tracemalloc # noqa: E402 + +tracemalloc.start() +m = REGISTRY["sparse_network"].build(100) +_current, peak = tracemalloc.get_traced_memory() +tracemalloc.stop() + +print(f"sparse_network n=100: built, peak allocation ≈ {peak / 1e6:.1f} MB") +print( + f" {m.variables.nvars} scalar variables, {m.constraints.ncons} scalar constraints" +) + +# %% [markdown] +# ## 8. Running the benchmark suite +# +# Three size tiers, configured per-spec via `quick_threshold` and +# `long_threshold`: +# +# | Flag | Sizes included | Use case | +# | ----------- | ------------------------- | ------------------------------------- | +# | `--quick` | `size <= quick_threshold` | CI smoke (~18s, one size per model) | +# | _(none)_ | `size <= long_threshold` | Local regression run (~45s) | +# | `--long` | all sizes | Full sweep (~2 min, slow stuff) | +# +# ```bash +# # Quickest smoke +# pytest benchmarks/ --quick --benchmark-disable +# +# # Default timing +# pytest benchmarks/ --benchmark-only +# +# # Full sweep with the slow sizes +# pytest benchmarks/ --benchmark-only --long +# +# # Pick a single (phase, model) pair +# pytest benchmarks/test_lp_write.py -k "knapsack and n=1000" +# ``` + +# %% [markdown] +# ## 9. Adding a new model +# +# 1. Drop `benchmarks/models/.py` with a `build_(size) -> Model`. +# 2. Build a `ModelSpec` and call `register(...)` at module scope. Declare +# realistic `quick_threshold` / `long_threshold` so the smoke stays fast. +# 3. Add an import in `benchmarks/models/__init__.py` so registration fires. +# +# That's it — every phase test picks the spec up automatically through +# `iter_params(phase)`. From 99483f8543d43ad4d762a332d4ef41ad574d5a56 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 11:53:51 +0200 Subject: [PATCH 06/68] benchmarks: switch walkthrough to .ipynb, add reprs to ModelSpec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace jupytext-style ``registry_usage.py`` with a proper ``registry_usage.ipynb`` — matches the repo convention (examples/*.ipynb, nbsphinx, nbstripout). CI executes it via ``jupyter nbconvert --execute``. - Add ``__repr__`` (one-line summary) and ``_repr_html_`` (attribute table) to ModelSpec. Visible in pytest -v output, in interactive Python, and as rich HTML in Jupyter cells. - Notebook simplified to lean on the new reprs: explicit-attribute prints in sections 2-5 replaced by bare expression evaluations. - README points at the .ipynb and notes the "launch jupyter from repo root" convention. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/benchmark-smoke.yml | 5 +- benchmarks/README.md | 20 +- benchmarks/notebooks/registry_usage.ipynb | 329 ++++++++++++++++++++++ benchmarks/notebooks/registry_usage.py | 204 -------------- benchmarks/registry.py | 32 ++- 5 files changed, 374 insertions(+), 216 deletions(-) create mode 100644 benchmarks/notebooks/registry_usage.ipynb delete mode 100644 benchmarks/notebooks/registry_usage.py diff --git a/.github/workflows/benchmark-smoke.yml b/.github/workflows/benchmark-smoke.yml index eeb6d2c5..8cffb97d 100644 --- a/.github/workflows/benchmark-smoke.yml +++ b/.github/workflows/benchmark-smoke.yml @@ -44,4 +44,7 @@ jobs: # Catches doc rot — the notebook is the canonical "how to use the # registry" walkthrough and must stay runnable end-to-end. run: | - python benchmarks/notebooks/registry_usage.py + jupyter nbconvert --to notebook --execute \ + --ExecutePreprocessor.timeout=300 \ + --output executed.ipynb \ + benchmarks/notebooks/registry_usage.ipynb diff --git a/benchmarks/README.md b/benchmarks/README.md index 19df4572..39a22d46 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -77,16 +77,16 @@ To add a new model, drop a file under `benchmarks/models/`, expose a ### Worked walkthrough -[`notebooks/registry_usage.py`](notebooks/registry_usage.py) is a jupytext -percent-format notebook that runs through every pattern above end-to-end. -Three ways to consume it: - -- **Script:** `python benchmarks/notebooks/registry_usage.py` — every cell - executes top-to-bottom; CI runs it this way on every PR so the examples - can't silently rot. -- **Notebook:** open the `.py` file in JupyterLab or VSCode with the jupytext - extension installed and it renders as a notebook. -- **Read:** the `# %% [markdown]` blocks render fine on GitHub directly. +[`notebooks/registry_usage.ipynb`](notebooks/registry_usage.ipynb) is the +canonical walkthrough — it runs through every pattern above end-to-end. +GitHub renders it inline. CI executes it on every PR via `jupyter nbconvert +--execute`, so the examples can't silently rot. + +Open it locally with JupyterLab launched from the repo root: + +```bash +jupyter lab benchmarks/notebooks/registry_usage.ipynb +``` ## Setup diff --git a/benchmarks/notebooks/registry_usage.ipynb b/benchmarks/notebooks/registry_usage.ipynb new file mode 100644 index 00000000..17511e5d --- /dev/null +++ b/benchmarks/notebooks/registry_usage.ipynb @@ -0,0 +1,329 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Benchmark model registry — usage guide\n", + "\n", + "This notebook is the canonical walkthrough for the benchmark\n", + "**model registry**. CI executes it end-to-end on every PR\n", + "(`jupyter nbconvert --execute`), so the examples can't silently rot.\n", + "\n", + "Launch jupyter from the repo root so `from benchmarks import ...` resolves\n", + "(same convention as `examples/*.ipynb`).\n", + "\n", + "The registry lives in `benchmarks/registry.py`. Each model file under\n", + "`benchmarks/models/` self-registers a `ModelSpec` on import, so just touching\n", + "the `benchmarks` package populates `REGISTRY`." + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## 1. Import the registry\n", + "\n", + "Single entry point: `from benchmarks import REGISTRY` plus the feature / phase\n", + "constants you need for filtering." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "# The benchmark suite isn't shipped in the linopy wheel — it lives in-tree.\n", + "# Find the repo root by walking up from cwd and put it on sys.path so the\n", + "# import resolves whether jupyter was launched from the repo root, the\n", + "# notebooks directory, or anywhere in between.\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "_p = Path.cwd()\n", + "while _p != _p.parent:\n", + " if (_p / \"benchmarks\" / \"registry.py\").exists():\n", + " if str(_p) not in sys.path:\n", + " sys.path.insert(0, str(_p))\n", + " break\n", + " _p = _p.parent\n", + "\n", + "from benchmarks import ( # noqa: E402\n", + " INTEGER,\n", + " QUADRATIC,\n", + " REGISTRY,\n", + " TO_GUROBIPY,\n", + " filter_by,\n", + " get,\n", + ")\n", + "\n", + "print(f\"{len(REGISTRY)} models registered: {sorted(REGISTRY)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## 2. Look up one model by name\n", + "\n", + "`REGISTRY[name]` returns a `ModelSpec` (frozen dataclass). Evaluating it\n", + "renders a full attribute table in Jupyter; `__repr__` gives a one-line\n", + "summary in scripts or `pytest -v` output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "spec = REGISTRY[\"basic\"]\n", + "spec" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "`.build(size)` constructs and returns a `linopy.Model`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "spec.build(50)" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "`get(\"name\")` is an equivalent functional accessor — handy when you don't\n", + "want to import `REGISTRY` directly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "assert get(\"basic\") is REGISTRY[\"basic\"]" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## 3. Iterate the whole registry\n", + "\n", + "Useful when you want to sweep your own test or profiling logic across every\n", + "model — e.g. checking that a refactor didn't break any spec. Each spec's\n", + "`__repr__` carries enough info for an at-a-glance overview." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "list(REGISTRY.values())" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## 4. Filter by feature\n", + "\n", + "`filter_by(has_feature=...)` returns specs that advertise that feature. The\n", + "feature tag constants (`CONTINUOUS`, `BINARY`, `INTEGER`, `QUADRATIC`, `SOS`,\n", + "`PIECEWISE`, `MASKED`) are exported from `benchmarks`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "filter_by(has_feature=QUADRATIC)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "filter_by(has_feature=INTEGER)" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## 5. Filter by phase\n", + "\n", + "Each spec declares which **phases** apply — `BUILD`, `MATRICES`, `LP_WRITE`,\n", + "`NETCDF`, `SOLVER_BUILD`, plus per-solver `TO_HIGHSPY` / `TO_GUROBIPY` /\n", + "`TO_MOSEK` / `TO_XPRESS`. Use `has_phase=` to narrow to solver-compatible\n", + "models, e.g. when writing a Gurobi-specific regression test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "filter_by(has_phase=TO_GUROBIPY)" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## 6. Reuse pattern — parametrize your own pytest\n", + "\n", + "The pattern the suite itself uses (see `benchmarks/test_build.py` etc.) —\n", + "`iter_params(phase)` returns `(spec, size)` pairs for the given phase, and\n", + "`param_ids(...)` builds stable test IDs for `pytest.mark.parametrize`:\n", + "\n", + "```python\n", + "import pytest\n", + "from benchmarks import BUILD, iter_params, param_ids\n", + "\n", + "_PARAMS = iter_params(BUILD)\n", + "\n", + "@pytest.mark.parametrize(\"spec,size\", _PARAMS, ids=param_ids(_PARAMS))\n", + "def test_my_invariant(spec, size):\n", + " m = spec.build(size)\n", + " # ... assertion that should hold for every model\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "## 7. Reuse pattern — one-off profiling\n", + "\n", + "Grab a single model at a chosen size, measure something, throw it away.\n", + "`tracemalloc` works well for in-process peak-RSS spot checks (use\n", + "`benchmarks/memory.py` + pytest-memray for the real metric)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "import tracemalloc # noqa: E402\n", + "\n", + "tracemalloc.start()\n", + "m = REGISTRY[\"sparse_network\"].build(100)\n", + "_current, peak = tracemalloc.get_traced_memory()\n", + "tracemalloc.stop()\n", + "\n", + "print(f\"sparse_network n=100: built, peak allocation ≈ {peak / 1e6:.1f} MB\")\n", + "print(\n", + " f\" {m.variables.nvars} scalar variables, {m.constraints.ncons} scalar constraints\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "## 8. Running the benchmark suite\n", + "\n", + "Three size tiers, configured per-spec via `quick_threshold` and\n", + "`long_threshold`:\n", + "\n", + "| Flag | Sizes included | Use case |\n", + "| ----------- | ------------------------- | ------------------------------------- |\n", + "| `--quick` | `size <= quick_threshold` | CI smoke (~18s, one size per model) |\n", + "| _(none)_ | `size <= long_threshold` | Local regression run (~45s) |\n", + "| `--long` | all sizes | Full sweep (~2 min, slow stuff) |\n", + "\n", + "```bash\n", + "# Quickest smoke\n", + "pytest benchmarks/ --quick --benchmark-disable\n", + "\n", + "# Default timing\n", + "pytest benchmarks/ --benchmark-only\n", + "\n", + "# Full sweep with the slow sizes\n", + "pytest benchmarks/ --benchmark-only --long\n", + "\n", + "# Pick a single (phase, model) pair\n", + "pytest benchmarks/test_lp_write.py -k \"knapsack and n=1000\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "## 9. Adding a new model\n", + "\n", + "1. Drop `benchmarks/models/.py` with a `build_(size) -> Model`.\n", + "2. Build a `ModelSpec` and call `register(...)` at module scope. Declare\n", + " realistic `quick_threshold` / `long_threshold` so the smoke stays fast.\n", + "3. Add an import in `benchmarks/models/__init__.py` so registration fires.\n", + "\n", + "That's it — every phase test picks the spec up automatically through\n", + "`iter_params(phase)`." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/benchmarks/notebooks/registry_usage.py b/benchmarks/notebooks/registry_usage.py deleted file mode 100644 index e999970b..00000000 --- a/benchmarks/notebooks/registry_usage.py +++ /dev/null @@ -1,204 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.16.0 -# kernelspec: -# display_name: Python 3 -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Benchmark model registry — usage guide -# -# This file is the canonical walkthrough for the benchmark **model registry**. -# It's authored in [jupytext](https://jupytext.readthedocs.io/) percent format, -# which means: -# -# - **Run as a script:** `python benchmarks/notebooks/registry_usage.py` — every -# pattern below executes end-to-end. CI runs it this way on every PR, so the -# examples can't silently rot. -# - **Open as a notebook:** in JupyterLab or VSCode with the jupytext extension, -# this file appears as a notebook with markdown + code cells. -# - **Lint:** `ruff check` works because it's plain Python. -# -# The registry lives in `benchmarks/registry.py`. Each model file under -# `benchmarks/models/` self-registers a `ModelSpec` on import, so just touching -# the `benchmarks` package populates `REGISTRY`. - -# %% [markdown] -# ## 1. Import the registry -# -# Single entry point: `from benchmarks import REGISTRY` plus the feature / phase -# constants you need for filtering. - -# %% -# Put the repo root on sys.path so the file runs from anywhere -# (e.g. ``python benchmarks/notebooks/registry_usage.py``). -import sys -from pathlib import Path - -_REPO_ROOT = Path(__file__).resolve().parents[2] -if str(_REPO_ROOT) not in sys.path: - sys.path.insert(0, str(_REPO_ROOT)) - -from benchmarks import ( # noqa: E402 - INTEGER, - QUADRATIC, - REGISTRY, - TO_GUROBIPY, - filter_by, - get, -) - -print(f"{len(REGISTRY)} models registered: {sorted(REGISTRY)}") - -# %% [markdown] -# ## 2. Look up one model by name -# -# `REGISTRY[name]` returns a `ModelSpec` (frozen dataclass). `.build(size)` -# constructs and returns a `linopy.Model`. - -# %% -spec = REGISTRY["basic"] -print(f"name: {spec.name}") -print(f"sizes: {spec.sizes}") -print(f"features: {sorted(spec.features)}") -print(f"quick_threshold: {spec.quick_threshold}") -print(f"long_threshold: {spec.long_threshold}") - -m = spec.build(50) -print( - f"\nbuilt at n=50: {len(m.variables)} variable arrays, " - f"{len(m.constraints)} constraint arrays" -) - -# %% [markdown] -# `get("name")` is an equivalent functional accessor — handy when you don't -# want to import `REGISTRY` directly. - -# %% -assert get("basic") is REGISTRY["basic"] - -# %% [markdown] -# ## 3. Iterate the whole registry -# -# Useful when you want to sweep your own test or profiling logic across every -# model — e.g. checking that a refactor didn't break any spec. - -# %% -print(f"{'name':<25} {'features':<35} {'sizes':<20}") -print("-" * 80) -for name, spec in REGISTRY.items(): - feats = ",".join(sorted(spec.features)) - sizes = f"{spec.sizes[0]}..{spec.sizes[-1]}" - print(f"{name:<25} {feats:<35} {sizes:<20}") - -# %% [markdown] -# ## 4. Filter by feature -# -# `filter_by(has_feature=...)` returns specs that advertise that feature. The -# feature tag constants (`CONTINUOUS`, `BINARY`, `INTEGER`, `QUADRATIC`, `SOS`, -# `PIECEWISE`, `MASKED`) are exported from `benchmarks`. - -# %% -qp_specs = filter_by(has_feature=QUADRATIC) -print("Quadratic models:", [s.name for s in qp_specs]) - -mip_specs = filter_by(has_feature=INTEGER) -print("Integer models: ", [s.name for s in mip_specs]) - -# %% [markdown] -# ## 5. Filter by phase -# -# Each spec declares which **phases** apply — `BUILD`, `MATRICES`, `LP_WRITE`, -# `NETCDF`, `SOLVER_BUILD`, plus per-solver `TO_HIGHSPY` / `TO_GUROBIPY` / -# `TO_MOSEK` / `TO_XPRESS`. Use `has_phase=` to narrow to solver-compatible -# models, e.g. when writing a Gurobi-specific regression test. - -# %% -gurobi_specs = filter_by(has_phase=TO_GUROBIPY) -print(f"{len(gurobi_specs)} models declare TO_GUROBIPY:") -for s in gurobi_specs: - print(f" - {s.name}") - -# %% [markdown] -# ## 6. Reuse pattern — parametrize your own pytest -# -# The pattern the suite itself uses (see `benchmarks/test_build.py` etc.) — -# `iter_params(phase)` returns `(spec, size)` pairs for the given phase, and -# `param_ids(...)` builds stable test IDs for `pytest.mark.parametrize`: -# -# ```python -# import pytest -# from benchmarks import BUILD, iter_params, param_ids -# -# _PARAMS = iter_params(BUILD) -# -# @pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) -# def test_my_invariant(spec, size): -# m = spec.build(size) -# # ... assertion that should hold for every model -# ``` - -# %% [markdown] -# ## 7. Reuse pattern — one-off profiling -# -# Grab a single model at a chosen size, measure something, throw it away. -# `tracemalloc` works well for in-process peak-RSS spot checks (use -# `benchmarks/memory.py` + pytest-memray for the real metric). - -# %% -import tracemalloc # noqa: E402 - -tracemalloc.start() -m = REGISTRY["sparse_network"].build(100) -_current, peak = tracemalloc.get_traced_memory() -tracemalloc.stop() - -print(f"sparse_network n=100: built, peak allocation ≈ {peak / 1e6:.1f} MB") -print( - f" {m.variables.nvars} scalar variables, {m.constraints.ncons} scalar constraints" -) - -# %% [markdown] -# ## 8. Running the benchmark suite -# -# Three size tiers, configured per-spec via `quick_threshold` and -# `long_threshold`: -# -# | Flag | Sizes included | Use case | -# | ----------- | ------------------------- | ------------------------------------- | -# | `--quick` | `size <= quick_threshold` | CI smoke (~18s, one size per model) | -# | _(none)_ | `size <= long_threshold` | Local regression run (~45s) | -# | `--long` | all sizes | Full sweep (~2 min, slow stuff) | -# -# ```bash -# # Quickest smoke -# pytest benchmarks/ --quick --benchmark-disable -# -# # Default timing -# pytest benchmarks/ --benchmark-only -# -# # Full sweep with the slow sizes -# pytest benchmarks/ --benchmark-only --long -# -# # Pick a single (phase, model) pair -# pytest benchmarks/test_lp_write.py -k "knapsack and n=1000" -# ``` - -# %% [markdown] -# ## 9. Adding a new model -# -# 1. Drop `benchmarks/models/.py` with a `build_(size) -> Model`. -# 2. Build a `ModelSpec` and call `register(...)` at module scope. Declare -# realistic `quick_threshold` / `long_threshold` so the smoke stays fast. -# 3. Add an import in `benchmarks/models/__init__.py` so registration fires. -# -# That's it — every phase test picks the spec up automatically through -# `iter_params(phase)`. diff --git a/benchmarks/registry.py b/benchmarks/registry.py index 228152b8..dbee2281 100644 --- a/benchmarks/registry.py +++ b/benchmarks/registry.py @@ -86,7 +86,7 @@ ) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class ModelSpec: """ Declarative description of one benchmark model. @@ -115,6 +115,36 @@ def applies_to(self, phase: str) -> bool: def has_feature(self, feature: str) -> bool: return feature in self.features + def __repr__(self) -> str: + feats = ",".join(sorted(self.features)) + size_range = ( + f"{self.sizes[0]}..{self.sizes[-1]}" + if len(self.sizes) > 1 + else str(self.sizes[0]) + ) + return f"ModelSpec({self.name!r}, features={{{feats}}}, sizes={size_range})" + + def _repr_html_(self) -> str: + # Rich rendering for Jupyter — a compact two-column table. + rows = [ + ("name", self.name), + ("features", ", ".join(sorted(self.features))), + ("sizes", ", ".join(str(s) for s in self.sizes)), + ("phases", ", ".join(sorted(self.phases))), + ("quick_threshold", self.quick_threshold), + ("long_threshold", self.long_threshold), + ("requires", ", ".join(self.requires) or "—"), + ] + body = "".join( + f"{k}" + f"{v}" + for k, v in rows + ) + return ( + f"ModelSpec {self.name}" + f"{body}
" + ) + REGISTRY: dict[str, ModelSpec] = {} From 751aa78e15546a32b9737126eb64c774eb6d6fff Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 28 May 2026 12:48:21 +0200 Subject: [PATCH 07/68] benchmarks: typer-based CLI as the single entry point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ``python -m benchmarks `` with typer subcommands: - list / show / filter — introspect the registry - smoke — pytest --quick --benchmark-disable (CI) - run [--long --phase --model --filter --json] — pytest --benchmark-only with knobs - notebook — execute the registry-usage notebook - memory save/compare — replaces the argparse main in memory.py Modern typer style throughout: Annotated[...] for every parameter, Literal[...] for the --phase choice, function docstrings for command help. ``--help`` is auto-generated and is the source of truth — README and the notebook just point at it instead of duplicating the menu. CI smoke now calls ``python -m benchmarks smoke`` and ``python -m benchmarks notebook``. memory.py keeps its save/compare functions but loses the argparse layer. typer added to the [benchmarks] extra. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/benchmark-smoke.yml | 7 +- benchmarks/README.md | 47 ++-- benchmarks/__main__.py | 5 + benchmarks/cli.py | 319 ++++++++++++++++++++++ benchmarks/memory.py | 54 +--- benchmarks/notebooks/registry_usage.ipynb | 39 +-- pyproject.toml | 1 + 7 files changed, 366 insertions(+), 106 deletions(-) create mode 100644 benchmarks/__main__.py create mode 100644 benchmarks/cli.py diff --git a/.github/workflows/benchmark-smoke.yml b/.github/workflows/benchmark-smoke.yml index 8cffb97d..25fee396 100644 --- a/.github/workflows/benchmark-smoke.yml +++ b/.github/workflows/benchmark-smoke.yml @@ -38,13 +38,10 @@ jobs: - name: Run benchmark smoke run: | - pytest benchmarks/ --quick --benchmark-disable -q + python -m benchmarks smoke - name: Execute registry-usage notebook # Catches doc rot — the notebook is the canonical "how to use the # registry" walkthrough and must stay runnable end-to-end. run: | - jupyter nbconvert --to notebook --execute \ - --ExecutePreprocessor.timeout=300 \ - --output executed.ipynb \ - benchmarks/notebooks/registry_usage.ipynb + python -m benchmarks notebook diff --git a/benchmarks/README.md b/benchmarks/README.md index 39a22d46..8fdced0f 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -91,39 +91,44 @@ jupyter lab benchmarks/notebooks/registry_usage.ipynb ## Setup ```bash -uv sync --extra dev --extra solvers +uv sync --extra dev --extra benchmarks source .venv/bin/activate ``` ## Run benchmarks -The suite has three size tiers, each spec declaring its own `quick_threshold` -and `long_threshold`: - -| Flag | Sizes included | Typical use | -| ---------- | ------------------------------------- | ---------------------------------------- | -| `--quick` | `size <= quick_threshold` | CI smoke, fast local sanity check | -| _(none)_ | `size <= long_threshold` | Default: medium-cost regression timing | -| `--long` | all sizes | Full sweep (the slow stuff — many min) | +Everything is exposed through a single typer-based CLI. The CLI's +`--help` is the source of truth — run it for the full menu: ```bash -# Quick smoke run (small sizes only, no timing) -pytest benchmarks/ --quick --benchmark-disable +python -m benchmarks --help +python -m benchmarks --help +``` + +Pytest still works directly for power users (`pytest benchmarks/ ...`). -# Default timing run (skips the super-long sizes) -pytest benchmarks/ --benchmark-only +### Size tiers -# Full sweep — every size on every model -pytest benchmarks/ --benchmark-only --long +Each spec declares its own `quick_threshold` and `long_threshold`: -# A single phase -pytest benchmarks/test_build.py +| Mode | Sizes included | Typical use | +| ----------------- | ------------------------- | -------------------------------------- | +| `smoke` | `size <= quick_threshold` | CI smoke, fast local sanity check | +| `run` | `size <= long_threshold` | Default: medium-cost regression timing | +| `run --long` | all sizes | Full sweep (the slow stuff — many min) | + +### Quick reference + +```bash +# Fastest sanity check (~18s, what CI runs) +python -m benchmarks smoke -# A single model across all phases -pytest benchmarks/ -k basic +# Default timing run +python -m benchmarks run -# A single (phase, model) pair -pytest benchmarks/test_lp_write.py -k "knapsack and n=1000" +# Save / compare memory snapshots +python -m benchmarks memory save "$(git rev-parse --short HEAD)" +python -m benchmarks memory compare master my-feature ``` ## Metrics diff --git a/benchmarks/__main__.py b/benchmarks/__main__.py new file mode 100644 index 00000000..34a28439 --- /dev/null +++ b/benchmarks/__main__.py @@ -0,0 +1,5 @@ +"""Allow ``python -m benchmarks ``.""" + +from benchmarks.cli import app + +app() diff --git a/benchmarks/cli.py b/benchmarks/cli.py new file mode 100644 index 00000000..255b4caa --- /dev/null +++ b/benchmarks/cli.py @@ -0,0 +1,319 @@ +""" +linopy benchmark CLI — one entry point for the suite. + +Run with:: + + python -m benchmarks [options] + +The CLI is a thin layer over pytest for the timing / smoke commands, plus +direct dispatch for registry introspection and memory snapshots. +""" + +from __future__ import annotations + +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Annotated, Literal + +import typer + +from benchmarks import ( + REGISTRY, + filter_by, + get, +) +from benchmarks.memory import compare as memory_compare +from benchmarks.memory import save as memory_save + +app = typer.Typer( + help=( + "Linopy internal benchmark suite — a thin layer over pytest plus " + "registry introspection and memory snapshots." + ), + no_args_is_help=True, + rich_markup_mode="rich", +) + +memory_app = typer.Typer( + help="Peak-RSS memory snapshots (pytest-memray under the hood).", + no_args_is_help=True, +) +app.add_typer(memory_app, name="memory") + + +PhaseName = Literal["build", "matrices", "lp_write", "netcdf", "solver_handoff"] + +_PHASE_TEST_FILE: dict[PhaseName, str] = { + "build": "benchmarks/test_build.py", + "matrices": "benchmarks/test_matrices.py", + "lp_write": "benchmarks/test_lp_write.py", + "netcdf": "benchmarks/test_netcdf.py", + "solver_handoff": "benchmarks/test_solver_handoff.py", +} + + +# --- Introspection commands ------------------------------------------------ + + +@app.command("list") +def list_( + details: Annotated[ + bool, + typer.Option("--details", "-d", help="Show features and size range."), + ] = False, +) -> None: + """ + List the registered model specs. + + By default emits one name per line — suitable for piping into other + tools. Pass ``--details`` for a small table that also shows the + features tags and the size range. + """ + if not details: + for name in sorted(REGISTRY): + typer.echo(name) + return + + rows = [ + ( + spec.name, + ",".join(sorted(spec.features)), + f"{spec.sizes[0]}..{spec.sizes[-1]}", + ) + for spec in REGISTRY.values() + ] + name_w = max(len(r[0]) for r in rows) + feat_w = max(len(r[1]) for r in rows) + typer.echo(f"{'name':<{name_w}} {'features':<{feat_w}} sizes") + typer.echo("-" * (name_w + feat_w + 20)) + for name, feats, sizes in rows: + typer.echo(f"{name:<{name_w}} {feats:<{feat_w}} {sizes}") + + +@app.command() +def show( + name: Annotated[str, typer.Argument(help="Spec name (see ``list``).")], +) -> None: + """ + Print full attributes of one model spec. + + Output includes sizes, feature tags, applicable phases, the quick / + long size thresholds, and any optional ``requires=`` dependencies the + spec advertises. + """ + try: + spec = get(name) + except KeyError as exc: + typer.secho(f"unknown model: {name!r}", fg=typer.colors.RED, err=True) + typer.echo(f"available: {', '.join(sorted(REGISTRY))}", err=True) + raise typer.Exit(code=2) from exc + typer.echo(repr(spec)) + typer.echo(f" sizes: {spec.sizes}") + typer.echo(f" features: {sorted(spec.features)}") + typer.echo(f" phases: {sorted(spec.phases)}") + typer.echo(f" quick_threshold: {spec.quick_threshold}") + typer.echo(f" long_threshold: {spec.long_threshold}") + if spec.requires: + typer.echo(f" requires: {list(spec.requires)}") + + +@app.command("filter") +def filter_( + feature: Annotated[ + str | None, + typer.Option(help="Feature tag, e.g. 'quadratic', 'integer', 'sos'."), + ] = None, + phase: Annotated[ + str | None, + typer.Option(help="Phase tag, e.g. 'to_gurobipy', 'lp_write'."), + ] = None, +) -> None: + """ + Filter specs by feature or phase tag. + + Both filters can be combined; the result is the intersection. + At least one of ``--feature`` / ``--phase`` must be supplied. + """ + if feature is None and phase is None: + typer.secho("pass --feature and/or --phase", fg=typer.colors.RED, err=True) + raise typer.Exit(code=2) + matches = filter_by(has_feature=feature, has_phase=phase) + for spec in matches: + typer.echo(repr(spec)) + + +# --- Execution commands ---------------------------------------------------- + + +def _run_pytest(args: list[str]) -> None: + """Invoke pytest as a subprocess and propagate its exit code.""" + cmd = [sys.executable, "-m", "pytest", *args] + typer.secho(f"$ {' '.join(cmd)}", fg=typer.colors.BRIGHT_BLACK) + result = subprocess.run(cmd, check=False) + if result.returncode != 0: + raise typer.Exit(code=result.returncode) + + +@app.command() +def smoke( + extra: Annotated[ + list[str] | None, + typer.Argument(help="Extra args forwarded to pytest verbatim."), + ] = None, +) -> None: + """ + Quick smoke run — what CI uses on every PR. + + Equivalent to ``pytest benchmarks/ --quick --benchmark-disable -q``. + Every model builds at one size and every phase fires once, no timings + recorded. Typical wall-clock: ~20s. + """ + args = ["benchmarks/", "--quick", "--benchmark-disable", "-q"] + if extra: + args.extend(extra) + _run_pytest(args) + + +@app.command() +def run( + long: Annotated[ + bool, + typer.Option( + "--long", + help="Include the slowest sizes (above each spec's long_threshold).", + ), + ] = False, + phase: Annotated[ + PhaseName | None, + typer.Option(help="Restrict to one phase's test file."), + ] = None, + model: Annotated[ + str | None, + typer.Option(help="Restrict to one model (passed as pytest ``-k``)."), + ] = None, + filter_expr: Annotated[ + str | None, + typer.Option( + "--filter", + "-k", + help="Arbitrary pytest ``-k`` expression (AND-ed with ``--model``).", + ), + ] = None, + json_out: Annotated[ + Path | None, + typer.Option("--json", help="Save pytest-benchmark JSON to this path."), + ] = None, + extra: Annotated[ + list[str] | None, + typer.Argument(help="Extra args forwarded to pytest verbatim."), + ] = None, +) -> None: + """ + Default timing run. Records timings with pytest-benchmark. + + Without ``--long``, sizes above each spec's ``long_threshold`` are + skipped — keeps the wall-clock around 45s instead of several minutes. + Add ``--long`` for the full sweep including the heaviest sizes + (knapsack at 1M, basic at 1600, pypsa_scigrid at >50). + + To skip timing entirely (e.g. just verifying everything runs at a + bigger size), use ``smoke`` instead, or pass ``--benchmark-disable`` + through ``extra``. + """ + args: list[str] = [] + args.append(_PHASE_TEST_FILE[phase] if phase is not None else "benchmarks/") + if long: + args.append("--long") + args.append("--benchmark-only") + if json_out is not None: + args.extend(["--benchmark-json", str(json_out)]) + + k_parts = [p for p in (model, filter_expr) if p] + if k_parts: + args.extend(["-k", " and ".join(k_parts)]) + + if extra: + args.extend(extra) + _run_pytest(args) + + +@app.command() +def notebook() -> None: + """ + Execute the registry-usage notebook end-to-end. + + Used by CI to catch doc rot — if any cell raises, the workflow fails. + The executed copy is written to a tempdir and discarded, so the + in-tree notebook stays output-free (nbstripout doesn't have to chase + a populated file). + """ + nb = Path("benchmarks/notebooks/registry_usage.ipynb") + if not nb.exists(): + typer.secho(f"notebook not found: {nb}", fg=typer.colors.RED, err=True) + raise typer.Exit(code=1) + with tempfile.TemporaryDirectory() as tmp: + cmd = [ + sys.executable, + "-m", + "jupyter", + "nbconvert", + "--to", + "notebook", + "--execute", + "--ExecutePreprocessor.timeout=300", + "--output-dir", + tmp, + "--output", + "executed.ipynb", + str(nb), + ] + typer.secho(f"$ {' '.join(cmd)}", fg=typer.colors.BRIGHT_BLACK) + result = subprocess.run(cmd, check=False) + if result.returncode != 0: + raise typer.Exit(code=result.returncode) + + +# --- Memory subcommands ---------------------------------------------------- + + +@memory_app.command("save") +def memory_save_cmd( + label: Annotated[ + str, typer.Argument(help="Label to attach to this snapshot, e.g. a git sha.") + ], + quick: Annotated[ + bool, typer.Option("--quick", help="Use smaller problem sizes.") + ] = False, + test_path: Annotated[ + list[str] | None, + typer.Option("--test-path", help="Test file(s) to run; defaults to build."), + ] = None, +) -> None: + """ + Run the build phase under pytest-memray and save peak RSS to JSON. + + Results land in ``.benchmarks/memory/