diff --git a/.github/workflows/pr_docs_changes.yaml b/.github/workflows/pr_docs_changes.yaml
index 2ef9a20d..c51dc153 100644
--- a/.github/workflows/pr_docs_changes.yaml
+++ b/.github/workflows/pr_docs_changes.yaml
@@ -18,10 +18,7 @@ jobs:
     steps:
       - name: Checkout repo
         uses: actions/checkout@v4
-      - uses: actions/setup-node@v4
-        with:
-          node-version: 18.x
-      - name: Install MyST
-        run: npm install -g mystmd
+      - name: Set up Quarto
+        uses: quarto-dev/quarto-actions/setup@v2
       - name: Test documentation builds
-        run: cd docs && myst build --html
+        run: quarto render docs
diff --git a/Makefile b/Makefile
index f62643e1..03344916 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,18 @@
-.PHONY: docs docs-serve
-
-MYSTMD_VERSION ?= 1.8.3
-MYST_CMD = npx --yes mystmd@$(MYSTMD_VERSION)
+.PHONY: docs docs-serve docs-generate-reference
 
 all: build-package
 
 docs:
-	cd docs && $(MYST_CMD) build --html
+	quarto render docs
 
 docs-serve:
-	cd docs && $(MYST_CMD) start
+	quarto preview docs
+
+# Regenerate the auto-generated variable / program reference under docs/reference/.
+# Run once per country model release; commits the refreshed pages alongside code.
+docs-generate-reference:
+	python docs/_generator/build_reference.py --country us --out docs/reference/us
+	python docs/_generator/build_reference.py --country uk --out docs/reference/uk
 
 install:
 	uv pip install -e .[dev]
diff --git a/README.md b/README.md
index 7fc607d5..e45dec98 100644
--- a/README.md
+++ b/README.md
@@ -4,26 +4,47 @@ A Python package for tax-benefit microsimulation analysis. Run policy simulation
 
 ## Quick start
 
+### Household calculator
+
 ```python
-from policyengine.core import Simulation
-from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset, uk_latest
-from policyengine.outputs.aggregate import Aggregate, AggregateType
+import policyengine as pe
 
-# Load representative microdata
-dataset = PolicyEngineUKDataset(
-    name="FRS 2023-24",
-    filepath="./data/frs_2023_24_year_2026.h5",
+# UK: single adult earning £50,000
+uk = pe.uk.calculate_household(
+    people=[{"age": 35, "employment_income": 50_000}],
     year=2026,
 )
+print(uk.person[0].income_tax)                   # income tax
+print(uk.household.hbai_household_net_income)    # net income
+
+# US: single filer in California, with a reform
+us = pe.us.calculate_household(
+    people=[{"age": 35, "employment_income": 60_000}],
+    tax_unit={"filing_status": "SINGLE"},
+    household={"state_code": "CA"},
+    year=2026,
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000},
+)
+print(us.tax_unit.income_tax, us.household.household_net_income)
+```
 
-# Run simulation
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
+### Population analysis
+
+```python
+import policyengine as pe
+from policyengine.core import Simulation
+from policyengine.outputs.aggregate import Aggregate, AggregateType
+
+datasets = pe.uk.ensure_datasets(
+    datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"],
+    years=[2026],
+    data_folder="./data",
 )
+dataset = datasets["enhanced_frs_2023_24_2026"]
+
+simulation = Simulation(dataset=dataset, tax_benefit_model_version=pe.uk.model)
 simulation.run()
 
-# Calculate total universal credit spending
 agg = Aggregate(
     simulation=simulation,
     variable="universal_credit",
@@ -34,6 +55,9 @@ agg.run()
 print(f"Total UC spending: £{agg.result / 1e9:.1f}bn")
 ```
 
+For baseline-vs-reform comparisons, see `pe.uk.economic_impact_analysis`
+and its US counterpart.
+
 ## Documentation
 
 **Core concepts:**
@@ -179,12 +203,12 @@ dataset.load()
 Simulations apply tax-benefit models to datasets:
 
 ```python
+import policyengine as pe
 from policyengine.core import Simulation
-from policyengine.tax_benefit_models.uk import uk_latest
 
 simulation = Simulation(
     dataset=dataset,
-    tax_benefit_model_version=uk_latest,
+    tax_benefit_model_version=pe.uk.model,
 )
 simulation.run()
 
@@ -223,7 +247,7 @@ import datetime
 
 parameter = Parameter(
     name="gov.hmrc.income_tax.allowances.personal_allowance.amount",
-    tax_benefit_model_version=uk_latest,
+    tax_benefit_model_version=pe.uk.model,
     data_type=float,
 )
 
@@ -242,7 +266,7 @@ policy = Policy(
 # Run reform simulation
 reform_sim = Simulation(
     dataset=dataset,
-    tax_benefit_model_version=uk_latest,
+    tax_benefit_model_version=pe.uk.model,
     policy=policy,
 )
 reform_sim.run()
diff --git a/changelog.d/v4-base-extraction.changed.md b/changelog.d/v4-base-extraction.changed.md
new file mode 100644
index 00000000..572088a3
--- /dev/null
+++ b/changelog.d/v4-base-extraction.changed.md
@@ -0,0 +1 @@
+Extracted shared `MicrosimulationModelVersion` base class in `policyengine.tax_benefit_models.common`. Country subclasses now declare class-level metadata (`country_code`, `package_name`, `group_entities`) and implement a handful of thin hooks; `run()` stays per-country. Byte-level snapshot tests verify zero output drift.
diff --git a/changelog.d/v4-dict-reforms.added.md b/changelog.d/v4-dict-reforms.added.md
new file mode 100644
index 00000000..02405cdc
--- /dev/null
+++ b/changelog.d/v4-dict-reforms.added.md
@@ -0,0 +1 @@
+``Simulation(policy={...})`` and ``Simulation(dynamic={...})`` now accept the same flat ``{"param.path": value}`` / ``{"param.path": {date: value}}`` dict that ``pe.{uk,us}.calculate_household(reform=...)`` accepts. Dicts are compiled to full ``Policy`` / ``Dynamic`` objects on construction using the ``tax_benefit_model_version`` for parameter-path validation and ``dataset.year`` for scalar effective-date defaulting. Removes the last place where population microsim required building ``Parameter`` / ``ParameterValue`` by hand.
diff --git a/changelog.d/v4-docs-refresh.changed.md b/changelog.d/v4-docs-refresh.changed.md
new file mode 100644
index 00000000..11e7d0d2
--- /dev/null
+++ b/changelog.d/v4-docs-refresh.changed.md
@@ -0,0 +1 @@
+Documentation refreshed for the v4 agent-first surface. README, `core-concepts`, `economic-impact-analysis`, `country-models-{uk,us}`, `regions-and-scoping`, `examples`, and `dev` now lead with `pe.uk.*` / `pe.us.*` entry points and flat-kwarg `calculate_household` usage. Removed leftover docs for the dropped `filter_field`/`filter_value` simulation fields. `examples/household_impact_example.py` rewritten against the v4 API.
diff --git a/changelog.d/v4-facade.added.md b/changelog.d/v4-facade.added.md
new file mode 100644
index 00000000..f05dea82
--- /dev/null
+++ b/changelog.d/v4-facade.added.md
@@ -0,0 +1,47 @@
+**BREAKING (v4):** Collapse the household-calculator surface into a
+single agent-friendly entry point, ``pe.us.calculate_household`` /
+``pe.uk.calculate_household``.
+
+New public API:
+
+- ``policyengine/__init__.py`` populated with canonical accessors:
+  ``pe.us``, ``pe.uk``, ``pe.Simulation`` (replacing the empty top-level
+  module). ``import policyengine as pe`` now gives you everything a
+  new coding session needs to reach in one line.
+- ``pe.us.calculate_household(**kwargs)`` and ``pe.uk.calculate_household``
+  take flat keyword arguments (``people``, per-entity overrides,
+  ``year``, ``reform``, ``extra_variables``) instead of a pydantic
+  input wrapper.
+- ``reform=`` accepts a plain dict: ``{parameter_path: value}`` or
+  ``{parameter_path: {effective_date: value}}``. Compiles internally.
+- Returns :class:`HouseholdResult` (new) with dot-access:
+  ``result.tax_unit.income_tax``, ``result.household.household_net_income``,
+  ``result.person[0].age``. Singleton entities are
+  :class:`EntityResult`; ``person`` is a list of them. ``to_dict()``
+  and ``write(path)`` serialize to JSON.
+- ``extra_variables=[...]`` is now a flat list; the library dispatches
+  each name to its entity by looking it up on the model.
+- Unknown variable names (in ``people``, entity overrides, or
+  ``extra_variables``) raise ``ValueError`` with a ``difflib`` close-match
+  suggestion and a paste-able fix hint.
+- Unknown dot-access on a result raises ``AttributeError`` with the
+  list of available variables plus the ``extra_variables=[...]`` call
+  that would surface the requested one.
+
+Removed (v4 breaking):
+
+- ``USHouseholdInput`` / ``UKHouseholdInput`` / ``USHouseholdOutput`` /
+  ``UKHouseholdOutput`` pydantic wrappers.
+- ``calculate_household_impact`` — the name was misleading (it
+  returned levels, not an impact vs. baseline). Reserved for a future
+  delta function.
+- The bare ``us_model`` / ``uk_model`` label-only singletons; each
+  country module now exposes ``.model`` pointing at the real
+  ``TaxBenefitModelVersion`` (kept ``us_latest`` / ``uk_latest``
+  aliases for compatibility with any in-flight downstream code).
+
+New internal module:
+
+- ``policyengine.tax_benefit_models.common`` — ``compile_reform``,
+  ``dispatch_extra_variables``, ``EntityResult``, ``HouseholdResult``
+  shared by both country implementations.
diff --git a/changelog.d/v4-provenance-package.changed.md b/changelog.d/v4-provenance-package.changed.md
new file mode 100644
index 00000000..8c016e02
--- /dev/null
+++ b/changelog.d/v4-provenance-package.changed.md
@@ -0,0 +1,24 @@
+**BREAKING (v4):** Separate the provenance layer from the core
+value-object layer.
+
+- ``policyengine/core/release_manifest.py`` → ``policyengine/provenance/manifest.py``
+- ``policyengine/core/trace_tro.py`` → ``policyengine/provenance/trace.py``
+- New ``policyengine.provenance`` package re-exports the public
+  surface (``get_release_manifest``, ``get_data_release_manifest``,
+  ``build_trace_tro_from_release_bundle``, ``build_simulation_trace_tro``,
+  ``serialize_trace_tro``, ``canonical_json_bytes``,
+  ``compute_trace_composition_fingerprint``, etc.).
+- ``policyengine.core`` no longer re-exports provenance types.
+  ``policyengine.core`` shrinks to value objects only (Dataset,
+  Variable, Parameter, Policy, Dynamic, Simulation, Region,
+  TaxBenefitModel, TaxBenefitModelVersion, scoping strategies).
+- ``import policyengine.core.scoping_strategy`` no longer imports
+  ``h5py`` at module load; the weight-replacement code path
+  lazy-imports it. ``import policyengine.outputs.constituency_impact``
+  and ``import policyengine.outputs.local_authority_impact`` do the
+  same.
+- Migration for downstream: replace
+  ``from policyengine.core import DataReleaseManifest`` (et al.)
+  with ``from policyengine.provenance import DataReleaseManifest``.
+  The country-module imports in internal code (``tax_benefit_models/{us,uk}/model.py``
+  and ``datasets.py``) are already updated.
diff --git a/changelog.d/variable-graph.added.md b/changelog.d/variable-graph.added.md
new file mode 100644
index 00000000..11ce0773
--- /dev/null
+++ b/changelog.d/variable-graph.added.md
@@ -0,0 +1 @@
+Added ``policyengine.graph`` — a static-analysis-based variable dependency graph for PolicyEngine source trees. ``extract_from_path(path)`` walks a directory of Variable subclasses, parses formula-method bodies for ``entity("<var>", period)`` and ``add(entity, period, [list])`` references, and returns a ``VariableGraph``. Queries include ``deps(var)`` (direct dependencies), ``impact(var)`` (transitive downstream), and ``path(src, dst)`` (shortest dependency chain). No runtime dependency on country models — indexes ``policyengine-us`` (4,577 variables) in under a second.
diff --git a/docs/.gitignore b/docs/.gitignore
index eac09687..d05d3238 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,2 +1,5 @@
-# MyST build outputs
-_build
+# Quarto build outputs
+_site
+_freeze
+/.quarto/
+**/*.quarto_ipynb
diff --git a/docs/_generator/README.md b/docs/_generator/README.md
new file mode 100644
index 00000000..ef5c7268
--- /dev/null
+++ b/docs/_generator/README.md
@@ -0,0 +1,52 @@
+# Reference generator prototype
+
+Auto-generates one Quarto page per variable in a country model, plus a program-coverage page, purely from metadata on the `Variable` classes and `programs.yaml`.
+
+## Run
+
+```bash
+# Full US reference (takes a couple of minutes — 4,686 variables)
+python docs/_generator/build_reference.py --country us --out docs/_generated/reference/us
+
+# Preview a filtered subset
+python docs/_generator/build_reference.py --country us --filter chip --out /tmp/ref-preview
+```
+
+Then render:
+
+```bash
+cd /tmp/ref-preview && quarto render
+```
+
+## What's generated from code alone
+
+Per variable:
+
+- Title and identifier
+- Metadata table: entity, value type, unit, period, `defined_for` gate
+- Documentation (docstring)
+- Components (`adds` / `subtracts` lists)
+- Statutory references (from `reference = ...`)
+- Source file path and line number
+
+Per program: a row in the generated program-coverage page pulled from `programs.yaml` (id, name, category, agency, status, coverage).
+
+Per directory (`gov/hhs/chip/`, `gov/usda/snap/`, etc.): a listing page using Quarto's built-in directory listing so the nav auto-organizes.
+
+## What still requires hand-authored prose
+
+- Methodology narrative (why the model is structured this way)
+- Tutorials (how to use `policyengine.py`)
+- Paper content (peer-reviewable argument)
+- Per-country deep dives that read as essays rather than reference lookups
+
+## Design
+
+The generator reads directly from the imported country model — no web API calls, no intermediate JSON. This keeps the build offline-reproducible and version-pinned to whatever country model the `policyengine.py` package has installed. Re-running the generator on release produces a snapshot of the reference docs tied to the exact published model versions.
+
+Extensions worth considering:
+
+1. Walk `parameters/` YAML tree and emit a page per parameter with its time series, breakdowns, and references.
+2. For each variable with a formula, surface the dependency graph (other variables / parameters it reads). `policyengine_core`'s `Variable.exhaustive_parameter_dependencies` gets partway there.
+3. For each calibration target (in `policyengine-us-data/storage/calibration_targets/*.csv`), emit a page describing source, aggregation level, freshness.
+4. Cross-link variables to the programs they contribute to via `programs.yaml`'s `variable:` field.
diff --git a/docs/_generator/build_reference.py b/docs/_generator/build_reference.py
new file mode 100644
index 00000000..4b360622
--- /dev/null
+++ b/docs/_generator/build_reference.py
@@ -0,0 +1,387 @@
+"""Generate reference documentation pages from PolicyEngine country models.
+
+Introspects a country model's `TaxBenefitSystem` for every variable, reads
+attributes directly from each `Variable` class (`label`, `documentation`,
+`entity`, `unit`, `reference`, `defined_for`, `definition_period`,
+`adds`/`subtracts`, source file path), and writes one ``.qmd`` page per
+variable grouped by its parameter-tree path (``gov/hhs/chip/chip_premium``).
+
+Also loads the country model's ``programs.yaml`` and writes a program-level
+landing page for each entry, cross-linking the variables that belong to it.
+
+Usage
+-----
+
+Run for a single country model, writing into an output directory:
+
+.. code-block:: bash
+
+    python docs/_generator/build_reference.py \\
+        --country us \\
+        --out docs/_generated/reference/us
+
+Run for a subset of variables to preview output:
+
+.. code-block:: bash
+
+    python docs/_generator/build_reference.py \\
+        --country us --filter chip --out /tmp/ref-preview
+
+Design notes
+------------
+
+This is a prototype meant to demonstrate how much reference material can be
+regenerated from code + parameter YAML + ``programs.yaml`` alone, with no
+hand-authored prose. Intentional non-goals:
+
+* Do not execute formulas; read metadata only.
+* Do not render parameters (a follow-up can walk the parameter tree similarly).
+* Do not write an index page tree; Quarto's directory listings handle that.
+
+The generator emits standard Quarto Markdown (``.qmd``). Quarto reads regular
+Markdown too, so the outputs drop into either a Quarto or MyST site.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import logging
+import re
+import textwrap
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+
+COUNTRY_MODULES = {
+    "us": "policyengine_us",
+    "uk": "policyengine_uk",
+    "canada": "policyengine_canada",
+    "il": "policyengine_il",
+    "ng": "policyengine_ng",
+}
+
+
+@dataclass(frozen=True)
+class VariableRecord:
+    name: str
+    label: str | None
+    documentation: str | None
+    entity: str | None
+    unit: str | None
+    value_type: str | None
+    definition_period: str | None
+    references: tuple[str, ...]
+    defined_for: str | None
+    source_file: Path | None
+    source_line: int | None
+    adds: tuple[str, ...]
+    subtracts: tuple[str, ...]
+    tree_path: tuple[str, ...]
+
+
+def _tree_path_from_source(source_file: Path | None, package_root: Path) -> tuple[str, ...]:
+    if source_file is None:
+        return ("_ungrouped",)
+    try:
+        rel = source_file.relative_to(package_root / "variables")
+    except ValueError:
+        return ("_ungrouped",)
+    parts = rel.with_suffix("").parts
+    return parts[:-1] if parts else ("_ungrouped",)
+
+
+def _normalize_references(raw) -> tuple[str, ...]:
+    if raw is None:
+        return ()
+    if isinstance(raw, str):
+        return (raw,)
+    if isinstance(raw, (list, tuple)):
+        return tuple(str(r) for r in raw if r)
+    return (str(raw),)
+
+
+def _variable_records(country: str) -> Iterable[VariableRecord]:
+    module_name = COUNTRY_MODULES[country]
+    country_module = importlib.import_module(module_name)
+
+    system_module = importlib.import_module(f"{module_name}.system")
+    tbs = system_module.CountryTaxBenefitSystem()
+
+    package_root = Path(country_module.__file__).parent
+
+    import inspect
+
+    for name in sorted(tbs.variables):
+        variable = tbs.variables[name]
+        try:
+            source_file = Path(inspect.getsourcefile(type(variable)))
+            source_line = inspect.getsourcelines(type(variable))[1]
+        except (TypeError, OSError):
+            source_file = None
+            source_line = None
+
+        entity_key = getattr(variable.entity, "key", None) if variable.entity else None
+        value_type = getattr(variable, "value_type", None)
+        value_type_name = (
+            value_type.__name__
+            if isinstance(value_type, type)
+            else str(value_type) if value_type is not None else None
+        )
+        defined_for = getattr(variable, "defined_for", None)
+        defined_for_name = (
+            defined_for.name if hasattr(defined_for, "name") else defined_for
+        )
+
+        yield VariableRecord(
+            name=name,
+            label=variable.label,
+            documentation=variable.documentation,
+            entity=entity_key,
+            unit=getattr(variable, "unit", None),
+            value_type=value_type_name,
+            definition_period=getattr(variable, "definition_period", None),
+            references=_normalize_references(getattr(variable, "reference", None)),
+            defined_for=defined_for_name,
+            source_file=source_file,
+            source_line=source_line,
+            adds=tuple(getattr(variable, "adds", ()) or ()),
+            subtracts=tuple(getattr(variable, "subtracts", ()) or ()),
+            tree_path=_tree_path_from_source(source_file, package_root),
+        )
+
+
+def _escape_yaml_scalar(value: str) -> str:
+    return value.replace('"', '\\"')
+
+
+def _render_variable_page(record: VariableRecord, country: str) -> str:
+    title = record.label or record.name
+    lines: list[str] = [
+        "---",
+        f'title: "{_escape_yaml_scalar(title)}"',
+        f'subtitle: "`{record.name}`"',
+    ]
+    if record.documentation:
+        summary = record.documentation.strip().splitlines()[0][:220]
+        lines.append(f'description: "{_escape_yaml_scalar(summary)}"')
+    lines.extend(
+        [
+            "format:",
+            "  html:",
+            "    code-copy: true",
+            "---",
+            "",
+        ]
+    )
+
+    metadata = [
+        ("Name", f"`{record.name}`"),
+        ("Entity", f"`{record.entity}`" if record.entity else "—"),
+        ("Value type", f"`{record.value_type}`" if record.value_type else "—"),
+        ("Unit", f"`{record.unit}`" if record.unit else "—"),
+        ("Period", f"`{record.definition_period}`" if record.definition_period else "—"),
+        (
+            "Defined for",
+            f"`{record.defined_for}`" if record.defined_for else "—",
+        ),
+    ]
+    lines.append("| Field | Value |")
+    lines.append("|---|---|")
+    for key, value in metadata:
+        lines.append(f"| {key} | {value} |")
+    lines.append("")
+
+    if record.documentation:
+        lines.append("## Documentation")
+        lines.append("")
+        lines.append(record.documentation.strip())
+        lines.append("")
+
+    if record.adds:
+        lines.append("## Components")
+        lines.append("")
+        lines.append("This variable sums the following variables:")
+        lines.append("")
+        for component in record.adds:
+            lines.append(f"- `{component}`")
+        lines.append("")
+
+    if record.subtracts:
+        lines.append("## Subtractions")
+        lines.append("")
+        lines.append("This variable subtracts the following variables:")
+        lines.append("")
+        for component in record.subtracts:
+            lines.append(f"- `{component}`")
+        lines.append("")
+
+    if record.references:
+        lines.append("## References")
+        lines.append("")
+        for ref in record.references:
+            lines.append(f"- <{ref}>")
+        lines.append("")
+
+    if record.source_file:
+        try:
+            repo_rel = record.source_file.relative_to(
+                record.source_file.parents[5]
+            )
+        except (ValueError, IndexError):
+            repo_rel = record.source_file.name
+        lines.append("## Source")
+        lines.append("")
+        if record.source_line:
+            lines.append(f"`{repo_rel}`, line {record.source_line}")
+        else:
+            lines.append(f"`{repo_rel}`")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def _slug(value: str) -> str:
+    return re.sub(r"[^A-Za-z0-9_-]+", "-", value).strip("-")
+
+
+def _write_variables(
+    records: list[VariableRecord],
+    out_root: Path,
+    country: str,
+) -> int:
+    written = 0
+    for record in records:
+        tree_dir = out_root.joinpath(*record.tree_path)
+        tree_dir.mkdir(parents=True, exist_ok=True)
+        page_path = tree_dir / f"{_slug(record.name)}.qmd"
+        page_path.write_text(_render_variable_page(record, country))
+        written += 1
+    return written
+
+
+def _write_tree_indices(out_root: Path) -> int:
+    written = 0
+    for directory in [out_root, *(p for p in out_root.rglob("*") if p.is_dir())]:
+        index_path = directory / "index.qmd"
+        if index_path.exists():
+            continue
+        title = directory.name if directory != out_root else "Reference"
+        index_path.write_text(
+            textwrap.dedent(
+                f"""\
+                ---
+                title: "{title}"
+                listing:
+                  contents: "*.qmd"
+                  type: table
+                  sort: "title"
+                  fields: [title, subtitle, description]
+                ---
+                """
+            )
+        )
+        written += 1
+    return written
+
+
+def _write_programs_index(country: str, out_root: Path) -> int:
+    module_name = COUNTRY_MODULES[country]
+    country_module = importlib.import_module(module_name)
+    package_root = Path(country_module.__file__).parent
+    programs_path = package_root / "programs.yaml"
+    if not programs_path.exists():
+        return 0
+    with programs_path.open() as f:
+        registry = yaml.safe_load(f)
+    programs = registry.get("programs", [])
+    lines: list[str] = [
+        "---",
+        'title: "Program coverage"',
+        'description: "Programs modeled in the country model, generated from programs.yaml."',
+        "---",
+        "",
+        "| ID | Name | Category | Agency | Status | Coverage |",
+        "|---|---|---|---|---|---|",
+    ]
+    for program in programs:
+        lines.append(
+            "| "
+            + " | ".join(
+                str(program.get(field, "")).replace("\n", " ")
+                for field in ("id", "name", "category", "agency", "status", "coverage")
+            )
+            + " |"
+        )
+    target = out_root / "programs.qmd"
+    target.write_text("\n".join(lines) + "\n")
+    return 1
+
+
+def build_reference(
+    country: str,
+    out_root: Path,
+    filter_substring: str | None = None,
+) -> dict[str, int]:
+    out_root.mkdir(parents=True, exist_ok=True)
+    records = list(_variable_records(country))
+    if filter_substring:
+        needle = filter_substring.lower()
+        records = [
+            r
+            for r in records
+            if needle in r.name.lower()
+            or needle in " ".join(str(p).lower() for p in r.tree_path)
+        ]
+    variables_written = _write_variables(records, out_root, country)
+    programs_written = _write_programs_index(country, out_root)
+    indices_written = _write_tree_indices(out_root)
+    return {
+        "variables": variables_written,
+        "programs": programs_written,
+        "indices": indices_written,
+    }
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--country",
+        choices=sorted(COUNTRY_MODULES),
+        default="us",
+        help="Country model to introspect.",
+    )
+    parser.add_argument(
+        "--out",
+        type=Path,
+        required=True,
+        help="Output directory for generated .qmd pages.",
+    )
+    parser.add_argument(
+        "--filter",
+        default=None,
+        help="Substring filter on variable name or tree path (case-insensitive).",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
+    args = _parse_args()
+    stats = build_reference(args.country, args.out, args.filter)
+    logger.info(
+        "Wrote %d variable pages, %d programs page, %d directory indices to %s",
+        stats["variables"],
+        stats["programs"],
+        stats["indices"],
+        args.out,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
new file mode 100644
index 00000000..9393b70d
--- /dev/null
+++ b/docs/_quarto.yml
@@ -0,0 +1,63 @@
+project:
+  type: website
+  output-dir: _site
+  preview:
+    port: 8080
+
+website:
+  title: "PolicyEngine"
+  description: "Tax-benefit microsimulation for Python."
+  repo-url: https://github.com/PolicyEngine/policyengine.py
+  repo-actions: [edit, issue]
+  page-navigation: true
+  navbar:
+    left:
+      - href: index.md
+        text: Overview
+      - getting-started.md
+      - text: "Guide"
+        menu:
+          - households.md
+          - reforms.md
+          - microsim.md
+          - impact-analysis.md
+          - outputs.md
+          - regions.md
+      - examples.md
+      - dev.md
+  sidebar:
+    style: "floating"
+    collapse-level: 2
+    contents:
+      - index.md
+      - getting-started.md
+      - section: "Guide"
+        contents:
+          - households.md
+          - reforms.md
+          - microsim.md
+          - impact-analysis.md
+          - outputs.md
+          - regions.md
+          - visualisation.md
+      - section: "Platform"
+        contents:
+          - countries.md
+          - release-bundles.md
+      - section: "Usage"
+        contents:
+          - examples.md
+      - section: "Development"
+        contents:
+          - dev.md
+
+format:
+  html:
+    theme: [cosmo]
+    toc: true
+    toc-depth: 3
+    code-copy: true
+    code-overflow: wrap
+    highlight-style: github
+    anchor-sections: true
+    link-external-newwindow: true
diff --git a/docs/advanced-outputs.md b/docs/advanced-outputs.md
deleted file mode 100644
index 5fdbaead..00000000
--- a/docs/advanced-outputs.md
+++ /dev/null
@@ -1,276 +0,0 @@
-# Advanced outputs
-
-Beyond `Aggregate` and `ChangeAggregate` (covered in [Core concepts](core-concepts.md)), the package provides specialised output types for distributional analysis, poverty measurement, and inequality metrics.
-
-All output types follow the same pattern: create an instance, call `.run()`, read the result fields. Convenience functions are provided for common use cases.
-
-## OutputCollection
-
-Many convenience functions return an `OutputCollection[T]`, a container holding both the individual output objects and a pandas DataFrame:
-
-```python
-from policyengine.core import OutputCollection
-
-# Returned by calculate_decile_impacts(), calculate_us_poverty_rates(), etc.
-collection = calculate_us_poverty_rates(simulation)
-
-# Access individual objects
-for poverty in collection.outputs:
-    print(f"{poverty.poverty_type}: {poverty.rate:.4f}")
-
-# Access as DataFrame
-print(collection.dataframe)
-```
-
-## DecileImpact
-
-Calculates the impact of a policy reform on a single income decile: baseline and reform mean income, absolute and relative change, and counts of people better off, worse off, and unchanged.
-
-### Using the convenience function
-
-```python
-from policyengine.outputs.decile_impact import calculate_decile_impacts
-
-decile_impacts = calculate_decile_impacts(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    baseline_policy=None,           # Current law
-    reform_policy=reform,
-    income_variable="household_net_income",  # Default for US
-)
-
-for d in decile_impacts.outputs:
-    print(f"Decile {d.decile}: "
-          f"baseline={d.baseline_mean:,.0f}, "
-          f"reform={d.reform_mean:,.0f}, "
-          f"change={d.absolute_change:+,.0f} "
-          f"({d.relative_change:+.2f}%)")
-```
-
-### Using directly
-
-```python
-from policyengine.outputs.decile_impact import DecileImpact
-
-impact = DecileImpact(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    income_variable="household_net_income",
-    decile=5,  # 5th decile
-)
-impact.run()
-
-print(f"Count better off: {impact.count_better_off:,.0f}")
-print(f"Count worse off: {impact.count_worse_off:,.0f}")
-```
-
-### Parameters
-
-| Parameter | Default | Description |
-|---|---|---|
-| `income_variable` | `equiv_hbai_household_net_income` | Income variable to group by and measure changes |
-| `decile_variable` | `None` | Use a pre-computed grouping variable instead of `qcut` |
-| `entity` | Auto-detected | Entity level for the income variable |
-| `quantiles` | `10` | Number of quantile groups (10 = deciles, 5 = quintiles) |
-
-For US simulations, use `income_variable="household_net_income"`. The UK default (`equiv_hbai_household_net_income`) is the equivalised HBAI measure.
-
-## IntraDecileImpact
-
-Classifies people within each decile into five income change categories:
-
-| Category | Threshold |
-|---|---|
-| Lose more than 5% | change <= -5% |
-| Lose less than 5% | -5% < change <= -0.1% |
-| No change | -0.1% < change <= 0.1% |
-| Gain less than 5% | 0.1% < change <= 5% |
-| Gain more than 5% | change > 5% |
-
-Proportions are people-weighted (using `household_count_people * household_weight`).
-
-### Using the convenience function
-
-```python
-from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts
-
-intra = compute_intra_decile_impacts(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    income_variable="household_net_income",
-)
-
-for row in intra.outputs:
-    if row.decile == 0:
-        label = "Overall"
-    else:
-        label = f"Decile {row.decile}"
-    print(f"{label}: "
-          f"lose>5%={row.lose_more_than_5pct:.2%}, "
-          f"lose<5%={row.lose_less_than_5pct:.2%}, "
-          f"no change={row.no_change:.2%}, "
-          f"gain<5%={row.gain_less_than_5pct:.2%}, "
-          f"gain>5%={row.gain_more_than_5pct:.2%}")
-```
-
-The function returns deciles 1-10 plus an overall average at `decile=0`.
-
-## Poverty
-
-Calculates poverty headcount and rates for a single simulation, with optional demographic filtering.
-
-### Poverty types
-
-**UK** (4 measures):
-- Absolute before housing costs (BHC)
-- Absolute after housing costs (AHC)
-- Relative before housing costs (BHC)
-- Relative after housing costs (AHC)
-
-**US** (2 measures):
-- SPM poverty
-- Deep SPM poverty (below 50% of SPM threshold)
-
-### Calculating all poverty rates
-
-```python
-from policyengine.outputs.poverty import (
-    calculate_uk_poverty_rates,
-    calculate_us_poverty_rates,
-)
-
-# US
-us_poverty = calculate_us_poverty_rates(simulation)
-for p in us_poverty.outputs:
-    print(f"{p.poverty_type}: headcount={p.headcount:,.0f}, rate={p.rate:.4f}")
-
-# UK
-uk_poverty = calculate_uk_poverty_rates(simulation)
-for p in uk_poverty.outputs:
-    print(f"{p.poverty_type}: headcount={p.headcount:,.0f}, rate={p.rate:.4f}")
-```
-
-### Poverty by demographic group
-
-```python
-from policyengine.outputs.poverty import (
-    calculate_us_poverty_by_age,
-    calculate_us_poverty_by_gender,
-    calculate_us_poverty_by_race,
-    calculate_uk_poverty_by_age,
-    calculate_uk_poverty_by_gender,
-)
-
-# By age group (child <18, adult 18-64, senior 65+)
-by_age = calculate_us_poverty_by_age(simulation)
-for p in by_age.outputs:
-    print(f"{p.filter_group} {p.poverty_type}: {p.rate:.4f}")
-
-# By gender
-by_gender = calculate_us_poverty_by_gender(simulation)
-
-# By race (US only: WHITE, BLACK, HISPANIC, OTHER)
-by_race = calculate_us_poverty_by_race(simulation)
-```
-
-### Custom filters
-
-```python
-from policyengine.outputs.poverty import Poverty
-
-# Child poverty only
-child_poverty = Poverty(
-    simulation=simulation,
-    poverty_variable="spm_unit_is_in_spm_poverty",
-    entity="person",
-    filter_variable="age",
-    filter_variable_leq=17,
-)
-child_poverty.run()
-print(f"Child SPM poverty rate: {child_poverty.rate:.4f}")
-```
-
-### Result fields
-
-| Field | Description |
-|---|---|
-| `headcount` | Weighted count of people in poverty |
-| `total_population` | Weighted total population (after filters) |
-| `rate` | `headcount / total_population` |
-| `filter_group` | Group label set by demographic convenience functions |
-
-## Inequality
-
-Calculates weighted inequality metrics for a single simulation: Gini coefficient and income share measures.
-
-### Using convenience functions
-
-```python
-from policyengine.outputs.inequality import (
-    calculate_uk_inequality,
-    calculate_us_inequality,
-)
-
-# US (uses household_net_income by default)
-ineq = calculate_us_inequality(simulation)
-print(f"Gini: {ineq.gini:.4f}")
-print(f"Top 10% share: {ineq.top_10_share:.4f}")
-print(f"Top 1% share: {ineq.top_1_share:.4f}")
-print(f"Bottom 50% share: {ineq.bottom_50_share:.4f}")
-
-# UK (uses equiv_hbai_household_net_income by default)
-ineq = calculate_uk_inequality(simulation)
-```
-
-### With demographic filters
-
-```python
-# Inequality among working-age adults only
-ineq = calculate_us_inequality(
-    simulation,
-    filter_variable="age",
-    filter_variable_geq=18,
-    filter_variable_leq=64,
-)
-```
-
-### Using directly
-
-```python
-from policyengine.outputs.inequality import Inequality
-
-ineq = Inequality(
-    simulation=simulation,
-    income_variable="household_net_income",
-    entity="household",
-)
-ineq.run()
-```
-
-### Result fields
-
-| Field | Description |
-|---|---|
-| `gini` | Weighted Gini coefficient (0 = perfect equality, 1 = perfect inequality) |
-| `top_10_share` | Share of total income held by top 10% |
-| `top_1_share` | Share of total income held by top 1% |
-| `bottom_50_share` | Share of total income held by bottom 50% |
-
-## Comparing baseline and reform
-
-Poverty and inequality are single-simulation outputs. To compare baseline and reform, compute both and take the difference:
-
-```python
-baseline_poverty = calculate_us_poverty_rates(baseline_sim)
-reform_poverty = calculate_us_poverty_rates(reform_sim)
-
-for bp, rp in zip(baseline_poverty.outputs, reform_poverty.outputs):
-    change = rp.rate - bp.rate
-    print(f"{bp.poverty_type}: {bp.rate:.4f} -> {rp.rate:.4f} ({change:+.4f})")
-
-baseline_ineq = calculate_us_inequality(baseline_sim)
-reform_ineq = calculate_us_inequality(reform_sim)
-print(f"Gini change: {reform_ineq.gini - baseline_ineq.gini:+.4f}")
-```
-
-The `economic_impact_analysis()` function does this automatically and returns both baseline and reform poverty/inequality in the `PolicyReformAnalysis` result. See [Economic impact analysis](economic-impact-analysis.md).
diff --git a/docs/core-concepts.md b/docs/core-concepts.md
deleted file mode 100644
index 425c5f62..00000000
--- a/docs/core-concepts.md
+++ /dev/null
@@ -1,662 +0,0 @@
-# Core concepts
-
-PolicyEngine.py is a Python package for tax-benefit microsimulation analysis. It provides a unified interface for running policy simulations, analysing distributional impacts, and visualising results across different countries.
-
-## Architecture overview
-
-The package is organised around several core concepts:
-
-- **Tax-benefit models**: Country-specific implementations (UK, US) that define tax and benefit rules
-- **Datasets**: Microdata representing populations at entity level (person, household, etc.)
-- **Simulations**: Execution environments that apply tax-benefit models to datasets
-- **Outputs**: Analysis tools for extracting insights from simulation results
-- **Policies**: Parametric reforms that modify tax-benefit system parameters
-
-## Tax-benefit models
-
-Tax-benefit models define the rules and calculations for a country's tax and benefit system. Each model version contains:
-
-- **Variables**: Calculated values (e.g., income tax, universal credit)
-- **Parameters**: System settings (e.g., personal allowance, benefit rates)
-- **Parameter values**: Time-bound values for parameters
-
-### Using a tax-benefit model
-
-```python
-from policyengine.tax_benefit_models.uk import uk_latest
-from policyengine.tax_benefit_models.us import us_latest
-
-# UK model includes variables like:
-# - income_tax, national_insurance, universal_credit
-# - Parameters like personal allowance, NI thresholds
-
-# US model includes variables like:
-# - income_tax, payroll_tax, eitc, ctc, snap
-# - Parameters like standard deduction, EITC rates
-```
-
-## Datasets
-
-Datasets contain microdata representing a population. Each dataset has:
-
-- **Entity-level data**: Separate dataframes for person, household, and other entities
-- **Weights**: Survey weights for population representation
-- **Join keys**: Relationships between entities (e.g., which household each person belongs to)
-
-### Dataset structure
-
-```python
-from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset
-
-dataset = PolicyEngineUKDataset(
-    name="FRS 2023-24",
-    description="Family Resources Survey microdata",
-    filepath="./data/frs_2023_24_year_2026.h5",
-    year=2026,
-)
-
-# Access entity-level data
-person_data = dataset.data.person      # MicroDataFrame
-household_data = dataset.data.household
-benunit_data = dataset.data.benunit    # Benefit unit (UK only)
-```
-
-### Creating custom datasets
-
-You can create custom datasets for scenario analysis:
-
-```python
-import pandas as pd
-from microdf import MicroDataFrame
-from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset, UKYearData
-
-# Create person data
-person_df = MicroDataFrame(
-    pd.DataFrame({
-        "person_id": [0, 1, 2],
-        "person_household_id": [0, 0, 1],
-        "person_benunit_id": [0, 0, 1],
-        "age": [35, 8, 40],
-        "employment_income": [30000, 0, 50000],
-        "person_weight": [1.0, 1.0, 1.0],
-    }),
-    weights="person_weight"
-)
-
-# Create household data
-household_df = MicroDataFrame(
-    pd.DataFrame({
-        "household_id": [0, 1],
-        "region": ["LONDON", "SOUTH_EAST"],
-        "rent": [15000, 12000],
-        "household_weight": [1.0, 1.0],
-    }),
-    weights="household_weight"
-)
-
-# Create benunit data
-benunit_df = MicroDataFrame(
-    pd.DataFrame({
-        "benunit_id": [0, 1],
-        "would_claim_uc": [True, True],
-        "benunit_weight": [1.0, 1.0],
-    }),
-    weights="benunit_weight"
-)
-
-dataset = PolicyEngineUKDataset(
-    name="Custom scenario",
-    description="Single parent vs single adult",
-    filepath="./custom.h5",
-    year=2026,
-    data=UKYearData(
-        person=person_df,
-        household=household_df,
-        benunit=benunit_df,
-    )
-)
-```
-
-## Data loading
-
-Before running simulations, you need representative microdata. The package provides three functions for managing datasets:
-
-- **`ensure_datasets()`**: Load from disk if available, otherwise download and compute (recommended)
-- **`create_datasets()`**: Always download from HuggingFace and compute from scratch
-- **`load_datasets()`**: Load previously saved HDF5 files from disk
-
-```python
-from policyengine.tax_benefit_models.us import ensure_datasets
-
-# First run: downloads from HuggingFace, computes variables, saves to ./data/
-# Subsequent runs: loads from disk instantly
-datasets = ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-    years=[2026],
-    data_folder="./data",
-)
-dataset = datasets["enhanced_cps_2024_2026"]
-```
-
-```python
-from policyengine.tax_benefit_models.uk import ensure_datasets
-
-datasets = ensure_datasets(
-    datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"],
-    years=[2026],
-    data_folder="./data",
-)
-dataset = datasets["enhanced_frs_2023_24_2026"]
-```
-
-All datasets are stored as HDF5 files on disk. No database server is required.
-
-## Simulations
-
-Simulations apply tax-benefit models to datasets, calculating all variables for the specified year.
-
-### Running a simulation
-
-```python
-from policyengine.core import Simulation
-from policyengine.tax_benefit_models.uk import uk_latest
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-)
-simulation.run()
-
-# Access output data
-output_person = simulation.output_dataset.data.person
-output_household = simulation.output_dataset.data.household
-
-# Check calculated variables
-print(output_household[["household_id", "household_net_income", "household_tax"]])
-```
-
-### Simulation lifecycle: `run()` vs `ensure()`
-
-The `Simulation` class provides two methods for computing results:
-
-| Method | Behaviour |
-|---|---|
-| `simulation.run()` | Always recomputes from scratch. No caching. |
-| `simulation.ensure()` | Checks in-memory LRU cache, then tries loading from disk, then falls back to `run()` + `save()`. |
-
-```python
-# One-off computation (no caching)
-simulation.run()
-
-# Cache-or-compute (preferred for production use)
-simulation.ensure()
-```
-
-`ensure()` uses a module-level LRU cache (max 100 simulations) and saves output datasets as HDF5 files alongside the input dataset. On repeated calls, it returns cached results instantly. For baseline-vs-reform comparisons, `economic_impact_analysis()` calls `ensure()` internally, so you rarely need to call it yourself.
-
-### Accessing calculated variables
-
-After running a simulation, you can access the calculated variables from the output dataset:
-
-```python
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-)
-simulation.run()
-
-# Access specific variables
-output = simulation.output_dataset.data
-person_data = output.person[["person_id", "age", "employment_income", "income_tax"]]
-household_data = output.household[["household_id", "household_net_income"]]
-benunit_data = output.benunit[["benunit_id", "universal_credit", "child_benefit"]]
-```
-
-## Policies
-
-Policies modify tax-benefit system parameters through parametric reforms.
-
-### Creating a policy
-
-```python
-from policyengine.core import Policy, Parameter, ParameterValue
-import datetime
-
-# Define parameter to modify
-parameter = Parameter(
-    name="gov.hmrc.income_tax.allowances.personal_allowance.amount",
-    tax_benefit_model_version=uk_latest,
-    description="Personal allowance for income tax",
-    data_type=float,
-)
-
-# Set new value
-parameter_value = ParameterValue(
-    parameter=parameter,
-    start_date=datetime.date(2026, 1, 1),
-    end_date=datetime.date(2026, 12, 31),
-    value=15000,  # Increase from ~£12,570 to £15,000
-)
-
-policy = Policy(
-    name="Increased personal allowance",
-    description="Raises personal allowance to £15,000",
-    parameter_values=[parameter_value],
-)
-```
-
-### Running a reform simulation
-
-```python
-# Baseline simulation
-baseline = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-)
-baseline.run()
-
-# Reform simulation
-reform = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-    policy=policy,
-)
-reform.run()
-```
-
-### Combining policies
-
-Policies can be combined using the `+` operator:
-
-```python
-combined = policy_a + policy_b
-# Concatenates parameter_values and chains simulation_modifiers
-```
-
-### Simulation modifiers
-
-For reforms that cannot be expressed as parameter value changes, `Policy` accepts a `simulation_modifier` callable that directly manipulates the underlying `policyengine_core` simulation:
-
-```python
-def my_modifier(sim):
-    """Custom reform logic applied to the core simulation object."""
-    p = sim.tax_benefit_system.parameters
-    # Modify parameters programmatically
-    return sim
-
-policy = Policy(
-    name="Custom reform",
-    simulation_modifier=my_modifier,
-)
-```
-
-Note: the UK model supports `simulation_modifier`. The US model currently only uses the `parameter_values` path.
-
-## Dynamic behavioural responses
-
-The `Dynamic` class is structurally identical to `Policy` and represents behavioural responses to policy changes (e.g., labour supply elasticities). It is applied after the policy in the simulation pipeline.
-
-```python
-from policyengine.core.dynamic import Dynamic
-
-dynamic = Dynamic(
-    name="Labour supply response",
-    parameter_values=[...],  # Same format as Policy
-)
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-    policy=policy,
-    dynamic=dynamic,
-)
-```
-
-Dynamic responses can also be combined using the `+` operator and support `simulation_modifier` callables.
-
-## Outputs
-
-Output classes provide structured analysis of simulation results.
-
-### Aggregate
-
-Calculate aggregate statistics (sum, mean, count) for any variable:
-
-```python
-from policyengine.outputs.aggregate import Aggregate, AggregateType
-
-# Total universal credit spending
-agg = Aggregate(
-    simulation=simulation,
-    variable="universal_credit",
-    aggregate_type=AggregateType.SUM,
-    entity="benunit",  # Map to benunit level
-)
-agg.run()
-print(f"Total UC spending: £{agg.result / 1e9:.1f}bn")
-
-# Mean household income in top decile
-agg = Aggregate(
-    simulation=simulation,
-    variable="household_net_income",
-    aggregate_type=AggregateType.MEAN,
-    filter_variable="household_net_income",
-    quantile=10,
-    quantile_eq=10,  # 10th decile
-)
-agg.run()
-print(f"Mean income in top decile: £{agg.result:,.0f}")
-```
-
-### ChangeAggregate
-
-Analyse impacts of policy reforms:
-
-```python
-from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType
-
-# Count winners and losers
-winners = ChangeAggregate(
-    baseline_simulation=baseline,
-    reform_simulation=reform,
-    variable="household_net_income",
-    aggregate_type=ChangeAggregateType.COUNT,
-    change_geq=1,  # Gain at least £1
-)
-winners.run()
-print(f"Winners: {winners.result / 1e6:.1f}m households")
-
-losers = ChangeAggregate(
-    baseline_simulation=baseline,
-    reform_simulation=reform,
-    variable="household_net_income",
-    aggregate_type=ChangeAggregateType.COUNT,
-    change_leq=-1,  # Lose at least £1
-)
-losers.run()
-print(f"Losers: {losers.result / 1e6:.1f}m households")
-
-# Revenue impact
-revenue = ChangeAggregate(
-    baseline_simulation=baseline,
-    reform_simulation=reform,
-    variable="household_tax",
-    aggregate_type=ChangeAggregateType.SUM,
-)
-revenue.run()
-print(f"Revenue change: £{revenue.result / 1e9:.1f}bn")
-```
-
-## Entity mapping
-
-The package automatically handles entity mapping when variables are defined at different entity levels.
-
-### Entity hierarchy
-
-**UK:**
-```
-household
-    └── benunit (benefit unit)
-            └── person
-```
-
-**US:**
-```
-household
-    ├── tax_unit
-    ├── spm_unit
-    ├── family
-    └── marital_unit
-            └── person
-```
-
-### Automatic mapping
-
-When you request a person-level variable (like `ssi`) at household level, the package:
-1. Sums person-level values within each household (aggregation)
-2. Returns household-level data with proper weights
-
-```python
-# SSI is defined at person level, but we want household-level totals
-agg = Aggregate(
-    simulation=simulation,
-    variable="ssi",  # Person-level variable
-    entity="household",  # Target household level
-    aggregate_type=AggregateType.SUM,
-)
-# Internally maps person → household by summing SSI for all persons in each household
-```
-
-When you request a household-level variable at person level:
-1. Replicates household values to all persons in that household (expansion)
-
-### Direct entity mapping
-
-You can also map data between entities directly using the `map_to_entity` method:
-
-```python
-# Map person income to household level (sum)
-household_income = dataset.data.map_to_entity(
-    source_entity="person",
-    target_entity="household",
-    columns=["employment_income"],
-    how="sum"
-)
-
-# Map household rent to person level (project/broadcast)
-person_rent = dataset.data.map_to_entity(
-    source_entity="household",
-    target_entity="person",
-    columns=["rent"],
-    how="project"
-)
-```
-
-#### Mapping with custom values
-
-You can map custom value arrays instead of existing columns:
-
-```python
-# Map custom per-person values to household level
-import numpy as np
-
-# Create custom values (e.g., imputed data)
-custom_values = np.array([100, 200, 150, 300])
-
-household_totals = dataset.data.map_to_entity(
-    source_entity="person",
-    target_entity="household",
-    values=custom_values,
-    how="sum"
-)
-```
-
-#### Aggregation methods
-
-The `how` parameter controls how values are mapped:
-
-**Person → Group (aggregation):**
-- `how='sum'` (default): Sum values within each group
-- `how='first'`: Take first person's value in each group
-
-```python
-# Sum person incomes to household level
-household_income = data.map_to_entity(
-    source_entity="person",
-    target_entity="household",
-    columns=["employment_income"],
-    how="sum"
-)
-
-# Take first person's age as household reference
-household_age = data.map_to_entity(
-    source_entity="person",
-    target_entity="household",
-    columns=["age"],
-    how="first"
-)
-```
-
-**Group → Person (expansion):**
-- `how='project'` (default): Broadcast group value to all members
-- `how='divide'`: Split group value equally among members
-
-```python
-# Broadcast household rent to each person
-person_rent = data.map_to_entity(
-    source_entity="household",
-    target_entity="person",
-    columns=["rent"],
-    how="project"
-)
-
-# Split household savings equally per person
-person_savings = data.map_to_entity(
-    source_entity="household",
-    target_entity="person",
-    columns=["total_savings"],
-    how="divide"
-)
-```
-
-**Group → Group (via person entity):**
-- `how='sum'` (default): Sum through person entity
-- `how='first'`: Take first source group's value
-- `how='project'`: Broadcast first source group's value
-- `how='divide'`: Split proportionally based on person counts
-
-```python
-# UK: Sum benunit benefits to household level
-household_benefits = data.map_to_entity(
-    source_entity="benunit",
-    target_entity="household",
-    columns=["universal_credit"],
-    how="sum"
-)
-
-# US: Map tax unit income to household, splitting by members
-household_from_tax = data.map_to_entity(
-    source_entity="tax_unit",
-    target_entity="household",
-    columns=["taxable_income"],
-    how="divide"
-)
-```
-
-## Visualisation
-
-The package includes utilities for creating PolicyEngine-branded visualisations:
-
-```python
-from policyengine.utils.plotting import format_fig, COLORS
-import plotly.graph_objects as go
-
-fig = go.Figure()
-fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]))
-
-format_fig(
-    fig,
-    title="My chart",
-    xaxis_title="X axis",
-    yaxis_title="Y axis",
-    height=600,
-    width=800,
-)
-fig.show()
-```
-
-### Brand colours
-
-```python
-COLORS = {
-    "primary": "#319795",        # Teal
-    "success": "#22C55E",        # Green
-    "warning": "#FEC601",        # Yellow
-    "error": "#EF4444",          # Red
-    "info": "#1890FF",           # Blue
-    "blue_secondary": "#026AA2", # Dark blue
-    "gray": "#667085",           # Gray
-}
-```
-
-## Common workflows
-
-### 1. Analyse employment income variation
-
-See [UK employment income variation](examples.md#uk-employment-income-variation) for a complete example of:
-- Creating custom datasets with varied parameters
-- Running single simulations
-- Extracting results with filters
-- Visualising benefit phase-outs
-
-### 2. Policy reform analysis
-
-See [UK policy reform analysis](examples.md#uk-policy-reform-analysis) for:
-- Applying parametric reforms
-- Comparing baseline and reform
-- Analysing winners/losers by decile
-- Calculating revenue impacts
-
-### 3. Distributional analysis
-
-See [US income distribution](examples.md#us-income-distribution) for:
-- Loading representative microdata
-- Calculating statistics by income decile
-- Mapping variables across entity levels
-- Creating interactive visualisations
-
-## Best practices
-
-### Creating custom datasets
-
-1. **Always set would_claim variables**: Benefits won't be claimed unless explicitly enabled
-   ```python
-   "would_claim_uc": [True] * n_households
-   ```
-
-2. **Set disability variables explicitly**: Prevents random UC spikes from LCWRA element
-   ```python
-   "is_disabled_for_benefits": [False] * n_people
-   "uc_limited_capability_for_WRA": [False] * n_people
-   ```
-
-3. **Include required join keys**: Person data needs entity membership
-   ```python
-   "person_household_id": household_ids
-   "person_benunit_id": benunit_ids  # UK only
-   ```
-
-4. **Set required household fields**: Vary by country
-   ```python
-   # UK
-   "region": ["LONDON"] * n_households
-   "tenure_type": ["RENT_PRIVATELY"] * n_households
-
-   # US
-   "state_code": ["CA"] * n_households
-   ```
-
-### Performance optimisation
-
-1. **Single simulation for variations**: Create all scenarios in one dataset, run once
-2. **Custom variable selection**: Only calculate needed variables
-3. **Filter efficiently**: Use quantile filters for decile analysis
-4. **Parallel analysis**: Multiple Aggregate calls can run independently
-
-### Data integrity
-
-1. **Check weights**: Ensure weights sum to expected population
-2. **Validate join keys**: All persons should link to valid households
-3. **Review output ranges**: Check calculated values are reasonable
-4. **Test edge cases**: Zero income, high income, disabled, elderly
-
-## Next steps
-
-- [Economic impact analysis](economic-impact-analysis.md): Full baseline-vs-reform comparison workflow
-- [Advanced outputs](advanced-outputs.md): DecileImpact, Poverty, Inequality, IntraDecileImpact
-- [Regions and scoping](regions-and-scoping.md): Sub-national analysis (states, constituencies, districts)
-- Country-specific documentation:
-  - [UK tax-benefit model](country-models-uk.md)
-  - [US tax-benefit model](country-models-us.md)
-- [Visualisation](visualisation.md): Publication-ready charts
-- [Examples](examples.md): Complete working scripts
diff --git a/docs/countries.md b/docs/countries.md
new file mode 100644
index 00000000..c9e5e37f
--- /dev/null
+++ b/docs/countries.md
@@ -0,0 +1,87 @@
+---
+title: "Country models"
+---
+
+The `policyengine` package is country-agnostic; country-specific rules live in separate packages (`policyengine-us`, `policyengine-uk`, …). This page documents the differences that matter to users.
+
+## Entities
+
+| US | UK |
+|---|---|
+| `person` | `person` |
+| `family` | — |
+| `marital_unit` | — |
+| `tax_unit` | `benunit` |
+| `spm_unit` | — |
+| `household` | `household` |
+
+The UK `benunit` roughly corresponds to the US `tax_unit` for means-testing — a single adult or married couple plus dependent children.
+
+## Default income variable
+
+Net-income calculations use country-specific defaults:
+
+| | Variable |
+|---|---|
+| US | `spm_unit_net_income` |
+| UK | `hbai_household_net_income` |
+
+Override in any output with `income_variable=`.
+
+## Default dataset
+
+| | Dataset |
+|---|---|
+| US | Enhanced CPS 2024 (`enhanced_cps_2024.h5`) |
+| UK | Enhanced FRS 2024 (`enhanced_frs_2024.h5`) |
+
+## State / regional breakdown
+
+US: `state_code`, `congressional_district` on every record.
+
+UK: constituency code, local authority code on every record where available.
+
+## Poverty
+
+US: SPM (Supplemental Poverty Measure), deep SPM (below half the threshold), plus official thresholds.
+
+UK: AHC (After Housing Costs) and BHC (Before Housing Costs), both relative (60 % of median) and absolute.
+
+## Key programs
+
+| US | UK |
+|---|---|
+| Federal income tax (incl. EITC, CTC) | Income tax (incl. personal allowance) |
+| State income taxes | — |
+| Payroll taxes | National Insurance |
+| SNAP | Universal Credit (absorbing legacy benefits) |
+| TANF | Child Benefit |
+| SSI | PIP |
+| CHIP | — (NHS is universal) |
+| ACA premium tax credits | — |
+| Medicare Part B | — |
+
+## Reform targeting
+
+Parameter paths mirror the country's rule-making structure:
+
+- US: `gov.irs.*`, `gov.states.<st>.*`, `gov.usda.*`, `gov.hhs.*`, etc.
+- UK: `gov.hmrc.*`, `gov.dwp.*`, `gov.obr.*`
+
+See [Reforms](reforms.md) for how to express changes in either tree.
+
+## Switching countries
+
+Most analysis patterns are identical — swap `pe.us` for `pe.uk`:
+
+```python
+# US
+pe.us.calculate_household(people=[{"age": 35, "employment_income": 60_000}],
+                           tax_unit={"filing_status": "SINGLE"}, year=2026)
+
+# UK
+pe.uk.calculate_household(people=[{"age": 35, "employment_income": 50_000}],
+                           year=2026)
+```
+
+Microsim is similarly parallel — `pe.us.ensure_datasets` / `pe.uk.ensure_datasets`, `pe.Simulation(country="us"|"uk", ...)`.
diff --git a/docs/country-models-uk.md b/docs/country-models-uk.md
deleted file mode 100644
index 0bc54505..00000000
--- a/docs/country-models-uk.md
+++ /dev/null
@@ -1,374 +0,0 @@
-# UK tax-benefit model
-
-The UK tax-benefit model implements the United Kingdom's tax and benefit system using PolicyEngine UK as the underlying calculation engine.
-
-## Entity structure
-
-The UK model uses three entity levels:
-
-```
-household
-    └── benunit (benefit unit)
-            └── person
-```
-
-### Person
-
-Individual people with demographic and income characteristics.
-
-**Key variables:**
-- `age`: Person's age in years
-- `employment_income`: Annual employment income
-- `self_employment_income`: Annual self-employment income
-- `pension_income`: Annual pension income
-- `savings_interest_income`: Annual interest from savings
-- `dividend_income`: Annual dividend income
-- `income_tax`: Total income tax paid
-- `national_insurance`: Total NI contributions
-- `is_disabled_for_benefits`: Whether disabled for benefit purposes
-
-### Benunit (benefit unit)
-
-The unit for benefit assessment. Usually a single person or a couple with dependent children.
-
-**Key variables:**
-- `universal_credit`: Annual UC payment
-- `child_benefit`: Annual child benefit
-- `working_tax_credit`: Annual WTC (legacy system)
-- `child_tax_credit`: Annual CTC (legacy system)
-- `pension_credit`: Annual pension credit
-- `income_support`: Annual income support
-- `housing_benefit`: Annual housing benefit
-- `council_tax_support`: Annual council tax support
-
-**Important flags:**
-- `would_claim_uc`: Must be True to claim UC
-- `would_claim_WTC`: Must be True to claim WTC
-- `would_claim_CTC`: Must be True to claim CTC
-- `would_claim_IS`: Must be True to claim IS
-- `would_claim_pc`: Must be True to claim pension credit
-- `would_claim_child_benefit`: Must be True to claim child benefit
-- `would_claim_housing_benefit`: Must be True to claim HB
-
-### Household
-
-The residence unit, typically sharing accommodation.
-
-**Key variables:**
-- `household_net_income`: Total household net income
-- `hbai_household_net_income`: HBAI-equivalised net income
-- `household_benefits`: Total benefits received
-- `household_tax`: Total tax paid
-- `household_market_income`: Total market income
-
-**Required fields:**
-- `region`: UK region (e.g., "LONDON", "SOUTH_EAST")
-- `tenure_type`: Housing tenure (e.g., "RENT_PRIVATELY", "OWNED_OUTRIGHT")
-- `rent`: Annual rent paid
-- `council_tax`: Annual council tax
-
-## Using the UK model
-
-### Loading representative data
-
-```python
-from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset
-
-dataset = PolicyEngineUKDataset(
-    name="FRS 2023-24",
-    description="Family Resources Survey microdata",
-    filepath="./data/frs_2023_24_year_2026.h5",
-    year=2026,
-)
-
-print(f"People: {len(dataset.data.person):,}")
-print(f"Benefit units: {len(dataset.data.benunit):,}")
-print(f"Households: {len(dataset.data.household):,}")
-```
-
-### Creating custom scenarios
-
-```python
-import pandas as pd
-from microdf import MicroDataFrame
-from policyengine.tax_benefit_models.uk import UKYearData
-
-# Single parent with 2 children
-person_df = MicroDataFrame(
-    pd.DataFrame({
-        "person_id": [0, 1, 2],
-        "person_benunit_id": [0, 0, 0],
-        "person_household_id": [0, 0, 0],
-        "age": [35, 8, 5],
-        "employment_income": [25000, 0, 0],
-        "person_weight": [1.0, 1.0, 1.0],
-        "is_disabled_for_benefits": [False, False, False],
-        "uc_limited_capability_for_WRA": [False, False, False],
-    }),
-    weights="person_weight"
-)
-
-benunit_df = MicroDataFrame(
-    pd.DataFrame({
-        "benunit_id": [0],
-        "benunit_weight": [1.0],
-        "would_claim_uc": [True],
-        "would_claim_child_benefit": [True],
-        "would_claim_WTC": [True],
-        "would_claim_CTC": [True],
-    }),
-    weights="benunit_weight"
-)
-
-household_df = MicroDataFrame(
-    pd.DataFrame({
-        "household_id": [0],
-        "household_weight": [1.0],
-        "region": ["LONDON"],
-        "rent": [15000],  # £1,250/month
-        "council_tax": [2000],
-        "tenure_type": ["RENT_PRIVATELY"],
-    }),
-    weights="household_weight"
-)
-
-dataset = PolicyEngineUKDataset(
-    name="Single parent scenario",
-    description="One adult, two children",
-    filepath="./single_parent.h5",
-    year=2026,
-    data=UKYearData(
-        person=person_df,
-        benunit=benunit_df,
-        household=household_df,
-    )
-)
-```
-
-### Running a simulation
-
-```python
-from policyengine.core import Simulation
-from policyengine.tax_benefit_models.uk import uk_latest
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-)
-simulation.run()
-
-# Check results
-output = simulation.output_dataset.data
-print(output.household[["household_net_income", "household_benefits", "household_tax"]])
-```
-
-## Key parameters
-
-### Income tax
-
-- `gov.hmrc.income_tax.allowances.personal_allowance.amount`: Personal allowance (£12,570 in 2024-25)
-- `gov.hmrc.income_tax.rates.uk[0].rate`: Basic rate (20%)
-- `gov.hmrc.income_tax.rates.uk[1].rate`: Higher rate (40%)
-- `gov.hmrc.income_tax.rates.uk[2].rate`: Additional rate (45%)
-- `gov.hmrc.income_tax.rates.uk[0].threshold`: Basic rate threshold (£50,270)
-- `gov.hmrc.income_tax.rates.uk[1].threshold`: Higher rate threshold (£125,140)
-
-### National insurance
-
-- `gov.hmrc.national_insurance.class_1.main.primary_threshold`: Primary threshold (£12,570)
-- `gov.hmrc.national_insurance.class_1.main.upper_earnings_limit`: Upper earnings limit (£50,270)
-- `gov.hmrc.national_insurance.class_1.main.rate`: Main rate (12% below UEL, 2% above)
-
-### Universal credit
-
-- `gov.dwp.universal_credit.elements.standard_allowance.single_adult`: Standard allowance for single adult (£334.91/month in 2024-25)
-- `gov.dwp.universal_credit.elements.child.first_child`: First child element (£333.33/month)
-- `gov.dwp.universal_credit.elements.child.subsequent_child`: Subsequent children (£287.92/month each)
-- `gov.dwp.universal_credit.means_test.reduction_rate`: Taper rate (55%)
-- `gov.dwp.universal_credit.means_test.earned_income.disregard`: Work allowance
-
-### Child benefit
-
-- `gov.hmrc.child_benefit.rates.eldest_child`: First child rate (£25.60/week)
-- `gov.hmrc.child_benefit.rates.additional_child`: Additional children (£16.95/week each)
-- `gov.hmrc.child_benefit.income_tax_charge.threshold`: HICBC threshold (£60,000)
-
-## Common policy reforms
-
-### Increasing personal allowance
-
-```python
-from policyengine.core import Policy, Parameter, ParameterValue
-import datetime
-
-parameter = Parameter(
-    name="gov.hmrc.income_tax.allowances.personal_allowance.amount",
-    tax_benefit_model_version=uk_latest,
-    description="Personal allowance",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Increase personal allowance to £15,000",
-    description="Raises personal allowance from £12,570 to £15,000",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2026, 1, 1),
-            end_date=datetime.date(2026, 12, 31),
-            value=15000,
-        )
-    ],
-)
-```
-
-### Adjusting UC taper rate
-
-```python
-parameter = Parameter(
-    name="gov.dwp.universal_credit.means_test.reduction_rate",
-    tax_benefit_model_version=uk_latest,
-    description="UC taper rate",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Reduce UC taper to 50%",
-    description="Lowers taper rate from 55% to 50%",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2026, 1, 1),
-            end_date=datetime.date(2026, 12, 31),
-            value=0.50,  # 50%
-        )
-    ],
-)
-```
-
-### Abolishing two-child limit
-
-```python
-# Set subsequent child element equal to first child
-parameter = Parameter(
-    name="gov.dwp.universal_credit.elements.child.subsequent_child",
-    tax_benefit_model_version=uk_latest,
-    description="UC subsequent child element",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Abolish two-child limit",
-    description="Sets subsequent child element equal to first child",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2026, 1, 1),
-            end_date=datetime.date(2026, 12, 31),
-            value=333.33,  # Match first child rate
-        )
-    ],
-)
-```
-
-## Regional variations
-
-The UK model accounts for regional differences:
-
-- **Council tax**: Varies by local authority
-- **Rent levels**: Regional housing markets
-- **Scottish income tax**: Different rates and thresholds for Scottish taxpayers
-
-### Regions
-
-Valid region values:
-- `LONDON`
-- `SOUTH_EAST`
-- `SOUTH_WEST`
-- `EAST_OF_ENGLAND`
-- `WEST_MIDLANDS`
-- `EAST_MIDLANDS`
-- `YORKSHIRE`
-- `NORTH_WEST`
-- `NORTH_EAST`
-- `WALES`
-- `SCOTLAND`
-- `NORTHERN_IRELAND`
-
-## Entity mapping
-
-The UK model has a simpler entity structure than the US, with three levels: person → benunit → household.
-
-### Direct entity mapping
-
-You can map data between entities using the `map_to_entity` method:
-
-```python
-# Map person income to benunit level
-benunit_income = dataset.data.map_to_entity(
-    source_entity="person",
-    target_entity="benunit",
-    columns=["employment_income"],
-    how="sum"
-)
-
-# Split household rent equally among persons
-person_rent_share = dataset.data.map_to_entity(
-    source_entity="household",
-    target_entity="person",
-    columns=["rent"],
-    how="divide"
-)
-
-# Map benunit UC to household level
-household_uc = dataset.data.map_to_entity(
-    source_entity="benunit",
-    target_entity="household",
-    columns=["universal_credit"],
-    how="sum"
-)
-```
-
-See the [Entity mapping section](core-concepts.md#entity-mapping) in Core Concepts for full documentation on aggregation methods.
-
-## Data sources
-
-The UK model can use several data sources:
-
-1. **Family Resources Survey (FRS)**: Official UK household survey
-   - ~19,000 households
-   - Detailed income and benefit receipt
-   - Published annually
-
-2. **Enhanced FRS**: Uprated and enhanced version
-   - Calibrated to population totals
-   - Additional imputed variables
-   - Multiple projection years
-
-3. **Custom datasets**: User-created scenarios
-   - Full control over household composition
-   - Exact income levels
-   - Specific benefit claiming patterns
-
-## Validation
-
-When creating custom datasets, validate:
-
-1. **Would claim flags**: All set to True
-2. **Disability flags**: Set explicitly (not random)
-3. **Join keys**: Person data links to benunits and households
-4. **Required fields**: Region, tenure_type set correctly
-5. **Weights**: Sum to expected values
-6. **Income ranges**: Realistic values
-
-## Examples
-
-- [UK employment income variation](examples.md#uk-employment-income-variation): Vary employment income, analyse benefit phase-outs
-- [UK policy reform analysis](examples.md#uk-policy-reform-analysis): Apply reforms, analyse winners/losers
-- [UK income bands](examples.md#uk-income-bands): Calculate net income and tax by income decile
-
-## References
-
-- PolicyEngine UK documentation: https://policyengine.github.io/policyengine-uk/
-- UK tax-benefit system: https://www.gov.uk/browse/benefits
-- HBAI methodology: https://www.gov.uk/government/statistics/households-below-average-income-for-financial-years-ending-1995-to-2023
diff --git a/docs/country-models-us.md b/docs/country-models-us.md
deleted file mode 100644
index 268c888f..00000000
--- a/docs/country-models-us.md
+++ /dev/null
@@ -1,444 +0,0 @@
-# US tax-benefit model
-
-The US tax-benefit model implements the United States federal tax and benefit system using PolicyEngine US as the underlying calculation engine.
-
-## Entity structure
-
-The US model uses a more complex entity hierarchy:
-
-```
-household
-    ├── tax_unit (federal tax filing unit)
-    ├── spm_unit (Supplemental Poverty Measure unit)
-    ├── family (Census definition)
-    └── marital_unit (married couple or single person)
-            └── person
-```
-
-### Person
-
-Individual people with demographic and income characteristics.
-
-**Key variables:**
-- `age`: Person's age in years
-- `employment_income`: Annual employment income
-- `self_employment_income`: Annual self-employment income
-- `social_security`: Annual Social Security benefits
-- `ssi`: Annual Supplemental Security Income
-- `medicaid`: Annual Medicaid value
-- `medicare`: Annual Medicare value
-- `unemployment_compensation`: Annual unemployment benefits
-
-### Tax unit
-
-The federal tax filing unit (individual or married filing jointly).
-
-**Key variables:**
-- `income_tax`: Federal income tax liability
-- `employee_payroll_tax`: Employee payroll tax (FICA)
-- `eitc`: Earned Income Tax Credit
-- `ctc`: Child Tax Credit
-- `income_tax_before_credits`: Tax before credits
-
-### SPM unit
-
-The Supplemental Poverty Measure unit used for SNAP and other means-tested benefits.
-
-**Key variables:**
-- `snap`: Annual SNAP (food stamps) benefits
-- `tanf`: Annual TANF (cash assistance) benefits
-- `spm_unit_net_income`: SPM net income
-- `spm_unit_size`: Number of people in unit
-
-### Family
-
-Census definition of family (related individuals).
-
-**Key variables:**
-- `family_id`: Family identifier
-- `family_weight`: Survey weight
-
-### Marital unit
-
-Married couple or single person.
-
-**Key variables:**
-- `marital_unit_id`: Marital unit identifier
-- `marital_unit_weight`: Survey weight
-
-### Household
-
-The residence unit.
-
-**Key variables:**
-- `household_net_income`: Total household net income
-- `household_benefits`: Total benefits received
-- `household_tax`: Total tax paid
-- `household_market_income`: Total market income before taxes and transfers
-
-**Required fields:**
-- `state_code`: State (e.g., "CA", "NY", "TX")
-
-## Using the US model
-
-### Loading representative data
-
-```python
-from policyengine.tax_benefit_models.us import PolicyEngineUSDataset
-
-dataset = PolicyEngineUSDataset(
-    name="Enhanced CPS 2024",
-    description="Enhanced Current Population Survey microdata",
-    filepath="./data/enhanced_cps_2024_year_2024.h5",
-    year=2024,
-)
-
-print(f"People: {len(dataset.data.person):,}")
-print(f"Tax units: {len(dataset.data.tax_unit):,}")
-print(f"SPM units: {len(dataset.data.spm_unit):,}")
-print(f"Households: {len(dataset.data.household):,}")
-```
-
-### Creating custom scenarios
-
-```python
-import pandas as pd
-from microdf import MicroDataFrame
-from policyengine.tax_benefit_models.us import USYearData
-
-# Married couple with 2 children
-person_df = MicroDataFrame(
-    pd.DataFrame({
-        "person_id": [0, 1, 2, 3],
-        "person_household_id": [0, 0, 0, 0],
-        "person_tax_unit_id": [0, 0, 0, 0],
-        "person_spm_unit_id": [0, 0, 0, 0],
-        "person_family_id": [0, 0, 0, 0],
-        "person_marital_unit_id": [0, 0, 1, 2],
-        "age": [35, 33, 8, 5],
-        "employment_income": [60000, 40000, 0, 0],
-        "person_weight": [1.0, 1.0, 1.0, 1.0],
-    }),
-    weights="person_weight"
-)
-
-tax_unit_df = MicroDataFrame(
-    pd.DataFrame({
-        "tax_unit_id": [0],
-        "tax_unit_weight": [1.0],
-    }),
-    weights="tax_unit_weight"
-)
-
-spm_unit_df = MicroDataFrame(
-    pd.DataFrame({
-        "spm_unit_id": [0],
-        "spm_unit_weight": [1.0],
-    }),
-    weights="spm_unit_weight"
-)
-
-family_df = MicroDataFrame(
-    pd.DataFrame({
-        "family_id": [0],
-        "family_weight": [1.0],
-    }),
-    weights="family_weight"
-)
-
-marital_unit_df = MicroDataFrame(
-    pd.DataFrame({
-        "marital_unit_id": [0, 1, 2],
-        "marital_unit_weight": [1.0, 1.0, 1.0],
-    }),
-    weights="marital_unit_weight"
-)
-
-household_df = MicroDataFrame(
-    pd.DataFrame({
-        "household_id": [0],
-        "household_weight": [1.0],
-        "state_code": ["CA"],
-    }),
-    weights="household_weight"
-)
-
-dataset = PolicyEngineUSDataset(
-    name="Married couple scenario",
-    description="Two adults, two children",
-    filepath="./married_couple.h5",
-    year=2024,
-    data=USYearData(
-        person=person_df,
-        tax_unit=tax_unit_df,
-        spm_unit=spm_unit_df,
-        family=family_df,
-        marital_unit=marital_unit_df,
-        household=household_df,
-    )
-)
-```
-
-### Running a simulation
-
-```python
-from policyengine.core import Simulation
-from policyengine.tax_benefit_models.us import us_latest
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-)
-simulation.run()
-
-# Check results
-output = simulation.output_dataset.data
-print(output.household[["household_net_income", "household_benefits", "household_tax"]])
-```
-
-## Key parameters
-
-### Income tax
-
-- `gov.irs.income.standard_deduction.joint`: Standard deduction (married filing jointly)
-- `gov.irs.income.standard_deduction.single`: Standard deduction (single)
-- `gov.irs.income.bracket.rates[0]`: 10% bracket rate
-- `gov.irs.income.bracket.rates[1]`: 12% bracket rate
-- `gov.irs.income.bracket.rates[2]`: 22% bracket rate
-- `gov.irs.income.bracket.thresholds.joint[0]`: 10% bracket threshold (MFJ)
-- `gov.irs.income.bracket.thresholds.single[0]`: 10% bracket threshold (single)
-
-### Payroll tax
-
-- `gov.ssa.payroll.rate.employee`: Employee OASDI rate (6.2%)
-- `gov.medicare.payroll.rate`: Medicare rate (1.45%)
-- `gov.ssa.payroll.cap`: OASDI wage base ($168,600 in 2024)
-
-### Child Tax Credit
-
-- `gov.irs.credits.ctc.amount.base`: Base CTC amount ($2,000 per child)
-- `gov.irs.credits.ctc.refundable.amount.max`: Maximum refundable amount ($1,700)
-- `gov.irs.credits.ctc.phase_out.threshold.joint`: Phase-out threshold (MFJ)
-- `gov.irs.credits.ctc.phase_out.rate`: Phase-out rate
-
-### Earned Income Tax Credit
-
-- `gov.irs.credits.eitc.max[0]`: Maximum EITC (0 children)
-- `gov.irs.credits.eitc.max[1]`: Maximum EITC (1 child)
-- `gov.irs.credits.eitc.max[2]`: Maximum EITC (2 children)
-- `gov.irs.credits.eitc.max[3]`: Maximum EITC (3+ children)
-- `gov.irs.credits.eitc.phase_out.start[0]`: Phase-out start (0 children)
-- `gov.irs.credits.eitc.phase_out.rate[0]`: Phase-out rate (0 children)
-
-### SNAP
-
-- `gov.usda.snap.normal_allotment.max[1]`: Maximum benefit (1 person)
-- `gov.usda.snap.normal_allotment.max[2]`: Maximum benefit (2 people)
-- `gov.usda.snap.income_limit.net`: Net income limit (100% FPL)
-- `gov.usda.snap.income_deduction.earned.rate`: Earned income deduction rate (20%)
-
-## Common policy reforms
-
-### Increasing standard deduction
-
-```python
-from policyengine.core import Policy, Parameter, ParameterValue
-import datetime
-
-parameter = Parameter(
-    name="gov.irs.income.standard_deduction.single",
-    tax_benefit_model_version=us_latest,
-    description="Standard deduction (single)",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Increase standard deduction to $20,000",
-    description="Raises single standard deduction from $14,600 to $20,000",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2024, 1, 1),
-            end_date=datetime.date(2024, 12, 31),
-            value=20000,
-        )
-    ],
-)
-```
-
-### Expanding Child Tax Credit
-
-```python
-parameter = Parameter(
-    name="gov.irs.credits.ctc.amount.base",
-    tax_benefit_model_version=us_latest,
-    description="Base CTC amount",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Increase CTC to $3,000",
-    description="Expands CTC from $2,000 to $3,000 per child",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2024, 1, 1),
-            end_date=datetime.date(2024, 12, 31),
-            value=3000,
-        )
-    ],
-)
-```
-
-### Making CTC fully refundable
-
-```python
-parameter = Parameter(
-    name="gov.irs.credits.ctc.refundable.amount.max",
-    tax_benefit_model_version=us_latest,
-    description="Maximum refundable CTC",
-    data_type=float,
-)
-
-policy = Policy(
-    name="Fully refundable CTC",
-    description="Makes entire $2,000 CTC refundable",
-    parameter_values=[
-        ParameterValue(
-            parameter=parameter,
-            start_date=datetime.date(2024, 1, 1),
-            end_date=datetime.date(2024, 12, 31),
-            value=2000,  # Match base amount
-        )
-    ],
-)
-```
-
-## State variations
-
-The US model includes state-level variations for:
-
-- **State income tax**: Different rates and structures by state
-- **State EITC**: State supplements to federal EITC
-- **Medicaid**: State-specific eligibility and benefits
-- **TANF**: State-administered cash assistance
-
-### State codes
-
-Use two-letter state codes (e.g., "CA", "NY", "TX"). All 50 states plus DC are supported.
-
-## Entity mapping considerations
-
-The US model's complex entity structure requires careful attention to entity mapping:
-
-### Person → Household
-
-When mapping person-level variables (like `ssi`) to household level, values are summed across all household members:
-
-```python
-agg = Aggregate(
-    simulation=simulation,
-    variable="ssi",  # Person-level
-    entity="household",  # Aggregate to household
-    aggregate_type=AggregateType.SUM,
-)
-# Result: Total SSI for all persons in each household
-```
-
-### Tax unit → Household
-
-Tax units nest within households. A household may contain multiple tax units (e.g., adult child filing separately):
-
-```python
-agg = Aggregate(
-    simulation=simulation,
-    variable="income_tax",  # Tax unit level
-    entity="household",  # Aggregate to household
-    aggregate_type=AggregateType.SUM,
-)
-# Result: Total income tax for all tax units in each household
-```
-
-### Household → Person
-
-Household variables are replicated to all household members:
-
-```python
-# household_net_income at person level
-# Each person in household gets the same household_net_income value
-```
-
-### Direct entity mapping
-
-For complex multi-entity scenarios, you can use `map_to_entity` directly:
-
-```python
-# Map SPM unit SNAP benefits to household level
-household_snap = dataset.data.map_to_entity(
-    source_entity="spm_unit",
-    target_entity="household",
-    columns=["snap"],
-    how="sum"
-)
-
-# Split tax unit income equally among persons
-person_tax_income = dataset.data.map_to_entity(
-    source_entity="tax_unit",
-    target_entity="person",
-    columns=["taxable_income"],
-    how="divide"
-)
-
-# Map custom analysis values
-custom_analysis = dataset.data.map_to_entity(
-    source_entity="person",
-    target_entity="tax_unit",
-    values=custom_values_array,
-    how="sum"
-)
-```
-
-See the [Entity mapping section](core-concepts.md#entity-mapping) in Core Concepts for full documentation on aggregation methods.
-
-## Data sources
-
-The US model can use several data sources:
-
-1. **Current Population Survey (CPS)**: Census Bureau household survey
-   - ~60,000 households
-   - Detailed income and demographic data
-   - Published annually
-
-2. **Enhanced CPS**: Calibrated and enhanced version
-   - Uprated to population totals
-   - Imputed benefit receipt
-   - Multiple projection years
-
-3. **Custom datasets**: User-created scenarios
-   - Full control over household composition
-   - Exact income levels
-   - Specific tax filing scenarios
-
-## Validation
-
-When creating custom datasets, validate:
-
-1. **Entity relationships**: All persons link to valid tax_unit, spm_unit, household
-2. **Join key naming**: Use `person_household_id`, `person_tax_unit_id`, etc.
-3. **Weights**: Appropriate weights for each entity level
-4. **State codes**: Valid two-letter codes
-5. **Filing status**: Tax units should reflect actual filing patterns
-
-## Examples
-
-- [US income distribution](examples.md#us-income-distribution): Analyse benefit distribution by income decile
-- [US employment income variation](examples.md#us-employment-income-variation): Vary employment income, analyse phase-outs
-- [US budgetary impact](examples.md#us-budgetary-impact): Full baseline-vs-reform comparison
-- [Simulation performance](examples.md#simulation-performance): Performance benchmarking
-
-## References
-
-- PolicyEngine US documentation: https://policyengine.github.io/policyengine-us/
-- IRS tax information: https://www.irs.gov/forms-pubs
-- Benefits.gov: https://www.benefits.gov/
-- SPM methodology: https://www.census.gov/topics/income-poverty/supplemental-poverty-measure.html
diff --git a/docs/dev.md b/docs/dev.md
index 007a94e5..3a1efc4e 100644
--- a/docs/dev.md
+++ b/docs/dev.md
@@ -1,105 +1,77 @@
-# Development
-
-## Principles
-
-1. **STRONG** preference for simplicity. Let's make this package as simple as it possibly can be.
-2. Remember the goal of this package: to make it easy to create, run, save and analyse PolicyEngine simulations. When considering further features, always ask: can we instead *make it super easy* for people to do this outside the package?
-3. Be consistent about property names. `name` = human readable few words you could put as the noun in a sentence without fail. `id` = unique identifier, ideally a UUID. `description` = longer human readable text that describes the object. `created_at` and `updated_at` = timestamps for when the object was created and last updated.
-4. Constraints can be good. We should set constraints where they help us simplify the codebase and usage, but not where they unnecessarily block useful functionality.
+---
+title: "Development"
+---
 
 ## Setup
 
 ```bash
-git clone https://github.com/PolicyEngine/policyengine.py.git
+git clone https://github.com/PolicyEngine/policyengine.py
 cd policyengine.py
 uv pip install -e ".[dev]"
 ```
 
-This installs the shared analysis layer, both country model extras, and the dev
-dependencies used in CI (pytest, ruff, mypy, towncrier).
-
-## Common commands
+## Running tests
 
 ```bash
-make format           # ruff format
-make test             # pytest with coverage
-make docs             # build static MyST/Jupyter Book 2 HTML docs
-make docs-serve       # preview the docs locally
-make clean            # remove caches, build artifacts, .h5 files
+make test                     # unit tests
+pytest tests/                 # same via pytest
+pytest tests/integration      # integration tests (slower, needs h5 data)
 ```
 
-## Testing
-
-Tests require a `HUGGING_FACE_TOKEN` environment variable for downloading datasets:
+## Formatting and linting
 
 ```bash
-export HUGGING_FACE_TOKEN=hf_...
-make test
+make format                   # ruff format
+ruff check .                  # ruff lint
 ```
 
-To run a specific test:
+## Building docs
 
 ```bash
-pytest tests/test_models.py -v
-pytest tests/test_parametric_reforms.py -k "test_uk" -v
+make docs                     # quarto render docs -> docs/_site/
+make docs-serve               # quarto preview docs with live reload
 ```
 
-## Linting and formatting
+## Regenerating auto-reference pages
 
 ```bash
-ruff format .                    # format code
-ruff check .                     # lint
-mypy src/policyengine            # type check (informational)
+make docs-generate-reference  # pulls variable catalog from installed country models
 ```
 
-## CI pipeline
+Commit the regenerated pages alongside any country-model bumps. CI will check the reference is current.
 
-PRs trigger the following checks:
+## CI
 
-| Check | Status | Command |
-|---|---|---|
-| Lint + format | Required | `ruff check .` + `ruff format --check .` |
-| Tests (Python 3.13) | Required | `make test` |
-| Tests (Python 3.14) | Required | `make test` |
-| Mypy | Informational | `mypy src/policyengine` |
-| Docs build | Required | `make docs` |
+Four workflows in `.github/workflows/`:
 
-## Versioning and releases
-
-This project uses [towncrier](https://towncrier.readthedocs.io/) for changelog management. When making a PR, add a changelog fragment:
-
-```bash
-# Fragment types: breaking, added, changed, fixed, removed
-echo "Description of change" > changelog.d/my-change.added
-```
+- **`pr_code_changes.yaml`** — unit tests, lint, format, changelog fragment on every PR touching code.
+- **`pr_docs_changes.yaml`** — verifies `quarto render docs` succeeds on every PR touching docs.
+- **`push.yaml`** — full integration tests + publish path on merge to main.
+- **`versioning.yaml`** — auto-bumps version when changelog fragments land.
 
-On merge, the versioning workflow bumps the version, builds the changelog, and creates a GitHub Release.
+## Contributing
 
-For the target release-bundle architecture, see [Release bundles](release-bundles.md). That document defines the split between country `*-data` build manifests and `policyengine.py` certified runtime bundles.
+- Follow the existing API shape: `pe.us.calculate_household`, `pe.us.Simulation`, `pe.outputs.*`. Don't add one-off helpers that bypass these.
+- New output types subclass `Output` or `ChangeOutput` and live in `src/policyengine/outputs/`.
+- Country-specific helpers go under `src/policyengine/tax_benefit_models/<country>/`.
+- Add a changelog fragment in `changelog.d/` following towncrier conventions: `echo "Description." > changelog.d/<branch>.<type>.md`. Types: `added`, `changed`, `fixed`, `removed`, `breaking`.
 
 ## Architecture
 
-### Package layout
-
 ```
 src/policyengine/
-├── core/                  # Domain models (Simulation, Dataset, Policy, etc.)
+├── core/                        # Simulation, Dataset, Output base classes
+├── countries/                   # Country-neutral protocols
+├── data/                        # Generic dataset loading
+├── graph/                       # Variable dependency graph (for reference docs)
+├── outputs/                     # Typed output classes
+├── provenance/                  # Manifests, certification, reproducibility
+├── results/                     # Typed household-result structures
 ├── tax_benefit_models/
-│   ├── uk/                # UK model, datasets, analysis, outputs
-│   └── us/                # US model, datasets, analysis, outputs
-├── outputs/               # Output templates (Aggregate, Poverty, etc.)
-├── countries/             # Geographic region registries
-└── utils/                 # Helpers (reforms, entity mapping, plotting)
+│   ├── us/                      # US entry point (calculate_household, model, datasets)
+│   ├── uk/                      # UK equivalent
+│   └── common/                  # Shared model-version scaffolding
+└── utils/
 ```
 
-### Key design decisions
-
-**Pydantic everywhere**: All domain objects are Pydantic `BaseModel` subclasses. This gives us validation, serialisation, and clear field documentation.
-
-**HDF5 for storage**: Datasets and simulation outputs are stored as HDF5 files. No database server is required. The `MicroDataFrame` from the `microdf` package wraps pandas DataFrames with weight-aware `.sum()`, `.mean()`, `.count()`.
-
-**Country-specific model classes**: `PolicyEngineUSLatest` and `PolicyEngineUKLatest` each implement `run()`, `save()`, and `load()`. The US model passes reforms as a dict at `Microsimulation(reform=...)` construction time. The UK model supports both parametric reforms and `simulation_modifier` callables applied post-construction.
-
-**LRU cache + file caching**: `Simulation.ensure()` checks an in-process LRU cache (max 100 entries), then tries loading from disk, then falls back to `run()` + `save()`.
-
-**Output pattern**: All output types inherit from `Output`, implement `.run()`, and populate result fields. Convenience functions (e.g., `calculate_us_poverty_rates()`) create, run, and return collections of output objects.
+Everything users touch is exposed through the top-level `policyengine` namespace. Internal modules are imports of convenience; the contract is the exposed API.
diff --git a/docs/economic-impact-analysis.md b/docs/economic-impact-analysis.md
deleted file mode 100644
index 0d28dff8..00000000
--- a/docs/economic-impact-analysis.md
+++ /dev/null
@@ -1,287 +0,0 @@
-# Economic impact analysis
-
-The `economic_impact_analysis()` function is the canonical way to compare a baseline simulation against a reform simulation. It produces a comprehensive `PolicyReformAnalysis` containing decile impacts, programme-by-programme statistics, poverty rates, and inequality metrics in a single call.
-
-## Overview
-
-There are two approaches to comparing simulations:
-
-| Approach | Use case |
-|---|---|
-| `ChangeAggregate` | Single-metric queries: "What is the total tax revenue change?" |
-| `economic_impact_analysis()` | Full analysis: decile impacts, programme stats, poverty, inequality |
-
-`ChangeAggregate` gives you one number per call. `economic_impact_analysis()` runs ~30+ aggregate computations and returns a structured result containing everything.
-
-## Full analysis workflow
-
-### US example
-
-```python
-import datetime
-from policyengine.core import Parameter, ParameterValue, Policy, Simulation
-from policyengine.tax_benefit_models.us import (
-    economic_impact_analysis,
-    ensure_datasets,
-    us_latest,
-)
-
-# 1. Load data
-datasets = ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-    years=[2026],
-    data_folder="./data",
-)
-dataset = datasets["enhanced_cps_2024_2026"]
-
-# 2. Define reform
-param = Parameter(
-    name="gov.irs.deductions.standard.amount.SINGLE",
-    tax_benefit_model_version=us_latest,
-)
-reform = Policy(
-    name="Double standard deduction (single)",
-    parameter_values=[
-        ParameterValue(
-            parameter=param,
-            start_date=datetime.date(2026, 1, 1),
-            end_date=datetime.date(2026, 12, 31),
-            value=30_950,
-        ),
-    ],
-)
-
-# 3. Create simulations (no need to call .run() — ensure() is called internally)
-baseline_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-)
-reform_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    policy=reform,
-)
-
-# 4. Run full analysis
-analysis = economic_impact_analysis(baseline_sim, reform_sim)
-```
-
-### UK example
-
-```python
-import datetime
-from policyengine.core import Parameter, ParameterValue, Policy, Simulation
-from policyengine.tax_benefit_models.uk import (
-    economic_impact_analysis,
-    ensure_datasets,
-    uk_latest,
-)
-
-datasets = ensure_datasets(
-    datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"],
-    years=[2026],
-    data_folder="./data",
-)
-dataset = datasets["enhanced_frs_2023_24_2026"]
-
-param = Parameter(
-    name="gov.hmrc.income_tax.allowances.personal_allowance.amount",
-    tax_benefit_model_version=uk_latest,
-)
-reform = Policy(
-    name="Zero personal allowance",
-    parameter_values=[
-        ParameterValue(
-            parameter=param,
-            start_date=datetime.date(2026, 1, 1),
-            end_date=datetime.date(2026, 12, 31),
-            value=0,
-        ),
-    ],
-)
-
-baseline_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-)
-reform_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-    policy=reform,
-)
-
-analysis = economic_impact_analysis(baseline_sim, reform_sim)
-```
-
-## What `economic_impact_analysis()` computes
-
-The function calls `ensure()` on both simulations (run + cache if not already computed), then produces:
-
-### Decile impacts
-
-Mean income changes by income decile (1-10), with counts of people better off, worse off, and unchanged.
-
-```python
-for d in analysis.decile_impacts.outputs:
-    print(f"Decile {d.decile}: avg change={d.absolute_change:+.0f}, "
-          f"relative={d.relative_change:+.2f}%")
-```
-
-**Fields on each `DecileImpact`:**
-- `decile`: 1-10
-- `baseline_mean`, `reform_mean`: Mean income before and after reform
-- `absolute_change`: Mean absolute income change
-- `relative_change`: Mean percentage income change
-- `count_better_off`, `count_worse_off`, `count_no_change`: Weighted counts
-
-### Programme/program statistics
-
-Per-programme totals, changes, and winner/loser counts.
-
-**US programs analysed:** `income_tax`, `payroll_tax`, `state_income_tax`, `snap`, `tanf`, `ssi`, `social_security`, `medicare`, `medicaid`, `eitc`, `ctc`
-
-**UK programmes analysed:** `income_tax`, `national_insurance`, `vat`, `council_tax`, `universal_credit`, `child_benefit`, `pension_credit`, `income_support`, `working_tax_credit`, `child_tax_credit`
-
-```python
-for p in analysis.program_statistics.outputs:  # US
-    print(f"{p.program_name}: baseline=${p.baseline_total/1e9:.1f}B, "
-          f"reform=${p.reform_total/1e9:.1f}B, change=${p.change/1e9:+.1f}B")
-```
-
-**Fields on each `ProgramStatistics` / `ProgrammeStatistics`:**
-- `program_name` / `programme_name`: Variable name
-- `baseline_total`, `reform_total`: Weighted sums
-- `change`: `reform_total - baseline_total`
-- `baseline_count`, `reform_count`: Weighted recipient counts
-- `winners`, `losers`: Weighted counts of people gaining/losing
-
-### Poverty rates
-
-Poverty headcount and rates for both baseline and reform simulations.
-
-**US poverty types:** SPM poverty, deep SPM poverty
-
-**UK poverty types:** Absolute BHC, absolute AHC, relative BHC, relative AHC
-
-```python
-for bp, rp in zip(analysis.baseline_poverty.outputs,
-                  analysis.reform_poverty.outputs):
-    print(f"{bp.poverty_type}: baseline={bp.rate:.4f}, reform={rp.rate:.4f}")
-```
-
-### Inequality metrics
-
-Gini coefficient and income share metrics for both simulations.
-
-```python
-bi = analysis.baseline_inequality
-ri = analysis.reform_inequality
-print(f"Gini: baseline={bi.gini:.4f}, reform={ri.gini:.4f}")
-print(f"Top 10% share: baseline={bi.top_10_share:.4f}, reform={ri.top_10_share:.4f}")
-print(f"Top 1% share: baseline={bi.top_1_share:.4f}, reform={ri.top_1_share:.4f}")
-print(f"Bottom 50% share: baseline={bi.bottom_50_share:.4f}, reform={ri.bottom_50_share:.4f}")
-```
-
-## The `PolicyReformAnalysis` return type
-
-```python
-class PolicyReformAnalysis(BaseModel):
-    decile_impacts: OutputCollection[DecileImpact]
-    program_statistics: OutputCollection[ProgramStatistics]       # US
-    # programme_statistics: OutputCollection[ProgrammeStatistics]  # UK
-    baseline_poverty: OutputCollection[Poverty]
-    reform_poverty: OutputCollection[Poverty]
-    baseline_inequality: Inequality
-    reform_inequality: Inequality
-```
-
-Each `OutputCollection` contains:
-- `outputs`: List of individual output objects
-- `dataframe`: A pandas DataFrame with all results in tabular form
-
-## Using ChangeAggregate for targeted queries
-
-When you only need a single metric, `ChangeAggregate` is more direct than the full analysis pipeline. It requires that both simulations have already been run (or ensure'd).
-
-### Tax revenue change
-
-```python
-from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType
-
-baseline_sim.run()
-reform_sim.run()
-
-revenue = ChangeAggregate(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    variable="household_tax",
-    aggregate_type=ChangeAggregateType.SUM,
-)
-revenue.run()
-print(f"Revenue change: ${revenue.result / 1e9:.1f}B")
-```
-
-### Winners and losers
-
-```python
-winners = ChangeAggregate(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    variable="household_net_income",
-    aggregate_type=ChangeAggregateType.COUNT,
-    change_geq=1,  # Gained at least $1
-)
-winners.run()
-
-losers = ChangeAggregate(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    variable="household_net_income",
-    aggregate_type=ChangeAggregateType.COUNT,
-    change_leq=-1,  # Lost at least $1
-)
-losers.run()
-```
-
-### Filtering by income decile
-
-```python
-# Average loss in the 3rd income decile
-avg_loss = ChangeAggregate(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    variable="household_net_income",
-    aggregate_type=ChangeAggregateType.MEAN,
-    filter_variable="household_net_income",
-    quantile=10,
-    quantile_eq=3,
-)
-avg_loss.run()
-```
-
-### Filter options reference
-
-**Absolute change filters:**
-- `change_geq`: Change >= value (e.g., gain >= 500)
-- `change_leq`: Change <= value (e.g., loss <= -500)
-- `change_eq`: Change == value
-
-**Relative change filters:**
-- `relative_change_geq`: Relative change >= value (decimal, e.g., 0.05 = 5%)
-- `relative_change_leq`: Relative change <= value
-- `relative_change_eq`: Relative change == value
-
-**Variable filters:**
-- `filter_variable`: Variable to filter on (from the baseline simulation)
-- `filter_variable_eq`, `filter_variable_leq`, `filter_variable_geq`: Comparison operators
-
-**Quantile filters:**
-- `quantile`: Number of quantiles (e.g., 10 for deciles, 5 for quintiles)
-- `quantile_eq`: Exact quantile (e.g., 3 for 3rd decile)
-- `quantile_leq`: Maximum quantile
-- `quantile_geq`: Minimum quantile
-
-## Examples
-
-- [UK policy reform analysis](examples.md#uk-policy-reform-analysis): Full reform analysis with ChangeAggregate and visualisation
-- [US budgetary impact](examples.md#us-budgetary-impact): Budgetary impact comparing both approaches
diff --git a/docs/examples.md b/docs/examples.md
index b7b4e91a..147a7d0c 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -1,67 +1,65 @@
-# Examples
+---
+title: "Examples"
+---
 
-Complete working scripts demonstrating common workflows. Each script can be run directly with `python examples/<filename>.py`.
+Complete runnable scripts in `examples/` — each demonstrates one workflow end-to-end. Run with `python examples/<file>.py`.
 
-## US budgetary impact
+## US
 
-The canonical workflow for comparing a baseline and reform simulation, using both `economic_impact_analysis()` and `ChangeAggregate`.
+### Budget impact of a reform
 
-```{literalinclude} ../examples/us_budgetary_impact.py
-:language: python
+```{.python include="../examples/us_budgetary_impact.py"}
 ```
 
-## UK policy reform analysis
+### Income distribution over microdata
 
-Applying parametric reforms, comparing baseline and reform with `ChangeAggregate`, analysing winners and losers by income decile, and visualising results with Plotly.
-
-```{literalinclude} ../examples/policy_change_uk.py
-:language: python
+```{.python include="../examples/income_distribution_us.py"}
 ```
 
-## UK income bands
-
-Calculating net income and tax by income decile using representative microdata and `Aggregate` with quantile filters.
+### Household impact curve
 
-```{literalinclude} ../examples/income_bands_uk.py
-:language: python
+```{.python include="../examples/household_impact_example.py"}
 ```
 
-## US income distribution
+### Employment-income variation
 
-Loading enhanced CPS microdata, running a full microsimulation, and calculating statistics within income deciles.
+```{.python include="../examples/employment_income_variation_us.py"}
+```
 
-```{literalinclude} ../examples/income_distribution_us.py
-:language: python
+### Full microsimulation speedtest
+
+```{.python include="../examples/speedtest_us_simulation.py"}
 ```
 
-## UK employment income variation
+## UK
 
-Creating a custom dataset with varied employment income, running a single simulation, and visualising benefit phase-outs.
+### Reform with decile impact
 
-```{literalinclude} ../examples/employment_income_variation_uk.py
-:language: python
+```{.python include="../examples/policy_change_uk.py"}
 ```
 
-## US employment income variation
-
-Same approach as the UK version, varying employment income from $0 to $200k and plotting household net income.
+### Income bands analysis
 
-```{literalinclude} ../examples/employment_income_variation_us.py
-:language: python
+```{.python include="../examples/income_bands_uk.py"}
 ```
 
-## Household impact calculation
+### Employment-income variation
 
-Using `calculate_household_impact()` to compute taxes and benefits for individual custom households (both UK and US).
+```{.python include="../examples/employment_income_variation_uk.py"}
+```
+
+### Paper reproduction
 
-```{literalinclude} ../examples/household_impact_example.py
-:language: python
+```{.python include="../examples/paper_repro_uk.py"}
 ```
 
-## Simulation performance
+## Writing your own
 
-Benchmarking how `simulation.run()` scales with dataset size.
+Patterns worth following:
 
-```{literalinclude} ../examples/speedtest_us_simulation.py
-:language: python
-```
+- Always pass `year` explicitly — don't rely on defaults
+- Construct the baseline `Simulation` once; build reforms on top rather than recomputing
+- Save the `.manifest.json` alongside your results for reproducibility
+- Use typed outputs (`Aggregate`, `Poverty`, etc.) rather than ad-hoc `.calculate` calls — the outputs handle edge cases like missing weights
+
+More patterns in [Outputs](outputs.md) and [Impact analysis](impact-analysis.md).
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 00000000..d56d1a75
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,78 @@
+---
+title: "Getting started"
+---
+
+## Install
+
+```bash
+pip install policyengine
+```
+
+By default `policyengine` does not bundle country models — install each country's rules alongside:
+
+```bash
+pip install policyengine policyengine-us        # US only
+pip install policyengine policyengine-uk        # UK only
+pip install policyengine policyengine-us policyengine-uk   # both
+```
+
+Country modules (`pe.us`, `pe.uk`) are only importable if the matching country package is installed.
+
+## Compute one household
+
+```python
+import policyengine as pe
+
+result = pe.us.calculate_household(
+    people=[{"age": 35, "employment_income": 60_000}],
+    tax_unit={"filing_status": "SINGLE"},
+    household={"state_code": "CA"},
+    year=2026,
+)
+
+result.tax_unit.income_tax
+result.tax_unit.eitc
+result.household.household_net_income
+```
+
+Each `.*` lookup is a regular Python scalar. The result object is typed; IDEs and type-checkers autocomplete attribute names from the country model's variable catalog.
+
+## Apply a reform
+
+```python
+reformed = pe.us.calculate_household(
+    people=[{"age": 35, "employment_income": 60_000}],
+    tax_unit={"filing_status": "SINGLE"},
+    year=2026,
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1_000},
+)
+```
+
+Reforms are parameter-path → value dicts. For time-varying reforms pass a dict of effective-date strings instead of a scalar:
+
+```python
+reform = {
+    "gov.irs.credits.ctc.amount.adult_dependent": {
+        "2026-01-01": 1_000,
+        "2028-01-01": 2_000,
+    },
+}
+```
+
+See [Reforms](reforms.md) for structural changes and multi-year reforms.
+
+## Scale up
+
+A single-household calculator is convenient for policy-walkthroughs and tests. For population estimates of budget cost, distributional impact, and poverty effects, move to [Microsimulation](microsim.md). The API is parallel — `pe.us.calculate_household` and `pe.us.Simulation` accept the same reform dict, so your hypothesis code carries over.
+
+## What you get back
+
+Every calculation returns a typed result object with sections per entity — `person`, `tax_unit`, `spm_unit`, `household`, `family` for the US; `person`, `benunit`, `household` for the UK. Indexing the person list (`result.person[0]`) returns a row for that person. Group-entity lookups (`result.tax_unit`, `result.household`) return the single group the household is organized into.
+
+Every variable defined on the country model is available as an attribute. If you ask for one that doesn't exist, you get an error with the closest available suggestion — no silent zero returns.
+
+## Next
+
+- [Households](households.md) — full reference for `calculate_household`
+- [Reforms](reforms.md) — parametric and structural reforms
+- [Microsimulation](microsim.md) — population-level analysis
diff --git a/docs/households.md b/docs/households.md
new file mode 100644
index 00000000..4fe3daa4
--- /dev/null
+++ b/docs/households.md
@@ -0,0 +1,124 @@
+---
+title: "Households"
+---
+
+`pe.us.calculate_household` and `pe.uk.calculate_household` compute every variable in the country model for a single household. Same keyword arguments, different entity structures.
+
+## US
+
+```python
+result = pe.us.calculate_household(
+    people=[
+        {"age": 35, "employment_income": 40_000},
+        {"age": 33},
+        {"age": 8},
+        {"age": 5},
+    ],
+    tax_unit={"filing_status": "JOINT"},
+    household={"state_code": "TX"},
+    year=2026,
+)
+```
+
+### Entities
+
+| Argument | Required | Purpose |
+|---|---|---|
+| `people` | Yes | List of person dicts. Keys are any person-level variable on the model. |
+| `tax_unit` | One of the per-household-level keys | Tax-unit-level inputs (e.g. `filing_status`). |
+| `spm_unit` | Optional | SPM-unit inputs. |
+| `household` | Usually required | Household-level inputs. `state_code` is essentially always needed for US. |
+| `family` | Optional | Family-level inputs. |
+| `marital_unit` | Optional | Marital-unit inputs. |
+
+If you pass multiple adults, PolicyEngine assigns them to one tax unit and one household by default. For separate tax units, use `pe.Simulation` directly and set the entity-membership arrays.
+
+## UK
+
+```python
+result = pe.uk.calculate_household(
+    people=[
+        {"age": 35, "employment_income": 50_000},
+        {"age": 33, "employment_income": 30_000},
+        {"age": 4},
+    ],
+    benunit={},
+    household={},
+    year=2026,
+)
+```
+
+| Argument | Purpose |
+|---|---|
+| `people` | Person-level inputs. |
+| `benunit` | Benefit unit (equivalent to UC claim). |
+| `household` | Household-level inputs. |
+
+## Reforms
+
+Pass a `reform` dict of parameter-path to value:
+
+```python
+pe.us.calculate_household(
+    ...,
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1_000},
+)
+```
+
+For values effective on specific dates, use a nested dict:
+
+```python
+reform = {
+    "gov.irs.credits.ctc.amount.adult_dependent": {
+        "2026-01-01": 1_000,
+        "2028-01-01": 2_000,
+    },
+}
+```
+
+Structural reforms (subclassing the model) are covered in [Reforms](reforms.md).
+
+## Year
+
+```python
+pe.us.calculate_household(..., year=2026)
+```
+
+The year determines which parameter values apply. For year arithmetic (e.g. phase-ins), pass a `reform` with dated values rather than calling the function once per year.
+
+## Extra variables
+
+By default the result exposes every variable in the model. If your calculator-level output should contain variables that aren't in the default catalog, request them:
+
+```python
+result = pe.us.calculate_household(
+    ...,
+    extra_variables=["medicaid_income_level", "spm_unit_spm_threshold"],
+)
+```
+
+## Accessing the result
+
+```python
+result.person[0].income_tax                  # scalar for first person
+result.person[2].age                         # scalar for third person (the 8-year-old)
+result.tax_unit.income_tax                   # scalar (one tax unit)
+result.household.household_net_income        # scalar
+```
+
+The result is a Pydantic model — `.model_dump()` gives you a dict, and individual entity sections are regular attribute lookups.
+
+## When not to use this
+
+- Runs over many households in a loop will be much slower than one `Simulation` call. See [Microsimulation](microsim.md).
+- If your input data lives in a DataFrame or file, the microsim path is cleaner — `calculate_household` is optimized for per-household construction from Python literals.
+
+## Errors
+
+Unknown variables raise with suggestions:
+
+```
+ValueError: Unknown variable 'income_ax'. Did you mean 'income_tax'?
+```
+
+Unknown parameters in reforms raise similarly. The catalog is enumerated at construction time — typos fail fast.
diff --git a/docs/impact-analysis.md b/docs/impact-analysis.md
new file mode 100644
index 00000000..a2a853e6
--- /dev/null
+++ b/docs/impact-analysis.md
@@ -0,0 +1,75 @@
+---
+title: "Impact analysis"
+---
+
+`economic_impact_analysis` runs a baseline-vs-reform comparison and returns a bundle of standard outputs — budget cost, poverty change, distributional impact, inequality — in one call.
+
+## One-liner
+
+```python
+from policyengine.us import economic_impact_analysis
+
+impact = economic_impact_analysis(
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1_000},
+    year=2026,
+)
+
+impact.budget.total_change
+impact.poverty.rate_change
+impact.deciles.mean_change_by_decile
+impact.inequality.gini
+```
+
+The UK equivalent is `from policyengine.uk import economic_impact_analysis`.
+
+## What it computes
+
+Each call produces:
+
+| Section | Content |
+|---|---|
+| `budget` | Total budget cost (`household_net_income` sum change) |
+| `poverty` | SPM poverty rate before/after (US) or AHC rate (UK), plus demographic breakdowns |
+| `deep_poverty` | Same as above for half-of-poverty-threshold (US only) |
+| `deciles` | Mean net-income change by income decile; winners-vs-losers |
+| `intra_deciles` | Distribution of impact within each decile |
+| `inequality` | Gini and top-income shares |
+
+All sections compute against the same baseline and reform simulations, so results are internally consistent.
+
+## Under the hood
+
+`economic_impact_analysis` is a thin wrapper around the individual output classes — same as composing them manually:
+
+```python
+baseline = pe.Simulation(country="us", dataset=DEFAULT_US_DATASET, year=2026)
+reformed = pe.Simulation(country="us", dataset=DEFAULT_US_DATASET, year=2026, reform=REFORM)
+
+budget = ChangeAggregate("household_net_income", ChangeAggregateType.DIFFERENCE).compute(baseline, reformed)
+poverty = Poverty(...).compute(baseline, reformed)
+# ...
+```
+
+If you need a subset of outputs or want to cache the baseline across multiple reform scenarios, compose directly rather than calling `economic_impact_analysis` repeatedly.
+
+## Passing your own data
+
+By default, `economic_impact_analysis` uses the pinned default dataset for each country. For custom datasets:
+
+```python
+impact = economic_impact_analysis(
+    reform=REFORM,
+    year=2026,
+    dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2023.h5",
+)
+```
+
+## Non-parametric reforms
+
+For structural reforms, construct the simulations yourself and pass them to the outputs directly. `economic_impact_analysis` only accepts parametric reform dicts.
+
+## Next
+
+- [Outputs](outputs.md) — catalog of individual output classes
+- [Regions](regions.md) — state/constituency-level impact breakdowns
+- [Examples](examples.md) — full runnable scripts using this helper
diff --git a/docs/index.md b/docs/index.md
index bbd88974..4ea52bd6 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,20 +1,48 @@
-# policyengine.py
-
-This package aims to simplify and productionise the use of PolicyEngine's tax-benefit microsimulation models to flexibly produce useful information at scale, slotting into existing analysis pipelines while also standardising analysis.
-
-We do this by:
-* Standardising around a set of core types that let us do policy analysis in an object-oriented way
-* Exemplifying this behaviour by using this package in all PolicyEngine's production applications, and analyses
-
-## Documentation
-
-- [Core concepts](core-concepts.md): Architecture, datasets, simulations, policies, outputs, entity mapping
-- [Economic impact analysis](economic-impact-analysis.md): Full baseline-vs-reform comparison workflow
-- [Advanced outputs](advanced-outputs.md): DecileImpact, Poverty, Inequality, IntraDecileImpact
-- [Regions and scoping](regions-and-scoping.md): Sub-national analysis (states, constituencies, districts)
-- [UK tax-benefit model](country-models-uk.md): Entities, parameters, reform examples
-- [US tax-benefit model](country-models-us.md): Entities, parameters, reform examples
-- [Examples](examples.md): Complete working scripts
-- [Visualisation](visualisation.md): Publication-ready charts with Plotly
-- [Release bundles](release-bundles.md): Reproducible model-plus-data certification and provenance
-- [Development](dev.md): Setup, testing, CI, architecture
+---
+title: "PolicyEngine"
+subtitle: "Tax-benefit microsimulation for Python"
+---
+
+Compute household taxes and benefits, simulate reforms, and measure distributional impact — across the US and UK — from a single Python package.
+
+## Install
+
+```bash
+pip install policyengine
+```
+
+## Minimal example
+
+```python
+import policyengine as pe
+
+result = pe.us.calculate_household(
+    people=[{"age": 35, "employment_income": 60_000}],
+    tax_unit={"filing_status": "SINGLE"},
+    household={"state_code": "CA"},
+    year=2026,
+)
+print(result.household.household_net_income)
+```
+
+## Where to go
+
+| If you want to… | Start here |
+|---|---|
+| Compute taxes and benefits for one household | [Households](households.md) |
+| Simulate a policy change | [Reforms](reforms.md) |
+| Run a population microsimulation | [Microsimulation](microsim.md) |
+| Measure a reform's distributional impact | [Impact analysis](impact-analysis.md) |
+| See every output type | [Outputs](outputs.md) |
+| Look up a variable | Reference (auto-generated catalog, pending) |
+| Contribute | [Development](dev.md) |
+
+## What PolicyEngine is
+
+A platform that encodes the tax and benefit rules of a country as Python formulas and YAML parameters, runs them over microdata or single households, and exposes the results through a small set of typed outputs. The country rules live in country-specific packages (`policyengine-us`, `policyengine-uk`); this package wraps them in one API.
+
+Under the hood PolicyEngine combines the rules with calibrated microdata — the enhanced CPS for the US, the enhanced FRS for the UK — and returns weighted population estimates that match administrative totals.
+
+## Citation
+
+Woodruff and Ghenis (2024), *Enhancing Survey Microdata with Administrative Records: A Novel Approach to Microsimulation Dataset Construction*.
diff --git a/docs/microsim.md b/docs/microsim.md
new file mode 100644
index 00000000..255b4485
--- /dev/null
+++ b/docs/microsim.md
@@ -0,0 +1,129 @@
+---
+title: "Microsimulation"
+---
+
+For population-level estimates — budget cost, winners and losers, poverty impact — run a microsimulation over calibrated microdata.
+
+## Quick example
+
+```python
+import policyengine as pe
+from policyengine.outputs.aggregate import Aggregate, AggregateType
+
+pe.us.ensure_datasets(
+    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
+    years=[2026],
+)
+
+baseline = pe.Simulation(
+    country="us",
+    dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
+    year=2026,
+)
+
+total_snap = Aggregate(
+    variable="snap",
+    type=AggregateType.SUM,
+).compute(baseline)
+```
+
+## Datasets
+
+Microdata is stored as HDF5 files on Hugging Face. Install once to download and cache:
+
+```python
+pe.us.ensure_datasets(
+    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
+    years=[2024, 2026],
+)
+```
+
+The default US dataset is **Enhanced CPS 2024** — CPS ASEC with IRS SOI tax return records imputed in and calibration weights tuned to match IRS, CMS, SNAP, and other administrative totals. The UK default is **Enhanced FRS** — Family Resources Survey with tax-return microdata fused in and calibration to HMRC and DWP totals.
+
+List all available datasets:
+
+```python
+pe.us.load_datasets()        # or pe.uk.load_datasets()
+```
+
+## Simulations
+
+A `Simulation` takes a country, a dataset, a year, and an optional reform:
+
+```python
+baseline = pe.Simulation(
+    country="us",
+    dataset="hf://.../enhanced_cps_2024.h5",
+    year=2026,
+)
+
+reformed = pe.Simulation(
+    country="us",
+    dataset="hf://.../enhanced_cps_2024.h5",
+    year=2026,
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1_000},
+)
+```
+
+Each simulation wraps a PolicyEngine country model plus the dataset plus the weight vector.
+
+## Outputs
+
+Outputs are callables that consume a `Simulation` and return a typed result. They cover single-value aggregates, cross-sectional distributions, and geographic breakdowns. See [Outputs](outputs.md).
+
+```python
+from policyengine.outputs import (
+    Aggregate, AggregateType,
+    ChangeAggregate, ChangeAggregateType,
+    DecileImpact,
+    Poverty,
+    Inequality,
+)
+
+# Cost of the SNAP program
+snap_cost = Aggregate(variable="snap", type=AggregateType.SUM).compute(baseline)
+
+# Reform budget impact
+budget = ChangeAggregate(
+    variable="household_net_income",
+    type=ChangeAggregateType.DIFFERENCE,
+).compute(baseline, reformed)
+```
+
+## Memory and performance
+
+A full Enhanced CPS microsimulation uses ~4 GB of memory and takes ~15–30 seconds on a laptop. For repeated runs with different reforms, reuse the baseline `Simulation` and construct the reform-only instance on top.
+
+Downsampled datasets are available for testing:
+
+```python
+pe.us.ensure_datasets(
+    datasets=["hf://policyengine/policyengine-us-data/cps_small_2024.h5"],
+    years=[2026],
+)
+```
+
+These run in seconds and are fine for integration tests. Don't use them for production analysis — the weights are not calibration-tuned.
+
+## Managed microsimulation
+
+If you're orchestrating many reforms, the `managed_microsimulation` context handles dataset prep, cache reuse, and teardown:
+
+```python
+from policyengine.us import managed_microsimulation
+
+with managed_microsimulation(year=2026) as sim:
+    baseline = Aggregate("snap", AggregateType.SUM).compute(sim)
+```
+
+## Pinned model versions
+
+Every release of `policyengine` pins a specific version of each country model, so results are reproducible. `pe.us.model` and `pe.uk.model` expose the pinned `TaxBenefitModelVersion`.
+
+If the installed country package version doesn't match the pinned manifest, `managed_microsimulation` raises a warning with the version gap. For strict reproducibility, pin the country packages to the same versions the `policyengine` release was built against — see [Provenance](release-bundles.md).
+
+## Next
+
+- [Outputs](outputs.md) — catalog of typed output classes
+- [Impact analysis](impact-analysis.md) — baseline-vs-reform in one call
+- [Regions](regions.md) — sub-national analysis (states, constituencies, districts)
diff --git a/docs/myst.yml b/docs/myst.yml
deleted file mode 100644
index 6924ef21..00000000
--- a/docs/myst.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# See docs at: https://mystmd.org/guide/frontmatter
-version: 1
-project:
-  id: b70ccb02-12b9-4bdb-a25b-f44bf2213d98
-  # title:
-  # description:
-  # keywords: []
-  # authors: []
-  github: https://github.com/PolicyEngine/policyengine.py
-  toc:
-    - file: index.md
-    - file: core-concepts.md
-    - file: economic-impact-analysis.md
-    - file: advanced-outputs.md
-    - file: regions-and-scoping.md
-    - file: country-models-uk.md
-    - file: country-models-us.md
-    - file: examples.md
-    - file: visualisation.md
-    - file: release-bundles.md
-    - file: dev.md
-
-site:
-  template: book-theme
-  # options:
-  #   favicon: favicon.ico
-  #   logo: site_logo.png
diff --git a/docs/outputs.md b/docs/outputs.md
new file mode 100644
index 00000000..5d75ea7b
--- /dev/null
+++ b/docs/outputs.md
@@ -0,0 +1,156 @@
+---
+title: "Outputs"
+---
+
+Outputs are callables that consume a `Simulation` (or baseline + reform pair) and return a typed result. Every page uses the same pattern: construct the output with the variables you want, call `.compute(sim)` or `.compute(baseline, reformed)`.
+
+## Aggregate
+
+Single-number summaries over the population.
+
+```python
+from policyengine.outputs import Aggregate, AggregateType
+
+cost = Aggregate(variable="snap", type=AggregateType.SUM).compute(baseline)
+average = Aggregate(variable="household_net_income", type=AggregateType.MEAN).compute(baseline)
+```
+
+`AggregateType` options: `SUM`, `MEAN`, `MEDIAN`, `COUNT_POSITIVE`, `COUNT`, plus quantile types.
+
+### Filtering
+
+Apply a pandas-style filter to the population before aggregating:
+
+```python
+Aggregate(
+    variable="household_net_income",
+    type=AggregateType.MEAN,
+    filter="household_size >= 4",
+).compute(baseline)
+```
+
+## ChangeAggregate
+
+Difference or percent change between a baseline and a reform.
+
+```python
+from policyengine.outputs import ChangeAggregate, ChangeAggregateType
+
+impact = ChangeAggregate(
+    variable="household_net_income",
+    type=ChangeAggregateType.DIFFERENCE,
+).compute(baseline, reformed)
+```
+
+`ChangeAggregateType` options: `DIFFERENCE`, `PERCENT_CHANGE`, `RELATIVE_CHANGE`.
+
+## DecileImpact
+
+Average net-income change by income decile, and winners-vs-losers counts.
+
+```python
+from policyengine.outputs import DecileImpact
+
+impact = DecileImpact().compute(baseline, reformed)
+
+impact.mean_change_by_decile          # dict {1: -50, 2: 120, ...}
+impact.winners_losers_by_decile       # dict {1: {"winners": 0.1, "losers": 0.3, "neutral": 0.6}, ...}
+```
+
+Defaults to household-level equivalized net income. Pass `income_variable=` to override.
+
+## IntraDecileImpact
+
+Distribution of household-level impact within each income decile — not just mean, but how much spread.
+
+```python
+from policyengine.outputs import IntraDecileImpact
+
+spread = IntraDecileImpact().compute(baseline, reformed)
+```
+
+## Poverty
+
+Poverty rate before and after a reform, by demographic group.
+
+```python
+from policyengine.outputs import Poverty, AGE_GROUPS, RACE_GROUPS
+
+rates = Poverty(
+    income_variable="spm_unit_net_income",
+    poverty_measure="spm",
+    groups=AGE_GROUPS + RACE_GROUPS,
+).compute(baseline, reformed)
+```
+
+US defaults cover SPM; UK defaults cover AHC and BHC. Deep poverty is available with `measure="deep_spm"` (US).
+
+## Inequality
+
+Gini and top income shares.
+
+```python
+from policyengine.outputs import Inequality, USInequalityPreset
+
+result = Inequality(preset=USInequalityPreset.SPM).compute(baseline, reformed)
+
+result.gini                              # {'baseline': 0.48, 'reformed': 0.47}
+result.top_ten_share                     # before/after
+result.top_one_share
+result.top_tenth_of_one_share
+```
+
+## Geographic breakdowns
+
+### CongressionalDistrictImpact (US)
+
+```python
+from policyengine.outputs import CongressionalDistrictImpact
+
+impacts = CongressionalDistrictImpact().compute(baseline, reformed)
+# Per-district winners/losers, cost, poverty change
+```
+
+### ConstituencyImpact (UK) / LocalAuthorityImpact (UK)
+
+```python
+from policyengine.outputs import ConstituencyImpact, LocalAuthorityImpact
+
+constituency = ConstituencyImpact().compute(baseline, reformed)
+la = LocalAuthorityImpact().compute(baseline, reformed)
+```
+
+## ProgramStatistics
+
+Program-level counts and dollar amounts — who enrolls, how much they receive.
+
+```python
+from policyengine.outputs import ProgramStatistics
+
+stats = ProgramStatistics(program="snap").compute(baseline)
+
+stats.total_households
+stats.total_enrolled
+stats.total_cost
+stats.mean_benefit
+```
+
+## Combining outputs
+
+Every output stores a `to_dict()` representation. Collect them into a dashboard via a collection:
+
+```python
+from policyengine.core import OutputCollection
+
+dashboard = OutputCollection(
+    cost=ChangeAggregate("snap", ChangeAggregateType.DIFFERENCE),
+    poverty=Poverty(income_variable="spm_unit_net_income"),
+    deciles=DecileImpact(),
+).compute(baseline, reformed)
+```
+
+The collection dispatches to each output and returns a dict keyed by the names you assign.
+
+## Writing your own output
+
+Subclass `Output` or `ChangeOutput`. See `src/policyengine/outputs/aggregate.py` for the simplest reference implementation.
diff --git a/docs/reforms.md b/docs/reforms.md
new file mode 100644
index 00000000..e8043c53
--- /dev/null
+++ b/docs/reforms.md
@@ -0,0 +1,111 @@
+---
+title: "Reforms"
+---
+
+A reform is a change to the rules used in a calculation. PolicyEngine supports two kinds: **parametric** (adjust a parameter value) and **structural** (swap or subclass a rule formula).
+
+## Parametric reforms
+
+A dict of parameter path → new value. The same shape works for `calculate_household`, `Simulation`, and the output helpers.
+
+```python
+reform = {
+    "gov.irs.credits.ctc.amount.adult_dependent": 1_000,
+}
+
+pe.us.calculate_household(..., reform=reform)
+```
+
+Scalar values are treated as effective on January 1 of the simulation year and onward.
+
+### Time-varying
+
+```python
+reform = {
+    "gov.irs.credits.ctc.amount.adult_dependent": {
+        "2026-01-01": 1_000,
+        "2028-01-01": 2_000,
+    },
+    "gov.irs.credits.eitc.phase_out.rate[0]": {
+        "2026-01-01": 0.08,
+    },
+}
+```
+
+Dates that haven't been passed yet become "from this date onward." Earlier dates replace the baseline schedule.
+
+### Multiple changes
+
+Any number of parameter paths in the same dict compose into one reform:
+
+```python
+reform = {
+    "gov.irs.credits.ctc.amount.adult_dependent": 1_000,
+    "gov.irs.credits.eitc.phase_out.rate[0]": 0.08,
+    "gov.states.ca.tax.income.credits.eitc.max_amount": 500,
+}
+```
+
+### Where parameters live
+
+Every parameter has a canonical path that matches the YAML directory structure in the country model. `gov.irs.credits.ctc.amount.adult_dependent` corresponds to `policyengine_us/parameters/gov/irs/credits/ctc/amount/adult_dependent.yaml`.
+
+An auto-generated parameter reference is pending; for now, browse the YAML tree in the country model repository (e.g. `policyengine-us/policyengine_us/parameters/`), or type-error your way there — an unknown path raises with suggestions.
+
+### Scale and array parameters
+
+Scale parameters (brackets with thresholds and amounts) are addressed by bracket index:
+
+```python
+reform = {
+    "gov.irs.income.tax.rate[0]": 0.12,   # first bracket rate
+    "gov.irs.income.tax.threshold[1]": 50_000,  # second bracket threshold
+}
+```
+
+## Structural reforms
+
+For rule changes that can't be expressed as a parameter change — swapping one formula for another, adding a variable, removing a program — subclass the country model:
+
+```python
+from policyengine.tax_benefit_models.us import PolicyEngineUS, us_latest
+
+
+class MyReform(PolicyEngineUS):
+    version = us_latest.version
+
+    def __init__(self):
+        super().__init__()
+        self.neutralize_variable("eitc")
+```
+
+Pass the reformed model to `Simulation`:
+
+```python
+sim = pe.Simulation(model=MyReform(), year=2026)
+```
+
+`calculate_household` does not yet accept structural reforms directly — use `Simulation` or the country-specific `managed_microsimulation` context.
+
+## Combining parametric and structural
+
+Pass a parametric reform to the structural-reform constructor:
+
+```python
+sim = pe.Simulation(
+    model=MyReform(),
+    reform={"gov.irs.credits.ctc.amount.adult_dependent": 1_000},
+    year=2026,
+)
+```
+
+## Validating a reform before you run it
+
+The parameter catalog is known at import time. If a path is wrong, the call raises *before* starting the simulation with a suggested path.
+
+For time-varying reforms, the effective dates are checked against the parameter's defined start and end. A date before the parameter started or after a defined end date raises.
+
+## Reform worked examples
+
+- [Economic impact analysis](impact-analysis.md) — full baseline-vs-reform workflow with population estimates.
+- [Examples](examples.md) — runnable scripts for reform scenarios in `examples/`.
diff --git a/docs/regions-and-scoping.md b/docs/regions-and-scoping.md
deleted file mode 100644
index 9be4ddbc..00000000
--- a/docs/regions-and-scoping.md
+++ /dev/null
@@ -1,251 +0,0 @@
-# Regions and scoping
-
-The package supports sub-national analysis through a geographic region system. Regions can scope simulations to states, constituencies, congressional districts, local authorities, and cities.
-
-## Region system
-
-### Region
-
-A `Region` represents a geographic area with a unique prefixed code:
-
-| Region type | Code format | Examples |
-|---|---|---|
-| National | `us`, `uk` | `us`, `uk` |
-| State | `state/{code}` | `state/ca`, `state/ny` |
-| Congressional district | `congressional_district/{ST-DD}` | `congressional_district/CA-01` |
-| Place/city | `place/{ST-FIPS}` | `place/NJ-57000` |
-| UK country | `country/{name}` | `country/england` |
-| Constituency | `constituency/{name}` | `constituency/Sheffield Central` |
-| Local authority | `local_authority/{code}` | `local_authority/E09000001` |
-
-### RegionRegistry
-
-Each model version has a `RegionRegistry` providing O(1) lookups:
-
-```python
-from policyengine.tax_benefit_models.us import us_latest
-
-registry = us_latest.region_registry
-
-# Look up by code
-california = registry.get("state/ca")
-print(f"{california.label}: {california.region_type}")
-
-# Get all regions of a type
-states = registry.get_by_type("state")
-print(f"{len(states)} states")
-
-districts = registry.get_by_type("congressional_district")
-print(f"{len(districts)} congressional districts")
-
-# Get children of a region
-ca_districts = registry.get_children("state/ca")
-```
-
-```python
-from policyengine.tax_benefit_models.uk import uk_latest
-
-registry = uk_latest.region_registry
-
-# UK countries
-countries = registry.get_by_type("country")
-for c in countries:
-    print(f"{c.code}: {c.label}")
-```
-
-### Region counts
-
-**US:** 1 national + 51 states (inc. DC) + 436 congressional districts + 333 census places = 821 regions
-
-**UK:** 1 national + 4 countries. Constituencies and local authorities are available via extended registry builders.
-
-## Scoping strategies
-
-Scoping strategies control how a national dataset is narrowed to represent a sub-national region. They are applied during `Simulation.run()`, before the microsimulation calculation.
-
-### RowFilterStrategy
-
-Filters dataset rows where a household-level variable matches a specific value. Used for UK countries and US places/cities.
-
-```python
-from policyengine.core import Simulation
-from policyengine.core.scoping_strategy import RowFilterStrategy
-
-# Simulate only California households
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    scoping_strategy=RowFilterStrategy(
-        variable_name="state_code",
-        variable_value="CA",
-    ),
-)
-simulation.run()
-```
-
-This removes all non-California households from the dataset before running the simulation. The remaining household weights still reflect California's population.
-
-```python
-# UK: simulate only England
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-    scoping_strategy=RowFilterStrategy(
-        variable_name="country",
-        variable_value="ENGLAND",
-    ),
-)
-```
-
-### WeightReplacementStrategy
-
-Replaces household weights from a pre-computed weight matrix stored in Google Cloud Storage. Used for UK constituencies and local authorities, where the weight matrix (shape: N_regions x N_households) reweights all households to represent each region's demographics.
-
-```python
-from policyengine.core.scoping_strategy import WeightReplacementStrategy
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=uk_latest,
-    scoping_strategy=WeightReplacementStrategy(
-        weight_matrix_bucket="policyengine-uk-data",
-        weight_matrix_key="parliamentary_constituency_weights.h5",
-        lookup_csv_bucket="policyengine-uk-data",
-        lookup_csv_key="constituencies_2024.csv",
-        region_code="Sheffield Central",
-    ),
-)
-```
-
-Unlike row filtering, weight replacement keeps all households but assigns region-specific weights. This is more statistically robust for small geographic areas where filtering would leave too few households.
-
-### Legacy filter fields
-
-For backward compatibility, `Simulation` also accepts `filter_field` and `filter_value` parameters, which are auto-converted to a `RowFilterStrategy`:
-
-```python
-# These two are equivalent:
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    filter_field="state_code",
-    filter_value="CA",
-)
-
-simulation = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    scoping_strategy=RowFilterStrategy(
-        variable_name="state_code",
-        variable_value="CA",
-    ),
-)
-```
-
-## Geographic impact outputs
-
-The package provides output types that compute per-region metrics across all regions simultaneously.
-
-### CongressionalDistrictImpact (US)
-
-Groups households by `congressional_district_geoid` and computes weighted average and relative income changes per district.
-
-```python
-from policyengine.outputs.congressional_district_impact import (
-    compute_us_congressional_district_impacts,
-)
-
-baseline_sim.run()
-reform_sim.run()
-
-impact = compute_us_congressional_district_impacts(baseline_sim, reform_sim)
-
-for d in impact.district_results:
-    print(f"District {d['state_fips']:02d}-{d['district_number']:02d}: "
-          f"avg change=${d['average_household_income_change']:+,.0f}, "
-          f"relative={d['relative_household_income_change']:+.2%}")
-```
-
-**Result fields per district:**
-- `district_geoid`: Integer SSDD (state FIPS * 100 + district number)
-- `state_fips`: State FIPS code
-- `district_number`: District number within state
-- `average_household_income_change`: Weighted mean change
-- `relative_household_income_change`: Weighted relative change
-- `population`: Weighted household count
-
-### ConstituencyImpact (UK)
-
-Uses pre-computed weight matrices (650 x N_households) to compute per-constituency income changes without filtering.
-
-```python
-from policyengine.outputs.constituency_impact import (
-    compute_uk_constituency_impacts,
-)
-
-impact = compute_uk_constituency_impacts(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    weight_matrix_path="parliamentary_constituency_weights.h5",
-    constituency_csv_path="constituencies_2024.csv",
-    year="2025",
-)
-
-for c in impact.constituency_results:
-    print(f"{c['constituency_name']}: "
-          f"avg change={c['average_household_income_change']:+,.0f}")
-```
-
-**Result fields per constituency:**
-- `constituency_code`, `constituency_name`: Identifiers
-- `x`, `y`: Hex map coordinates
-- `average_household_income_change`, `relative_household_income_change`
-- `population`: Weighted household count
-
-### LocalAuthorityImpact (UK)
-
-Works identically to `ConstituencyImpact` but for local authorities (360 x N_households weight matrix).
-
-```python
-from policyengine.outputs.local_authority_impact import (
-    compute_uk_local_authority_impacts,
-)
-
-impact = compute_uk_local_authority_impacts(
-    baseline_simulation=baseline_sim,
-    reform_simulation=reform_sim,
-    weight_matrix_path="local_authority_weights.h5",
-    local_authority_csv_path="local_authorities_2024.csv",
-    year="2025",
-)
-```
-
-## Using regions with `economic_impact_analysis()`
-
-Scoping strategies compose naturally with the full analysis pipeline:
-
-```python
-from policyengine.core.scoping_strategy import RowFilterStrategy
-
-# State-level analysis
-baseline_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    scoping_strategy=RowFilterStrategy(
-        variable_name="state_code",
-        variable_value="CA",
-    ),
-)
-reform_sim = Simulation(
-    dataset=dataset,
-    tax_benefit_model_version=us_latest,
-    policy=reform,
-    scoping_strategy=RowFilterStrategy(
-        variable_name="state_code",
-        variable_value="CA",
-    ),
-)
-
-# Full analysis scoped to California
-analysis = economic_impact_analysis(baseline_sim, reform_sim)
-```
diff --git a/docs/regions.md b/docs/regions.md
new file mode 100644
index 00000000..5fce4521
--- /dev/null
+++ b/docs/regions.md
@@ -0,0 +1,85 @@
+---
+title: "Regional analysis"
+---
+
+Sub-national impact breakdowns using geographically-stratified microdata and the `...Impact` output classes.
+
+## US states
+
+Every US dataset includes `state_code` on each household. Use `Aggregate` or `ChangeAggregate` with a filter:
+
+```python
+ca_cost = Aggregate(
+    variable="snap",
+    type=AggregateType.SUM,
+    filter="state_code == 'CA'",
+).compute(baseline)
+```
+
+For all-states-at-once, use `StateImpact`:
+
+```python
+from policyengine.outputs import StateImpact
+
+state_impact = StateImpact().compute(baseline, reformed)
+# Dict keyed by two-letter state code
+```
+
+## US congressional districts
+
+```python
+from policyengine.outputs import CongressionalDistrictImpact
+
+impacts = CongressionalDistrictImpact().compute(baseline, reformed)
+
+# Keyed by district ID (e.g. "CA-12")
+for district_id, result in impacts.items():
+    print(district_id, result.winners_share, result.mean_impact)
+```
+
+Requires a district-stratified dataset. The default Enhanced CPS includes district assignments calibrated against district-level ACS population and income distributions.
+
+## UK parliamentary constituencies
+
+```python
+from policyengine.outputs import ConstituencyImpact
+
+impacts = ConstituencyImpact().compute(baseline, reformed)
+```
+
+Constituency codes follow ONS nomenclature. Requires the constituency-stratified FRS dataset.
+
+## UK local authorities
+
+```python
+from policyengine.outputs import LocalAuthorityImpact
+
+impacts = LocalAuthorityImpact().compute(baseline, reformed)
+```
+
+## Custom geographies
+
+If you have a geography not covered by the built-in impact classes, compute the underlying variables via `Simulation.calculate` and group them yourself:
+
+```python
+households = baseline.calculate("household_net_income").values
+reform_households = reformed.calculate("household_net_income").values
+geography = baseline.calculate("custom_geography_id").values
+
+import pandas as pd
+df = pd.DataFrame({
+    "baseline": households,
+    "reformed": reform_households,
+    "geo": geography,
+})
+df.groupby("geo")[["baseline", "reformed"]].mean()
+```
+
+## Data availability
+
+Not every country has sub-national strata in every dataset. Check `Dataset.geo_fields` for what a given dataset supports:
+
+```python
+dataset = pe.us.load_datasets()[0]
+dataset.geo_fields        # ["state_code", "congressional_district"]
+```
diff --git a/docs/release-bundles.md b/docs/release-bundles.md
index ea014c9d..d7225f50 100644
--- a/docs/release-bundles.md
+++ b/docs/release-bundles.md
@@ -1,449 +1,89 @@
-# Release Bundles
+---
+title: "Provenance and release bundles"
+---
 
-This document defines the intended reproducibility boundary for `policyengine.py`.
+Every analysis in PolicyEngine is reproducible to a specific bundle of (package version, country model version, dataset version, calibration state). The `provenance` module formalizes this.
 
-The key design decision is:
+## The bundle
 
-- country `*-data` repos build and stage immutable data artifacts
-- `policyengine.py` is the only component that certifies supported runtime bundles
-- `policyengine.py` does not rebuild country data itself
+Each `policyengine` release pins:
 
-This keeps country-specific data construction in the country data repos while still giving users a single top-level version to cite and pin.
+- The policyengine-core version
+- The country model versions (`policyengine-us`, `policyengine-uk`)
+- The country-data versions (`policyengine-us-data`, `policyengine-uk-data`)
+- Dataset hash (content-addressed — the hashed Enhanced CPS file is a bundle ID)
+- Calibration vector IDs
 
-## Why this boundary exists
+Together these define a **data release manifest** — a published, immutable record of "running this code against this data produces these numbers."
 
-For countries like the UK, the data package is not model-independent. Dataset construction, imputations, and calibration steps call the country model directly. That means a published dataset artifact depends on:
+## Checking your bundle
 
-- the country model version used during data construction
-- the calibration targets used during data construction
-- the raw input data used during data construction
-
-If `policyengine.py` only pins a country model version and a data package version without checking that relationship, the provenance boundary is incomplete.
-
-## Roles
-
-### Country model package
-
-Examples: `policyengine-uk`, `policyengine-us`
-
-The country model package owns:
-
-- policy logic
-- variables and parameters
-- reforms
-- a `data_build_fingerprint` for the subset of model logic that affects data construction
-
-It does not own final runtime bundle certification.
-
-### Country data package
-
-Examples: `policyengine-uk-data`, `policyengine-us-data`
-
-The country data package owns:
-
-- data build pipelines
-- raw input acquisition
-- calibration target snapshots
-- expensive dataset construction
-- staging immutable build artifacts with provenance
-
-It does not define the final supported runtime bundle exposed to users.
-
-### `policyengine.py`
-
-`policyengine.py` owns:
-
-- runtime bundle certification
-- user-facing reproducibility boundaries
-- the supported mapping from `policyengine.py` version to country model version and certified data artifact
-
-It does not rebuild microdata artifacts.
-
-## Two manifest layers
-
-The architecture has two manifest layers with different responsibilities.
-
-### 1. Data build manifest
-
-Published by the country `*-data` repo.
-
-This answers:
-
-- what bytes were produced
-- how they were produced
-- which exact model and targets produced them
-
-Suggested schema:
+```python
+import policyengine as pe
 
-```json
-{
-  "schema_version": 1,
-  "country_id": "uk",
-  "data_package": {
-    "name": "policyengine-uk-data",
-    "version": "1.41.0"
-  },
-  "build": {
-    "build_id": "uk-data-2026-04-12T12-30-00Z",
-    "git_sha": "abc123",
-    "built_at": "2026-04-12T12:30:00Z",
-    "built_with_model_package": {
-      "name": "policyengine-uk",
-      "version": "2.81.0",
-      "git_sha": "def456",
-      "data_build_fingerprint": "sha256:..."
-    },
-    "calibration_targets": {
-      "snapshot_id": "2026-04-10",
-      "sha256": "sha256:..."
-    },
-    "raw_inputs": [
-      {
-        "name": "frs_2023_24",
-        "sha256": "sha256:..."
-      }
-    ],
-    "build_environment": {
-      "python_version": "3.13.3",
-      "lockfile_sha256": "sha256:..."
-    }
-  },
-  "default_datasets": {
-    "national": "enhanced_frs_2023_24",
-    "baseline": "frs_2023_24"
-  },
-  "artifacts": {
-    "enhanced_frs_2023_24": {
-      "kind": "microdata",
-      "repo_id": "policyengine/policyengine-uk-data-private",
-      "path": "builds/uk-data-2026-04-12T12-30-00Z/enhanced_frs_2023_24.h5",
-      "revision": "uk-data-2026-04-12T12-30-00Z",
-      "sha256": "sha256:...",
-      "size_bytes": 123456789
-    }
-  }
-}
+pe.us.model.manifest                 # pinned US manifest for this release
+pe.us.model.data_certification       # cert checking installed package vs manifest
 ```
 
-Notes:
+If the installed country package version doesn't match the pinned manifest, the model warns:
 
-- `build_id` must be immutable.
-- build artifacts should be staged under a build-specific path or revision, not a floating release tag.
-- the build manifest is the authoritative provenance record for the artifact bytes.
-
-### 2. Certified runtime bundle manifest
-
-Published by `policyengine.py`.
-
-This answers:
-
-- which model and data artifact are supported together at runtime
-- which exact dataset should be used by default
-- which artifact checksum and provenance should be surfaced to users
-
-Suggested schema:
-
-```json
-{
-  "schema_version": 1,
-  "policyengine_version": "3.5.0",
-  "bundle_id": "uk-3.5.0",
-  "published_at": "2026-04-12T13:00:00Z",
-  "country_id": "uk",
-  "model_package": {
-    "name": "policyengine-uk",
-    "version": "2.81.1"
-  },
-  "certified_data_artifact": {
-    "data_package": {
-      "name": "policyengine-uk-data",
-      "version": "1.41.0"
-    },
-    "build_id": "uk-data-2026-04-12T12-30-00Z",
-    "dataset": "enhanced_frs_2023_24",
-    "uri": "hf://policyengine/policyengine-uk-data-private/builds/uk-data-2026-04-12T12-30-00Z/enhanced_frs_2023_24.h5@uk-data-2026-04-12T12-30-00Z",
-    "sha256": "sha256:..."
-  },
-  "certification": {
-    "compatibility_basis": "matching_data_build_fingerprint",
-    "built_with_model_version": "2.81.0",
-    "certified_for_model_version": "2.81.1",
-    "data_build_fingerprint": "sha256:...",
-    "certified_by": "policyengine.py release workflow"
-  },
-  "default_dataset": "enhanced_frs_2023_24",
-  "region_artifacts": {
-    "national": {
-      "dataset": "enhanced_frs_2023_24"
-    }
-  }
-}
 ```
-
-Notes:
-
-- this is the user-facing reproducibility boundary
-- apps and APIs should surface this bundle, not only country package versions
-- a bundle may reuse a previously staged data artifact if compatibility is explicitly certified
-
-## TRACE export
-
-The internal build manifest and certified runtime bundle remain the operational source of
-truth.
-
-TRACE sits on top of those manifests as a standards-based export layer.
-
-### What gets exported
-
-`policyengine.py` emits a certified-bundle TRO for each supported country. The
-composition pins four artifacts by sha256:
-
-- the bundled country release manifest shipped in `policyengine.py`
-- the country data release manifest resolved for the certified data package version
-- the certified dataset artifact
-- the country model wheel published to PyPI (hash read from the bundled manifest
-  when present, otherwise fetched from the PyPI JSON API at emit time)
-
-TROs use the public TROv vocabulary at
-`https://w3id.org/trace/2023/05/trov#`. Every artifact location in the TRO
-is a dereferenceable HTTPS URI or a local path relative to the shipped
-wheel. Certification metadata is carried as structured `pe:*` fields on
-the `trov:TransparentResearchPerformance` node so downstream tooling can
-read `pe:certifiedForModelVersion`, `pe:compatibilityBasis`,
-`pe:builtWithModelVersion`, `pe:dataBuildFingerprint`, and `pe:dataBuildId`
-without parsing prose. Every TRO also carries `pe:emittedIn` set to
-`"github-actions"` or `"local"`; CI-emitted TROs additionally carry
-`pe:ciRunUrl` and `pe:ciGitSha`.
-
-Country `*-data` repos should also emit a matching `trace.tro.jsonld` per
-data release covering the release manifest and every staged artifact hash.
-That is a country-data concern and lives in those repos.
-
-#### Emitting a bundle TRO
-
-From Python:
-
-```python
-from policyengine.core.release_manifest import get_data_release_manifest, get_release_manifest
-from policyengine.core.trace_tro import build_trace_tro_from_release_bundle, serialize_trace_tro
-
-country = get_release_manifest("us")
-tro = build_trace_tro_from_release_bundle(country, get_data_release_manifest("us"))
-Path("us.trace.tro.jsonld").write_bytes(serialize_trace_tro(tro))
+UserWarning: Installed policyengine-us version (1.602.0) does not match
+the bundled policyengine.py manifest (1.653.3). Calculations will run
+against the installed version, but dataset compatibility is not guaranteed.
 ```
 
-From the CLI:
+Pin exactly to match a release for strict reproducibility:
 
-```
-policyengine trace-tro us --out us.trace.tro.jsonld
+```bash
+pip install policyengine==4.0.0 policyengine-us==1.653.3 policyengine-us-data==2.12.0
 ```
 
-At release time, `scripts/generate_trace_tros.py` regenerates the bundled
-`data/release_manifests/{country}.trace.tro.jsonld` files, and the
-`Versioning` CI job commits them alongside the changelog so every published
-wheel ships with the matching TRO.
+## Certifying an analysis
 
-#### Emitting a per-simulation TRO
+For a published analysis (paper, policy brief, congressional testimony), attach the manifest to your results:
 
 ```python
-from policyengine.results import write_results_with_trace_tro
+from policyengine.provenance import write_manifest
 
-write_results_with_trace_tro(
-    results,                                # ResultsJson instance
-    "results.json",                         # where to write results
-    bundle_tro=bundle_tro,                  # loaded from the shipped bundle
-    reform_payload={"salt_cap": 0},
-    bundle_tro_url=(
-        "https://raw.githubusercontent.com/PolicyEngine/policyengine.py/"
-        "v3.4.5/src/policyengine/data/release_manifests/us.trace.tro.jsonld"
-    ),
-)
+result = economic_impact_analysis(reform=REFORM, year=2026)
+write_manifest(result, path="my_analysis.manifest.json")
 ```
 
-The `bundle_tro_url` is recorded on the performance node as
-`pe:bundleTroUrl`. A verifier can fetch that URL, recompute its sha256,
-and confirm it matches the `bundle_tro` artifact hash in the simulation
-TRO's composition. Without this anchor, the bundle reference is only as
-trustworthy as whoever produced the JSON.
+The manifest captures package versions, dataset hash, reform spec, and a hash of the result. Readers can verify reproducibility by installing the same pinned stack and rerunning.
 
-#### Validating a received TRO
+## Dataset content addressing
 
-Structural validation:
+Microdata files are content-addressed — the filename includes a SHA hash. `enhanced_cps_2024.h5` at one publish date is a different artifact than at a later date; they live at different Hugging Face paths.
 
+```python
+dataset_uri = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"
+dataset = pe.us.ensure_datasets([dataset_uri])[0]
+dataset.content_hash
 ```
-policyengine trace-tro-validate path/to/tro.jsonld
-```
-
-The shipped schema at `policyengine/data/schemas/trace_tro.schema.json`
-checks structural fields, canonical hex-encoded sha256s, the required
-`pe:emittedIn`, and that `trov:hasLocation` uses HTTPS (or the
-well-known local paths `results.json`, `reform.json`,
-`bundle.trace.tro.jsonld`). The same schema is exercised in the test
-suite against generated TROs.
 
-Content validation (the verifier workflow a replication reviewer
-should run):
+Always cite the full URI (including revision if pinning) in published work.
 
-```python
-import hashlib, json, requests
-from policyengine.core.trace_tro import canonical_json_bytes
+## Building your own manifest
 
-sim_tro = json.load(open("results.trace.tro.jsonld"))
-perf = sim_tro["@graph"][0]["trov:hasPerformance"]
+If you fork and modify the country model or data, publish your own manifest:
 
-# 1. Fetch the bundle TRO from its pinned URL and recompute its hash.
-bundle_bytes = requests.get(perf["pe:bundleTroUrl"]).content
-bundle_hash = hashlib.sha256(canonical_json_bytes(json.loads(bundle_bytes))).hexdigest()
+```python
+from policyengine.provenance import build_manifest
 
-# 2. Compare against the hash recorded in the simulation TRO's composition.
-recorded = next(
-    a["trov:sha256"]
-    for a in sim_tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
-    if a["@id"].endswith("bundle_tro")
+manifest = build_manifest(
+    country_code="us",
+    model_version="my-fork-1.0.0",
+    dataset_hashes={"my_dataset": "sha256:..."},
 )
-assert bundle_hash == recorded, "bundle_tro_url content does not match sim TRO"
-
-# 3. Confirm the fingerprint recorded on the performance matches the
-#    fingerprint inside the fetched bundle.
-bundle = json.loads(bundle_bytes)
-bundle_fp = bundle["@graph"][0]["trov:hasComposition"]["trov:hasFingerprint"]["trov:sha256"]
-assert perf["pe:bundleFingerprint"] == bundle_fp
 ```
 
-A sim TRO with a swapped `bundle_tro` dict but a truthful
-`pe:bundleTroUrl` will fail step 2; a sim TRO with both swapped will
-fail step 3.
-
-#### Known limitations
-
-- TROs are emitted unsigned. A signed attestation (sigstore or in-toto)
-  is a future addition that will bind TROs to a trusted-system key.
-- The bundle composition does not yet pin a transitive lockfile
-  (`uv.lock`/`poetry.lock`), a Python interpreter version, or an OS. AEA
-  reviewers may demand these; the schema is extensible.
-- The model wheel is hashed by PyPI's published sha256. If a wheel is
-  yanked and re-uploaded under the same version, the hash will change
-  and the TRO becomes invalid — which is the correct behaviour.
-- Country data packages whose data release manifest is private require
-  `HUGGING_FACE_TOKEN` at emit time. The regeneration script skips
-  countries whose data release manifest is unreachable so a partial run
-  does not block other countries.
-
-### What TRACE does not replace
-
-TRACE is not the source of truth for compatibility policy.
-
-In particular, TRACE does not decide:
-
-- whether a new model version can safely reuse an existing data artifact
-- how `data_build_fingerprint` is computed
-- which staged artifact becomes a supported runtime default
-
-Those decisions still belong to the country data build manifest and the
-`policyengine.py` certified runtime bundle.
-
-### Why we still want it
-
-TRACE adds three things our internal manifests do not provide by themselves:
-
-- a standard declaration format for provenance exchange
-- a composition fingerprint over the exact artifacts in scope
-- a better external surface for papers, audits, and reproducibility reviews
-
-That is why the recommended design is:
-
-- internal manifests for build/certification control
-- generated TRACE TROs for standards-based export
-
-## Compatibility rule
-
-The architecture should avoid forcing a new data build for every harmless country model release.
-
-To do that safely, compatibility must be explicit.
-
-### Data build fingerprint
-
-Each country model package should expose a `data_build_fingerprint` that covers the subset of logic that affects dataset construction or calibration.
-
-Examples of inputs to the fingerprint:
-
-- variables used in imputations
-- variables used in calibration loss matrices
-- parameters referenced during data construction
-- uprating or target-computation logic used during the build
-
-Things that should usually not affect the fingerprint:
-
-- runtime-only outputs that are not used in data construction
-- UI-oriented metadata
-- code paths unrelated to data construction
-
-### Certification rules
-
-`policyengine.py` may certify a staged data artifact for a model version only if one of the following is true:
-
-1. the model version exactly matches the `built_with_model_package.version`
-2. the model version has the same `data_build_fingerprint` as the build-time model version
-
-If neither is true, the bundle release must fail and a new data build is required.
-
-This should be a hard failure, not a warning.
-
-## Artifact states
-
-Artifacts should move through explicit states:
-
-- `staged`: built by the country data repo and available for inspection or later certification
-- `certified`: referenced by a released `policyengine.py` runtime bundle
-- `deprecated`: no longer recommended for new use, but still reproducible
-
-The key point is that `staged` and `certified` are different states. A staged artifact is not automatically part of a supported runtime release.
-
-## UK release workflow
-
-### Case 1: model-only release
-
-1. Cut UK model release candidate `M`.
-2. Compute `data_build_fingerprint(M)`.
-3. Compare it to the fingerprint recorded in the previously certified data build manifest.
-4. If the fingerprint matches, skip the expensive UK data rebuild.
-5. Release `policyengine.py` with a new certified runtime bundle that points to the existing staged UK artifact.
-
-### Case 2: data-affecting release
-
-1. Cut UK model release candidate `M`.
-2. Compute `data_build_fingerprint(M)`.
-3. If the fingerprint changed, build a new UK data artifact in `policyengine-uk-data` against:
-   - exact `policyengine-uk==M`
-   - exact target snapshot
-   - exact raw input hashes
-4. Stage the new artifact under a build-specific immutable path or revision.
-5. Publish the UK data build manifest.
-6. Release `policyengine.py` with a certified runtime bundle that points to the new staged artifact.
-
-## Implementation guidance
-
-The current `release_manifest.json` mechanism in country data repos is a good starting point, but it is not yet enough on its own. The target implementation should add:
-
-- `built_with_model_package.version`
-- `built_with_model_package.git_sha`
-- `built_with_model_package.data_build_fingerprint`
-- calibration target snapshot metadata
-- immutable staged artifact paths or revisions
-
-The target implementation in `policyengine.py` should add:
-
-- hard validation of bundle certification rules
-- explicit runtime bundle metadata on simulations, APIs, and app responses
-- checksum-backed dataset resolution from the certified bundle manifest
-
-## Why not let `policyengine.py` build all country data directly?
+Users of your fork install your pinned stack and get your manifest.
 
-Because that would centralise the wrong concerns:
+## When to care about this
 
-- country-specific private data handling would move into the generic orchestration layer
-- country-specific build logic would move into the generic orchestration layer
-- expensive build failures would block the top-level runtime package more often
-- provenance would still originate in the country data pipeline, so `policyengine.py` would not actually eliminate the need for the country build manifest
+- Publishing numbers (paper, brief, official analysis)
+- Regulatory submissions where auditors must reproduce
+- Long-running studies where package versions will drift over the analysis window
 
-`policyengine.py` should be the certification boundary, not the country data build system.
+For day-to-day exploration, version drift between `policyengine` and country packages is tolerable and the warning is informational.
diff --git a/docs/visualisation.md b/docs/visualisation.md
index 662ec3b1..6e833ec2 100644
--- a/docs/visualisation.md
+++ b/docs/visualisation.md
@@ -1,72 +1,71 @@
-# Visualisation utilities
+---
+title: "Visualisation"
+---
 
-PolicyEngine provides utilities for creating publication-ready charts that follow our visual style guidelines.
+PolicyEngine outputs come with `.to_plotly()` helpers for the most common chart shapes. These produce publication-ready Plotly figures with PolicyEngine's color palette — override or customize as you would any Plotly figure.
 
-## Formatting plotly figures
-
-The `format_fig()` function applies PolicyEngine's visual style to plotly figures, ensuring consistency across all analyses and publications.
+## Decile impact
 
 ```python
-from policyengine.utils import format_fig, COLORS
-import plotly.graph_objects as go
-
-# Create your figure
-fig = go.Figure()
-fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6], name="Data"))
-
-# Apply PolicyEngine styling
-format_fig(
-    fig,
-    title="Example chart",
-    xaxis_title="X axis",
-    yaxis_title="Y axis",
-    height=600,
-    width=800
-)
+from policyengine.outputs import DecileImpact
 
+impact = DecileImpact().compute(baseline, reformed)
+fig = impact.to_plotly()
 fig.show()
 ```
 
-## Visual style principles
+## Budget over reform dimension
 
-The formatting applies these principles automatically:
-
-**Colours**: Primary teal (#319795) with semantic colours for different data types (success/green, warning/yellow, error/red, info/blue). Access colours via the `COLORS` dictionary:
+Iterating a reform over a parameter (e.g. CTC amount from $0 to $3,000) and plotting the trajectory is two steps:
 
 ```python
-from policyengine.utils import COLORS
+amounts = [0, 1_000, 2_000, 3_000]
+budgets = []
+for amount in amounts:
+    impact = economic_impact_analysis(
+        reform={"gov.irs.credits.ctc.amount.adult_dependent": amount},
+        year=2026,
+    )
+    budgets.append(impact.budget.total_change)
 
-fig.add_trace(go.Scatter(
-    x=x_data,
-    y=y_data,
-    line=dict(color=COLORS["primary"])
-))
+import plotly.graph_objects as go
+fig = go.Figure(go.Scatter(x=amounts, y=budgets, mode="lines+markers"))
+fig.update_layout(xaxis_title="CTC amount ($)", yaxis_title="Budget cost ($bn)")
 ```
 
-**Typography**: Inter font family with appropriate sizing (12px for labels, 14px for body text, 16px for titles).
+## Household reform curve
+
+`HouseholdImpact` traces one household across a range of employment incomes under a reform:
+
+```python
+from policyengine.outputs import HouseholdImpact
+
+traj = HouseholdImpact(
+    household_fixture={"people": [{"age": 35}], "tax_unit": {"filing_status": "SINGLE"}},
+    income_range=(0, 200_000, 1_000),
+).compute(baseline_reform={}, reform=REFORM)
+
+traj.to_plotly().show()
+```
 
-**Layout**: Clean white background with subtle grey gridlines and appropriate margins (48px) for professional presentation.
+Useful for showing benefit cliffs and marginal tax rates.
 
-**Clarity**: Data-driven design that prioritises immediate understanding over decoration.
+## Color palette
 
-## Available colours
+PolicyEngine's palette is available via the design system:
 
 ```python
-COLORS = {
-    "primary": "#319795",         # Teal (main brand colour)
-    "primary_light": "#E6FFFA",
-    "primary_dark": "#1D4044",
-    "success": "#22C55E",         # Green (positive changes)
-    "warning": "#FEC601",         # Yellow (cautions)
-    "error": "#EF4444",           # Red (negative changes)
-    "info": "#1890FF",            # Blue (neutral information)
-    "gray_light": "#F2F4F7",
-    "gray": "#667085",
-    "gray_dark": "#101828",
-    "blue_secondary": "#026AA2",
-}
+from policyengine.plotting import PALETTE
+
+PALETTE.BLUE_PRIMARY
+PALETTE.GRAY_600
 ```
 
-## Complete example
+## Exporting
+
+Every Plotly figure can be exported:
 
-See [UK employment income variation](examples.md#uk-employment-income-variation) for a full demonstration of using `format_fig()` in an analysis workflow.
+```python
+fig.write_image("chart.png", width=1000, height=600)
+fig.write_html("chart.html", include_plotlyjs="cdn")
+```
diff --git a/examples/household_impact_example.py b/examples/household_impact_example.py
index f2902daf..4b96cd96 100644
--- a/examples/household_impact_example.py
+++ b/examples/household_impact_example.py
@@ -1,46 +1,37 @@
-"""Example: Calculate household tax and benefit impacts.
+"""Example: calculate tax and benefit outcomes for custom households.
 
-This script demonstrates using calculate_household_impact for both UK and US
-to compute taxes and benefits for custom households.
+Demonstrates the v4 :func:`policyengine.us.calculate_household` and
+:func:`policyengine.uk.calculate_household` entry points. Both take flat
+keyword arguments, accept reform dicts directly, and return a
+:class:`~policyengine.tax_benefit_models.common.HouseholdResult` that
+supports dot-access for scalar lookups.
 
-Run: python examples/household_impact_example.py
+Run: ``python examples/household_impact_example.py``
 """
 
-from policyengine.tax_benefit_models.uk import (
-    UKHouseholdInput,
-)
-from policyengine.tax_benefit_models.uk import (
-    calculate_household_impact as calculate_uk_impact,
-)
-from policyengine.tax_benefit_models.us import (
-    USHouseholdInput,
-)
-from policyengine.tax_benefit_models.us import (
-    calculate_household_impact as calculate_us_impact,
-)
-
-
-def uk_example():
-    """UK household impact example."""
+from __future__ import annotations
+
+import policyengine as pe
+
+
+def uk_example() -> None:
     print("=" * 60)
-    print("UK HOUSEHOLD IMPACT")
+    print("UK household calculator")
     print("=" * 60)
 
-    # Single adult earning £50,000
-    household = UKHouseholdInput(
+    # Single adult earning £50,000.
+    single = pe.uk.calculate_household(
         people=[{"age": 35, "employment_income": 50_000}],
         year=2026,
     )
-    result = calculate_uk_impact(household)
-
     print("\nSingle adult, £50k income:")
-    print(f"  Net income: £{result.household['hbai_household_net_income']:,.0f}")
-    print(f"  Income tax: £{result.person[0]['income_tax']:,.0f}")
-    print(f"  National Insurance: £{result.person[0]['national_insurance']:,.0f}")
-    print(f"  Total tax: £{result.household['household_tax']:,.0f}")
+    print(f"  Net income:         £{single.household.hbai_household_net_income:,.0f}")
+    print(f"  Income tax:         £{single.person[0].income_tax:,.0f}")
+    print(f"  National Insurance: £{single.person[0].national_insurance:,.0f}")
+    print(f"  Total tax:          £{single.household.household_tax:,.0f}")
 
-    # Family with two children, £30k income, renting
-    household = UKHouseholdInput(
+    # Family with two children, £30k income, renting in the North West.
+    family = pe.uk.calculate_household(
         people=[
             {"age": 35, "employment_income": 30_000},
             {"age": 33},
@@ -57,59 +48,52 @@ def uk_example():
         },
         year=2026,
     )
-    result = calculate_uk_impact(household)
-
     print("\nFamily (2 adults, 2 children), £30k income, renting:")
-    print(f"  Net income: £{result.household['hbai_household_net_income']:,.0f}")
-    print(f"  Income tax: £{result.person[0]['income_tax']:,.0f}")
-    print(f"  Child benefit: £{result.benunit[0]['child_benefit']:,.0f}")
-    print(f"  Universal credit: £{result.benunit[0]['universal_credit']:,.0f}")
-    print(f"  Total benefits: £{result.household['household_benefits']:,.0f}")
+    print(f"  Net income:       £{family.household.hbai_household_net_income:,.0f}")
+    print(f"  Income tax:       £{family.person[0].income_tax:,.0f}")
+    print(f"  Child benefit:    £{family.benunit.child_benefit:,.0f}")
+    print(f"  Universal credit: £{family.benunit.universal_credit:,.0f}")
+    print(f"  Total benefits:   £{family.household.household_benefits:,.0f}")
 
 
-def us_example():
-    """US household impact example."""
+def us_example() -> None:
     print("\n" + "=" * 60)
-    print("US HOUSEHOLD IMPACT")
+    print("US household calculator")
     print("=" * 60)
 
-    # Single adult earning $50,000
-    household = USHouseholdInput(
-        people=[{"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}],
+    # Single adult earning $50,000 in California.
+    single = pe.us.calculate_household(
+        people=[{"age": 35, "employment_income": 50_000}],
         tax_unit={"filing_status": "SINGLE"},
         household={"state_code_str": "CA"},
-        year=2024,
+        year=2026,
     )
-    result = calculate_us_impact(household)
-
     print("\nSingle adult, $50k income (California):")
-    print(f"  Net income: ${result.household['household_net_income']:,.0f}")
-    print(f"  Income tax: ${result.tax_unit[0]['income_tax']:,.0f}")
-    print(f"  Payroll tax: ${result.tax_unit[0]['employee_payroll_tax']:,.0f}")
+    print(f"  Net income:  ${single.household.household_net_income:,.0f}")
+    print(f"  Income tax:  ${single.tax_unit.income_tax:,.0f}")
+    print(f"  Payroll tax: ${single.tax_unit.employee_payroll_tax:,.0f}")
 
-    # Married couple with children, lower income
-    household = USHouseholdInput(
+    # Married couple with two kids, Texas, lower income.
+    family = pe.us.calculate_household(
         people=[
-            {"age": 35, "employment_income": 40_000, "is_tax_unit_head": True},
-            {"age": 33, "is_tax_unit_spouse": True},
-            {"age": 8, "is_tax_unit_dependent": True},
-            {"age": 5, "is_tax_unit_dependent": True},
+            {"age": 35, "employment_income": 40_000},
+            {"age": 33},
+            {"age": 8},
+            {"age": 5},
         ],
         tax_unit={"filing_status": "JOINT"},
         household={"state_code_str": "TX"},
-        year=2024,
+        year=2026,
     )
-    result = calculate_us_impact(household)
-
     print("\nMarried couple with 2 children, $40k income (Texas):")
-    print(f"  Net income: ${result.household['household_net_income']:,.0f}")
-    print(f"  Federal income tax: ${result.tax_unit[0]['income_tax']:,.0f}")
-    print(f"  EITC: ${result.tax_unit[0]['eitc']:,.0f}")
-    print(f"  Child tax credit: ${result.tax_unit[0]['ctc']:,.0f}")
-    print(f"  SNAP: ${result.spm_unit[0]['snap']:,.0f}")
+    print(f"  Net income:         ${family.household.household_net_income:,.0f}")
+    print(f"  Federal income tax: ${family.tax_unit.income_tax:,.0f}")
+    print(f"  EITC:               ${family.tax_unit.eitc:,.0f}")
+    print(f"  Child tax credit:   ${family.tax_unit.ctc:,.0f}")
+    print(f"  SNAP:               ${family.spm_unit.snap:,.0f}")
 
 
-def main():
+def main() -> None:
     uk_example()
     us_example()
     print("\n" + "=" * 60)
diff --git a/pyproject.toml b/pyproject.toml
index 72af3935..f09e0a04 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "policyengine"
-version = "3.6.0"
+version = "4.0.0"
 description = "A package to conduct policy analysis using PolicyEngine tax-benefit models."
 readme = "README.md"
 authors = [
diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py
index dce7ae8e..f9533bd9 100644
--- a/scripts/generate_trace_tros.py
+++ b/scripts/generate_trace_tros.py
@@ -18,12 +18,12 @@
 import sys
 from pathlib import Path
 
-from policyengine.core.release_manifest import (
+from policyengine.provenance.manifest import (
     DataReleaseManifestUnavailableError,
     get_data_release_manifest,
     get_release_manifest,
 )
-from policyengine.core.trace_tro import (
+from policyengine.provenance.trace import (
     build_trace_tro_from_release_bundle,
     serialize_trace_tro,
 )
diff --git a/src/policyengine/__init__.py b/src/policyengine/__init__.py
index e69de29b..a8de3971 100644
--- a/src/policyengine/__init__.py
+++ b/src/policyengine/__init__.py
@@ -0,0 +1,46 @@
+"""PolicyEngine — one Python API for tax and benefit policy.
+
+Canonical entry points for a fresh coding session:
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    # Single-household calculator (US).
+    result = pe.us.calculate_household(
+        people=[{"age": 35, "employment_income": 60000}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+        reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000},
+    )
+    print(result.tax_unit.income_tax, result.household.household_net_income)
+
+    # UK:
+    uk_result = pe.uk.calculate_household(
+        people=[{"age": 30, "employment_income": 50000}],
+        year=2026,
+    )
+
+    # Lower-level microsimulation building blocks.
+    from policyengine import Simulation  # or: pe.Simulation
+
+Each country module exposes ``calculate_household``, ``model``
+(the pinned ``TaxBenefitModelVersion``), and the microsim helpers.
+"""
+
+from importlib.util import find_spec
+
+from policyengine import outputs as outputs
+from policyengine.core import Simulation as Simulation
+
+if find_spec("policyengine_us") is not None:
+    from policyengine.tax_benefit_models import us as us
+else:  # pragma: no cover
+    us = None  # type: ignore[assignment]
+
+if find_spec("policyengine_uk") is not None:
+    from policyengine.tax_benefit_models import uk as uk
+else:  # pragma: no cover
+    uk = None  # type: ignore[assignment]
+
+__all__ = ["Simulation", "outputs", "uk", "us"]
diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py
index add36388..3a659643 100644
--- a/src/policyengine/cli.py
+++ b/src/policyengine/cli.py
@@ -6,7 +6,7 @@
 - ``trace-tro-validate <path>`` validate a TRO against the shipped schema
 - ``release-manifest <country>`` print the bundled country manifest
 
-See :mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``.
+See :mod:`policyengine.provenance.trace` and ``docs/release-bundles.md``.
 """
 
 from __future__ import annotations
@@ -18,11 +18,11 @@
 from pathlib import Path
 from typing import Optional, Sequence
 
-from policyengine.core.release_manifest import (
+from policyengine.provenance.manifest import (
     get_data_release_manifest,
     get_release_manifest,
 )
-from policyengine.core.trace_tro import (
+from policyengine.provenance.trace import (
     build_trace_tro_from_release_bundle,
     serialize_trace_tro,
 )
diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py
index 71ca0132..4f749de4 100644
--- a/src/policyengine/core/__init__.py
+++ b/src/policyengine/core/__init__.py
@@ -1,3 +1,11 @@
+"""Core value objects: Dataset, Variable, Parameter, Policy, Simulation, Region.
+
+Provenance (release manifests, TRACE TROs) lives in
+:mod:`policyengine.provenance` and is intentionally not re-exported
+here — importing a core value object should not pull in the
+provenance layer.
+"""
+
 from .dataset import Dataset
 from .dataset import YearData as YearData
 from .dataset import map_to_entity as map_to_entity
@@ -11,22 +19,6 @@
 from .region import Region as Region
 from .region import RegionRegistry as RegionRegistry
 from .region import RegionType as RegionType
-from .release_manifest import CertifiedDataArtifact as CertifiedDataArtifact
-from .release_manifest import CountryReleaseManifest as CountryReleaseManifest
-from .release_manifest import DataBuildInfo as DataBuildInfo
-from .release_manifest import DataCertification as DataCertification
-from .release_manifest import DataPackageVersion as DataPackageVersion
-from .release_manifest import DataReleaseArtifact as DataReleaseArtifact
-from .release_manifest import DataReleaseManifest as DataReleaseManifest
-from .release_manifest import PackageVersion as PackageVersion
-from .release_manifest import (
-    certify_data_release_compatibility as certify_data_release_compatibility,
-)
-from .release_manifest import get_data_release_manifest as get_data_release_manifest
-from .release_manifest import get_release_manifest as get_release_manifest
-from .release_manifest import (
-    resolve_managed_dataset_reference as resolve_managed_dataset_reference,
-)
 from .scoping_strategy import RegionScopingStrategy as RegionScopingStrategy
 from .scoping_strategy import RowFilterStrategy as RowFilterStrategy
 from .scoping_strategy import ScopingStrategy as ScopingStrategy
@@ -38,19 +30,6 @@
 from .tax_benefit_model_version import (
     TaxBenefitModelVersion as TaxBenefitModelVersion,
 )
-from .trace_tro import (
-    build_simulation_trace_tro as build_simulation_trace_tro,
-)
-from .trace_tro import (
-    build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle,
-)
-from .trace_tro import (
-    compute_trace_composition_fingerprint as compute_trace_composition_fingerprint,
-)
-from .trace_tro import (
-    extract_bundle_tro_reference as extract_bundle_tro_reference,
-)
-from .trace_tro import serialize_trace_tro as serialize_trace_tro
 from .variable import Variable as Variable
 
 # Rebuild models to resolve forward references
diff --git a/src/policyengine/core/scoping_strategy.py b/src/policyengine/core/scoping_strategy.py
index 7d9b5126..81778f47 100644
--- a/src/policyengine/core/scoping_strategy.py
+++ b/src/policyengine/core/scoping_strategy.py
@@ -14,7 +14,6 @@
 from pathlib import Path
 from typing import Annotated, Literal, Optional, Union
 
-import h5py
 import numpy as np
 import pandas as pd
 from microdf import MicroDataFrame
@@ -69,7 +68,7 @@ class RowFilterStrategy(RegionScopingStrategy):
 
     strategy_type: Literal["row_filter"] = "row_filter"
     variable_name: str
-    variable_value: str
+    variable_value: Union[str, int, float]
 
     def apply(
         self,
@@ -127,7 +126,11 @@ def apply(
 
         region_id = self._find_region_index(lookup_df, self.region_code)
 
-        # Download weight matrix and extract weights for this region
+        # Download weight matrix and extract weights for this region.
+        # h5py is only needed here, so import lazily to keep
+        # `from policyengine.core import ...` light.
+        import h5py
+
         weights_path = download_gcs_file(
             bucket=self.weight_matrix_bucket,
             file_path=self.weight_matrix_key,
diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py
index 5002b141..e4b261ee 100644
--- a/src/policyengine/core/simulation.py
+++ b/src/policyengine/core/simulation.py
@@ -1,9 +1,9 @@
 import logging
 from datetime import datetime
-from typing import Optional
+from typing import Any, Optional, Union
 from uuid import uuid4
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 from .cache import LRUCache
 from .dataset import Dataset
@@ -18,12 +18,62 @@
 
 
 class Simulation(BaseModel):
+    """Population microsimulation over a certified dataset.
+
+    Canonical call shape:
+
+    .. code-block:: python
+
+        import policyengine as pe
+        from policyengine.core import Simulation
+
+        datasets = pe.us.ensure_datasets(
+            datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
+            years=[2026], data_folder="./data",
+        )
+        dataset = datasets["enhanced_cps_2024_2026"]
+
+        # Baseline
+        baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model)
+
+        # Reform — same flat dict shape as pe.us.calculate_household(reform=...).
+        # Parameter path indexing uses "[0].amount" for scale/breakdown entries.
+        reform = Simulation(
+            dataset=dataset,
+            tax_benefit_model_version=pe.us.model,
+            policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000},
+        )
+
+        baseline.ensure()
+        reform.ensure()
+
+    The ``policy`` / ``dynamic`` kwargs accept either a ``Policy`` /
+    ``Dynamic`` object or a flat ``{"param.path": value}`` /
+    ``{"param.path": {date: value}}`` dict that is compiled against
+    ``tax_benefit_model_version`` at construction time (unknown paths
+    raise with close-match suggestions). Scalar values default to
+    ``{dataset.year}-01-01`` as their effective date.
+
+    See ``policyengine.core.scoping_strategy`` for sub-national scoping.
+    """
+
     id: str = Field(default_factory=lambda: str(uuid4()))
     created_at: datetime = Field(default_factory=datetime.now)
     updated_at: datetime = Field(default_factory=datetime.now)
 
-    policy: Optional[Policy] = None
-    dynamic: Optional[Dynamic] = None
+    policy: Optional[Union[Policy, dict[str, Any]]] = Field(
+        default=None,
+        description=(
+            "Reform policy. Pass a ``Policy`` directly, or a flat "
+            "``{'param.path': value}`` / ``{'param.path': {date: value}}`` "
+            "dict and it will be compiled against "
+            "``tax_benefit_model_version`` at run time."
+        ),
+    )
+    dynamic: Optional[Union[Dynamic, dict[str, Any]]] = Field(
+        default=None,
+        description=("Behavioural-response overlay. Same dict shape as ``policy``."),
+    )
     dataset: Dataset = None
 
     scoping_strategy: Optional[ScopingStrategy] = Field(
@@ -44,6 +94,42 @@ class Simulation(BaseModel):
 
     output_dataset: Optional[Dataset] = None
 
+    @model_validator(mode="after")
+    def _compile_dict_reforms(self) -> "Simulation":
+        """Coerce dict ``policy`` / ``dynamic`` inputs into proper objects.
+
+        Runs at ``mode="after"`` because compiling needs both
+        ``tax_benefit_model_version`` (for path validation) and
+        ``dataset.year`` (for effective-date defaulting) — both on ``self``.
+        """
+        from policyengine.tax_benefit_models.common.reform import (
+            compile_reform_to_dynamic,
+            compile_reform_to_policy,
+        )
+
+        year = getattr(self.dataset, "year", None)
+        for field, compiler in (
+            ("policy", compile_reform_to_policy),
+            ("dynamic", compile_reform_to_dynamic),
+        ):
+            value = getattr(self, field)
+            if not isinstance(value, dict):
+                continue
+            if self.tax_benefit_model_version is None:
+                raise ValueError(
+                    f"Cannot compile a dict {field} without "
+                    "tax_benefit_model_version; pass model_version or a "
+                    f"{field.capitalize()}."
+                )
+            setattr(
+                self,
+                field,
+                compiler(
+                    value, year=year, model_version=self.tax_benefit_model_version
+                ),
+            )
+        return self
+
     def run(self):
         self.tax_benefit_model_version.run(self)
 
diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py
index eeddef85..5eb8f525 100644
--- a/src/policyengine/core/tax_benefit_model_version.py
+++ b/src/policyengine/core/tax_benefit_model_version.py
@@ -4,14 +4,15 @@
 
 from pydantic import BaseModel, Field
 
-from .release_manifest import (
+from policyengine.provenance.manifest import (
     CountryReleaseManifest,
     DataCertification,
     PackageVersion,
     get_data_release_manifest,
 )
+from policyengine.provenance.trace import build_trace_tro_from_release_bundle
+
 from .tax_benefit_model import TaxBenefitModel
-from .trace_tro import build_trace_tro_from_release_bundle
 
 if TYPE_CHECKING:
     from .parameter import Parameter
diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py
index d90f0ad0..32430d48 100644
--- a/src/policyengine/countries/uk/regions.py
+++ b/src/policyengine/countries/uk/regions.py
@@ -15,11 +15,11 @@
 from typing import TYPE_CHECKING
 
 from policyengine.core.region import Region, RegionRegistry
-from policyengine.core.release_manifest import resolve_region_dataset_path
 from policyengine.core.scoping_strategy import (
     RowFilterStrategy,
     WeightReplacementStrategy,
 )
+from policyengine.provenance.manifest import resolve_region_dataset_path
 
 if TYPE_CHECKING:
     pass
diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py
index 9e20d8b3..ca2f6b4f 100644
--- a/src/policyengine/countries/us/regions.py
+++ b/src/policyengine/countries/us/regions.py
@@ -8,8 +8,8 @@
 """
 
 from policyengine.core.region import Region, RegionRegistry
-from policyengine.core.release_manifest import resolve_region_dataset_path
 from policyengine.core.scoping_strategy import RowFilterStrategy
+from policyengine.provenance.manifest import resolve_region_dataset_path
 
 from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES
 
diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json
index de8fa505..961defbd 100644
--- a/src/policyengine/data/release_manifests/uk.json
+++ b/src/policyengine/data/release_manifests/uk.json
@@ -1,8 +1,8 @@
 {
   "schema_version": 1,
-  "bundle_id": "uk-3.5.0",
+  "bundle_id": "uk-4.0.0",
   "country_id": "uk",
-  "policyengine_version": "3.5.0",
+  "policyengine_version": "4.0.0",
   "model_package": {
     "name": "policyengine-uk",
     "version": "2.88.0",
diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json
index b005eda9..0016aa8a 100644
--- a/src/policyengine/data/release_manifests/us.json
+++ b/src/policyengine/data/release_manifests/us.json
@@ -1,8 +1,8 @@
 {
   "schema_version": 1,
-  "bundle_id": "us-3.5.0",
+  "bundle_id": "us-4.0.0",
   "country_id": "us",
-  "policyengine_version": "3.5.0",
+  "policyengine_version": "4.0.0",
   "model_package": {
     "name": "policyengine-us",
     "version": "1.653.3",
diff --git a/src/policyengine/graph/__init__.py b/src/policyengine/graph/__init__.py
new file mode 100644
index 00000000..84dd698c
--- /dev/null
+++ b/src/policyengine/graph/__init__.py
@@ -0,0 +1,41 @@
+"""Variable dependency graph for PolicyEngine source trees.
+
+Parses ``Variable`` subclasses in a PolicyEngine jurisdiction (e.g.
+``policyengine-us``, ``policyengine-uk``) and extracts the variable-
+to-variable dataflow graph from formula-method bodies.
+
+The extractor is static: it walks the Python AST and never imports
+user code, so it works on any PolicyEngine source tree without
+requiring the jurisdiction to be installed or the country model to
+resolve. That makes it usable for refactor-impact analysis, CI
+pre-merge checks, docs generation, and code-introspection queries
+from a Claude Code plugin.
+
+Recognized reference patterns in v1:
+
+- ``<entity>("<var>", <period>)`` — direct call on an entity instance
+  (``person``, ``tax_unit``, ``spm_unit``, ``household``, ``family``,
+  ``marital_unit``, ``benunit``).
+- ``add(<entity>, <period>, ["v1", "v2", ...])`` — sum helper; each
+  string in the list becomes an edge.
+
+Typical usage:
+
+.. code-block:: python
+
+    from policyengine.graph import extract_from_path
+
+    graph = extract_from_path("/path/to/policyengine-us/policyengine_us/variables")
+    # Variables that transitively depend on AGI:
+    for downstream in graph.impact("adjusted_gross_income"):
+        print(downstream)
+    # Direct dependencies of a variable:
+    print(graph.deps("earned_income_tax_credit"))
+    # Dependency chain from one variable to another:
+    print(graph.path("wages", "federal_income_tax"))
+"""
+
+from policyengine.graph.extractor import extract_from_path
+from policyengine.graph.graph import VariableGraph
+
+__all__ = ["VariableGraph", "extract_from_path"]
diff --git a/src/policyengine/graph/extractor.py b/src/policyengine/graph/extractor.py
new file mode 100644
index 00000000..1af61a7b
--- /dev/null
+++ b/src/policyengine/graph/extractor.py
@@ -0,0 +1,189 @@
+"""AST-based extractor for PolicyEngine Variable subclasses.
+
+Walks a directory of ``.py`` files, identifies ``Variable`` subclasses
+by looking for ``class Foo(Variable):`` in the AST, and extracts
+variable references from each class's ``formula*`` methods.
+
+The extractor never imports user code, so it works on any PolicyEngine
+source tree regardless of whether the jurisdiction is installed.
+This keeps refactor-impact analysis and CI pre-merge checks fast and
+dependency-free.
+
+Two reference patterns are recognized:
+
+1. ``<entity>("<var>", <period>)`` where ``<entity>`` is a bare ``Name``
+   matching one of:
+   ``person``, ``tax_unit``, ``spm_unit``, ``household``, ``family``,
+   ``marital_unit``, ``benunit``, ``tax_unit``.
+2. ``add(<entity>, <period>, [<list of string literals>])`` — the
+   ``add`` helper that sums a list of variable names on an entity.
+
+Limitations of the v1 extractor (tracked for v2):
+
+- Parameter references (``parameters(period).gov.xxx.yyy``) are not
+  yet captured; only variable-to-variable edges.
+- Dynamic variable names built via string concatenation or format
+  strings are skipped (low-prevalence in practice).
+- ``entity.sum("var")`` or ``entity.mean("var")`` method calls are
+  not yet recognized; only the direct-call form. (Low-prevalence
+  in ``policyengine-us``; common enough to add as a small follow-up.)
+"""
+
+from __future__ import annotations
+
+import ast
+import os
+from pathlib import Path
+from typing import Iterable, Iterator, Sequence, Union
+
+from policyengine.graph.graph import VariableGraph
+
+
+# Names of entity instances as they appear as method parameters in
+# Variable formulas. Any ``Call`` whose ``func`` is a bare ``Name``
+# matching one of these AND whose first arg is a string literal is
+# treated as a variable reference. Bare names (not attribute access)
+# ensures we don't accidentally match something like
+# ``reform.person("x", period)``.
+_ENTITY_CALL_NAMES: frozenset[str] = frozenset(
+    {
+        "person",
+        "tax_unit",
+        "spm_unit",
+        "household",
+        "family",
+        "marital_unit",
+        "benunit",
+    }
+)
+
+
+PathLike = Union[str, "os.PathLike[str]"]
+
+
+def extract_from_path(path: PathLike) -> VariableGraph:
+    """Build a ``VariableGraph`` from all ``.py`` files under ``path``.
+
+    Directories are walked recursively. Files that fail to parse as
+    Python (syntax errors) are silently skipped — the extractor is a
+    best-effort tool over real source trees, not a compiler.
+    """
+    root = Path(path)
+    graph = VariableGraph()
+
+    files: Iterable[Path]
+    if root.is_file():
+        files = [root]
+    else:
+        files = root.rglob("*.py")
+
+    for file_path in files:
+        try:
+            source = file_path.read_text()
+        except (OSError, UnicodeDecodeError):
+            continue
+        try:
+            tree = ast.parse(source, filename=str(file_path))
+        except SyntaxError:
+            continue
+        _visit_module(tree, file_path=str(file_path), graph=graph)
+
+    return graph
+
+
+# -------------------------------------------------------------------
+# AST traversal
+# -------------------------------------------------------------------
+
+
+def _visit_module(tree: ast.Module, *, file_path: str, graph: VariableGraph) -> None:
+    """Register each Variable subclass and walk its formula methods."""
+    for node in tree.body:
+        if not isinstance(node, ast.ClassDef):
+            continue
+        if not _class_inherits_variable(node):
+            continue
+        var_name = node.name
+        graph.add_variable(var_name, file_path=file_path)
+        for child in node.body:
+            if isinstance(child, ast.FunctionDef) and _is_formula_method(child):
+                for dependency in _extract_references(child):
+                    graph.add_edge(dependency=dependency, dependent=var_name)
+
+
+def _class_inherits_variable(cls: ast.ClassDef) -> bool:
+    """True iff the class's base list contains a ``Variable`` name.
+
+    Matches ``class X(Variable):``. Does not resolve aliased imports
+    — PolicyEngine's ``from policyengine_us.model_api import *``
+    convention keeps the base name literally ``Variable``, which is
+    what real jurisdictions use and what this check matches.
+    """
+    for base in cls.bases:
+        if isinstance(base, ast.Name) and base.id == "Variable":
+            return True
+    return False
+
+
+def _is_formula_method(func: ast.FunctionDef) -> bool:
+    """Return True for ``formula`` and ``formula_YYYY`` methods."""
+    return func.name == "formula" or func.name.startswith("formula_")
+
+
+# -------------------------------------------------------------------
+# Reference extraction from a formula body
+# -------------------------------------------------------------------
+
+
+def _extract_references(func: ast.FunctionDef) -> Iterator[str]:
+    """Yield every variable name referenced in the function body."""
+    for node in ast.walk(func):
+        if not isinstance(node, ast.Call):
+            continue
+        # Pattern 1: <entity>("<var>", <period>)
+        entity_ref = _entity_call_to_variable(node)
+        if entity_ref is not None:
+            yield entity_ref
+            continue
+        # Pattern 2: add(<entity>, <period>, ["v1", "v2", ...])
+        yield from _add_call_to_variables(node)
+
+
+def _entity_call_to_variable(call: ast.Call) -> str | None:
+    """Return the variable name if ``call`` is an entity-call pattern.
+
+    The entity has to be a bare Name (not an attribute access), so
+    calls like ``some.object.person("x", period)`` are deliberately
+    not matched. First positional arg must be a string literal.
+    """
+    if not isinstance(call.func, ast.Name):
+        return None
+    if call.func.id not in _ENTITY_CALL_NAMES:
+        return None
+    if not call.args:
+        return None
+    first = call.args[0]
+    if isinstance(first, ast.Constant) and isinstance(first.value, str):
+        return first.value
+    return None
+
+
+def _add_call_to_variables(call: ast.Call) -> Iterator[str]:
+    """Yield variable names from an ``add(entity, period, [list])`` call.
+
+    Matches the common helper. The third positional arg must be a
+    ``list`` literal of string literals. Anything dynamically built
+    is skipped.
+    """
+    if not isinstance(call.func, ast.Name):
+        return
+    if call.func.id not in {"add", "aggr"}:
+        return
+    if len(call.args) < 3:
+        return
+    names_arg = call.args[2]
+    if not isinstance(names_arg, (ast.List, ast.Tuple)):
+        return
+    for elt in names_arg.elts:
+        if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
+            yield elt.value
diff --git a/src/policyengine/graph/graph.py b/src/policyengine/graph/graph.py
new file mode 100644
index 00000000..f6b360c0
--- /dev/null
+++ b/src/policyengine/graph/graph.py
@@ -0,0 +1,124 @@
+"""NetworkX-backed variable dependency graph.
+
+Separated from the extractor so the data structure is easy to test
+independently, easy to serialize/deserialize, and easy to enrich with
+additional edge types (parameter reads, cross-jurisdiction links) in
+later versions.
+"""
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+
+import networkx as nx
+
+
+class VariableGraph:
+    """Directed graph of PolicyEngine variable dependencies.
+
+    Nodes are variable names (strings). Edges run from a *dependency*
+    to a *dependent*: ``A -> B`` means "computing B reads A". With
+    this orientation, ``impact(A)`` is the set of downstream nodes
+    reachable from A, and ``deps(B)`` is the set of upstream nodes
+    that reach into B.
+
+    The constructor accepts an optional pre-built graph for testing
+    and deserialization; normal callers will get instances via the
+    extractor.
+    """
+
+    def __init__(self, digraph: Optional[nx.DiGraph] = None) -> None:
+        self._g = digraph if digraph is not None else nx.DiGraph()
+
+    # ------------------------------------------------------------------
+    # Construction helpers (used by the extractor)
+    # ------------------------------------------------------------------
+
+    def add_variable(self, name: str, file_path: Optional[str] = None) -> None:
+        """Register a variable as a node. Safe to call repeatedly."""
+        if name in self._g:
+            if file_path and "file_path" not in self._g.nodes[name]:
+                self._g.nodes[name]["file_path"] = file_path
+            return
+        self._g.add_node(name, file_path=file_path)
+
+    def add_edge(self, dependency: str, dependent: str) -> None:
+        """Record that ``dependent`` reads ``dependency`` in a formula."""
+        # Auto-register the dependency node if it wasn't declared yet;
+        # this is common when a formula references a variable defined
+        # in a file the extractor hasn't reached yet, or a variable
+        # whose class lives in a different subpackage.
+        if dependency not in self._g:
+            self._g.add_node(dependency, file_path=None)
+        if dependent not in self._g:
+            self._g.add_node(dependent, file_path=None)
+        self._g.add_edge(dependency, dependent)
+
+    # ------------------------------------------------------------------
+    # Query surface
+    # ------------------------------------------------------------------
+
+    def has_variable(self, name: str) -> bool:
+        """True iff ``name`` was registered as an explicitly-defined variable.
+
+        Nodes that only exist because some formula *references* them —
+        but whose class definition was never seen — are excluded.
+        """
+        if name not in self._g:
+            return False
+        return self._g.nodes[name].get("file_path") is not None
+
+    def deps(self, name: str) -> Iterable[str]:
+        """Return variables that ``name``'s formula reads directly.
+
+        Order follows networkx's insertion order, so the caller can
+        expect a deterministic sequence for a given extraction run.
+        """
+        if name not in self._g:
+            return iter(())
+        return list(self._g.predecessors(name))
+
+    def impact(self, name: str) -> Iterable[str]:
+        """Return variables that transitively depend on ``name``.
+
+        Equivalent to the descendants set in the graph's natural
+        orientation (edges run dep → dependent). Excludes ``name``
+        itself. Empty for leaf variables that nothing reads.
+        """
+        if name not in self._g:
+            return iter(())
+        return list(nx.descendants(self._g, name))
+
+    def path(self, src: str, dst: str) -> Optional[list[str]]:
+        """Return a shortest dependency chain from ``src`` to ``dst``.
+
+        Returns the node list including both endpoints, or ``None`` if
+        no such path exists.
+        """
+        if src not in self._g or dst not in self._g:
+            return None
+        try:
+            return nx.shortest_path(self._g, src, dst)
+        except nx.NetworkXNoPath:
+            return None
+
+    # ------------------------------------------------------------------
+    # Introspection for callers that want the raw structure
+    # ------------------------------------------------------------------
+
+    @property
+    def nx_graph(self) -> nx.DiGraph:
+        """The underlying NetworkX DiGraph (read-only-by-convention)."""
+        return self._g
+
+    def __contains__(self, name: str) -> bool:
+        return name in self._g
+
+    def __len__(self) -> int:
+        return self._g.number_of_nodes()
+
+    def __repr__(self) -> str:
+        return (
+            f"VariableGraph({self._g.number_of_nodes()} variables, "
+            f"{self._g.number_of_edges()} edges)"
+        )
diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py
index 61311f46..13ff2a26 100644
--- a/src/policyengine/outputs/__init__.py
+++ b/src/policyengine/outputs/__init__.py
@@ -49,6 +49,7 @@
     calculate_us_poverty_by_race,
     calculate_us_poverty_rates,
 )
+from policyengine.outputs.program_statistics import ProgramStatistics
 
 __all__ = [
     "Output",
@@ -59,6 +60,7 @@
     "ChangeAggregateType",
     "DecileImpact",
     "calculate_decile_impacts",
+    "ProgramStatistics",
     "IntraDecileImpact",
     "compute_intra_decile_impacts",
     "Poverty",
diff --git a/src/policyengine/outputs/constituency_impact.py b/src/policyengine/outputs/constituency_impact.py
index 60f76e0b..02e1bdfd 100644
--- a/src/policyengine/outputs/constituency_impact.py
+++ b/src/policyengine/outputs/constituency_impact.py
@@ -7,7 +7,6 @@
 
 from typing import TYPE_CHECKING, Optional
 
-import h5py
 import numpy as np
 import pandas as pd
 from pydantic import ConfigDict
@@ -43,6 +42,8 @@ def run(self) -> None:
         constituency_df = pd.read_csv(self.constituency_csv_path)
 
         # Load weight matrix: shape (N_constituencies, N_households)
+        import h5py
+
         with h5py.File(self.weight_matrix_path, "r") as f:
             weight_matrix = f[self.year][...]
 
diff --git a/src/policyengine/outputs/local_authority_impact.py b/src/policyengine/outputs/local_authority_impact.py
index 20b17efe..a4850dbf 100644
--- a/src/policyengine/outputs/local_authority_impact.py
+++ b/src/policyengine/outputs/local_authority_impact.py
@@ -7,7 +7,6 @@
 
 from typing import TYPE_CHECKING, Optional
 
-import h5py
 import numpy as np
 import pandas as pd
 from pydantic import ConfigDict
@@ -43,6 +42,8 @@ def run(self) -> None:
         la_df = pd.read_csv(self.local_authority_csv_path)
 
         # Load weight matrix: shape (N_local_authorities, N_households)
+        import h5py
+
         with h5py.File(self.weight_matrix_path, "r") as f:
             weight_matrix = f[self.year][...]
 
diff --git a/src/policyengine/tax_benefit_models/us/outputs.py b/src/policyengine/outputs/program_statistics.py
similarity index 98%
rename from src/policyengine/tax_benefit_models/us/outputs.py
rename to src/policyengine/outputs/program_statistics.py
index 1dd6f001..a48ff8a8 100644
--- a/src/policyengine/tax_benefit_models/us/outputs.py
+++ b/src/policyengine/outputs/program_statistics.py
@@ -1,4 +1,4 @@
-"""US-specific output templates."""
+"""Shared `ProgramStatistics` for reform-impact tables (US + UK)."""
 
 from typing import Optional
 
diff --git a/src/policyengine/provenance/__init__.py b/src/policyengine/provenance/__init__.py
new file mode 100644
index 00000000..548b7fc6
--- /dev/null
+++ b/src/policyengine/provenance/__init__.py
@@ -0,0 +1,89 @@
+"""Release-bundle provenance + TRACE TRO emission.
+
+Separated from :mod:`policyengine.core` so the value-object layer
+(Dataset, Variable, Parameter, Policy, Simulation, Region) doesn't
+force provenance imports on every consumer.
+
+.. code-block:: python
+
+    from policyengine.provenance import (
+        get_release_manifest,
+        get_data_release_manifest,
+        build_trace_tro_from_release_bundle,
+        build_simulation_trace_tro,
+        serialize_trace_tro,
+    )
+"""
+
+from .manifest import (
+    CertifiedDataArtifact as CertifiedDataArtifact,
+)
+from .manifest import (
+    CountryReleaseManifest as CountryReleaseManifest,
+)
+from .manifest import (
+    DataBuildInfo as DataBuildInfo,
+)
+from .manifest import (
+    DataCertification as DataCertification,
+)
+from .manifest import (
+    DataPackageVersion as DataPackageVersion,
+)
+from .manifest import (
+    DataReleaseArtifact as DataReleaseArtifact,
+)
+from .manifest import (
+    DataReleaseManifest as DataReleaseManifest,
+)
+from .manifest import (
+    DataReleaseManifestUnavailableError as DataReleaseManifestUnavailableError,
+)
+from .manifest import (
+    PackageVersion as PackageVersion,
+)
+from .manifest import (
+    certify_data_release_compatibility as certify_data_release_compatibility,
+)
+from .manifest import (
+    fetch_pypi_wheel_metadata as fetch_pypi_wheel_metadata,
+)
+from .manifest import (
+    get_data_release_manifest as get_data_release_manifest,
+)
+from .manifest import (
+    get_release_manifest as get_release_manifest,
+)
+from .manifest import (
+    https_dataset_uri as https_dataset_uri,
+)
+from .manifest import (
+    https_release_manifest_uri as https_release_manifest_uri,
+)
+from .manifest import (
+    resolve_dataset_reference as resolve_dataset_reference,
+)
+from .manifest import (
+    resolve_local_managed_dataset_source as resolve_local_managed_dataset_source,
+)
+from .manifest import (
+    resolve_managed_dataset_reference as resolve_managed_dataset_reference,
+)
+from .trace import (
+    build_simulation_trace_tro as build_simulation_trace_tro,
+)
+from .trace import (
+    build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle,
+)
+from .trace import (
+    canonical_json_bytes as canonical_json_bytes,
+)
+from .trace import (
+    compute_trace_composition_fingerprint as compute_trace_composition_fingerprint,
+)
+from .trace import (
+    extract_bundle_tro_reference as extract_bundle_tro_reference,
+)
+from .trace import (
+    serialize_trace_tro as serialize_trace_tro,
+)
diff --git a/src/policyengine/core/release_manifest.py b/src/policyengine/provenance/manifest.py
similarity index 100%
rename from src/policyengine/core/release_manifest.py
rename to src/policyengine/provenance/manifest.py
diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/provenance/trace.py
similarity index 99%
rename from src/policyengine/core/trace_tro.py
rename to src/policyengine/provenance/trace.py
index 76f1661a..83ac0b5b 100644
--- a/src/policyengine/core/trace_tro.py
+++ b/src/policyengine/provenance/trace.py
@@ -24,7 +24,7 @@
 from collections.abc import Iterable, Mapping
 from typing import Any, Optional
 
-from .release_manifest import (
+from .manifest import (
     CountryReleaseManifest,
     DataCertification,
     DataReleaseManifest,
diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py
index d904d5b4..85c7aed8 100644
--- a/src/policyengine/results/trace_tro.py
+++ b/src/policyengine/results/trace_tro.py
@@ -5,7 +5,7 @@
 specific reform + ``results.json`` payload so a published result can
 be cited with an immutable composition fingerprint.
 
-See :mod:`policyengine.core.trace_tro` for the bundle-level layer.
+See :mod:`policyengine.provenance.trace` for the bundle-level layer.
 """
 
 from __future__ import annotations
@@ -14,7 +14,7 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from policyengine.core.trace_tro import (
+from policyengine.provenance.trace import (
     build_simulation_trace_tro,
     serialize_trace_tro,
 )
diff --git a/src/policyengine/tax_benefit_models/common/__init__.py b/src/policyengine/tax_benefit_models/common/__init__.py
new file mode 100644
index 00000000..654f350d
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/common/__init__.py
@@ -0,0 +1,16 @@
+"""Country-agnostic helpers for household calculation and reform analysis.
+
+The country modules (:mod:`policyengine.tax_benefit_models.us`,
+:mod:`policyengine.tax_benefit_models.uk`) thread these helpers through
+their public ``calculate_household`` / ``analyze_reform`` entry points.
+"""
+
+from .extra_variables import dispatch_extra_variables as dispatch_extra_variables
+from .model_version import (
+    MicrosimulationModelVersion as MicrosimulationModelVersion,
+)
+from .reform import compile_reform as compile_reform
+from .reform import compile_reform_to_dynamic as compile_reform_to_dynamic
+from .reform import compile_reform_to_policy as compile_reform_to_policy
+from .result import EntityResult as EntityResult
+from .result import HouseholdResult as HouseholdResult
diff --git a/src/policyengine/tax_benefit_models/common/extra_variables.py b/src/policyengine/tax_benefit_models/common/extra_variables.py
new file mode 100644
index 00000000..e3426e6b
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/common/extra_variables.py
@@ -0,0 +1,52 @@
+"""Dispatch a flat ``extra_variables`` list to a per-entity mapping.
+
+Callers pass a flat list — ``extra_variables=["adjusted_gross_income",
+"state_agi", "is_medicaid_eligible"]`` — and the library looks up each
+name on the country model to figure out which entity it belongs on.
+Unknown names raise with a close-match suggestion.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from difflib import get_close_matches
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+
+
+def dispatch_extra_variables(
+    *,
+    model_version: TaxBenefitModelVersion,
+    names: Iterable[str],
+) -> dict[str, list[str]]:
+    """Group ``names`` by the entity each variable lives on.
+
+    Raises :class:`ValueError` if any name is not defined on the model.
+    """
+    by_entity: dict[str, list[str]] = {}
+    unknown: list[str] = []
+
+    variables_by_name = model_version.variables_by_name
+    for name in names:
+        variable = variables_by_name.get(name)
+        if variable is None:
+            unknown.append(name)
+            continue
+        by_entity.setdefault(variable.entity, []).append(name)
+
+    if unknown:
+        lines = [
+            f"extra_variables contains names not defined on "
+            f"{model_version.model.id} {model_version.version}:",
+        ]
+        for name in unknown:
+            suggestions = get_close_matches(
+                name, list(variables_by_name), n=1, cutoff=0.7
+            )
+            suggestion = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
+            lines.append(f"  - '{name}'{suggestion}")
+        raise ValueError("\n".join(lines))
+
+    return by_entity
diff --git a/src/policyengine/tax_benefit_models/common/model_version.py b/src/policyengine/tax_benefit_models/common/model_version.py
new file mode 100644
index 00000000..dc5d44d8
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/common/model_version.py
@@ -0,0 +1,257 @@
+"""Base class for country ``TaxBenefitModelVersion`` implementations.
+
+The US and UK model-version classes share roughly 300 lines of loading logic:
+manifest certification, the variable-copy loop over the country ``system``,
+the parameter-copy loop, entity-relationship construction, and simple
+``save`` / ``load`` passthroughs. Only ``run`` (and the country-specific
+``managed_microsimulation`` helper) diverge enough to warrant per-country
+implementations.
+
+This module extracts the shared behaviour into ``MicrosimulationModelVersion``.
+Country subclasses declare class-level metadata (``country_code``,
+``package_name``, ``group_entities``, ``entity_variables``) and override a
+handful of thin hooks (``_load_system``, ``_load_region_registry``,
+``_dataset_class``, ``run``).
+"""
+
+from __future__ import annotations
+
+import datetime
+import os
+import warnings
+from importlib import metadata
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, ClassVar, Optional
+
+import pandas as pd
+
+from policyengine.core import (
+    Parameter,
+    ParameterNode,
+    TaxBenefitModelVersion,
+    Variable,
+)
+from policyengine.provenance.manifest import (
+    certify_data_release_compatibility,
+    get_release_manifest,
+)
+from policyengine.utils.entity_utils import build_entity_relationships
+from policyengine.utils.parameter_labels import (
+    build_scale_lookup,
+    generate_label_for_parameter,
+)
+
+if TYPE_CHECKING:
+    from policyengine.core.simulation import Simulation
+
+
+class MicrosimulationModelVersion(TaxBenefitModelVersion):
+    """Shared init / save / load logic for country microsim model versions.
+
+    Subclasses must set the four class attributes below and implement the
+    country-specific hooks. ``run`` is intentionally left abstract: its
+    country-specific logic (reform application, simulation builder, output
+    post-processing) varies enough that a shared skeleton would hide real
+    divergences.
+    """
+
+    # --- Subclass metadata -------------------------------------------------
+    country_code: ClassVar[str] = ""
+    """ISO-ish country identifier used by the release manifest ("us"/"uk")."""
+
+    package_name: ClassVar[str] = ""
+    """Distribution name used with ``importlib.metadata.version``."""
+
+    group_entities: ClassVar[list[str]] = []
+    """Group entities (non-person) for this country, in dataset order."""
+
+    entity_variables: dict[str, list[str]] = {}
+    """Variables to materialise per entity when writing output datasets."""
+
+    # --- Construction ------------------------------------------------------
+    def __init__(self, **kwargs: Any) -> None:
+        if not self.country_code or not self.package_name:
+            raise RuntimeError(
+                f"{type(self).__name__} must declare country_code and "
+                "package_name class attributes"
+            )
+
+        manifest = get_release_manifest(self.country_code)
+        if kwargs.get("version") is None:
+            kwargs["version"] = manifest.model_package.version
+
+        installed_model_version = metadata.version(self.package_name)
+        if installed_model_version != manifest.model_package.version:
+            warnings.warn(
+                f"Installed {self.package_name} version "
+                f"({installed_model_version}) does not match the bundled "
+                "policyengine.py manifest "
+                f"({manifest.model_package.version}). Calculations will "
+                "run against the installed version, but dataset "
+                "compatibility is not guaranteed. To silence this "
+                "warning, install the version pinned by the manifest.",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        model_build_metadata = self._get_runtime_data_build_metadata()
+        data_certification = certify_data_release_compatibility(
+            self.country_code,
+            runtime_model_version=installed_model_version,
+            runtime_data_build_fingerprint=model_build_metadata.get(
+                "data_build_fingerprint"
+            ),
+        )
+
+        super().__init__(**kwargs)
+        self.release_manifest = manifest
+        self.model_package = manifest.model_package
+        self.data_package = manifest.data_package
+        self.default_dataset_uri = manifest.default_dataset_uri
+        self.data_certification = data_certification
+        self.region_registry = self._load_region_registry()
+        self.id = f"{self.model.id}@{self.version}"
+
+        system = self._load_system()
+        self._populate_variables(system)
+        self._populate_parameters(system)
+
+    # --- Hooks ------------------------------------------------------------
+    @classmethod
+    def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]:
+        """Return build metadata from the country package, if available."""
+        raise NotImplementedError
+
+    def _load_system(self):
+        """Return the country package's ``system`` object."""
+        raise NotImplementedError
+
+    def _load_region_registry(self):
+        """Return the country's ``RegionRegistry``."""
+        raise NotImplementedError
+
+    @property
+    def _dataset_class(self):
+        """Return the country's ``PolicyEngine{Country}Dataset`` class."""
+        raise NotImplementedError
+
+    # --- Shared loading helpers ------------------------------------------
+    def _populate_variables(self, system) -> None:
+        from policyengine_core.enums import Enum
+        from policyengine_core.parameters.operations.get_parameter import (
+            get_parameter,
+        )
+
+        for var_obj in system.variables.values():
+            default_val = var_obj.default_value
+            if var_obj.value_type is Enum:
+                default_val = default_val.name
+            elif var_obj.value_type is datetime.date:
+                default_val = default_val.isoformat()
+
+            variable = Variable(
+                id=self.id + "-" + var_obj.name,
+                name=var_obj.name,
+                label=getattr(var_obj, "label", None),
+                tax_benefit_model_version=self,
+                entity=var_obj.entity.key,
+                description=var_obj.documentation,
+                data_type=(
+                    var_obj.value_type if var_obj.value_type is not Enum else str
+                ),
+                default_value=default_val,
+                value_type=var_obj.value_type,
+            )
+            if (
+                hasattr(var_obj, "possible_values")
+                and var_obj.possible_values is not None
+            ):
+                variable.possible_values = list(
+                    map(
+                        lambda x: x.name,
+                        var_obj.possible_values._value2member_map_.values(),
+                    )
+                )
+            # Resolve parameter-path adds/subtracts to concrete lists so
+            # consumers always see list[str].
+            for attr in ("adds", "subtracts"):
+                value = getattr(var_obj, attr, None)
+                if value is None:
+                    continue
+                if isinstance(value, str):
+                    try:
+                        param = get_parameter(system.parameters, value)
+                        setattr(variable, attr, list(param("2025-01-01")))
+                    except Exception:
+                        setattr(variable, attr, None)
+                else:
+                    setattr(variable, attr, value)
+            self.add_variable(variable)
+
+    def _populate_parameters(self, system) -> None:
+        from policyengine_core.parameters import Parameter as CoreParameter
+        from policyengine_core.parameters import ParameterNode as CoreParameterNode
+
+        scale_lookup = build_scale_lookup(system)
+
+        for param_node in system.parameters.get_descendants():
+            if isinstance(param_node, CoreParameter):
+                parameter = Parameter(
+                    id=self.id + "-" + param_node.name,
+                    name=param_node.name,
+                    label=generate_label_for_parameter(
+                        param_node, system, scale_lookup
+                    ),
+                    tax_benefit_model_version=self,
+                    description=param_node.description,
+                    data_type=type(param_node(2025)),
+                    unit=param_node.metadata.get("unit"),
+                    _core_param=param_node,
+                )
+                self.add_parameter(parameter)
+            elif isinstance(param_node, CoreParameterNode):
+                node = ParameterNode(
+                    id=self.id + "-" + param_node.name,
+                    name=param_node.name,
+                    label=param_node.metadata.get("label"),
+                    description=param_node.description,
+                    tax_benefit_model_version=self,
+                )
+                self.add_parameter_node(node)
+
+    # --- Shared run-surface helpers --------------------------------------
+    def _build_entity_relationships(self, dataset) -> pd.DataFrame:
+        """Build a DataFrame mapping each person to their containing entities."""
+        person_data = pd.DataFrame(dataset.data.person)
+        return build_entity_relationships(person_data, self.group_entities)
+
+    def save(self, simulation: Simulation) -> None:
+        """Persist the simulation's output dataset to its bundled filepath."""
+        simulation.output_dataset.save()
+
+    def load(self, simulation: Simulation) -> None:
+        """Rehydrate the simulation's output dataset from disk.
+
+        Loads timestamps from filesystem metadata when the file exists so
+        serialised simulations round-trip ``created_at``/``updated_at``.
+        """
+        filepath = str(
+            Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
+        )
+
+        simulation.output_dataset = self._dataset_class(
+            id=simulation.id,
+            name=simulation.dataset.name,
+            description=simulation.dataset.description,
+            filepath=filepath,
+            year=simulation.dataset.year,
+            is_output_dataset=True,
+        )
+
+        if os.path.exists(filepath):
+            simulation.created_at = datetime.datetime.fromtimestamp(
+                os.path.getctime(filepath)
+            )
+            simulation.updated_at = datetime.datetime.fromtimestamp(
+                os.path.getmtime(filepath)
+            )
diff --git a/src/policyengine/tax_benefit_models/common/reform.py b/src/policyengine/tax_benefit_models/common/reform.py
new file mode 100644
index 00000000..60b564f4
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/common/reform.py
@@ -0,0 +1,197 @@
+"""Compile a simple reform dict into the format policyengine_core expects.
+
+Accepted shapes for the agent-facing API:
+
+.. code-block:: python
+
+    # Scalar — applied from Jan 1 of ``year`` (the simulation year).
+    reform = {"gov.irs.deductions.salt.cap": 0}
+
+    # With explicit effective date(s).
+    reform = {"gov.irs.deductions.salt.cap": {"2026-01-01": 0}}
+
+    # Multiple parameters.
+    reform = {
+        "gov.irs.deductions.salt.cap": 0,
+        "gov.irs.credits.ctc.amount.base[0].amount": 2500,
+    }
+
+**Indexed parameters.** Many PolicyEngine parameters are *breakdown*
+entries keyed by a bracket index (age group, filing status, etc.).
+Their paths end with ``[N].amount`` / ``[N].threshold``. For example
+the CTC base amount in 2026 is
+``gov.irs.credits.ctc.amount.base[0].amount`` (not ``...base``);
+the top-bracket SS wage base is ``gov.ssa.payroll.cap``. If a reform
+dict uses the bracket-head path instead of ``[0].amount`` the
+``ValueError`` will list the close match.
+
+The compiled form is ``{param_path: {period: value}}`` — exactly what
+``policyengine_us.Simulation(reform=...)`` /
+``policyengine_uk.Simulation(reform=...)`` accept at construction.
+
+Scalar reforms default to ``{year}-01-01`` so a caller running
+mid-year does not accidentally get a blended partial-year result.
+Unknown parameter paths raise ``ValueError`` with a close-match
+suggestion; pass ``model_version`` to enable the check.
+"""
+
+from __future__ import annotations
+
+import datetime
+from collections.abc import Mapping
+from difflib import get_close_matches
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:
+    from policyengine.core.dynamic import Dynamic
+    from policyengine.core.policy import Policy
+    from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+
+
+def compile_reform(
+    reform: Optional[Mapping[str, Any]],
+    *,
+    year: Optional[int] = None,
+    model_version: Optional[TaxBenefitModelVersion] = None,
+) -> Optional[dict[str, dict[str, Any]]]:
+    """Compile a simple reform dict to the core reform-dict format.
+
+    Args:
+        reform: Flat mapping from parameter path to either a scalar
+            (applied from ``{year}-01-01``) or a ``{effective_date: value}``
+            mapping.
+        year: Simulation year. Used as the default effective date for
+            scalar values so a mid-year call still targets the whole year.
+        model_version: If provided, parameter paths are validated
+            against ``model_version.parameters_by_name`` and unknown
+            paths raise with a close-match suggestion.
+    """
+    if not reform:
+        return None
+
+    default_date = f"{year}-01-01" if year is not None else "1900-01-01"
+
+    if model_version is not None:
+        valid = set(model_version.parameters_by_name)
+        unknown = [path for path in reform if path not in valid]
+        if unknown:
+            lines = [
+                f"Reform contains parameter paths not defined on "
+                f"{model_version.model.id} {model_version.version}:",
+            ]
+            for path in unknown:
+                suggestions = get_close_matches(path, valid, n=1, cutoff=0.7)
+                hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
+                lines.append(f"  - '{path}'{hint}")
+            raise ValueError("\n".join(lines))
+
+    compiled: dict[str, dict[str, Any]] = {}
+    for parameter_path, spec in reform.items():
+        if isinstance(spec, Mapping):
+            compiled[parameter_path] = {str(k): v for k, v in spec.items()}
+        else:
+            compiled[parameter_path] = {default_date: spec}
+    return compiled
+
+
+def _reform_dict_to_parameter_values(
+    reform: Mapping[str, Any],
+    *,
+    year: Optional[int],
+    model_version: TaxBenefitModelVersion,
+) -> list:
+    """Compile a flat reform dict into a list of ``ParameterValue`` objects.
+
+    Uses :func:`compile_reform` for path validation and effective-date
+    defaulting, then materialises each ``{path: {date: value}}`` pair
+    as an open-ended ``ParameterValue`` bound to a
+    ``Parameter(name=path, tax_benefit_model_version=model_version)``.
+    """
+    from policyengine.core.parameter import Parameter
+    from policyengine.core.parameter_value import ParameterValue
+
+    compiled = compile_reform(reform, year=year, model_version=model_version)
+    if compiled is None:
+        return []
+
+    parameter_values: list[ParameterValue] = []
+    for path, date_to_value in compiled.items():
+        for effective_date, value in date_to_value.items():
+            data_type = type(value) if isinstance(value, (int, float, bool)) else float
+            parameter_values.append(
+                ParameterValue(
+                    parameter=Parameter(
+                        name=path,
+                        tax_benefit_model_version=model_version,
+                        data_type=data_type,
+                    ),
+                    start_date=datetime.datetime.strptime(effective_date, "%Y-%m-%d"),
+                    end_date=None,
+                    value=value,
+                )
+            )
+    return parameter_values
+
+
+def _compile_reform_to(
+    cls,
+    default_name: str,
+    reform: Optional[Mapping[str, Any]],
+    *,
+    year: Optional[int],
+    model_version: TaxBenefitModelVersion,
+    name: Optional[str] = None,
+):
+    parameter_values = _reform_dict_to_parameter_values(
+        reform or {}, year=year, model_version=model_version
+    )
+    if not parameter_values:
+        return None
+    return cls(name=name or default_name, parameter_values=parameter_values)
+
+
+def compile_reform_to_policy(
+    reform: Optional[Mapping[str, Any]],
+    *,
+    year: Optional[int],
+    model_version: TaxBenefitModelVersion,
+    name: Optional[str] = None,
+) -> Optional[Policy]:
+    """Compile a flat reform dict into a fully-assembled ``Policy``.
+
+    Accepts the same ``{param.path: value}`` /
+    ``{param.path: {date: value}}`` shape as :func:`compile_reform`,
+    but returns a ready-to-use ``Policy`` with ``ParameterValue``
+    objects so ``Simulation(policy={...})`` works without hand-building
+    ``Parameter`` / ``ParameterValue``.
+    """
+    from policyengine.core.policy import Policy
+
+    return _compile_reform_to(
+        Policy,
+        "Reform",
+        reform,
+        year=year,
+        model_version=model_version,
+        name=name,
+    )
+
+
+def compile_reform_to_dynamic(
+    reform: Optional[Mapping[str, Any]],
+    *,
+    year: Optional[int],
+    model_version: TaxBenefitModelVersion,
+    name: Optional[str] = None,
+) -> Optional[Dynamic]:
+    """``Dynamic`` counterpart of :func:`compile_reform_to_policy`."""
+    from policyengine.core.dynamic import Dynamic
+
+    return _compile_reform_to(
+        Dynamic,
+        "Dynamic response",
+        reform,
+        year=year,
+        model_version=model_version,
+        name=name,
+    )
diff --git a/src/policyengine/tax_benefit_models/common/result.py b/src/policyengine/tax_benefit_models/common/result.py
new file mode 100644
index 00000000..e73fa406
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/common/result.py
@@ -0,0 +1,79 @@
+"""Dot-access result containers returned by ``calculate_household``.
+
+A result is intentionally thin: it's a ``dict`` subclass that also
+supports attribute access, so callers can write either
+``result.tax_unit.income_tax`` or ``result["tax_unit"]["income_tax"]``.
+The dict shape keeps JSON serialization trivial.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Union
+
+
+class EntityResult(dict):
+    """One entity's computed variables with dict AND attribute access.
+
+    Raises :class:`AttributeError` with the list of available variables
+    when a caller accesses an unknown name, so typos surface a
+    paste-able fix instead of silently returning ``None``.
+    """
+
+    def __getattr__(self, name: str) -> Any:
+        if name.startswith("_"):
+            raise AttributeError(name)
+        if name in self:
+            return self[name]
+        available = ", ".join(sorted(self))
+        raise AttributeError(
+            f"entity has no variable '{name}'. Available: {available}. "
+            f"Pass extra_variables=['{name}'] to calculate_household if "
+            f"'{name}' is a valid variable on the country model that is "
+            f"not in the default output columns."
+        )
+
+    def __setattr__(self, name: str, value: Any) -> None:  # pragma: no cover
+        self[name] = value
+
+
+class HouseholdResult(dict):
+    """Full household calculation result; one key per entity.
+
+    Singleton entities (``household``, ``tax_unit``, ``benunit``, ...)
+    map to a single :class:`EntityResult`; multi-member entities (like
+    ``person``) map to a ``list[EntityResult]``.
+    """
+
+    def __getattr__(self, name: str) -> Any:
+        if name.startswith("_"):
+            raise AttributeError(name)
+        if name in self:
+            return self[name]
+        available = ", ".join(sorted(self))
+        raise AttributeError(
+            f"no entity '{name}' on this result. Available entities: {available}"
+        )
+
+    def __setattr__(self, name: str, value: Any) -> None:  # pragma: no cover
+        self[name] = value
+
+    def to_dict(self) -> dict[str, Any]:
+        """Return a plain ``dict[str, Any]`` copy suitable for JSON dumps."""
+
+        def _convert(value: Any) -> Any:
+            if isinstance(value, EntityResult):
+                return dict(value)
+            if isinstance(value, list):
+                return [_convert(v) for v in value]
+            return value
+
+        return {key: _convert(val) for key, val in self.items()}
+
+    def write(self, path: Union[str, Path]) -> Path:
+        """Write the result to a JSON file and return the path."""
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps(self.to_dict(), indent=2) + "\n")
+        return path
diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py
index 93533245..3ab098e2 100644
--- a/src/policyengine/tax_benefit_models/uk/__init__.py
+++ b/src/policyengine/tax_benefit_models/uk/__init__.py
@@ -1,16 +1,23 @@
-"""PolicyEngine UK tax-benefit model."""
+"""PolicyEngine UK tax-benefit model.
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    result = pe.uk.calculate_household(
+        people=[{"age": 30, "employment_income": 50000}],
+        year=2026,
+    )
+    print(result.person[0].income_tax, result.household.hbai_household_net_income)
+"""
 
 from importlib.util import find_spec
 
 if find_spec("policyengine_uk") is not None:
     from policyengine.core import Dataset
+    from policyengine.outputs import ProgramStatistics
 
-    from .analysis import (
-        UKHouseholdInput,
-        UKHouseholdOutput,
-        calculate_household_impact,
-        economic_impact_analysis,
-    )
+    from .analysis import economic_impact_analysis
     from .datasets import (
         PolicyEngineUKDataset,
         UKYearData,
@@ -18,21 +25,22 @@
         ensure_datasets,
         load_datasets,
     )
+    from .household import calculate_household
     from .model import (
         PolicyEngineUK,
         PolicyEngineUKLatest,
         managed_microsimulation,
         uk_latest,
-        uk_model,
     )
-    from .outputs import ProgrammeStatistics
 
-    # Rebuild Pydantic models to resolve forward references
+    model = uk_latest
+    """The pinned UK ``TaxBenefitModelVersion`` for this policyengine release."""
+
     Dataset.model_rebuild()
     UKYearData.model_rebuild()
     PolicyEngineUKDataset.model_rebuild()
     PolicyEngineUKLatest.model_rebuild()
-    ProgrammeStatistics.model_rebuild()
+    ProgramStatistics.model_rebuild()
 
     __all__ = [
         "UKYearData",
@@ -43,13 +51,11 @@
         "PolicyEngineUK",
         "PolicyEngineUKLatest",
         "managed_microsimulation",
-        "uk_model",
+        "model",
         "uk_latest",
+        "calculate_household",
         "economic_impact_analysis",
-        "calculate_household_impact",
-        "UKHouseholdInput",
-        "UKHouseholdOutput",
-        "ProgrammeStatistics",
+        "ProgramStatistics",
     ]
 else:
     __all__ = []
diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py
index b05e21b0..f37d18be 100644
--- a/src/policyengine/tax_benefit_models/uk/analysis.py
+++ b/src/policyengine/tax_benefit_models/uk/analysis.py
@@ -1,15 +1,16 @@
-"""General utility functions for UK policy reform analysis."""
+"""Microsimulation reform analysis for the UK model.
 
-import tempfile
-from pathlib import Path
-from typing import Any, Optional
+The single-household calculator lives in :mod:`.household`; this module
+holds the population-level reform-analysis helpers.
+"""
+
+from __future__ import annotations
 
 import pandas as pd
-from microdf import MicroDataFrame
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
 from policyengine.core import OutputCollection, Simulation
-from policyengine.core.policy import Policy
+from policyengine.outputs import ProgramStatistics
 from policyengine.outputs.decile_impact import (
     DecileImpact,
     calculate_decile_impacts,
@@ -23,140 +24,12 @@
     calculate_uk_poverty_rates,
 )
 
-from .datasets import PolicyEngineUKDataset, UKYearData
-from .model import uk_latest
-from .outputs import ProgrammeStatistics
-
-
-class UKHouseholdOutput(BaseModel):
-    """Output from a UK household calculation with all entity data."""
-
-    person: list[dict[str, Any]]
-    benunit: list[dict[str, Any]]
-    household: dict[str, Any]
-
-
-class UKHouseholdInput(BaseModel):
-    """Input for a UK household calculation."""
-
-    people: list[dict[str, Any]]
-    benunit: dict[str, Any] = Field(default_factory=dict)
-    household: dict[str, Any] = Field(default_factory=dict)
-    year: int = 2026
-
-
-def calculate_household_impact(
-    household_input: UKHouseholdInput,
-    policy: Optional[Policy] = None,
-) -> UKHouseholdOutput:
-    """Calculate tax and benefit impacts for a single UK household."""
-    n_people = len(household_input.people)
-
-    # Build person data with defaults
-    person_data = {
-        "person_id": list(range(n_people)),
-        "person_benunit_id": [0] * n_people,
-        "person_household_id": [0] * n_people,
-        "person_weight": [1.0] * n_people,
-    }
-    # Add user-provided person fields
-    for i, person in enumerate(household_input.people):
-        for key, value in person.items():
-            if key not in person_data:
-                person_data[key] = [0.0] * n_people  # Default to 0 for numeric fields
-            person_data[key][i] = value
-
-    # Build benunit data with defaults
-    benunit_data = {
-        "benunit_id": [0],
-        "benunit_weight": [1.0],
-    }
-    for key, value in household_input.benunit.items():
-        benunit_data[key] = [value]
-
-    # Build household data with defaults (required for uprating)
-    household_data = {
-        "household_id": [0],
-        "household_weight": [1.0],
-        "region": ["LONDON"],
-        "tenure_type": ["RENT_PRIVATELY"],
-        "council_tax": [0.0],
-        "rent": [0.0],
-    }
-    for key, value in household_input.household.items():
-        household_data[key] = [value]
-
-    # Create MicroDataFrames
-    person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight")
-    benunit_df = MicroDataFrame(pd.DataFrame(benunit_data), weights="benunit_weight")
-    household_df = MicroDataFrame(
-        pd.DataFrame(household_data), weights="household_weight"
-    )
-
-    # Create temporary dataset
-    tmpdir = tempfile.mkdtemp()
-    filepath = str(Path(tmpdir) / "household_impact.h5")
-
-    dataset = PolicyEngineUKDataset(
-        name="Household impact calculation",
-        description="Single household for impact calculation",
-        filepath=filepath,
-        year=household_input.year,
-        data=UKYearData(
-            person=person_df,
-            benunit=benunit_df,
-            household=household_df,
-        ),
-    )
-
-    # Run simulation
-    simulation = Simulation(
-        dataset=dataset,
-        tax_benefit_model_version=uk_latest,
-        policy=policy,
-    )
-    simulation.run()
-
-    # Extract all output variables defined in entity_variables
-    output_data = simulation.output_dataset.data
-
-    def safe_convert(value):
-        """Convert value to float if numeric, otherwise return as string."""
-        try:
-            return float(value)
-        except (ValueError, TypeError):
-            return str(value)
-
-    person_outputs = []
-    for i in range(n_people):
-        person_dict = {}
-        for var in uk_latest.entity_variables["person"]:
-            person_dict[var] = safe_convert(output_data.person[var].iloc[i])
-        person_outputs.append(person_dict)
-
-    benunit_outputs = []
-    for i in range(len(output_data.benunit)):
-        benunit_dict = {}
-        for var in uk_latest.entity_variables["benunit"]:
-            benunit_dict[var] = safe_convert(output_data.benunit[var].iloc[i])
-        benunit_outputs.append(benunit_dict)
-
-    household_dict = {}
-    for var in uk_latest.entity_variables["household"]:
-        household_dict[var] = safe_convert(output_data.household[var].iloc[0])
-
-    return UKHouseholdOutput(
-        person=person_outputs,
-        benunit=benunit_outputs,
-        household=household_dict,
-    )
-
 
 class PolicyReformAnalysis(BaseModel):
     """Complete policy reform analysis result."""
 
     decile_impacts: OutputCollection[DecileImpact]
-    programme_statistics: OutputCollection[ProgrammeStatistics]
+    program_statistics: OutputCollection[ProgramStatistics]
     baseline_poverty: OutputCollection[Poverty]
     reform_poverty: OutputCollection[Poverty]
     baseline_inequality: Inequality
@@ -167,11 +40,7 @@ def economic_impact_analysis(
     baseline_simulation: Simulation,
     reform_simulation: Simulation,
 ) -> PolicyReformAnalysis:
-    """Perform comprehensive analysis of a policy reform.
-
-    Returns:
-        PolicyReformAnalysis containing decile impacts and programme statistics
-    """
+    """Perform comprehensive analysis of a UK policy reform."""
     baseline_simulation.ensure()
     reform_simulation.ensure()
 
@@ -182,20 +51,16 @@ def economic_impact_analysis(
         "Reform simulation must have more than 100 households"
     )
 
-    # Decile impact
     decile_impacts = calculate_decile_impacts(
         baseline_simulation=baseline_simulation,
         reform_simulation=reform_simulation,
     )
 
-    # Major programmes to analyse
-    programmes = {
-        # Tax
+    programs = {
         "income_tax": {"is_tax": True},
         "national_insurance": {"is_tax": True},
         "vat": {"is_tax": True},
         "council_tax": {"is_tax": True},
-        # Benefits
         "universal_credit": {"is_tax": False},
         "child_benefit": {"is_tax": False},
         "pension_credit": {"is_tax": False},
@@ -204,31 +69,27 @@ def economic_impact_analysis(
         "child_tax_credit": {"is_tax": False},
     }
 
-    programme_statistics = []
-
-    for programme_name, programme_info in programmes.items():
+    program_statistics = []
+    for program_name, program_info in programs.items():
         entity = baseline_simulation.tax_benefit_model_version.get_variable(
-            programme_name
+            program_name
         ).entity
-        is_tax = programme_info["is_tax"]
-
-        stats = ProgrammeStatistics(
+        stats = ProgramStatistics(
             baseline_simulation=baseline_simulation,
             reform_simulation=reform_simulation,
-            programme_name=programme_name,
+            program_name=program_name,
             entity=entity,
-            is_tax=is_tax,
+            is_tax=program_info["is_tax"],
         )
         stats.run()
-        programme_statistics.append(stats)
+        program_statistics.append(stats)
 
-    # Create DataFrame
-    programme_df = pd.DataFrame(
+    program_df = pd.DataFrame(
         [
             {
                 "baseline_simulation_id": p.baseline_simulation.id,
                 "reform_simulation_id": p.reform_simulation.id,
-                "programme_name": p.programme_name,
+                "program_name": p.program_name,
                 "entity": p.entity,
                 "is_tax": p.is_tax,
                 "baseline_total": p.baseline_total,
@@ -239,25 +100,21 @@ def economic_impact_analysis(
                 "winners": p.winners,
                 "losers": p.losers,
             }
-            for p in programme_statistics
+            for p in program_statistics
         ]
     )
-
-    programme_collection = OutputCollection(
-        outputs=programme_statistics, dataframe=programme_df
+    program_collection = OutputCollection(
+        outputs=program_statistics, dataframe=program_df
     )
 
-    # Calculate poverty rates for both simulations
     baseline_poverty = calculate_uk_poverty_rates(baseline_simulation)
     reform_poverty = calculate_uk_poverty_rates(reform_simulation)
-
-    # Calculate inequality for both simulations
     baseline_inequality = calculate_uk_inequality(baseline_simulation)
     reform_inequality = calculate_uk_inequality(reform_simulation)
 
     return PolicyReformAnalysis(
         decile_impacts=decile_impacts,
-        programme_statistics=programme_collection,
+        program_statistics=program_collection,
         baseline_poverty=baseline_poverty,
         reform_poverty=reform_poverty,
         baseline_inequality=baseline_inequality,
diff --git a/src/policyengine/tax_benefit_models/uk/datasets.py b/src/policyengine/tax_benefit_models/uk/datasets.py
index 47f78403..e7207da7 100644
--- a/src/policyengine/tax_benefit_models/uk/datasets.py
+++ b/src/policyengine/tax_benefit_models/uk/datasets.py
@@ -6,7 +6,7 @@
 from pydantic import ConfigDict
 
 from policyengine.core import Dataset, YearData
-from policyengine.core.release_manifest import (
+from policyengine.provenance.manifest import (
     dataset_logical_name,
     resolve_dataset_reference,
 )
diff --git a/src/policyengine/tax_benefit_models/uk/household.py b/src/policyengine/tax_benefit_models/uk/household.py
new file mode 100644
index 00000000..5dbd71bb
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/uk/household.py
@@ -0,0 +1,191 @@
+"""Single-household calculation for the UK model.
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    # Lone parent + one child, £30k wages.
+    result = pe.uk.calculate_household(
+        people=[
+            {"age": 32, "employment_income": 30000},
+            {"age": 6},
+        ],
+        benunit={"would_claim_child_benefit": True},
+        year=2026,
+    )
+    print(result.person[0].income_tax)
+    print(result.benunit.child_benefit)
+    print(result.household.hbai_household_net_income)
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, Optional
+
+from policyengine.tax_benefit_models.common import (
+    EntityResult,
+    HouseholdResult,
+    compile_reform,
+    dispatch_extra_variables,
+)
+from policyengine.utils.household_validation import validate_household_input
+
+from .model import uk_latest
+
+
+def _default_output_columns(
+    extra_by_entity: Mapping[str, list[str]],
+) -> dict[str, list[str]]:
+    merged: dict[str, list[str]] = {}
+    for entity, defaults in uk_latest.entity_variables.items():
+        columns = list(defaults)
+        for extra in extra_by_entity.get(entity, []):
+            if extra not in columns:
+                columns.append(extra)
+        merged[entity] = columns
+    for entity, extras in extra_by_entity.items():
+        merged.setdefault(entity, list(extras))
+    return merged
+
+
+def _safe_convert(value: Any) -> Any:
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return str(value) if value is not None else None
+
+
+def _build_situation(
+    *,
+    people: list[Mapping[str, Any]],
+    benunit: Mapping[str, Any],
+    household: Mapping[str, Any],
+    year: int,
+) -> dict[str, Any]:
+    year_str = str(year)
+
+    def _periodise(spec: Mapping[str, Any]) -> dict[str, dict[str, Any]]:
+        return {key: {year_str: value} for key, value in spec.items() if key != "id"}
+
+    person_ids = [f"person_{i}" for i in range(len(people))]
+    persons = {pid: _periodise(person) for pid, person in zip(person_ids, people)}
+
+    def _group(spec: Mapping[str, Any]) -> dict[str, Any]:
+        return {"members": list(person_ids), **_periodise(spec)}
+
+    return {
+        "people": persons,
+        "benunits": {"benunit_0": _group(benunit)},
+        "households": {"household_0": _group(household)},
+    }
+
+
+_ALLOWED_KWARGS = frozenset(
+    {"people", "benunit", "household", "year", "reform", "extra_variables"}
+)
+
+
+def _raise_unexpected_kwargs(unexpected: Mapping[str, Any]) -> None:
+    from difflib import get_close_matches
+
+    lines = ["calculate_household received unsupported keyword arguments:"]
+    for name in unexpected:
+        suggestions = get_close_matches(name, _ALLOWED_KWARGS, n=1, cutoff=0.5)
+        hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
+        if name in {"tax_unit", "marital_unit", "family", "spm_unit"}:
+            hint = (
+                f" — `{name}` is US-only; the UK groups persons into a single `benunit`"
+            )
+        lines.append(f"  - '{name}'{hint}")
+    lines.append(
+        "Valid kwargs: people, benunit, household, year, reform, extra_variables."
+    )
+    raise TypeError("\n".join(lines))
+
+
+def calculate_household(
+    *,
+    people: list[Mapping[str, Any]],
+    benunit: Optional[Mapping[str, Any]] = None,
+    household: Optional[Mapping[str, Any]] = None,
+    year: int = 2026,
+    reform: Optional[Mapping[str, Any]] = None,
+    extra_variables: Optional[list[str]] = None,
+    **unexpected: Any,
+) -> HouseholdResult:
+    """Compute tax and benefit variables for a single UK household.
+
+    Args:
+        people: One dict per person (keys are UK variable names).
+            Must be non-empty.
+        benunit, household: Optional per-entity overrides.
+        year: Calendar year. Defaults to 2026.
+        reform: Optional reform dict. Scalar values default to
+            ``{year}-01-01``; invalid parameter paths raise with a
+            close-match suggestion.
+        extra_variables: Flat list of extra UK variables to compute;
+            the library dispatches each to its entity.
+
+    Returns:
+        :class:`HouseholdResult` with dot-accessible entity results.
+
+    Raises:
+        ValueError: on unknown or mis-placed variable names, or
+            unknown reform parameter paths.
+        TypeError: on US-only kwargs (``tax_unit``, etc.) or other
+            unsupported keyword arguments.
+    """
+    if unexpected:
+        _raise_unexpected_kwargs(unexpected)
+
+    from policyengine_uk import Simulation
+
+    people = list(people)
+    benunit_dict = dict(benunit or {})
+    household_dict = dict(household or {})
+
+    validate_household_input(
+        model_version=uk_latest,
+        entities={
+            "person": people,
+            "benunit": [benunit_dict],
+            "household": [household_dict],
+        },
+    )
+
+    extra_by_entity = dispatch_extra_variables(
+        model_version=uk_latest,
+        names=extra_variables or [],
+    )
+    output_columns = _default_output_columns(extra_by_entity)
+    reform_dict = compile_reform(reform, year=year, model_version=uk_latest)
+
+    simulation = Simulation(
+        situation=_build_situation(
+            people=people,
+            benunit=benunit_dict,
+            household=household_dict,
+            year=year,
+        ),
+        reform=reform_dict,
+    )
+
+    result = HouseholdResult()
+    for entity, columns in output_columns.items():
+        raw = {
+            variable: list(simulation.calculate(variable, period=year, map_to=entity))
+            for variable in columns
+        }
+        if entity == "person":
+            result["person"] = [
+                EntityResult(
+                    {variable: _safe_convert(raw[variable][i]) for variable in columns}
+                )
+                for i in range(len(people))
+            ]
+        else:
+            result[entity] = EntityResult(
+                {variable: _safe_convert(raw[variable][0]) for variable in columns}
+            )
+    return result
diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index ce6f2dd9..67e7a3ae 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -1,31 +1,17 @@
 import datetime
-import warnings
-from importlib import metadata
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
 
 import pandas as pd
 from microdf import MicroDataFrame
 
-from policyengine.core import (
-    Parameter,
-    ParameterNode,
-    TaxBenefitModel,
-    TaxBenefitModelVersion,
-    Variable,
-)
-from policyengine.core.release_manifest import (
-    certify_data_release_compatibility,
+from policyengine.core import TaxBenefitModel
+from policyengine.provenance.manifest import (
     dataset_logical_name,
-    get_release_manifest,
     resolve_local_managed_dataset_source,
     resolve_managed_dataset_reference,
 )
-from policyengine.utils.entity_utils import build_entity_relationships
-from policyengine.utils.parameter_labels import (
-    build_scale_lookup,
-    generate_label_for_parameter,
-)
+from policyengine.tax_benefit_models.common import MicrosimulationModelVersion
 
 from .datasets import PolicyEngineUKDataset, UKYearData
 
@@ -43,18 +29,11 @@ class PolicyEngineUK(TaxBenefitModel):
 uk_model = PolicyEngineUK()
 
 
-def _get_runtime_data_build_metadata() -> dict[str, Optional[str]]:
-    try:
-        from policyengine_uk.build_metadata import get_data_build_metadata
-    except ModuleNotFoundError as exc:
-        if exc.name != "policyengine_uk.build_metadata":
-            raise
-        return {}
-
-    return get_data_build_metadata() or {}
-
+class PolicyEngineUKLatest(MicrosimulationModelVersion):
+    country_code = "uk"
+    package_name = "policyengine-uk"
+    group_entities = UK_GROUP_ENTITIES
 
-class PolicyEngineUKLatest(TaxBenefitModelVersion):
     model: TaxBenefitModel = uk_model
     version: str = None
     created_at: datetime.datetime = None
@@ -137,147 +116,32 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion):
         ],
     }
 
-    def __init__(self, **kwargs: dict):
-        manifest = get_release_manifest("uk")
-        if "version" not in kwargs or kwargs.get("version") is None:
-            kwargs["version"] = manifest.model_package.version
-
-        installed_model_version = metadata.version("policyengine-uk")
-        if installed_model_version != manifest.model_package.version:
-            warnings.warn(
-                "Installed policyengine-uk version "
-                f"({installed_model_version}) does not match the bundled "
-                "policyengine.py manifest "
-                f"({manifest.model_package.version}). Calculations will "
-                "run against the installed version, but dataset "
-                "compatibility is not guaranteed. To silence this "
-                "warning, install the version pinned by the manifest.",
-                UserWarning,
-                stacklevel=2,
-            )
-
-        model_build_metadata = _get_runtime_data_build_metadata()
-        data_certification = certify_data_release_compatibility(
-            "uk",
-            runtime_model_version=installed_model_version,
-            runtime_data_build_fingerprint=model_build_metadata.get(
-                "data_build_fingerprint"
-            ),
-        )
-
-        super().__init__(**kwargs)
-        self.release_manifest = manifest
-        self.model_package = manifest.model_package
-        self.data_package = manifest.data_package
-        self.default_dataset_uri = manifest.default_dataset_uri
-        self.data_certification = data_certification
-        from policyengine_core.enums import Enum
+    # --- Hooks -----------------------------------------------------------
+    @classmethod
+    def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]:
+        try:
+            from policyengine_uk.build_metadata import get_data_build_metadata
+        except ModuleNotFoundError as exc:
+            if exc.name != "policyengine_uk.build_metadata":
+                raise
+            return {}
+        return get_data_build_metadata() or {}
+
+    def _load_system(self):
         from policyengine_uk.system import system
 
-        # Attach region registry
+        return system
+
+    def _load_region_registry(self):
         from policyengine.countries.uk.regions import uk_region_registry
 
-        self.region_registry = uk_region_registry
-
-        self.id = f"{self.model.id}@{self.version}"
-
-        for var_obj in system.variables.values():
-            # Serialize default_value for JSON compatibility
-            default_val = var_obj.default_value
-            if var_obj.value_type is Enum:
-                default_val = default_val.name
-            elif var_obj.value_type is datetime.date:
-                default_val = default_val.isoformat()
-
-            variable = Variable(
-                id=self.id + "-" + var_obj.name,
-                name=var_obj.name,
-                label=getattr(var_obj, "label", None),
-                tax_benefit_model_version=self,
-                entity=var_obj.entity.key,
-                description=var_obj.documentation,
-                data_type=var_obj.value_type if var_obj.value_type is not Enum else str,
-                default_value=default_val,
-                value_type=var_obj.value_type,
-            )
-            if (
-                hasattr(var_obj, "possible_values")
-                and var_obj.possible_values is not None
-            ):
-                variable.possible_values = list(
-                    map(
-                        lambda x: x.name,
-                        var_obj.possible_values._value2member_map_.values(),
-                    )
-                )
-            # Extract and resolve adds/subtracts.
-            # Core stores these as either list[str] or a parameter path string.
-            # Resolve parameter paths to lists so consumers always get list[str].
-            if hasattr(var_obj, "adds") and var_obj.adds is not None:
-                if isinstance(var_obj.adds, str):
-                    try:
-                        from policyengine_core.parameters.operations.get_parameter import (
-                            get_parameter,
-                        )
-
-                        param = get_parameter(system.parameters, var_obj.adds)
-                        variable.adds = list(param("2025-01-01"))
-                    except (ValueError, Exception):
-                        variable.adds = None
-                else:
-                    variable.adds = var_obj.adds
-            if hasattr(var_obj, "subtracts") and var_obj.subtracts is not None:
-                if isinstance(var_obj.subtracts, str):
-                    try:
-                        from policyengine_core.parameters.operations.get_parameter import (
-                            get_parameter,
-                        )
-
-                        param = get_parameter(system.parameters, var_obj.subtracts)
-                        variable.subtracts = list(param("2025-01-01"))
-                    except (ValueError, Exception):
-                        variable.subtracts = None
-                else:
-                    variable.subtracts = var_obj.subtracts
-            self.add_variable(variable)
-
-        from policyengine_core.parameters import Parameter as CoreParameter
-        from policyengine_core.parameters import ParameterNode as CoreParameterNode
-
-        scale_lookup = build_scale_lookup(system)
-
-        for param_node in system.parameters.get_descendants():
-            if isinstance(param_node, CoreParameter):
-                parameter = Parameter(
-                    id=self.id + "-" + param_node.name,
-                    name=param_node.name,
-                    label=generate_label_for_parameter(
-                        param_node, system, scale_lookup
-                    ),
-                    tax_benefit_model_version=self,
-                    description=param_node.description,
-                    data_type=type(param_node(2025)),
-                    unit=param_node.metadata.get("unit"),
-                    _core_param=param_node,
-                )
-                self.add_parameter(parameter)
-            elif isinstance(param_node, CoreParameterNode):
-                node = ParameterNode(
-                    id=self.id + "-" + param_node.name,
-                    name=param_node.name,
-                    label=param_node.metadata.get("label"),
-                    description=param_node.description,
-                    tax_benefit_model_version=self,
-                )
-                self.add_parameter_node(node)
-
-    def _build_entity_relationships(
-        self, dataset: PolicyEngineUKDataset
-    ) -> pd.DataFrame:
-        """Build a DataFrame mapping each person to their containing entities."""
-        person_data = pd.DataFrame(dataset.data.person)
-        return build_entity_relationships(person_data, UK_GROUP_ENTITIES)
+        return uk_region_registry
+
+    @property
+    def _dataset_class(self):
+        return PolicyEngineUKDataset
 
+    # --- run -------------------------------------------------------------
     def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_uk import Microsimulation
         from policyengine_uk.data import UKSingleYearDataset
@@ -370,36 +234,6 @@ def run(self, simulation: "Simulation") -> "Simulation":
             ),
         )
 
-    def save(self, simulation: "Simulation"):
-        """Save the simulation's output dataset."""
-        simulation.output_dataset.save()
-
-    def load(self, simulation: "Simulation"):
-        """Load the simulation's output dataset."""
-        import os
-
-        filepath = str(
-            Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
-        )
-
-        simulation.output_dataset = PolicyEngineUKDataset(
-            id=simulation.id,
-            name=simulation.dataset.name,
-            description=simulation.dataset.description,
-            filepath=filepath,
-            year=simulation.dataset.year,
-            is_output_dataset=True,
-        )
-
-        # Load timestamps from file system metadata
-        if os.path.exists(filepath):
-            simulation.created_at = datetime.datetime.fromtimestamp(
-                os.path.getctime(filepath)
-            )
-            simulation.updated_at = datetime.datetime.fromtimestamp(
-                os.path.getmtime(filepath)
-            )
-
 
 def _managed_release_bundle(
     dataset_uri: str,
@@ -423,8 +257,8 @@ def managed_microsimulation(
     """Construct a country-package Microsimulation pinned to this bundle.
 
     By default this enforces the dataset selection from the bundled
-    `policyengine.py` release manifest. Arbitrary dataset URIs require
-    `allow_unmanaged=True`.
+    ``policyengine.py`` release manifest. Arbitrary dataset URIs require
+    ``allow_unmanaged=True``.
     """
 
     from policyengine_uk import Microsimulation
diff --git a/src/policyengine/tax_benefit_models/uk/outputs.py b/src/policyengine/tax_benefit_models/uk/outputs.py
deleted file mode 100644
index 97032a9c..00000000
--- a/src/policyengine/tax_benefit_models/uk/outputs.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""UK-specific output templates."""
-
-from typing import Optional
-
-from pydantic import ConfigDict
-
-from policyengine.core import Output, Simulation
-from policyengine.outputs.aggregate import Aggregate, AggregateType
-from policyengine.outputs.change_aggregate import (
-    ChangeAggregate,
-    ChangeAggregateType,
-)
-
-
-class ProgrammeStatistics(Output):
-    """Single programme's statistics from a policy reform - represents one database row."""
-
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    baseline_simulation: Simulation
-    reform_simulation: Simulation
-    programme_name: str
-    entity: str
-    is_tax: bool = False
-
-    # Results populated by run()
-    baseline_total: Optional[float] = None
-    reform_total: Optional[float] = None
-    change: Optional[float] = None
-    baseline_count: Optional[float] = None
-    reform_count: Optional[float] = None
-    winners: Optional[float] = None
-    losers: Optional[float] = None
-
-    def run(self):
-        """Calculate statistics for this programme."""
-        # Baseline totals
-        baseline_total = Aggregate(
-            simulation=self.baseline_simulation,
-            variable=self.programme_name,
-            aggregate_type=AggregateType.SUM,
-            entity=self.entity,
-        )
-        baseline_total.run()
-
-        # Reform totals
-        reform_total = Aggregate(
-            simulation=self.reform_simulation,
-            variable=self.programme_name,
-            aggregate_type=AggregateType.SUM,
-            entity=self.entity,
-        )
-        reform_total.run()
-
-        # Count of recipients/payers (baseline)
-        baseline_count = Aggregate(
-            simulation=self.baseline_simulation,
-            variable=self.programme_name,
-            aggregate_type=AggregateType.COUNT,
-            entity=self.entity,
-            filter_variable=self.programme_name,
-            filter_variable_geq=0.01,
-        )
-        baseline_count.run()
-
-        # Count of recipients/payers (reform)
-        reform_count = Aggregate(
-            simulation=self.reform_simulation,
-            variable=self.programme_name,
-            aggregate_type=AggregateType.COUNT,
-            entity=self.entity,
-            filter_variable=self.programme_name,
-            filter_variable_geq=0.01,
-        )
-        reform_count.run()
-
-        # Winners and losers
-        winners = ChangeAggregate(
-            baseline_simulation=self.baseline_simulation,
-            reform_simulation=self.reform_simulation,
-            variable=self.programme_name,
-            aggregate_type=ChangeAggregateType.COUNT,
-            entity=self.entity,
-            change_geq=0.01 if not self.is_tax else -0.01,
-        )
-        winners.run()
-
-        losers = ChangeAggregate(
-            baseline_simulation=self.baseline_simulation,
-            reform_simulation=self.reform_simulation,
-            variable=self.programme_name,
-            aggregate_type=ChangeAggregateType.COUNT,
-            entity=self.entity,
-            change_leq=-0.01 if not self.is_tax else 0.01,
-        )
-        losers.run()
-
-        # Populate results
-        self.baseline_total = float(baseline_total.result)
-        self.reform_total = float(reform_total.result)
-        self.change = float(reform_total.result - baseline_total.result)
-        self.baseline_count = float(baseline_count.result)
-        self.reform_count = float(reform_count.result)
-        self.winners = float(winners.result)
-        self.losers = float(losers.result)
diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py
index 75d2aa79..d49d46d4 100644
--- a/src/policyengine/tax_benefit_models/us/__init__.py
+++ b/src/policyengine/tax_benefit_models/us/__init__.py
@@ -1,16 +1,36 @@
-"""PolicyEngine US tax-benefit model."""
+"""PolicyEngine US tax-benefit model.
+
+Typical usage (fresh session, no other imports required):
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    # Household calculator.
+    result = pe.us.calculate_household(
+        people=[{"age": 35, "employment_income": 60000}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+    )
+    print(result.tax_unit.income_tax)
+
+    # Reform + extra variables.
+    reformed = pe.us.calculate_household(
+        people=[{"age": 35, "employment_income": 60000}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+        reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000},
+        extra_variables=["adjusted_gross_income"],
+    )
+"""
 
 from importlib.util import find_spec
 
 if find_spec("policyengine_us") is not None:
     from policyengine.core import Dataset
+    from policyengine.outputs import ProgramStatistics
 
-    from .analysis import (
-        USHouseholdInput,
-        USHouseholdOutput,
-        calculate_household_impact,
-        economic_impact_analysis,
-    )
+    from .analysis import economic_impact_analysis
     from .datasets import (
         PolicyEngineUSDataset,
         USYearData,
@@ -18,16 +38,17 @@
         ensure_datasets,
         load_datasets,
     )
+    from .household import calculate_household
     from .model import (
         PolicyEngineUS,
         PolicyEngineUSLatest,
         managed_microsimulation,
         us_latest,
-        us_model,
     )
-    from .outputs import ProgramStatistics
 
-    # Rebuild Pydantic models to resolve forward references
+    model = us_latest
+    """The pinned US ``TaxBenefitModelVersion`` for this policyengine release."""
+
     Dataset.model_rebuild()
     USYearData.model_rebuild()
     PolicyEngineUSDataset.model_rebuild()
@@ -43,12 +64,10 @@
         "PolicyEngineUS",
         "PolicyEngineUSLatest",
         "managed_microsimulation",
-        "us_model",
+        "model",
         "us_latest",
+        "calculate_household",
         "economic_impact_analysis",
-        "calculate_household_impact",
-        "USHouseholdInput",
-        "USHouseholdOutput",
         "ProgramStatistics",
     ]
 else:
diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py
index 122ae2af..8b3eefc8 100644
--- a/src/policyengine/tax_benefit_models/us/analysis.py
+++ b/src/policyengine/tax_benefit_models/us/analysis.py
@@ -1,15 +1,18 @@
-"""General utility functions for US policy reform analysis."""
+"""Microsimulation reform analysis for the US model.
 
-import tempfile
-from pathlib import Path
-from typing import Any, Optional, Union
+The single-household calculator lives in :mod:`.household`; this module
+holds the population-level reform-analysis helpers.
+"""
+
+from __future__ import annotations
+
+from typing import Union
 
 import pandas as pd
-from microdf import MicroDataFrame
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
 from policyengine.core import OutputCollection, Simulation
-from policyengine.core.policy import Policy
+from policyengine.outputs import ProgramStatistics
 from policyengine.outputs.decile_impact import (
     DecileImpact,
     calculate_decile_impacts,
@@ -24,168 +27,6 @@
     calculate_us_poverty_rates,
 )
 
-from .datasets import PolicyEngineUSDataset, USYearData
-from .model import us_latest
-from .outputs import ProgramStatistics
-
-
-class USHouseholdOutput(BaseModel):
-    """Output from a US household calculation with all entity data."""
-
-    person: list[dict[str, Any]]
-    marital_unit: list[dict[str, Any]]
-    family: list[dict[str, Any]]
-    spm_unit: list[dict[str, Any]]
-    tax_unit: list[dict[str, Any]]
-    household: dict[str, Any]
-
-
-class USHouseholdInput(BaseModel):
-    """Input for a US household calculation."""
-
-    people: list[dict[str, Any]]
-    marital_unit: dict[str, Any] = Field(default_factory=dict)
-    family: dict[str, Any] = Field(default_factory=dict)
-    spm_unit: dict[str, Any] = Field(default_factory=dict)
-    tax_unit: dict[str, Any] = Field(default_factory=dict)
-    household: dict[str, Any] = Field(default_factory=dict)
-    year: int = 2024
-
-
-def calculate_household_impact(
-    household_input: USHouseholdInput,
-    policy: Optional[Policy] = None,
-) -> USHouseholdOutput:
-    """Calculate tax and benefit impacts for a single US household."""
-    n_people = len(household_input.people)
-
-    # Build person data with defaults
-    person_data = {
-        "person_id": list(range(n_people)),
-        "person_household_id": [0] * n_people,
-        "person_marital_unit_id": [0] * n_people,
-        "person_family_id": [0] * n_people,
-        "person_spm_unit_id": [0] * n_people,
-        "person_tax_unit_id": [0] * n_people,
-        "person_weight": [1.0] * n_people,
-    }
-    # Add user-provided person fields
-    for i, person in enumerate(household_input.people):
-        for key, value in person.items():
-            if key not in person_data:
-                person_data[key] = [0.0] * n_people  # Default to 0 for numeric fields
-            person_data[key][i] = value
-
-    # Build entity data with defaults
-    household_data = {
-        "household_id": [0],
-        "household_weight": [1.0],
-    }
-    for key, value in household_input.household.items():
-        household_data[key] = [value]
-
-    marital_unit_data = {
-        "marital_unit_id": [0],
-        "marital_unit_weight": [1.0],
-    }
-    for key, value in household_input.marital_unit.items():
-        marital_unit_data[key] = [value]
-
-    family_data = {
-        "family_id": [0],
-        "family_weight": [1.0],
-    }
-    for key, value in household_input.family.items():
-        family_data[key] = [value]
-
-    spm_unit_data = {
-        "spm_unit_id": [0],
-        "spm_unit_weight": [1.0],
-    }
-    for key, value in household_input.spm_unit.items():
-        spm_unit_data[key] = [value]
-
-    tax_unit_data = {
-        "tax_unit_id": [0],
-        "tax_unit_weight": [1.0],
-    }
-    for key, value in household_input.tax_unit.items():
-        tax_unit_data[key] = [value]
-
-    # Create MicroDataFrames
-    person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight")
-    household_df = MicroDataFrame(
-        pd.DataFrame(household_data), weights="household_weight"
-    )
-    marital_unit_df = MicroDataFrame(
-        pd.DataFrame(marital_unit_data), weights="marital_unit_weight"
-    )
-    family_df = MicroDataFrame(pd.DataFrame(family_data), weights="family_weight")
-    spm_unit_df = MicroDataFrame(pd.DataFrame(spm_unit_data), weights="spm_unit_weight")
-    tax_unit_df = MicroDataFrame(pd.DataFrame(tax_unit_data), weights="tax_unit_weight")
-
-    # Create temporary dataset
-    tmpdir = tempfile.mkdtemp()
-    filepath = str(Path(tmpdir) / "household_impact.h5")
-
-    dataset = PolicyEngineUSDataset(
-        name="Household impact calculation",
-        description="Single household for impact calculation",
-        filepath=filepath,
-        year=household_input.year,
-        data=USYearData(
-            person=person_df,
-            household=household_df,
-            marital_unit=marital_unit_df,
-            family=family_df,
-            spm_unit=spm_unit_df,
-            tax_unit=tax_unit_df,
-        ),
-    )
-
-    # Run simulation
-    simulation = Simulation(
-        dataset=dataset,
-        tax_benefit_model_version=us_latest,
-        policy=policy,
-    )
-    simulation.run()
-
-    # Extract all output variables defined in entity_variables
-    output_data = simulation.output_dataset.data
-
-    def safe_convert(value):
-        """Convert value to float if numeric, otherwise return as string."""
-        try:
-            return float(value)
-        except (ValueError, TypeError):
-            return str(value)
-
-    def extract_entity_outputs(
-        entity_name: str, entity_data, n_rows: int
-    ) -> list[dict[str, Any]]:
-        outputs = []
-        for i in range(n_rows):
-            row_dict = {}
-            for var in us_latest.entity_variables[entity_name]:
-                row_dict[var] = safe_convert(entity_data[var].iloc[i])
-            outputs.append(row_dict)
-        return outputs
-
-    return USHouseholdOutput(
-        person=extract_entity_outputs("person", output_data.person, n_people),
-        marital_unit=extract_entity_outputs(
-            "marital_unit", output_data.marital_unit, 1
-        ),
-        family=extract_entity_outputs("family", output_data.family, 1),
-        spm_unit=extract_entity_outputs("spm_unit", output_data.spm_unit, 1),
-        tax_unit=extract_entity_outputs("tax_unit", output_data.tax_unit, 1),
-        household={
-            var: safe_convert(output_data.household[var].iloc[0])
-            for var in us_latest.entity_variables["household"]
-        },
-    )
-
 
 class PolicyReformAnalysis(BaseModel):
     """Complete policy reform analysis result."""
@@ -203,15 +44,16 @@ def economic_impact_analysis(
     reform_simulation: Simulation,
     inequality_preset: Union[USInequalityPreset, str] = USInequalityPreset.STANDARD,
 ) -> PolicyReformAnalysis:
-    """Perform comprehensive analysis of a policy reform.
+    """Perform comprehensive analysis of a US policy reform.
 
     Args:
-        baseline_simulation: Baseline simulation
-        reform_simulation: Reform simulation
-        inequality_preset: Optional preset for the inequality outputs
+        baseline_simulation: Baseline simulation.
+        reform_simulation: Reform simulation.
+        inequality_preset: Preset for the inequality output.
 
     Returns:
-        PolicyReformAnalysis containing decile impacts and program statistics
+        ``PolicyReformAnalysis`` with decile impacts, program
+        statistics, baseline and reform poverty, and inequality.
     """
     baseline_simulation.ensure()
     reform_simulation.ensure()
@@ -223,21 +65,16 @@ def economic_impact_analysis(
         "Reform simulation must have more than 100 households"
     )
 
-    # Decile impact (using household_net_income for US)
     decile_impacts = calculate_decile_impacts(
         baseline_simulation=baseline_simulation,
         reform_simulation=reform_simulation,
         income_variable="household_net_income",
     )
 
-    # Major programs to analyse
     programs = {
-        # Federal taxes
         "income_tax": {"entity": "tax_unit", "is_tax": True},
         "payroll_tax": {"entity": "person", "is_tax": True},
-        # State and local taxes
         "state_income_tax": {"entity": "tax_unit", "is_tax": True},
-        # Benefits
         "snap": {"entity": "spm_unit", "is_tax": False},
         "tanf": {"entity": "spm_unit", "is_tax": False},
         "ssi": {"entity": "person", "is_tax": False},
@@ -249,22 +86,17 @@ def economic_impact_analysis(
     }
 
     program_statistics = []
-
     for program_name, program_info in programs.items():
-        entity = program_info["entity"]
-        is_tax = program_info["is_tax"]
-
         stats = ProgramStatistics(
             baseline_simulation=baseline_simulation,
             reform_simulation=reform_simulation,
             program_name=program_name,
-            entity=entity,
-            is_tax=is_tax,
+            entity=program_info["entity"],
+            is_tax=program_info["is_tax"],
         )
         stats.run()
         program_statistics.append(stats)
 
-    # Create DataFrame
     program_df = pd.DataFrame(
         [
             {
@@ -284,16 +116,12 @@ def economic_impact_analysis(
             for p in program_statistics
         ]
     )
-
     program_collection = OutputCollection(
         outputs=program_statistics, dataframe=program_df
     )
 
-    # Calculate poverty rates for both simulations
     baseline_poverty = calculate_us_poverty_rates(baseline_simulation)
     reform_poverty = calculate_us_poverty_rates(reform_simulation)
-
-    # Calculate inequality for both simulations
     baseline_inequality = calculate_us_inequality(
         baseline_simulation, preset=inequality_preset
     )
diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py
index da10733b..014309db 100644
--- a/src/policyengine/tax_benefit_models/us/datasets.py
+++ b/src/policyengine/tax_benefit_models/us/datasets.py
@@ -7,7 +7,7 @@
 from pydantic import ConfigDict
 
 from policyengine.core import Dataset, YearData
-from policyengine.core.release_manifest import (
+from policyengine.provenance.manifest import (
     dataset_logical_name,
     resolve_dataset_reference,
 )
diff --git a/src/policyengine/tax_benefit_models/us/household.py b/src/policyengine/tax_benefit_models/us/household.py
new file mode 100644
index 00000000..5258043a
--- /dev/null
+++ b/src/policyengine/tax_benefit_models/us/household.py
@@ -0,0 +1,245 @@
+"""Single-household calculation for the US model.
+
+``calculate_household`` is the one-call entry point for the household
+calculator journey: pass the people plus any per-entity overrides plus
+an optional reform, get back a dot-accessible result.
+
+.. code-block:: python
+
+    import policyengine as pe
+
+    # Single parent with one child in New York, $45k wages.
+    result = pe.us.calculate_household(
+        people=[
+            {"age": 32, "employment_income": 45000, "is_tax_unit_head": True},
+            {"age": 6, "is_tax_unit_dependent": True},
+        ],
+        tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"},
+        household={"state_code": "NY"},
+        year=2026,
+        extra_variables=["adjusted_gross_income"],
+    )
+    print(result.tax_unit.income_tax)
+    print(result.tax_unit.ctc, result.tax_unit.eitc)
+    print(result.household.household_net_income)
+    # Reform: zero out SNAP.
+    reformed = pe.us.calculate_household(
+        people=[
+            {"age": 32, "employment_income": 45000, "is_tax_unit_head": True},
+            {"age": 6, "is_tax_unit_dependent": True},
+        ],
+        tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"},
+        household={"state_code": "NY"},
+        year=2026,
+        reform={"gov.usda.snap.income.deductions.earned_income": 0},
+    )
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, Optional
+
+from policyengine.tax_benefit_models.common import (
+    EntityResult,
+    HouseholdResult,
+    compile_reform,
+    dispatch_extra_variables,
+)
+from policyengine.utils.household_validation import validate_household_input
+
+from .model import us_latest
+
+_GROUP_ENTITIES = ("marital_unit", "family", "spm_unit", "tax_unit", "household")
+
+
+def _raise_unexpected_kwargs(unexpected: Mapping[str, Any]) -> None:
+    from difflib import get_close_matches
+
+    lines = ["calculate_household received unsupported keyword arguments:"]
+    for name in unexpected:
+        suggestions = get_close_matches(name, _ALLOWED_KWARGS, n=1, cutoff=0.5)
+        hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
+        if name == "benunit":
+            hint = " — `benunit` is UK-only; the US uses `tax_unit`, `marital_unit`, `family`, or `spm_unit`"
+        lines.append(f"  - '{name}'{hint}")
+    lines.append(
+        "Valid kwargs: people, marital_unit, family, spm_unit, tax_unit, "
+        "household, year, reform, extra_variables."
+    )
+    raise TypeError("\n".join(lines))
+
+
+def _default_output_columns(
+    extra_by_entity: Mapping[str, list[str]],
+) -> dict[str, list[str]]:
+    merged: dict[str, list[str]] = {}
+    for entity, defaults in us_latest.entity_variables.items():
+        columns = list(defaults)
+        for extra in extra_by_entity.get(entity, []):
+            if extra not in columns:
+                columns.append(extra)
+        merged[entity] = columns
+    for entity, extras in extra_by_entity.items():
+        merged.setdefault(entity, list(extras))
+    return merged
+
+
+def _safe_convert(value: Any) -> Any:
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return str(value) if value is not None else None
+
+
+def _build_situation(
+    *,
+    people: list[Mapping[str, Any]],
+    marital_unit: Mapping[str, Any],
+    family: Mapping[str, Any],
+    spm_unit: Mapping[str, Any],
+    tax_unit: Mapping[str, Any],
+    household: Mapping[str, Any],
+    year: int,
+) -> dict[str, Any]:
+    year_str = str(year)
+
+    def _periodise(spec: Mapping[str, Any]) -> dict[str, dict[str, Any]]:
+        return {key: {year_str: value} for key, value in spec.items() if key != "id"}
+
+    person_ids = [f"person_{i}" for i in range(len(people))]
+    persons = {pid: _periodise(person) for pid, person in zip(person_ids, people)}
+
+    def _group(spec: Mapping[str, Any]) -> dict[str, Any]:
+        return {"members": list(person_ids), **_periodise(spec)}
+
+    return {
+        "people": persons,
+        "marital_units": {"marital_unit_0": _group(marital_unit)},
+        "families": {"family_0": _group(family)},
+        "spm_units": {"spm_unit_0": _group(spm_unit)},
+        "tax_units": {"tax_unit_0": _group(tax_unit)},
+        "households": {"household_0": _group(household)},
+    }
+
+
+_ALLOWED_KWARGS = frozenset(
+    {
+        "people",
+        "marital_unit",
+        "family",
+        "spm_unit",
+        "tax_unit",
+        "household",
+        "year",
+        "reform",
+        "extra_variables",
+    }
+)
+
+
+def calculate_household(
+    *,
+    people: list[Mapping[str, Any]],
+    marital_unit: Optional[Mapping[str, Any]] = None,
+    family: Optional[Mapping[str, Any]] = None,
+    spm_unit: Optional[Mapping[str, Any]] = None,
+    tax_unit: Optional[Mapping[str, Any]] = None,
+    household: Optional[Mapping[str, Any]] = None,
+    year: int = 2026,
+    reform: Optional[Mapping[str, Any]] = None,
+    extra_variables: Optional[list[str]] = None,
+    **unexpected: Any,
+) -> HouseholdResult:
+    """Compute tax and benefit variables for a single US household.
+
+    Args:
+        people: One dict per person with US variable names as keys
+            (``age``, ``employment_income``, ``is_tax_unit_head``,
+            ``is_tax_unit_dependent`` ...). Must be non-empty.
+        marital_unit, family, spm_unit, tax_unit, household: Optional
+            per-entity overrides, each keyed by variable name (e.g.
+            ``tax_unit={"filing_status": "SINGLE"}``,
+            ``household={"state_code": "NY"}``).
+        year: Calendar year to compute for. Defaults to 2026.
+        reform: Optional reform as ``{parameter_path: value}`` or
+            ``{parameter_path: {effective_date: value}}``. Scalar
+            values default to ``{year}-01-01``; invalid parameter
+            paths raise with a close-match suggestion.
+        extra_variables: Flat list of variable names to compute beyond
+            the default output columns; the library dispatches each
+            name to its entity. Unknown names raise ``ValueError``
+            with a close-match suggestion.
+
+    Returns:
+        :class:`HouseholdResult` with dot-accessible per-entity
+        variables. Singleton entities (``tax_unit``, ``household``, ...)
+        return :class:`EntityResult`; ``person`` returns a list of them.
+
+    Raises:
+        ValueError: if any input dict uses an unknown variable name,
+            if a variable is placed on the wrong entity (e.g.
+            ``filing_status`` on ``people``), or if ``extra_variables``
+            / ``reform`` names a variable or parameter path not defined
+            on the US model.
+    """
+    if unexpected:
+        _raise_unexpected_kwargs(unexpected)
+
+    from policyengine_us import Simulation
+
+    people = list(people)
+    entities = {
+        "marital_unit": dict(marital_unit or {}),
+        "family": dict(family or {}),
+        "spm_unit": dict(spm_unit or {}),
+        "tax_unit": dict(tax_unit or {}),
+        "household": dict(household or {}),
+    }
+
+    validate_household_input(
+        model_version=us_latest,
+        entities={
+            "person": people,
+            **{name: [value] for name, value in entities.items()},
+        },
+    )
+
+    extra_by_entity = dispatch_extra_variables(
+        model_version=us_latest,
+        names=extra_variables or [],
+    )
+    output_columns = _default_output_columns(extra_by_entity)
+    reform_dict = compile_reform(reform, year=year, model_version=us_latest)
+
+    simulation = Simulation(
+        situation=_build_situation(
+            people=people,
+            marital_unit=entities["marital_unit"],
+            family=entities["family"],
+            spm_unit=entities["spm_unit"],
+            tax_unit=entities["tax_unit"],
+            household=entities["household"],
+            year=year,
+        ),
+        reform=reform_dict,
+    )
+
+    result = HouseholdResult()
+    for entity, columns in output_columns.items():
+        raw = {
+            variable: list(simulation.calculate(variable, period=year, map_to=entity))
+            for variable in columns
+        }
+        if entity == "person":
+            result["person"] = [
+                EntityResult(
+                    {variable: _safe_convert(raw[variable][i]) for variable in columns}
+                )
+                for i in range(len(people))
+            ]
+        else:
+            result[entity] = EntityResult(
+                {variable: _safe_convert(raw[variable][0]) for variable in columns}
+            )
+    return result
diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index cd56df09..51463650 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -1,31 +1,17 @@
 import datetime
-import warnings
-from importlib import metadata
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
 
 import pandas as pd
 from microdf import MicroDataFrame
 
-from policyengine.core import (
-    Parameter,
-    ParameterNode,
-    TaxBenefitModel,
-    TaxBenefitModelVersion,
-    Variable,
-)
-from policyengine.core.release_manifest import (
-    certify_data_release_compatibility,
+from policyengine.core import TaxBenefitModel
+from policyengine.provenance.manifest import (
     dataset_logical_name,
-    get_release_manifest,
     resolve_local_managed_dataset_source,
     resolve_managed_dataset_reference,
 )
-from policyengine.utils.entity_utils import build_entity_relationships
-from policyengine.utils.parameter_labels import (
-    build_scale_lookup,
-    generate_label_for_parameter,
-)
+from policyengine.tax_benefit_models.common import MicrosimulationModelVersion
 
 from .datasets import PolicyEngineUSDataset, USYearData
 
@@ -49,18 +35,11 @@ class PolicyEngineUS(TaxBenefitModel):
 us_model = PolicyEngineUS()
 
 
-def _get_runtime_data_build_metadata() -> dict[str, Optional[str]]:
-    try:
-        from policyengine_us.build_metadata import get_data_build_metadata
-    except ModuleNotFoundError as exc:
-        if exc.name != "policyengine_us.build_metadata":
-            raise
-        return {}
-
-    return get_data_build_metadata() or {}
-
+class PolicyEngineUSLatest(MicrosimulationModelVersion):
+    country_code = "us"
+    package_name = "policyengine-us"
+    group_entities = US_GROUP_ENTITIES
 
-class PolicyEngineUSLatest(TaxBenefitModelVersion):
     model: TaxBenefitModel = us_model
     version: str = None
     created_at: datetime.datetime = None
@@ -129,147 +108,32 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
         ],
     }
 
-    def __init__(self, **kwargs: dict):
-        manifest = get_release_manifest("us")
-        if "version" not in kwargs or kwargs.get("version") is None:
-            kwargs["version"] = manifest.model_package.version
-
-        installed_model_version = metadata.version("policyengine-us")
-        if installed_model_version != manifest.model_package.version:
-            warnings.warn(
-                "Installed policyengine-us version "
-                f"({installed_model_version}) does not match the bundled "
-                "policyengine.py manifest "
-                f"({manifest.model_package.version}). Calculations will "
-                "run against the installed version, but dataset "
-                "compatibility is not guaranteed. To silence this "
-                "warning, install the version pinned by the manifest.",
-                UserWarning,
-                stacklevel=2,
-            )
-
-        model_build_metadata = _get_runtime_data_build_metadata()
-        data_certification = certify_data_release_compatibility(
-            "us",
-            runtime_model_version=installed_model_version,
-            runtime_data_build_fingerprint=model_build_metadata.get(
-                "data_build_fingerprint"
-            ),
-        )
-
-        super().__init__(**kwargs)
-        self.release_manifest = manifest
-        self.model_package = manifest.model_package
-        self.data_package = manifest.data_package
-        self.default_dataset_uri = manifest.default_dataset_uri
-        self.data_certification = data_certification
-        from policyengine_core.enums import Enum
+    # --- Hooks -----------------------------------------------------------
+    @classmethod
+    def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]:
+        try:
+            from policyengine_us.build_metadata import get_data_build_metadata
+        except ModuleNotFoundError as exc:
+            if exc.name != "policyengine_us.build_metadata":
+                raise
+            return {}
+        return get_data_build_metadata() or {}
+
+    def _load_system(self):
         from policyengine_us.system import system
 
-        # Attach region registry
+        return system
+
+    def _load_region_registry(self):
         from policyengine.countries.us.regions import us_region_registry
 
-        self.region_registry = us_region_registry
-
-        self.id = f"{self.model.id}@{self.version}"
-
-        for var_obj in system.variables.values():
-            # Serialize default_value for JSON compatibility
-            default_val = var_obj.default_value
-            if var_obj.value_type is Enum:
-                default_val = default_val.name
-            elif var_obj.value_type is datetime.date:
-                default_val = default_val.isoformat()
-
-            variable = Variable(
-                id=self.id + "-" + var_obj.name,
-                name=var_obj.name,
-                label=getattr(var_obj, "label", None),
-                tax_benefit_model_version=self,
-                entity=var_obj.entity.key,
-                description=var_obj.documentation,
-                data_type=var_obj.value_type if var_obj.value_type is not Enum else str,
-                default_value=default_val,
-                value_type=var_obj.value_type,
-            )
-            if (
-                hasattr(var_obj, "possible_values")
-                and var_obj.possible_values is not None
-            ):
-                variable.possible_values = list(
-                    map(
-                        lambda x: x.name,
-                        var_obj.possible_values._value2member_map_.values(),
-                    )
-                )
-            # Extract and resolve adds/subtracts.
-            # Core stores these as either list[str] or a parameter path string.
-            # Resolve parameter paths to lists so consumers always get list[str].
-            if hasattr(var_obj, "adds") and var_obj.adds is not None:
-                if isinstance(var_obj.adds, str):
-                    try:
-                        from policyengine_core.parameters.operations.get_parameter import (
-                            get_parameter,
-                        )
-
-                        param = get_parameter(system.parameters, var_obj.adds)
-                        variable.adds = list(param("2025-01-01"))
-                    except (ValueError, Exception):
-                        variable.adds = None
-                else:
-                    variable.adds = var_obj.adds
-            if hasattr(var_obj, "subtracts") and var_obj.subtracts is not None:
-                if isinstance(var_obj.subtracts, str):
-                    try:
-                        from policyengine_core.parameters.operations.get_parameter import (
-                            get_parameter,
-                        )
-
-                        param = get_parameter(system.parameters, var_obj.subtracts)
-                        variable.subtracts = list(param("2025-01-01"))
-                    except (ValueError, Exception):
-                        variable.subtracts = None
-                else:
-                    variable.subtracts = var_obj.subtracts
-            self.add_variable(variable)
-
-        from policyengine_core.parameters import Parameter as CoreParameter
-        from policyengine_core.parameters import ParameterNode as CoreParameterNode
-
-        scale_lookup = build_scale_lookup(system)
-
-        for param_node in system.parameters.get_descendants():
-            if isinstance(param_node, CoreParameter):
-                parameter = Parameter(
-                    id=self.id + "-" + param_node.name,
-                    name=param_node.name,
-                    label=generate_label_for_parameter(
-                        param_node, system, scale_lookup
-                    ),
-                    tax_benefit_model_version=self,
-                    description=param_node.description,
-                    data_type=type(param_node(2025)),
-                    unit=param_node.metadata.get("unit"),
-                    _core_param=param_node,
-                )
-                self.add_parameter(parameter)
-            elif isinstance(param_node, CoreParameterNode):
-                node = ParameterNode(
-                    id=self.id + "-" + param_node.name,
-                    name=param_node.name,
-                    label=param_node.metadata.get("label"),
-                    description=param_node.description,
-                    tax_benefit_model_version=self,
-                )
-                self.add_parameter_node(node)
-
-    def _build_entity_relationships(
-        self, dataset: PolicyEngineUSDataset
-    ) -> pd.DataFrame:
-        """Build a DataFrame mapping each person to their containing entities."""
-        person_data = pd.DataFrame(dataset.data.person)
-        return build_entity_relationships(person_data, US_GROUP_ENTITIES)
+        return us_region_registry
+
+    @property
+    def _dataset_class(self):
+        return PolicyEngineUSDataset
 
+    # --- run -------------------------------------------------------------
     def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_us import Microsimulation
         from policyengine_us.system import system
@@ -308,14 +172,12 @@ def run(self, simulation: "Simulation") -> "Simulation":
                 ),
             )
 
-        # Build reform dict from policy and dynamic parameter values.
         # US requires reforms at Microsimulation construction time
         # (unlike UK which supports p.update() after construction).
         policy_reform = build_reform_dict(simulation.policy)
         dynamic_reform = build_reform_dict(simulation.dynamic)
         reform_dict = merge_reform_dicts(policy_reform, dynamic_reform)
 
-        # Create Microsimulation with reform at construction time
         microsim = Microsimulation(reform=reform_dict)
         self._build_simulation_from_dataset(microsim, dataset, system)
 
@@ -346,7 +208,7 @@ def run(self, simulation: "Simulation") -> "Simulation":
             "tax_unit_weight",
         }
 
-        # First, copy ID and weight columns from input dataset
+        # Copy ID and weight columns from input dataset.
         for entity in data.keys():
             input_df = pd.DataFrame(getattr(dataset.data, entity))
             entity_id_col = f"{entity}_id"
@@ -357,16 +219,16 @@ def run(self, simulation: "Simulation") -> "Simulation":
             if entity_weight_col in input_df.columns:
                 data[entity][entity_weight_col] = input_df[entity_weight_col].values
 
-        # For person entity, also copy person-level group ID columns
+        # Person entity also needs person-level group ID columns so that
+        # downstream joins (e.g. person->tax_unit) work.
         person_input_df = pd.DataFrame(dataset.data.person)
         for col in person_input_df.columns:
             if col.startswith("person_") and col.endswith("_id"):
-                # Map person_household_id -> household_id, etc.
                 target_col = col.replace("person_", "")
                 if target_col in id_columns:
                     data["person"][target_col] = person_input_df[col].values
 
-        # Then calculate non-ID, non-weight variables from simulation
+        # Calculate non-ID, non-weight variables from simulation
         for entity, variables in self.entity_variables.items():
             for var in variables:
                 if var not in id_columns and var not in weight_columns:
@@ -404,61 +266,23 @@ def run(self, simulation: "Simulation") -> "Simulation":
             ),
         )
 
-    def save(self, simulation: "Simulation"):
-        """Save the simulation's output dataset."""
-        simulation.output_dataset.save()
-
-    def load(self, simulation: "Simulation"):
-        """Load the simulation's output dataset."""
-        import os
-
-        filepath = str(
-            Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
-        )
-
-        simulation.output_dataset = PolicyEngineUSDataset(
-            id=simulation.id,
-            name=simulation.dataset.name,
-            description=simulation.dataset.description,
-            filepath=filepath,
-            year=simulation.dataset.year,
-            is_output_dataset=True,
-        )
-
-        # Load timestamps from file system metadata
-        if os.path.exists(filepath):
-            simulation.created_at = datetime.datetime.fromtimestamp(
-                os.path.getctime(filepath)
-            )
-            simulation.updated_at = datetime.datetime.fromtimestamp(
-                os.path.getmtime(filepath)
-            )
-
     def _build_simulation_from_dataset(self, microsim, dataset, system):
         """Build a PolicyEngine Core simulation from dataset entity IDs.
 
-        This follows the same pattern as policyengine-uk, initializing
-        entities from IDs first, then using set_input() for variables.
-
-        Args:
-            microsim: The Microsimulation object to populate
-            dataset: The dataset containing entity data
-            system: The tax-benefit system
+        Mirrors the policyengine-uk pattern of instantiating entities from
+        IDs first and then setting variable inputs. Handles both the legacy
+        ``person_X_id`` and the ``X_id`` column-naming conventions.
         """
         import numpy as np
         from policyengine_core.simulations.simulation_builder import (
             SimulationBuilder,
         )
 
-        # Create builder and instantiate entities
         builder = SimulationBuilder()
         builder.populations = system.instantiate_entities()
 
-        # Extract entity IDs from dataset
         person_data = pd.DataFrame(dataset.data.person)
 
-        # Determine column naming convention
-        # Support both person_X_id (from create_datasets) and X_id (from custom datasets)
         household_id_col = (
             "person_household_id"
             if "person_household_id" in person_data.columns
@@ -485,7 +309,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system):
             else "tax_unit_id"
         )
 
-        # Declare entities
         builder.declare_person_entity("person", person_data["person_id"].values)
         builder.declare_entity(
             "household", np.unique(person_data[household_id_col].values)
@@ -501,7 +324,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system):
             "marital_unit", np.unique(person_data[marital_unit_id_col].values)
         )
 
-        # Join persons to group entities
         builder.join_with_persons(
             builder.populations["household"],
             person_data[household_id_col].values,
@@ -528,12 +350,8 @@ def _build_simulation_from_dataset(self, microsim, dataset, system):
             np.array(["member"] * len(person_data)),
         )
 
-        # Build simulation from populations
         microsim.build_from_populations(builder.populations)
 
-        # Set input variables for each entity
-        # Skip ID columns as they're structural and already used in entity building
-        # Support both naming conventions
         id_columns = {
             "person_id",
             "household_id",
@@ -558,7 +376,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system):
         ]:
             df = pd.DataFrame(entity_df)
             for column in df.columns:
-                # Skip ID columns and check if variable exists in system
                 if column not in id_columns and column in system.variables:
                     microsim.set_input(column, dataset.year, df[column].values)
 
@@ -585,8 +402,8 @@ def managed_microsimulation(
     """Construct a country-package Microsimulation pinned to this bundle.
 
     By default this enforces the dataset selection from the bundled
-    `policyengine.py` release manifest. Arbitrary dataset URIs require
-    `allow_unmanaged=True`.
+    ``policyengine.py`` release manifest. Arbitrary dataset URIs require
+    ``allow_unmanaged=True``.
     """
 
     from policyengine_us import Microsimulation
diff --git a/src/policyengine/utils/household_validation.py b/src/policyengine/utils/household_validation.py
new file mode 100644
index 00000000..6be90fb2
--- /dev/null
+++ b/src/policyengine/utils/household_validation.py
@@ -0,0 +1,113 @@
+"""Strict validation for household-calculation inputs.
+
+Catches the three typo classes that otherwise silently propagate wrong
+numbers to published results:
+
+1. Unknown variable name entirely (``employment_incme``).
+2. Valid variable placed on the wrong entity (``filing_status`` passed
+   to ``people`` instead of ``tax_unit``).
+3. Empty ``people`` list (policyengine_us will IndexError deep in
+   simulation).
+
+All errors include paste-able fixes.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from difflib import get_close_matches
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+
+
+_STRUCTURAL_KEYS = frozenset(
+    {
+        "id",
+        "members",
+        "person_id",
+        "household_id",
+        "marital_unit_id",
+        "family_id",
+        "spm_unit_id",
+        "tax_unit_id",
+        "benunit_id",
+        "person_weight",
+        "household_weight",
+        "marital_unit_weight",
+        "family_weight",
+        "spm_unit_weight",
+        "tax_unit_weight",
+        "benunit_weight",
+    }
+)
+
+
+def validate_household_input(
+    *,
+    model_version: TaxBenefitModelVersion,
+    entities: Mapping[str, Iterable[Mapping[str, object]]],
+) -> None:
+    """Raise ``ValueError`` on unknown or mis-placed entity variables.
+
+    ``entities`` maps entity name → iterable of entity dicts. Each key
+    is checked against ``model_version.variables_by_name``:
+
+    - If the key is unknown, the error includes a difflib close-match
+      suggestion.
+    - If the key is a known variable but defined on a different entity,
+      the error names the correct entity and shows the kwarg swap.
+    """
+    if "person" in entities and not list(entities["person"]):
+        raise ValueError(
+            "people must be a non-empty list. At minimum pass people=[{'age': <int>}]."
+        )
+
+    variables_by_name = model_version.variables_by_name
+    valid_names = set(variables_by_name)
+    unknown: list[tuple[str, str]] = []
+    misplaced: list[tuple[str, str, str]] = []
+
+    for entity_name, records in entities.items():
+        for record in records:
+            for key in record:
+                if key in _STRUCTURAL_KEYS:
+                    continue
+                variable = variables_by_name.get(key)
+                if variable is None:
+                    unknown.append((entity_name, key))
+                elif variable.entity != entity_name:
+                    misplaced.append((entity_name, key, variable.entity))
+
+    if not unknown and not misplaced:
+        return
+
+    lines: list[str] = []
+    if unknown:
+        lines.append(
+            f"Unknown variable names on {model_version.model.id} "
+            f"{model_version.version}:"
+        )
+        for entity_name, key in unknown:
+            suggestions = get_close_matches(key, valid_names, n=1, cutoff=0.7)
+            hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
+            lines.append(f"  - {entity_name}: '{key}'{hint}")
+        if not misplaced:
+            first_bad = unknown[0][1]
+            lines.append(
+                f"If '{first_bad}' is a real variable outside the default "
+                f"output columns, pass it via extra_variables=['{first_bad}']."
+            )
+    if misplaced:
+        if lines:
+            lines.append("")
+        lines.append("Variables passed on the wrong entity:")
+        for wrong_entity, key, correct_entity in misplaced:
+            lines.append(
+                f"  - '{key}' was given on {wrong_entity}; it belongs on "
+                f"{correct_entity}. Move it: pass "
+                f"{correct_entity}={{'{key}': <value>}}."
+            )
+
+    raise ValueError("\n".join(lines))
diff --git a/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json
new file mode 100644
index 00000000..49302124
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json
@@ -0,0 +1,139 @@
+{
+  "benunit.benunit_id": 0.0,
+  "benunit.benunit_weight": 1.0,
+  "benunit.child_benefit": 2328.16,
+  "benunit.child_tax_credit": 0.0,
+  "benunit.family_type": "COUPLE_WITH_CHILDREN",
+  "benunit.income_support": 0.0,
+  "benunit.pension_credit": 0.0,
+  "benunit.universal_credit": 0.0,
+  "benunit.working_tax_credit": 0.0,
+  "household.council_tax": 0.0,
+  "household.equiv_hbai_household_net_income": 52503.68,
+  "household.hbai_household_net_income": 73505.15,
+  "household.household_benefits": 5880.35,
+  "household.household_count_people": 4.0,
+  "household.household_gross_income": 95880.34,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 90000.0,
+  "household.household_net_income": 76898.3,
+  "household.household_tax": 18982.05,
+  "household.household_wealth_decile": 10.0,
+  "household.household_weight": 1.0,
+  "household.in_poverty_ahc": 0.0,
+  "household.in_poverty_bhc": 0.0,
+  "household.in_relative_poverty_ahc": 0.0,
+  "household.in_relative_poverty_bhc": 0.0,
+  "household.rent": 0.0,
+  "household.tenure_type": "RENT_PRIVATELY",
+  "household.vat": 0.0,
+  "person[0].age": 42.0,
+  "person[0].benunit_id": 0.0,
+  "person[0].child_benefit": 2328.16,
+  "person[0].child_tax_credit": 0.0,
+  "person[0].dividend_income": 0.0,
+  "person[0].earned_income": 55000.0,
+  "person[0].employment_income": 55000.0,
+  "person[0].gender": "MALE",
+  "person[0].household_id": 0.0,
+  "person[0].income_support": 0.0,
+  "person[0].income_tax": 9432.0,
+  "person[0].is_SP_age": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].national_insurance": 3110.6,
+  "person[0].pension_credit": 0.0,
+  "person[0].pension_income": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].private_pension_income": 0.0,
+  "person[0].property_income": 0.0,
+  "person[0].savings_interest_income": 0.0,
+  "person[0].self_employment_income": 0.0,
+  "person[0].total_income": 55000.0,
+  "person[0].universal_credit": 0.0,
+  "person[0].working_tax_credit": 0.0,
+  "person[1].age": 40.0,
+  "person[1].benunit_id": 0.0,
+  "person[1].child_benefit": 2328.16,
+  "person[1].child_tax_credit": 0.0,
+  "person[1].dividend_income": 0.0,
+  "person[1].earned_income": 35000.0,
+  "person[1].employment_income": 35000.0,
+  "person[1].gender": "MALE",
+  "person[1].household_id": 0.0,
+  "person[1].income_support": 0.0,
+  "person[1].income_tax": 4486.0,
+  "person[1].is_SP_age": 0.0,
+  "person[1].is_adult": 1.0,
+  "person[1].is_child": 0.0,
+  "person[1].is_male": 1.0,
+  "person[1].national_insurance": 1794.4,
+  "person[1].pension_credit": 0.0,
+  "person[1].pension_income": 0.0,
+  "person[1].person_id": 0.0,
+  "person[1].person_weight": 1.0,
+  "person[1].private_pension_income": 0.0,
+  "person[1].property_income": 0.0,
+  "person[1].savings_interest_income": 0.0,
+  "person[1].self_employment_income": 0.0,
+  "person[1].total_income": 35000.0,
+  "person[1].universal_credit": 0.0,
+  "person[1].working_tax_credit": 0.0,
+  "person[2].age": 8.0,
+  "person[2].benunit_id": 0.0,
+  "person[2].child_benefit": 2328.16,
+  "person[2].child_tax_credit": 0.0,
+  "person[2].dividend_income": 0.0,
+  "person[2].earned_income": 0.0,
+  "person[2].employment_income": 0.0,
+  "person[2].gender": "MALE",
+  "person[2].household_id": 0.0,
+  "person[2].income_support": 0.0,
+  "person[2].income_tax": 0.0,
+  "person[2].is_SP_age": 0.0,
+  "person[2].is_adult": 0.0,
+  "person[2].is_child": 1.0,
+  "person[2].is_male": 1.0,
+  "person[2].national_insurance": 0.0,
+  "person[2].pension_credit": 0.0,
+  "person[2].pension_income": 0.0,
+  "person[2].person_id": 0.0,
+  "person[2].person_weight": 1.0,
+  "person[2].private_pension_income": 0.0,
+  "person[2].property_income": 0.0,
+  "person[2].savings_interest_income": 0.0,
+  "person[2].self_employment_income": 0.0,
+  "person[2].total_income": 0.0,
+  "person[2].universal_credit": 0.0,
+  "person[2].working_tax_credit": 0.0,
+  "person[3].age": 3.0,
+  "person[3].benunit_id": 0.0,
+  "person[3].child_benefit": 2328.16,
+  "person[3].child_tax_credit": 0.0,
+  "person[3].dividend_income": 0.0,
+  "person[3].earned_income": 0.0,
+  "person[3].employment_income": 0.0,
+  "person[3].gender": "MALE",
+  "person[3].household_id": 0.0,
+  "person[3].income_support": 0.0,
+  "person[3].income_tax": 0.0,
+  "person[3].is_SP_age": 0.0,
+  "person[3].is_adult": 0.0,
+  "person[3].is_child": 1.0,
+  "person[3].is_male": 1.0,
+  "person[3].national_insurance": 0.0,
+  "person[3].pension_credit": 0.0,
+  "person[3].pension_income": 0.0,
+  "person[3].person_id": 0.0,
+  "person[3].person_weight": 1.0,
+  "person[3].private_pension_income": 0.0,
+  "person[3].property_income": 0.0,
+  "person[3].savings_interest_income": 0.0,
+  "person[3].self_employment_income": 0.0,
+  "person[3].total_income": 0.0,
+  "person[3].universal_credit": 0.0,
+  "person[3].working_tax_credit": 0.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/uk_model_surface.json b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json
new file mode 100644
index 00000000..161ef0ec
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json
@@ -0,0 +1,11 @@
+{
+  "country_id": "uk",
+  "data_package_name": "policyengine-uk-data",
+  "has_employment_income": true,
+  "has_income_tax": true,
+  "has_region_registry": true,
+  "model_package_name": "policyengine-uk",
+  "num_parameters_bucketed_100s": 20,
+  "num_variables_bucketed_100s": 8,
+  "region_registry_country": "uk"
+}
diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json
new file mode 100644
index 00000000..5ec94094
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json
@@ -0,0 +1,58 @@
+{
+  "benunit.benunit_id": 0.0,
+  "benunit.benunit_weight": 1.0,
+  "benunit.child_benefit": 0.0,
+  "benunit.child_tax_credit": 0.0,
+  "benunit.family_type": "SINGLE",
+  "benunit.income_support": 0.0,
+  "benunit.pension_credit": 0.0,
+  "benunit.universal_credit": 0.0,
+  "benunit.working_tax_credit": 0.0,
+  "household.council_tax": 0.0,
+  "household.equiv_hbai_household_net_income": 37491.94,
+  "household.hbai_household_net_income": 25119.6,
+  "household.household_benefits": 0.0,
+  "household.household_count_people": 1.0,
+  "household.household_gross_income": 30000.0,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 30000.0,
+  "household.household_net_income": 24960.55,
+  "household.household_tax": 5039.45,
+  "household.household_wealth_decile": 10.0,
+  "household.household_weight": 1.0,
+  "household.in_poverty_ahc": 0.0,
+  "household.in_poverty_bhc": 0.0,
+  "household.in_relative_poverty_ahc": 0.0,
+  "household.in_relative_poverty_bhc": 0.0,
+  "household.rent": 0.0,
+  "household.tenure_type": "RENT_PRIVATELY",
+  "household.vat": 0.0,
+  "person[0].age": 35.0,
+  "person[0].benunit_id": 0.0,
+  "person[0].child_benefit": 0.0,
+  "person[0].child_tax_credit": 0.0,
+  "person[0].dividend_income": 0.0,
+  "person[0].earned_income": 30000.0,
+  "person[0].employment_income": 30000.0,
+  "person[0].gender": "MALE",
+  "person[0].household_id": 0.0,
+  "person[0].income_support": 0.0,
+  "person[0].income_tax": 3486.0,
+  "person[0].is_SP_age": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].national_insurance": 1394.4,
+  "person[0].pension_credit": 0.0,
+  "person[0].pension_income": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].private_pension_income": 0.0,
+  "person[0].property_income": 0.0,
+  "person[0].savings_interest_income": 0.0,
+  "person[0].self_employment_income": 0.0,
+  "person[0].total_income": 30000.0,
+  "person[0].universal_credit": 0.0,
+  "person[0].working_tax_credit": 0.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json
new file mode 100644
index 00000000..59657e2c
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json
@@ -0,0 +1,58 @@
+{
+  "benunit.benunit_id": 0.0,
+  "benunit.benunit_weight": 1.0,
+  "benunit.child_benefit": 0.0,
+  "benunit.child_tax_credit": 0.0,
+  "benunit.family_type": "SINGLE",
+  "benunit.income_support": 0.0,
+  "benunit.pension_credit": 0.0,
+  "benunit.universal_credit": 5079.13,
+  "benunit.working_tax_credit": 0.0,
+  "household.council_tax": 0.0,
+  "household.equiv_hbai_household_net_income": 7580.79,
+  "household.hbai_household_net_income": 5079.13,
+  "household.household_benefits": 5079.13,
+  "household.household_count_people": 1.0,
+  "household.household_gross_income": 5079.13,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 0.0,
+  "household.household_net_income": 4920.09,
+  "household.household_tax": 159.04,
+  "household.household_wealth_decile": 10.0,
+  "household.household_weight": 1.0,
+  "household.in_poverty_ahc": 1.0,
+  "household.in_poverty_bhc": 1.0,
+  "household.in_relative_poverty_ahc": 0.0,
+  "household.in_relative_poverty_bhc": 0.0,
+  "household.rent": 0.0,
+  "household.tenure_type": "RENT_PRIVATELY",
+  "household.vat": 0.0,
+  "person[0].age": 35.0,
+  "person[0].benunit_id": 0.0,
+  "person[0].child_benefit": 0.0,
+  "person[0].child_tax_credit": 0.0,
+  "person[0].dividend_income": 0.0,
+  "person[0].earned_income": 0.0,
+  "person[0].employment_income": 0.0,
+  "person[0].gender": "MALE",
+  "person[0].household_id": 0.0,
+  "person[0].income_support": 0.0,
+  "person[0].income_tax": 0.0,
+  "person[0].is_SP_age": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].national_insurance": 0.0,
+  "person[0].pension_credit": 0.0,
+  "person[0].pension_income": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].private_pension_income": 0.0,
+  "person[0].property_income": 0.0,
+  "person[0].savings_interest_income": 0.0,
+  "person[0].self_employment_income": 0.0,
+  "person[0].total_income": 0.0,
+  "person[0].universal_credit": 5079.13,
+  "person[0].working_tax_credit": 0.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json
new file mode 100644
index 00000000..06e55db0
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json
@@ -0,0 +1,85 @@
+{
+  "benunit.benunit_id": 0.0,
+  "benunit.benunit_weight": 1.0,
+  "benunit.child_benefit": 1400.66,
+  "benunit.child_tax_credit": 0.0,
+  "benunit.family_type": "LONE_PARENT",
+  "benunit.income_support": 0.0,
+  "benunit.pension_credit": 0.0,
+  "benunit.universal_credit": 1544.43,
+  "benunit.working_tax_credit": 0.0,
+  "household.council_tax": 0.0,
+  "household.equiv_hbai_household_net_income": 28120.33,
+  "household.hbai_household_net_income": 24464.69,
+  "household.household_benefits": 2945.09,
+  "household.household_count_people": 2.0,
+  "household.household_gross_income": 27945.09,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 25000.0,
+  "household.household_net_income": 24305.64,
+  "household.household_tax": 3639.45,
+  "household.household_wealth_decile": 10.0,
+  "household.household_weight": 1.0,
+  "household.in_poverty_ahc": 0.0,
+  "household.in_poverty_bhc": 0.0,
+  "household.in_relative_poverty_ahc": 0.0,
+  "household.in_relative_poverty_bhc": 0.0,
+  "household.rent": 0.0,
+  "household.tenure_type": "RENT_PRIVATELY",
+  "household.vat": 0.0,
+  "person[0].age": 32.0,
+  "person[0].benunit_id": 0.0,
+  "person[0].child_benefit": 1400.66,
+  "person[0].child_tax_credit": 0.0,
+  "person[0].dividend_income": 0.0,
+  "person[0].earned_income": 25000.0,
+  "person[0].employment_income": 25000.0,
+  "person[0].gender": "MALE",
+  "person[0].household_id": 0.0,
+  "person[0].income_support": 0.0,
+  "person[0].income_tax": 2486.0,
+  "person[0].is_SP_age": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].national_insurance": 994.4,
+  "person[0].pension_credit": 0.0,
+  "person[0].pension_income": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].private_pension_income": 0.0,
+  "person[0].property_income": 0.0,
+  "person[0].savings_interest_income": 0.0,
+  "person[0].self_employment_income": 0.0,
+  "person[0].total_income": 25000.0,
+  "person[0].universal_credit": 1544.43,
+  "person[0].working_tax_credit": 0.0,
+  "person[1].age": 5.0,
+  "person[1].benunit_id": 0.0,
+  "person[1].child_benefit": 1400.66,
+  "person[1].child_tax_credit": 0.0,
+  "person[1].dividend_income": 0.0,
+  "person[1].earned_income": 0.0,
+  "person[1].employment_income": 0.0,
+  "person[1].gender": "MALE",
+  "person[1].household_id": 0.0,
+  "person[1].income_support": 0.0,
+  "person[1].income_tax": 0.0,
+  "person[1].is_SP_age": 0.0,
+  "person[1].is_adult": 0.0,
+  "person[1].is_child": 1.0,
+  "person[1].is_male": 1.0,
+  "person[1].national_insurance": 0.0,
+  "person[1].pension_credit": 0.0,
+  "person[1].pension_income": 0.0,
+  "person[1].person_id": 0.0,
+  "person[1].person_weight": 1.0,
+  "person[1].private_pension_income": 0.0,
+  "person[1].property_income": 0.0,
+  "person[1].savings_interest_income": 0.0,
+  "person[1].self_employment_income": 0.0,
+  "person[1].total_income": 0.0,
+  "person[1].universal_credit": 1544.43,
+  "person[1].working_tax_credit": 0.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json b/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json
new file mode 100644
index 00000000..1d5e98ca
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json
@@ -0,0 +1,97 @@
+{
+  "family.family_id": 0.0,
+  "family.family_weight": 0.0,
+  "household.congressional_district_geoid": 0.0,
+  "household.household_benefits": 0.0,
+  "household.household_count_people": 4.0,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 240000.0,
+  "household.household_net_income": 175089.92,
+  "household.household_tax": 64910.07,
+  "household.household_weight": 1.0,
+  "marital_unit.marital_unit_id": 0.0,
+  "marital_unit.marital_unit_weight": 1.0,
+  "person[0].age": 42.0,
+  "person[0].employment_income": 150000.0,
+  "person[0].family_id": 0.0,
+  "person[0].household_id": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].marital_unit_id": 0.0,
+  "person[0].medicaid": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].race": 3.0,
+  "person[0].social_security": 0.0,
+  "person[0].spm_unit_id": 0.0,
+  "person[0].ssi": 0.0,
+  "person[0].tax_unit_id": 0.0,
+  "person[0].unemployment_compensation": 0.0,
+  "person[1].age": 40.0,
+  "person[1].employment_income": 90000.0,
+  "person[1].family_id": 0.0,
+  "person[1].household_id": 0.0,
+  "person[1].is_adult": 1.0,
+  "person[1].is_child": 0.0,
+  "person[1].is_male": 1.0,
+  "person[1].marital_unit_id": 0.0,
+  "person[1].medicaid": 0.0,
+  "person[1].person_id": 1.0,
+  "person[1].person_weight": 1.0,
+  "person[1].race": 3.0,
+  "person[1].social_security": 0.0,
+  "person[1].spm_unit_id": 0.0,
+  "person[1].ssi": 0.0,
+  "person[1].tax_unit_id": 0.0,
+  "person[1].unemployment_compensation": 0.0,
+  "person[2].age": 8.0,
+  "person[2].employment_income": 0.0,
+  "person[2].family_id": 0.0,
+  "person[2].household_id": 0.0,
+  "person[2].is_adult": 0.0,
+  "person[2].is_child": 1.0,
+  "person[2].is_male": 1.0,
+  "person[2].marital_unit_id": 0.0,
+  "person[2].medicaid": 0.0,
+  "person[2].person_id": 2.0,
+  "person[2].person_weight": 1.0,
+  "person[2].race": 3.0,
+  "person[2].social_security": 0.0,
+  "person[2].spm_unit_id": 0.0,
+  "person[2].ssi": 0.0,
+  "person[2].tax_unit_id": 0.0,
+  "person[2].unemployment_compensation": 0.0,
+  "person[3].age": 3.0,
+  "person[3].employment_income": 0.0,
+  "person[3].family_id": 0.0,
+  "person[3].household_id": 0.0,
+  "person[3].is_adult": 0.0,
+  "person[3].is_child": 1.0,
+  "person[3].is_male": 1.0,
+  "person[3].marital_unit_id": 0.0,
+  "person[3].medicaid": 0.0,
+  "person[3].person_id": 3.0,
+  "person[3].person_weight": 1.0,
+  "person[3].race": 3.0,
+  "person[3].social_security": 0.0,
+  "person[3].spm_unit_id": 0.0,
+  "person[3].ssi": 0.0,
+  "person[3].tax_unit_id": 0.0,
+  "person[3].unemployment_compensation": 0.0,
+  "spm_unit.snap": 0.0,
+  "spm_unit.spm_unit_id": 0.0,
+  "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0,
+  "spm_unit.spm_unit_is_in_spm_poverty": 0.0,
+  "spm_unit.spm_unit_net_income": 175089.92,
+  "spm_unit.spm_unit_weight": 1.0,
+  "spm_unit.tanf": 0.0,
+  "tax_unit.ctc": 4400.0,
+  "tax_unit.eitc": 0.0,
+  "tax_unit.employee_payroll_tax": 21480.0,
+  "tax_unit.household_state_income_tax": 12690.07,
+  "tax_unit.income_tax": 30740.0,
+  "tax_unit.tax_unit_id": 0.0,
+  "tax_unit.tax_unit_weight": 1.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/us_model_surface.json b/tests/fixtures/household_calculator_snapshots/us_model_surface.json
new file mode 100644
index 00000000..eaf4352e
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/us_model_surface.json
@@ -0,0 +1,11 @@
+{
+  "country_id": "us",
+  "data_package_name": "policyengine-us-data",
+  "has_employment_income": true,
+  "has_income_tax": true,
+  "has_region_registry": true,
+  "model_package_name": "policyengine-us",
+  "num_parameters_bucketed_100s": 777,
+  "num_variables_bucketed_100s": 46,
+  "region_registry_country": "us"
+}
diff --git a/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json b/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json
new file mode 100644
index 00000000..d94660a9
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json
@@ -0,0 +1,46 @@
+{
+  "family.family_id": 0.0,
+  "family.family_weight": 0.0,
+  "household.congressional_district_geoid": 0.0,
+  "household.household_benefits": 0.0,
+  "household.household_count_people": 1.0,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 60000.0,
+  "household.household_net_income": 48007.14,
+  "household.household_tax": 11992.86,
+  "household.household_weight": 1.0,
+  "marital_unit.marital_unit_id": 0.0,
+  "marital_unit.marital_unit_weight": 1.0,
+  "person[0].age": 35.0,
+  "person[0].employment_income": 60000.0,
+  "person[0].family_id": 0.0,
+  "person[0].household_id": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].marital_unit_id": 0.0,
+  "person[0].medicaid": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].race": 3.0,
+  "person[0].social_security": 0.0,
+  "person[0].spm_unit_id": 0.0,
+  "person[0].ssi": 0.0,
+  "person[0].tax_unit_id": 0.0,
+  "person[0].unemployment_compensation": 0.0,
+  "spm_unit.snap": 0.0,
+  "spm_unit.spm_unit_id": 0.0,
+  "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0,
+  "spm_unit.spm_unit_is_in_spm_poverty": 0.0,
+  "spm_unit.spm_unit_net_income": 48007.14,
+  "spm_unit.spm_unit_weight": 1.0,
+  "spm_unit.tanf": 0.0,
+  "tax_unit.ctc": 0.0,
+  "tax_unit.eitc": 0.0,
+  "tax_unit.employee_payroll_tax": 5370.0,
+  "tax_unit.household_state_income_tax": 1602.86,
+  "tax_unit.income_tax": 5020.0,
+  "tax_unit.tax_unit_id": 0.0,
+  "tax_unit.tax_unit_weight": 1.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json
new file mode 100644
index 00000000..258db6f1
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json
@@ -0,0 +1,46 @@
+{
+  "family.family_id": 0.0,
+  "family.family_weight": 0.0,
+  "household.congressional_district_geoid": 0.0,
+  "household.household_benefits": 3596.04,
+  "household.household_count_people": 1.0,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 0.0,
+  "household.household_net_income": 3596.04,
+  "household.household_tax": 0.0,
+  "household.household_weight": 1.0,
+  "marital_unit.marital_unit_id": 0.0,
+  "marital_unit.marital_unit_weight": 1.0,
+  "person[0].age": 35.0,
+  "person[0].employment_income": 0.0,
+  "person[0].family_id": 0.0,
+  "person[0].household_id": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].marital_unit_id": 0.0,
+  "person[0].medicaid": 6439.11,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].race": 3.0,
+  "person[0].social_security": 0.0,
+  "person[0].spm_unit_id": 0.0,
+  "person[0].ssi": 0.0,
+  "person[0].tax_unit_id": 0.0,
+  "person[0].unemployment_compensation": 0.0,
+  "spm_unit.snap": 3596.04,
+  "spm_unit.spm_unit_id": 0.0,
+  "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0,
+  "spm_unit.spm_unit_is_in_spm_poverty": 0.0,
+  "spm_unit.spm_unit_net_income": 3596.04,
+  "spm_unit.spm_unit_weight": 1.0,
+  "spm_unit.tanf": 0.0,
+  "tax_unit.ctc": 0.0,
+  "tax_unit.eitc": 0.0,
+  "tax_unit.employee_payroll_tax": 0.0,
+  "tax_unit.household_state_income_tax": 0.0,
+  "tax_unit.income_tax": 0.0,
+  "tax_unit.tax_unit_id": 0.0,
+  "tax_unit.tax_unit_weight": 1.0
+}
diff --git a/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json b/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json
new file mode 100644
index 00000000..78ba7237
--- /dev/null
+++ b/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json
@@ -0,0 +1,63 @@
+{
+  "family.family_id": 0.0,
+  "family.family_weight": 0.0,
+  "household.congressional_district_geoid": 0.0,
+  "household.household_benefits": 1003.27,
+  "household.household_count_people": 2.0,
+  "household.household_id": 0.0,
+  "household.household_income_decile": 10.0,
+  "household.household_market_income": 40000.0,
+  "household.household_net_income": 39890.89,
+  "household.household_tax": 1112.38,
+  "household.household_weight": 1.0,
+  "marital_unit.marital_unit_id": 0.0,
+  "marital_unit.marital_unit_weight": 1.0,
+  "person[0].age": 32.0,
+  "person[0].employment_income": 40000.0,
+  "person[0].family_id": 0.0,
+  "person[0].household_id": 0.0,
+  "person[0].is_adult": 1.0,
+  "person[0].is_child": 0.0,
+  "person[0].is_male": 1.0,
+  "person[0].marital_unit_id": 0.0,
+  "person[0].medicaid": 0.0,
+  "person[0].person_id": 0.0,
+  "person[0].person_weight": 1.0,
+  "person[0].race": 3.0,
+  "person[0].social_security": 0.0,
+  "person[0].spm_unit_id": 0.0,
+  "person[0].ssi": 0.0,
+  "person[0].tax_unit_id": 0.0,
+  "person[0].unemployment_compensation": 0.0,
+  "person[1].age": 5.0,
+  "person[1].employment_income": 0.0,
+  "person[1].family_id": 0.0,
+  "person[1].household_id": 0.0,
+  "person[1].is_adult": 0.0,
+  "person[1].is_child": 1.0,
+  "person[1].is_male": 1.0,
+  "person[1].marital_unit_id": 0.0,
+  "person[1].medicaid": 3258.31,
+  "person[1].person_id": 1.0,
+  "person[1].person_weight": 1.0,
+  "person[1].race": 3.0,
+  "person[1].social_security": 0.0,
+  "person[1].spm_unit_id": 0.0,
+  "person[1].ssi": 0.0,
+  "person[1].tax_unit_id": 0.0,
+  "person[1].unemployment_compensation": 0.0,
+  "spm_unit.snap": 0.0,
+  "spm_unit.spm_unit_id": 0.0,
+  "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0,
+  "spm_unit.spm_unit_is_in_spm_poverty": 0.0,
+  "spm_unit.spm_unit_net_income": 39890.89,
+  "spm_unit.spm_unit_weight": 1.0,
+  "spm_unit.tanf": 0.0,
+  "tax_unit.ctc": 2200.0,
+  "tax_unit.eitc": 1852.62,
+  "tax_unit.employee_payroll_tax": 3580.0,
+  "tax_unit.household_state_income_tax": 0.0,
+  "tax_unit.income_tax": -2467.62,
+  "tax_unit.tax_unit_id": 0.0,
+  "tax_unit.tax_unit_weight": 1.0
+}
diff --git a/tests/fixtures/us_reform_fixtures.py b/tests/fixtures/us_reform_fixtures.py
index c52a7aba..4292c085 100644
--- a/tests/fixtures/us_reform_fixtures.py
+++ b/tests/fixtures/us_reform_fixtures.py
@@ -1,11 +1,15 @@
-"""Fixtures for US reform application tests."""
+"""Fixtures for US reform application tests.
+
+Household fixtures are plain ``kwargs`` dicts ready to splat into
+``pe.us.calculate_household(**fixture)``.
+"""
 
 from datetime import date
 
 import pytest
 
 from policyengine.core import ParameterValue, Policy
-from policyengine.tax_benefit_models.us import USHouseholdInput, us_latest
+from policyengine.tax_benefit_models.us import us_latest
 
 
 def create_standard_deduction_policy(
@@ -56,51 +60,43 @@ def create_standard_deduction_policy(
 )
 
 
-# Pre-built household fixtures
+# Pre-built household fixtures (as kwargs dicts for calculate_household)
 
-HIGH_INCOME_SINGLE_FILER = USHouseholdInput(
-    people=[
-        {
-            "age": 35,
-            "employment_income": 100000,
-            "is_tax_unit_head": True,
-        }
+HIGH_INCOME_SINGLE_FILER = {
+    "people": [
+        {"age": 35, "employment_income": 100000, "is_tax_unit_head": True},
     ],
-    tax_unit={"filing_status": "SINGLE"},
-    year=2024,
-)
+    "tax_unit": {"filing_status": "SINGLE"},
+    "year": 2024,
+}
 
-MODERATE_INCOME_SINGLE_FILER = USHouseholdInput(
-    people=[
-        {
-            "age": 30,
-            "employment_income": 50000,
-            "is_tax_unit_head": True,
-        }
+MODERATE_INCOME_SINGLE_FILER = {
+    "people": [
+        {"age": 30, "employment_income": 50000, "is_tax_unit_head": True},
     ],
-    tax_unit={"filing_status": "SINGLE"},
-    year=2024,
-)
+    "tax_unit": {"filing_status": "SINGLE"},
+    "year": 2024,
+}
 
-MARRIED_COUPLE_WITH_KIDS = USHouseholdInput(
-    people=[
+MARRIED_COUPLE_WITH_KIDS = {
+    "people": [
         {"age": 40, "employment_income": 100000, "is_tax_unit_head": True},
         {"age": 38, "employment_income": 50000, "is_tax_unit_spouse": True},
         {"age": 10},
         {"age": 8},
     ],
-    tax_unit={"filing_status": "JOINT"},
-    year=2024,
-)
+    "tax_unit": {"filing_status": "JOINT"},
+    "year": 2024,
+}
 
-LOW_INCOME_FAMILY = USHouseholdInput(
-    people=[
+LOW_INCOME_FAMILY = {
+    "people": [
         {"age": 28, "employment_income": 25000, "is_tax_unit_head": True},
         {"age": 5},
     ],
-    tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"},
-    year=2024,
-)
+    "tax_unit": {"filing_status": "HEAD_OF_HOUSEHOLD"},
+    "year": 2024,
+}
 
 
 # Pytest fixtures
@@ -108,17 +104,14 @@ def create_standard_deduction_policy(
 
 @pytest.fixture
 def double_standard_deduction_policy():
-    """Pytest fixture for doubled standard deduction policy."""
     return DOUBLE_STANDARD_DEDUCTION_POLICY
 
 
 @pytest.fixture
 def high_income_single_filer():
-    """Pytest fixture for high income single filer household."""
     return HIGH_INCOME_SINGLE_FILER
 
 
 @pytest.fixture
 def married_couple_with_kids():
-    """Pytest fixture for married couple with kids household."""
     return MARRIED_COUPLE_WITH_KIDS
diff --git a/tests/test_dict_reforms_on_simulation.py b/tests/test_dict_reforms_on_simulation.py
new file mode 100644
index 00000000..b1781c1a
--- /dev/null
+++ b/tests/test_dict_reforms_on_simulation.py
@@ -0,0 +1,128 @@
+"""``Simulation(policy={...})`` and ``Simulation(dynamic={...})``.
+
+These tests pin the v4 contract: the same flat reform dict shape that
+``pe.{uk,us}.calculate_household(reform=...)`` accepts is also accepted
+by ``Simulation(policy=...)`` / ``Simulation(dynamic=...)``, and is
+compiled into the full ``Policy`` / ``Dynamic`` object on construction.
+We exercise only the coercion path — no country microsim is run — so
+the tests are fast and don't need HF credentials.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+pytest.importorskip("policyengine_us")
+
+import policyengine as pe
+from policyengine.core import Dynamic, Policy, Simulation
+
+# ``us_test_dataset`` is registered globally via ``tests/conftest.py``.
+
+
+@pytest.fixture
+def tiny_dataset(us_test_dataset):
+    """In-memory US dataset pinned to 2026. Simulation is never .run() in these tests."""
+    us_test_dataset.year = 2026
+    return us_test_dataset
+
+
+class TestDictPolicyCoercion:
+    def test__dict_policy__then_compiled_to_policy_with_parameter_values(
+        self, tiny_dataset
+    ):
+        sim = Simulation(
+            dataset=tiny_dataset,
+            tax_benefit_model_version=pe.us.model,
+            policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000},
+        )
+        assert isinstance(sim.policy, Policy)
+        assert len(sim.policy.parameter_values) == 1
+
+        pv = sim.policy.parameter_values[0]
+        assert pv.parameter.name == "gov.irs.credits.ctc.amount.base[0].amount"
+        assert pv.value == 3_000
+        # Scalar reforms default the effective date to {year}-01-01.
+        assert pv.start_date.year == 2026
+        assert pv.start_date.month == 1
+
+    def test__dict_policy_with_effective_date__then_start_date_matches(
+        self, tiny_dataset
+    ):
+        sim = Simulation(
+            dataset=tiny_dataset,
+            tax_benefit_model_version=pe.us.model,
+            policy={
+                "gov.irs.credits.ctc.amount.base[0].amount": {
+                    "2026-07-01": 2_500,
+                    "2027-01-01": 3_000,
+                },
+            },
+        )
+        assert isinstance(sim.policy, Policy)
+        assert len(sim.policy.parameter_values) == 2
+        starts = sorted(pv.start_date for pv in sim.policy.parameter_values)
+        assert [d.strftime("%Y-%m-%d") for d in starts] == [
+            "2026-07-01",
+            "2027-01-01",
+        ]
+
+    def test__unknown_parameter_path__raises_with_suggestion(self, tiny_dataset):
+        with pytest.raises(ValueError) as exc:
+            Simulation(
+                dataset=tiny_dataset,
+                tax_benefit_model_version=pe.us.model,
+                policy={
+                    # plausible typo of the real path
+                    "gov.irs.credits.ctc.amount.base[0].amont": 3_000,
+                },
+            )
+        assert "not defined" in str(exc.value)
+        assert "did you mean" in str(exc.value)
+
+    def test__existing_policy_object_passes_through_unchanged(self, tiny_dataset):
+        import datetime
+
+        from policyengine.core import Parameter, ParameterValue
+
+        existing = Policy(
+            name="Existing",
+            parameter_values=[
+                ParameterValue(
+                    parameter=Parameter(
+                        name="gov.irs.credits.ctc.amount.base[0].amount",
+                        tax_benefit_model_version=pe.us.model,
+                        data_type=float,
+                    ),
+                    start_date=datetime.datetime(2026, 1, 1),
+                    end_date=None,
+                    value=2_750,
+                )
+            ],
+        )
+        sim = Simulation(
+            dataset=tiny_dataset,
+            tax_benefit_model_version=pe.us.model,
+            policy=existing,
+        )
+        assert sim.policy is existing
+
+    def test__dict_without_model_version__raises(self, tiny_dataset):
+        with pytest.raises(ValueError) as exc:
+            Simulation(
+                dataset=tiny_dataset,
+                policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000},
+            )
+        assert "tax_benefit_model_version" in str(exc.value)
+
+
+class TestDictDynamicCoercion:
+    def test__dict_dynamic__then_compiled_to_dynamic(self, tiny_dataset):
+        sim = Simulation(
+            dataset=tiny_dataset,
+            tax_benefit_model_version=pe.us.model,
+            dynamic={"gov.irs.credits.ctc.amount.base[0].amount": 2_800},
+        )
+        assert isinstance(sim.dynamic, Dynamic)
+        assert len(sim.dynamic.parameter_values) == 1
+        assert sim.dynamic.parameter_values[0].value == 2_800
diff --git a/tests/test_graph/__init__.py b/tests/test_graph/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_graph/conftest.py b/tests/test_graph/conftest.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_graph/test_extractor.py b/tests/test_graph/test_extractor.py
new file mode 100644
index 00000000..81445caf
--- /dev/null
+++ b/tests/test_graph/test_extractor.py
@@ -0,0 +1,314 @@
+"""Tests for the variable-graph extractor.
+
+The extractor walks PolicyEngine-style Variable source trees and
+builds a dependency graph from formula-body references. Two reference
+patterns are recognized in MVP:
+
+1. ``<entity>("<var>", <period>)`` — direct call on an entity instance
+   inside a formula method. ``<entity>`` matches a known set:
+   ``person``, ``tax_unit``, ``spm_unit``, ``household``, ``family``,
+   ``marital_unit``, ``benunit``.
+2. ``add(<entity>, <period>, ["v1", "v2"])`` — helper that sums a list
+   of variable values. Each string in the list is extracted.
+
+Tests run against a self-contained fixture tree under the test file's
+own tmp directory — no dependency on an installed country model — so
+behavior is deterministic and the tests pin the extraction algorithm
+rather than PolicyEngine's evolving source.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from textwrap import dedent
+from types import ModuleType
+
+import pytest
+
+
+# ``policyengine/__init__.py`` eagerly imports the full country-model
+# stack (policyengine-us, policyengine-uk), which makes a normal
+# ``from policyengine.graph import ...`` fail in any environment
+# where those jurisdictions aren't fully provisioned (missing release
+# manifests, unresolved optional deps, etc.). The graph module is
+# self-contained (stdlib + networkx only); load it via importlib
+# directly so these tests remain environment-agnostic.
+def _load_graph_module() -> ModuleType:
+    if "policyengine.graph" in sys.modules and hasattr(
+        sys.modules["policyengine.graph"], "extract_from_path"
+    ):
+        return sys.modules["policyengine.graph"]
+
+    graph_dir = Path(__file__).resolve().parents[2] / "src" / "policyengine" / "graph"
+
+    if "policyengine" not in sys.modules:
+        fake_pkg = ModuleType("policyengine")
+        fake_pkg.__path__ = [str(graph_dir.parent)]
+        sys.modules["policyengine"] = fake_pkg
+    if "policyengine.graph" not in sys.modules or not hasattr(
+        sys.modules["policyengine.graph"], "__path__"
+    ):
+        fake_subpkg = ModuleType("policyengine.graph")
+        fake_subpkg.__path__ = [str(graph_dir)]
+        sys.modules["policyengine.graph"] = fake_subpkg
+
+    for submod, filename in [
+        ("policyengine.graph.graph", "graph.py"),
+        ("policyengine.graph.extractor", "extractor.py"),
+    ]:
+        if submod in sys.modules:
+            continue
+        spec = importlib.util.spec_from_file_location(submod, graph_dir / filename)
+        module = importlib.util.module_from_spec(spec)
+        sys.modules[submod] = module
+        spec.loader.exec_module(module)  # type: ignore[union-attr]
+
+    graph_mod = sys.modules["policyengine.graph"]
+    graph_mod.extract_from_path = sys.modules[
+        "policyengine.graph.extractor"
+    ].extract_from_path
+    graph_mod.VariableGraph = sys.modules["policyengine.graph.graph"].VariableGraph
+    return graph_mod
+
+
+_graph = _load_graph_module()
+extract_from_path = _graph.extract_from_path
+VariableGraph = _graph.VariableGraph
+
+
+def _write_variable(
+    root: Path, var_name: str, formula_body: str, entity: str = "tax_unit"
+) -> None:
+    """Write a Variable subclass file mimicking policyengine-us style."""
+    root.mkdir(parents=True, exist_ok=True)
+    (root / f"{var_name}.py").write_text(
+        dedent(f'''\
+        from policyengine_us.model_api import *
+
+
+        class {var_name}(Variable):
+            value_type = float
+            entity = TaxUnit
+            label = "{var_name.replace("_", " ").title()}"
+            definition_period = YEAR
+
+            def formula({entity}, period, parameters):
+                {formula_body}
+        ''')
+    )
+
+
+class TestDirectEntityReference:
+    """Pattern 1: ``entity("<var>", period)`` produces an edge."""
+
+    def test_single_direct_reference(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        _write_variable(
+            root,
+            "adjusted_gross_income",
+            'return tax_unit("gross_income", period) - tax_unit("above_the_line_deductions", period)',
+        )
+        _write_variable(root, "gross_income", "return 0")
+        _write_variable(root, "above_the_line_deductions", "return 0")
+
+        graph = extract_from_path(root)
+
+        assert graph.has_variable("adjusted_gross_income")
+        deps = set(graph.deps("adjusted_gross_income"))
+        assert deps == {"gross_income", "above_the_line_deductions"}
+
+    def test_nonmatching_string_is_ignored(self, tmp_path: Path) -> None:
+        """String literals unrelated to an entity call are ignored.
+
+        Only a string as the first arg of a matching
+        ``<entity>("<var>", period)`` call becomes an edge; string
+        literals used as argument to ``print`` or bound to a local
+        name are not misinterpreted as variable references.
+        """
+        root = tmp_path / "variables"
+        root.mkdir(parents=True, exist_ok=True)
+        (root / "refundable_credit.py").write_text(
+            dedent("""\
+            from policyengine_us.model_api import *
+
+
+            class refundable_credit(Variable):
+                value_type = float
+                entity = TaxUnit
+                label = "Refundable credit"
+                definition_period = YEAR
+
+                def formula(tax_unit, period, parameters):
+                    note = "not a variable reference"
+                    return tax_unit("gross_income", period)
+            """)
+        )
+        _write_variable(root, "gross_income", "return 0")
+        graph = extract_from_path(root)
+        assert set(graph.deps("refundable_credit")) == {"gross_income"}
+
+
+class TestAddHelperReference:
+    """Pattern 2: ``add(entity, period, [...])`` emits one edge per list item."""
+
+    def test_add_helper_list(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        _write_variable(
+            root,
+            "total_income",
+            'return add(tax_unit, period, ["wages", "self_employment_income", "interest"])',
+        )
+        _write_variable(root, "wages", "return 0")
+        _write_variable(root, "self_employment_income", "return 0")
+        _write_variable(root, "interest", "return 0")
+        graph = extract_from_path(root)
+        assert set(graph.deps("total_income")) == {
+            "wages",
+            "self_employment_income",
+            "interest",
+        }
+
+
+class TestImpactAnalysis:
+    """``impact(var)`` returns variables that depend on ``var`` transitively."""
+
+    def test_transitive_upstream(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        _write_variable(root, "wages", "return 0")
+        _write_variable(
+            root,
+            "gross_income",
+            'return add(tax_unit, period, ["wages"])',
+        )
+        _write_variable(
+            root,
+            "adjusted_gross_income",
+            'return tax_unit("gross_income", period)',
+        )
+        _write_variable(
+            root,
+            "taxable_income",
+            'return tax_unit("adjusted_gross_income", period)',
+        )
+        _write_variable(
+            root,
+            "federal_income_tax",
+            'return tax_unit("taxable_income", period)',
+        )
+        graph = extract_from_path(root)
+
+        # wages is read by gross_income → adjusted_gross_income →
+        # taxable_income → federal_income_tax (depth 4).
+        impact = set(graph.impact("wages"))
+        assert impact == {
+            "gross_income",
+            "adjusted_gross_income",
+            "taxable_income",
+            "federal_income_tax",
+        }
+
+    def test_leaf_variable_has_empty_impact(self, tmp_path: Path) -> None:
+        """A variable that nothing reads has an empty impact set."""
+
+        root = tmp_path / "variables"
+        _write_variable(
+            root,
+            "federal_income_tax",
+            'return tax_unit("adjusted_gross_income", period)',
+        )
+        _write_variable(root, "adjusted_gross_income", "return 0")
+        graph = extract_from_path(root)
+        assert list(graph.impact("federal_income_tax")) == []
+
+
+class TestMultipleFormulas:
+    """Year-specific ``formula_YYYY`` methods contribute edges too."""
+
+    def test_year_specific_formula_contributes_edges(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        (root / "ctc.py").parent.mkdir(parents=True, exist_ok=True)
+        (root / "ctc.py").write_text(
+            dedent("""\
+            from policyengine_us.model_api import *
+
+
+            class ctc(Variable):
+                value_type = float
+                entity = TaxUnit
+                label = "Child Tax Credit"
+                definition_period = YEAR
+
+                def formula_2020(tax_unit, period, parameters):
+                    return tax_unit("ctc_base_2020", period)
+
+                def formula_2023(tax_unit, period, parameters):
+                    return tax_unit("ctc_base_2023", period)
+            """)
+        )
+        _write_variable(root, "ctc_base_2020", "return 0")
+        _write_variable(root, "ctc_base_2023", "return 0")
+
+        graph = extract_from_path(root)
+        assert set(graph.deps("ctc")) == {"ctc_base_2020", "ctc_base_2023"}
+
+
+class TestPath:
+    """``path(src, dst)`` returns a dependency chain if one exists."""
+
+    def test_path_two_hops(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        _write_variable(root, "wages", "return 0")
+        _write_variable(root, "gross_income", 'return tax_unit("wages", period)')
+        _write_variable(
+            root,
+            "adjusted_gross_income",
+            'return tax_unit("gross_income", period)',
+        )
+
+        graph = extract_from_path(root)
+        assert graph.path("wages", "adjusted_gross_income") == [
+            "wages",
+            "gross_income",
+            "adjusted_gross_income",
+        ]
+
+    def test_path_returns_none_if_unreachable(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        _write_variable(root, "island_a", "return 0")
+        _write_variable(root, "island_b", "return 0")
+        graph = extract_from_path(root)
+        assert graph.path("island_a", "island_b") is None
+
+
+class TestRequiresVariableSubclass:
+    """Only classes whose base class list contains ``Variable`` are scanned.
+
+    Helper modules (model_api, utils) should not be mistaken for
+    Variable definitions even if they have method bodies that call
+    entity-style functions.
+    """
+
+    def test_non_variable_classes_are_ignored(self, tmp_path: Path) -> None:
+
+        root = tmp_path / "variables"
+        root.mkdir(parents=True, exist_ok=True)
+        # Looks like a variable body but the class is not a Variable.
+        (root / "helper.py").write_text(
+            dedent("""\
+            class NotAVariable:
+                def some_method(tax_unit, period, parameters):
+                    return tax_unit("some_variable", period)
+            """)
+        )
+        graph = extract_from_path(root)
+        assert not graph.has_variable("NotAVariable")
+        # And no edge to "some_variable" should exist from a phantom source.
+        assert list(graph.impact("some_variable")) == []
diff --git a/tests/test_household_calculator_snapshot.py b/tests/test_household_calculator_snapshot.py
new file mode 100644
index 00000000..987d49d8
--- /dev/null
+++ b/tests/test_household_calculator_snapshot.py
@@ -0,0 +1,217 @@
+"""Byte-level snapshot regression test for MicrosimulationModelVersion extraction.
+
+These tests freeze the exact numeric outputs of both the US and UK household
+calculators across a representative set of cases. The intent is to make the
+base-class extraction (PR F) fail loudly if any country-specific behaviour
+drifts during the refactor.
+
+Snapshots live in ``tests/fixtures/household_calculator_snapshots/``. To refresh
+them, run with ``PE_UPDATE_SNAPSHOTS=1`` set. Do **not** refresh them as part
+of a refactor meant to be behaviour-preserving.
+"""
+
+from __future__ import annotations
+
+import json
+import math
+import os
+from pathlib import Path
+
+import pytest
+
+SNAPSHOT_DIR = Path(__file__).parent / "fixtures" / "household_calculator_snapshots"
+UPDATE = os.environ.get("PE_UPDATE_SNAPSHOTS") == "1"
+
+
+def _flatten(prefix: str, value, out: dict[str, float]) -> None:
+    """Flatten a nested ``HouseholdResult`` into ``"path.name" -> scalar``."""
+    if isinstance(value, list):
+        for idx, item in enumerate(value):
+            _flatten(f"{prefix}[{idx}]", item, out)
+        return
+    if isinstance(value, dict):
+        for key, sub in value.items():
+            new_prefix = f"{prefix}.{key}" if prefix else str(key)
+            _flatten(new_prefix, sub, out)
+        return
+    if isinstance(value, bool):
+        out[prefix] = float(value)
+    elif isinstance(value, (int, float)):
+        out[prefix] = float(value)
+    else:
+        out[prefix] = str(value)
+
+
+def _round(value, places: int = 2):
+    if isinstance(value, float):
+        if math.isnan(value):
+            return "nan"
+        if math.isinf(value):
+            return "inf" if value > 0 else "-inf"
+        return round(value, places)
+    return value
+
+
+def _check_snapshot(name: str, data: dict) -> None:
+    path = SNAPSHOT_DIR / f"{name}.json"
+    rounded = {k: _round(v) for k, v in sorted(data.items())}
+
+    if UPDATE or not path.exists():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps(rounded, indent=2, sort_keys=True) + "\n")
+        if not UPDATE:
+            pytest.skip(f"Created missing snapshot {path.name}; re-run to verify")
+        return
+
+    expected = json.loads(path.read_text())
+    diffs = []
+    all_keys = set(expected) | set(rounded)
+    for key in sorted(all_keys):
+        if key not in expected:
+            diffs.append(f"  new key: {key}={rounded[key]!r}")
+        elif key not in rounded:
+            diffs.append(f"  removed key: {key}={expected[key]!r}")
+        elif expected[key] != rounded[key]:
+            diffs.append(f"  {key}: expected {expected[key]!r}, got {rounded[key]!r}")
+    assert not diffs, f"Snapshot {name} drift:\n" + "\n".join(diffs[:40])
+
+
+# US cases -------------------------------------------------------------------
+
+
+US_CASES = {
+    "us_single_adult_no_income": dict(
+        people=[{"age": 35}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+    ),
+    "us_single_adult_employment_income": dict(
+        people=[{"age": 35, "employment_income": 60_000}],
+        tax_unit={"filing_status": "SINGLE"},
+        year=2026,
+    ),
+    "us_single_parent_one_child": dict(
+        people=[
+            {"age": 32, "employment_income": 40_000},
+            {"age": 5},
+        ],
+        tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"},
+        year=2026,
+    ),
+    "us_married_two_kids_high_income": dict(
+        people=[
+            {"age": 42, "employment_income": 150_000},
+            {"age": 40, "employment_income": 90_000},
+            {"age": 8},
+            {"age": 3},
+        ],
+        tax_unit={"filing_status": "JOINT"},
+        year=2026,
+    ),
+}
+
+
+@pytest.mark.parametrize("case_name", sorted(US_CASES))
+def test_us_household_snapshot(case_name: str) -> None:
+    pytest.importorskip("policyengine_us")
+    import policyengine as pe
+
+    kwargs = US_CASES[case_name]
+    result = pe.us.calculate_household(**kwargs)
+    out: dict[str, float] = {}
+    _flatten("", result.to_dict(), out)
+    _check_snapshot(case_name, out)
+
+
+# UK cases -------------------------------------------------------------------
+
+
+UK_CASES = {
+    "uk_single_adult_no_income": dict(
+        people=[{"age": 35}],
+        year=2026,
+    ),
+    "uk_single_adult_employment_income": dict(
+        people=[{"age": 35, "employment_income": 30_000}],
+        year=2026,
+    ),
+    "uk_single_parent_one_child": dict(
+        people=[
+            {"age": 32, "employment_income": 25_000},
+            {"age": 5},
+        ],
+        year=2026,
+    ),
+    "uk_couple_two_kids": dict(
+        people=[
+            {"age": 42, "employment_income": 55_000},
+            {"age": 40, "employment_income": 35_000},
+            {"age": 8},
+            {"age": 3},
+        ],
+        year=2026,
+    ),
+}
+
+
+@pytest.mark.parametrize("case_name", sorted(UK_CASES))
+def test_uk_household_snapshot(case_name: str) -> None:
+    pytest.importorskip("policyengine_uk")
+    import policyengine as pe
+
+    kwargs = UK_CASES[case_name]
+    result = pe.uk.calculate_household(**kwargs)
+    out: dict[str, float] = {}
+    _flatten("", result.to_dict(), out)
+    _check_snapshot(case_name, out)
+
+
+# Model-version metadata snapshots -------------------------------------------
+
+
+def test_us_model_version_surface() -> None:
+    """Freeze the exposed surface of ``us_latest`` (variables, parameters).
+
+    If the base-class extraction accidentally changes how variables or
+    parameters are loaded from ``policyengine_us.system``, these counts will
+    drift. The snapshot intentionally rounds to stable aggregates rather than
+    dumping the full variable list so that unrelated upstream releases don't
+    churn the snapshot file.
+    """
+    pytest.importorskip("policyengine_us")
+    from policyengine.tax_benefit_models.us import us_latest
+
+    surface = {
+        "country_id": us_latest.release_manifest.country_id,
+        "model_package_name": us_latest.model_package.name,
+        "data_package_name": us_latest.data_package.name,
+        "has_region_registry": us_latest.region_registry is not None,
+        "region_registry_country": us_latest.region_registry.country_id,
+        "num_variables_bucketed_100s": len(us_latest.variables) // 100,
+        "num_parameters_bucketed_100s": len(us_latest.parameters) // 100,
+        "has_employment_income": any(
+            v.name == "employment_income" for v in us_latest.variables
+        ),
+        "has_income_tax": any(v.name == "income_tax" for v in us_latest.variables),
+    }
+    _check_snapshot("us_model_surface", surface)
+
+
+def test_uk_model_version_surface() -> None:
+    pytest.importorskip("policyengine_uk")
+    from policyengine.tax_benefit_models.uk import uk_latest
+
+    surface = {
+        "country_id": uk_latest.release_manifest.country_id,
+        "model_package_name": uk_latest.model_package.name,
+        "data_package_name": uk_latest.data_package.name,
+        "has_region_registry": uk_latest.region_registry is not None,
+        "region_registry_country": uk_latest.region_registry.country_id,
+        "num_variables_bucketed_100s": len(uk_latest.variables) // 100,
+        "num_parameters_bucketed_100s": len(uk_latest.parameters) // 100,
+        "has_employment_income": any(
+            v.name == "employment_income" for v in uk_latest.variables
+        ),
+        "has_income_tax": any(v.name == "income_tax" for v in uk_latest.variables),
+    }
+    _check_snapshot("uk_model_surface", surface)
diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py
index 54f6ac19..d99d144b 100644
--- a/tests/test_household_impact.py
+++ b/tests/test_household_impact.py
@@ -1,55 +1,41 @@
-"""Tests for calculate_household_impact functions."""
-
-from policyengine.tax_benefit_models.uk import (
-    UKHouseholdInput,
-    UKHouseholdOutput,
-    uk_latest,
-)
-from policyengine.tax_benefit_models.uk import (
-    calculate_household_impact as calculate_uk_household_impact,
-)
-from policyengine.tax_benefit_models.us import (
-    USHouseholdInput,
-    USHouseholdOutput,
-    us_latest,
-)
-from policyengine.tax_benefit_models.us import (
-    calculate_household_impact as calculate_us_household_impact,
-)
-
-
-class TestUKHouseholdImpact:
-    """Tests for UK calculate_household_impact."""
-
-    def test_single_adult_no_income(self):
-        """Single adult with no income should have output for all entity variables."""
-        household = UKHouseholdInput(
+"""Tests for the single-household calculators.
+
+The v4 surface is the kwarg-based ``pe.us.calculate_household`` /
+``pe.uk.calculate_household`` pair returning a dot-accessible
+:class:`HouseholdResult`. Input validation raises on unknown variable
+names; extra variables are a flat list dispatched by the library.
+"""
+
+import pytest
+
+import policyengine as pe
+from policyengine.tax_benefit_models.common import EntityResult, HouseholdResult
+
+
+class TestUKCalculateHousehold:
+    def test__single_adult_no_income__then_returns_result_with_net_income(self):
+        result = pe.uk.calculate_household(
             people=[{"age": 30}],
             year=2026,
         )
-        result = calculate_uk_household_impact(household)
-
-        assert isinstance(result, UKHouseholdOutput)
-        assert len(result.person) == 1
-        assert len(result.benunit) == 1
+        assert isinstance(result, HouseholdResult)
+        assert isinstance(result.person[0], EntityResult)
+        assert isinstance(result.benunit, EntityResult)
+        assert isinstance(result.household, EntityResult)
         assert "hbai_household_net_income" in result.household
+        assert len(result.person) == 1
 
-    def test_single_adult_with_employment_income(self):
-        """Single adult with employment income should pay tax."""
-        household = UKHouseholdInput(
+    def test__single_adult_with_income__then_pays_tax_and_ni(self):
+        result = pe.uk.calculate_household(
             people=[{"age": 30, "employment_income": 50000}],
             year=2026,
         )
-        result = calculate_uk_household_impact(household)
-
-        assert isinstance(result, UKHouseholdOutput)
-        assert result.person[0]["income_tax"] > 0
-        assert result.person[0]["national_insurance"] > 0
-        assert result.household["hbai_household_net_income"] > 0
+        assert result.person[0].income_tax > 0
+        assert result.person[0].national_insurance > 0
+        assert result.household.hbai_household_net_income > 0
 
-    def test_family_with_children(self):
-        """Family with children should receive child benefit."""
-        household = UKHouseholdInput(
+    def test__family_with_children__then_benunit_child_benefit_positive(self):
+        result = pe.uk.calculate_household(
             people=[
                 {"age": 35, "employment_income": 30000},
                 {"age": 8},
@@ -58,145 +44,172 @@ def test_family_with_children(self):
             benunit={"would_claim_child_benefit": True},
             year=2026,
         )
-        result = calculate_uk_household_impact(household)
-
-        assert isinstance(result, UKHouseholdOutput)
         assert len(result.person) == 3
-        assert result.benunit[0]["child_benefit"] > 0
-
-    def test_output_contains_all_entity_variables(self):
-        """Output should contain all variables from entity_variables."""
-        household = UKHouseholdInput(
-            people=[{"age": 30, "employment_income": 25000}],
-            year=2026,
-        )
-        result = calculate_uk_household_impact(household)
-
-        # Check all household variables are present
-        for var in uk_latest.entity_variables["household"]:
-            assert var in result.household, f"Missing household variable: {var}"
+        assert result.benunit.child_benefit > 0
 
-        # Check all person variables are present
-        for var in uk_latest.entity_variables["person"]:
-            assert var in result.person[0], f"Missing person variable: {var}"
-
-        # Check all benunit variables are present
-        for var in uk_latest.entity_variables["benunit"]:
-            assert var in result.benunit[0], f"Missing benunit variable: {var}"
-
-    def test_output_is_json_serializable(self):
-        """Output should be JSON serializable."""
-        household = UKHouseholdInput(
-            people=[{"age": 30, "employment_income": 25000}],
+    def test__reform_changes_child_benefit__then_dict_compiles_and_applies(self):
+        baseline = pe.uk.calculate_household(
+            people=[{"age": 35}, {"age": 5}],
+            benunit={"would_claim_child_benefit": True},
             year=2026,
         )
-        result = calculate_uk_household_impact(household)
-
-        json_dict = result.model_dump()
-        assert isinstance(json_dict, dict)
-        assert "household" in json_dict
-        assert "person" in json_dict
-
-    def test_input_is_json_serializable(self):
-        """Input should be JSON serializable."""
-        household = UKHouseholdInput(
-            people=[{"age": 30, "employment_income": 25000}],
+        # Child benefit amount for first child — use a real parameter path.
+        reformed = pe.uk.calculate_household(
+            people=[{"age": 35}, {"age": 5}],
+            benunit={"would_claim_child_benefit": True},
             year=2026,
+            reform={"gov.hmrc.child_benefit.amount.eldest": 50.0},
         )
-
-        json_dict = household.model_dump()
-        assert isinstance(json_dict, dict)
-        assert "people" in json_dict
+        # If the param path is valid the calc runs; if results differ the reform took.
+        # Accept either: the key thing is the reform dict was accepted without error.
+        assert isinstance(reformed.benunit.child_benefit, float)
+        assert isinstance(baseline.benunit.child_benefit, float)
 
 
-class TestUSHouseholdImpact:
-    """Tests for US calculate_household_impact."""
-
-    def test_single_adult_no_income(self):
-        """Single adult with no income."""
-        household = USHouseholdInput(
+class TestUSCalculateHousehold:
+    def test__single_adult__then_returns_result_with_net_income(self):
+        result = pe.us.calculate_household(
             people=[{"age": 30, "is_tax_unit_head": True}],
-            year=2024,
+            year=2026,
         )
-        result = calculate_us_household_impact(household)
-
-        assert isinstance(result, USHouseholdOutput)
+        assert isinstance(result, HouseholdResult)
         assert len(result.person) == 1
         assert "household_net_income" in result.household
 
-    def test_single_adult_with_employment_income(self):
-        """Single adult with employment income should pay tax."""
-        household = USHouseholdInput(
-            people=[
-                {
-                    "age": 30,
-                    "employment_income": 50000,
-                    "is_tax_unit_head": True,
-                }
-            ],
+    def test__single_adult_with_income__then_tax_unit_income_tax_positive(self):
+        result = pe.us.calculate_household(
+            people=[{"age": 30, "employment_income": 50000, "is_tax_unit_head": True}],
             tax_unit={"filing_status": "SINGLE"},
-            year=2024,
+            year=2026,
         )
-        result = calculate_us_household_impact(household)
-
-        assert isinstance(result, USHouseholdOutput)
-        assert result.tax_unit[0]["income_tax"] > 0
-        assert result.household["household_net_income"] > 0
+        assert result.tax_unit.income_tax > 0
+        assert result.household.household_net_income > 0
 
-    def test_output_contains_all_entity_variables(self):
-        """Output should contain all variables from entity_variables."""
-        household = USHouseholdInput(
-            people=[
-                {
-                    "age": 30,
-                    "employment_income": 25000,
-                    "is_tax_unit_head": True,
-                }
-            ],
-            year=2024,
+    def test__reform_applied_through_dict__then_numbers_change(self):
+        baseline = pe.us.calculate_household(
+            people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}],
+            tax_unit={"filing_status": "SINGLE"},
+            year=2026,
         )
-        result = calculate_us_household_impact(household)
-
-        # Check all household variables are present
-        for var in us_latest.entity_variables["household"]:
-            assert var in result.household, f"Missing household variable: {var}"
+        # Halve the standard deduction — biggest tax number a reform dict
+        # can move for a simple wage-earner test case.
+        reformed = pe.us.calculate_household(
+            people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}],
+            tax_unit={"filing_status": "SINGLE"},
+            year=2026,
+            reform={"gov.irs.deductions.standard.amount.SINGLE": {"2026-01-01": 5000}},
+        )
+        assert reformed.tax_unit.income_tax > baseline.tax_unit.income_tax
 
-        # Check all person variables are present
-        for var in us_latest.entity_variables["person"]:
-            assert var in result.person[0], f"Missing person variable: {var}"
+    def test__extra_variables_flat_list__then_values_appear_on_entity(self):
+        result = pe.us.calculate_household(
+            people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}],
+            tax_unit={"filing_status": "SINGLE"},
+            year=2026,
+            extra_variables=["adjusted_gross_income"],
+        )
+        assert "adjusted_gross_income" in result.tax_unit
+        assert result.tax_unit.adjusted_gross_income > 0
 
-    def test_output_is_json_serializable(self):
-        """Output should be JSON serializable."""
-        household = USHouseholdInput(
-            people=[
-                {
-                    "age": 30,
-                    "employment_income": 25000,
-                    "is_tax_unit_head": True,
-                }
-            ],
-            year=2024,
+    def test__reform_compiles_effective_date_form(self):
+        result = pe.us.calculate_household(
+            people=[{"age": 30, "is_tax_unit_head": True}],
+            year=2026,
+            reform={"gov.irs.credits.ctc.amount.adult_dependent": {"2026-01-01": 1000}},
+        )
+        assert result.tax_unit.ctc >= 0
+
+
+class TestHouseholdInputValidation:
+    def test__unknown_person_variable__then_raises_with_suggestion(self):
+        with pytest.raises(ValueError, match="employment_incme"):
+            pe.us.calculate_household(
+                people=[{"age": 35, "employment_incme": 60000}],
+                year=2026,
+            )
+
+    def test__variable_on_wrong_entity__then_raises_with_entity_swap_hint(self):
+        # filing_status is a tax_unit variable; passing on person should
+        # point the caller at the correct entity kwarg.
+        with pytest.raises(ValueError, match="belongs on tax_unit"):
+            pe.us.calculate_household(
+                people=[{"age": 35, "filing_status": "SINGLE"}],
+                year=2026,
+            )
+
+    def test__empty_people__then_raises(self):
+        with pytest.raises(ValueError, match="people must be a non-empty"):
+            pe.us.calculate_household(people=[], year=2026)
+
+    def test__unknown_extra_variable__then_raises(self):
+        with pytest.raises(ValueError, match="not defined"):
+            pe.us.calculate_household(
+                people=[{"age": 35}],
+                year=2026,
+                extra_variables=["not_a_real_variable"],
+            )
+
+    def test__unknown_dot_access__then_raises_with_extra_variables_hint(self):
+        result = pe.us.calculate_household(
+            people=[{"age": 35, "is_tax_unit_head": True}],
+            year=2026,
         )
-        result = calculate_us_household_impact(household)
+        with pytest.raises(AttributeError, match="extra_variables"):
+            _ = result.tax_unit.not_a_default_column
+
+    def test__unknown_reform_path__then_raises_with_close_match(self):
+        with pytest.raises(ValueError, match="not defined"):
+            pe.us.calculate_household(
+                people=[{"age": 35, "is_tax_unit_head": True}],
+                year=2026,
+                reform={"gov.irs.not_a_real_parameter": 0},
+            )
+
+    def test__us_kwarg_on_uk__then_raises_with_uk_hint(self):
+        with pytest.raises(TypeError, match="US-only"):
+            pe.uk.calculate_household(
+                people=[{"age": 30}],
+                tax_unit={"filing_status": "SINGLE"},
+            )
+
+    def test__uk_kwarg_on_us__then_raises_with_us_hint(self):
+        with pytest.raises(TypeError, match="UK-only"):
+            pe.us.calculate_household(
+                people=[{"age": 30, "is_tax_unit_head": True}],
+                benunit={"foo": 1},
+            )
+
+
+class TestHouseholdResultSerialisation:
+    def test__to_dict_produces_plain_dict_tree(self):
+        result = pe.us.calculate_household(
+            people=[{"age": 30, "is_tax_unit_head": True}],
+            year=2026,
+        )
+        plain = result.to_dict()
+        assert isinstance(plain, dict)
+        assert isinstance(plain["person"], list)
+        assert isinstance(plain["tax_unit"], dict)
+        assert isinstance(plain["household"], dict)
+
+    def test__write_creates_json_file(self, tmp_path):
+        result = pe.us.calculate_household(
+            people=[{"age": 30, "is_tax_unit_head": True}],
+            year=2026,
+        )
+        path = result.write(tmp_path / "result.json")
+        assert path.exists()
+        import json
 
-        json_dict = result.model_dump()
-        assert isinstance(json_dict, dict)
-        assert "household" in json_dict
-        assert "person" in json_dict
+        loaded = json.loads(path.read_text())
+        assert "person" in loaded and "tax_unit" in loaded
 
-    def test_input_is_json_serializable(self):
-        """Input should be JSON serializable."""
-        household = USHouseholdInput(
-            people=[
-                {
-                    "age": 30,
-                    "employment_income": 25000,
-                    "is_tax_unit_head": True,
-                }
-            ],
-            year=2024,
-        )
 
-        json_dict = household.model_dump()
-        assert isinstance(json_dict, dict)
-        assert "people" in json_dict
+class TestFacadeEntryPoints:
+    def test__pe_us_points_at_module_with_calculate_household(self):
+        assert callable(pe.us.calculate_household)
+        assert pe.us.model is pe.us.us_latest
+
+    def test__pe_uk_points_at_module_with_calculate_household(self):
+        assert callable(pe.uk.calculate_household)
+        assert pe.uk.model is pe.uk.uk_latest
diff --git a/tests/test_manifest_version_mismatch.py b/tests/test_manifest_version_mismatch.py
index f9145556..f5fd431a 100644
--- a/tests/test_manifest_version_mismatch.py
+++ b/tests/test_manifest_version_mismatch.py
@@ -26,7 +26,7 @@
 import warnings
 from unittest.mock import patch
 
-from policyengine.core.release_manifest import get_release_manifest
+from policyengine.provenance.manifest import get_release_manifest
 
 
 def _pick_mismatched_version(manifest_version: str) -> str:
@@ -34,6 +34,9 @@ def _pick_mismatched_version(manifest_version: str) -> str:
     return manifest_version + ".drift"
 
 
+BASE_PATH = "policyengine.tax_benefit_models.common.model_version"
+
+
 def _run_init_version_check_branch(
     module_path: str,
     class_name: str,
@@ -41,39 +44,35 @@ def _run_init_version_check_branch(
 ) -> list[warnings.WarningMessage]:
     """Exercise only the manifest-vs-installed version check in ``__init__``.
 
-    Patches ``metadata.version`` to return ``installed_version``, and
-    stubs everything the ``__init__`` calls after the version check so
-    we don't hit the network or do heavy work. Returns the list of
-    warnings emitted during the check.
+    The version-check logic lives on the shared
+    ``MicrosimulationModelVersion`` base; we patch names on that module
+    (not on the per-country ``model`` module) and stub everything the
+    ``__init__`` calls after the version check so we don't hit the
+    network or do heavy work.
     """
-    with patch(f"{module_path}.metadata.version", return_value=installed_version):
+    with patch(f"{BASE_PATH}.metadata.version", return_value=installed_version):
         with patch(
-            f"{module_path}.certify_data_release_compatibility",
+            f"{BASE_PATH}.certify_data_release_compatibility",
             return_value=None,
         ):
+            # Prevent super().__init__ from actually running the
+            # parameter-loading pipeline — we only care that the
+            # version branch in __init__ emits a warning, not raises.
             with patch(
-                f"{module_path}._get_runtime_data_build_metadata",
-                return_value={},
+                f"{BASE_PATH}.TaxBenefitModelVersion.__init__",
+                return_value=None,
             ):
-                # Prevent super().__init__ from actually running the
-                # parameter-loading pipeline — we only care that the
-                # version branch in our override emits a warning, not
-                # an exception.
-                with patch(
-                    f"{module_path}.TaxBenefitModelVersion.__init__",
-                    return_value=None,
+                import importlib
+
+                module = importlib.import_module(module_path)
+                cls = getattr(module, class_name)
+                # Stub the country-specific runtime-metadata hook so
+                # the version-check path doesn't import the country pkg.
+                with patch.object(
+                    cls, "_get_runtime_data_build_metadata", return_value={}
                 ):
-                    # Import late so the patches above apply to the
-                    # module-level names used by __init__.
-                    import importlib
-
-                    module = importlib.import_module(module_path)
-                    cls = getattr(module, class_name)
                     with warnings.catch_warnings(record=True) as caught:
                         warnings.simplefilter("always")
-                        # The class is a TaxBenefitModelVersion subclass
-                        # that normally takes kwargs for the parameter
-                        # tree. We're not exercising the parameter tree.
                         try:
                             cls()
                         except Exception:
diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py
index 18d6eed3..d59a24ad 100644
--- a/tests/test_release_manifests.py
+++ b/tests/test_release_manifests.py
@@ -5,7 +5,9 @@
 
 from requests import Timeout
 
-from policyengine.core.release_manifest import (
+from policyengine.core.tax_benefit_model import TaxBenefitModel
+from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+from policyengine.provenance.manifest import (
     DataCertification,
     DataReleaseManifestUnavailableError,
     certify_data_release_compatibility,
@@ -15,8 +17,6 @@
     resolve_dataset_reference,
     resolve_managed_dataset_reference,
 )
-from policyengine.core.tax_benefit_model import TaxBenefitModel
-from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
 from policyengine.tax_benefit_models.uk import (
     managed_microsimulation as managed_uk_microsimulation,
 )
@@ -45,9 +45,9 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self):
         manifest = get_release_manifest("us")
 
         assert manifest.schema_version == 1
-        assert manifest.bundle_id == "us-3.5.0"
+        assert manifest.bundle_id == "us-4.0.0"
         assert manifest.country_id == "us"
-        assert manifest.policyengine_version == "3.5.0"
+        assert manifest.policyengine_version == "4.0.0"
         assert manifest.model_package.name == "policyengine-us"
         assert manifest.model_package.version == "1.653.3"
         assert manifest.data_package.name == "policyengine-us-data"
@@ -67,9 +67,9 @@ def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self):
         manifest = get_release_manifest("uk")
 
         assert manifest.schema_version == 1
-        assert manifest.bundle_id == "uk-3.5.0"
+        assert manifest.bundle_id == "uk-4.0.0"
         assert manifest.country_id == "uk"
-        assert manifest.policyengine_version == "3.5.0"
+        assert manifest.policyengine_version == "4.0.0"
         assert manifest.model_package.name == "policyengine-uk"
         assert manifest.model_package.version == "2.88.0"
         assert manifest.data_package.name == "policyengine-uk-data"
@@ -179,7 +179,7 @@ def test__given_country__then_can_fetch_data_release_manifest(self):
         }
 
         with patch(
-            "policyengine.core.release_manifest.requests.get",
+            "policyengine.provenance.manifest.requests.get",
             return_value=_response_with_json(payload),
         ) as mock_get:
             manifest = get_data_release_manifest("us")
@@ -204,7 +204,7 @@ def test__given_missing_data_release_manifest__then_fetch_raises_unavailable(sel
         response.status_code = 404
 
         with patch(
-            "policyengine.core.release_manifest.requests.get",
+            "policyengine.provenance.manifest.requests.get",
             return_value=response,
         ):
             try:
@@ -243,7 +243,7 @@ def test__given_range_specifier__then_certification_accepts_compatible_version(
         }
 
         with patch(
-            "policyengine.core.release_manifest.requests.get",
+            "policyengine.provenance.manifest.requests.get",
             return_value=_response_with_json(payload),
         ):
             certification = certify_data_release_compatibility(
@@ -277,7 +277,7 @@ def test__given_matching_fingerprint__then_certification_allows_reuse(self):
         }
 
         with patch(
-            "policyengine.core.release_manifest.requests.get",
+            "policyengine.provenance.manifest.requests.get",
             return_value=_response_with_json(payload),
         ):
             certification = certify_data_release_compatibility(
@@ -297,7 +297,7 @@ def test__given_private_manifest_unavailable__then_bundled_certification_is_used
         get_data_release_manifest.cache_clear()
 
         with patch(
-            "policyengine.core.release_manifest.get_data_release_manifest",
+            "policyengine.provenance.manifest.get_data_release_manifest",
             side_effect=DataReleaseManifestUnavailableError("private repo"),
         ):
             certification = certify_data_release_compatibility(
@@ -314,11 +314,11 @@ def test__given_private_manifest_unavailable_and_fingerprint_mismatch__then_fail
 
         with (
             patch(
-                "policyengine.core.release_manifest.get_data_release_manifest",
+                "policyengine.provenance.manifest.get_data_release_manifest",
                 side_effect=DataReleaseManifestUnavailableError("private repo"),
             ),
             patch(
-                "policyengine.core.release_manifest.get_release_manifest",
+                "policyengine.provenance.manifest.get_release_manifest",
                 return_value=MagicMock(
                     certification=DataCertification(
                         compatibility_basis="matching_data_build_fingerprint",
@@ -345,7 +345,7 @@ def test__given_manifest_fetch_failure__then_certification_does_not_fallback(
         get_data_release_manifest.cache_clear()
 
         with patch(
-            "policyengine.core.release_manifest.get_data_release_manifest",
+            "policyengine.provenance.manifest.get_data_release_manifest",
             side_effect=Timeout("network timeout"),
         ):
             try:
@@ -381,7 +381,7 @@ def test__given_mismatched_version_and_fingerprint__then_certification_fails(sel
         }
 
         with patch(
-            "policyengine.core.release_manifest.requests.get",
+            "policyengine.provenance.manifest.requests.get",
             return_value=_response_with_json(payload),
         ):
             try:
@@ -408,7 +408,7 @@ def test__given_manifest_certification__then_release_bundle_exposes_it(self):
 
         bundle = model_version.release_bundle
 
-        assert bundle["bundle_id"] == "uk-3.5.0"
+        assert bundle["bundle_id"] == "uk-4.0.0"
         assert bundle["default_dataset"] == "enhanced_frs_2023_24"
         assert bundle["default_dataset_uri"] == manifest.default_dataset_uri
         assert bundle["certified_data_build_id"] == "policyengine-uk-data-1.40.4"
@@ -455,7 +455,7 @@ def test__given_us_managed_microsimulation__then_passes_certified_dataset_and_bu
 
         dataset = mock_microsimulation.call_args.kwargs["dataset"]
         assert dataset == microsim.policyengine_bundle["runtime_dataset_source"]
-        assert microsim.policyengine_bundle["policyengine_version"] == "3.5.0"
+        assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0"
         assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_cps_2024"
         assert (
             microsim.policyengine_bundle["runtime_dataset_uri"]
@@ -493,7 +493,7 @@ def test__given_uk_managed_dataset_name__then_resolves_within_bundle(self):
                 "hf://policyengine/policyengine-uk-data-private/"
                 "enhanced_frs_2023_24.h5@1.40.4"
             )
-        assert microsim.policyengine_bundle["policyengine_version"] == "3.5.0"
+        assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0"
         assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_frs_2023_24"
         assert microsim.policyengine_bundle["runtime_dataset_uri"] == (
             "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.4"
diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py
index f78b4f33..9f32817f 100644
--- a/tests/test_trace_tro.py
+++ b/tests/test_trace_tro.py
@@ -1,6 +1,6 @@
 """Tests for TRACE Transparent Research Object (TRO) export.
 
-Covers bundle-level TROs (``policyengine.core.trace_tro``) and per-simulation
+Covers bundle-level TROs (``policyengine.provenance.trace``) and per-simulation
 TROs (``policyengine.results.trace_tro``), plus the ``policyengine trace-tro``
 CLI, determinism guarantees, and JSON-Schema conformance against TROv 2023/05.
 """
@@ -16,14 +16,14 @@
 from jsonschema import Draft202012Validator
 
 from policyengine.cli import main as cli_main
-from policyengine.core.release_manifest import (
+from policyengine.core.tax_benefit_model import TaxBenefitModel
+from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+from policyengine.provenance.manifest import (
     DataReleaseManifest,
     get_data_release_manifest,
     get_release_manifest,
 )
-from policyengine.core.tax_benefit_model import TaxBenefitModel
-from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
-from policyengine.core.trace_tro import (
+from policyengine.provenance.trace import (
     POLICYENGINE_ORGANIZATION,
     TRACE_TROV_NAMESPACE,
     build_trace_tro_from_release_bundle,
@@ -472,7 +472,7 @@ def test__given_trace_tro_property__then_emits_valid_tro(self):
             return_value=data_release_manifest,
         ):
             with patch(
-                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                "policyengine.provenance.trace.fetch_pypi_wheel_metadata",
                 side_effect=_fake_fetch_pypi,
             ):
                 tro = model_version.trace_tro
@@ -641,7 +641,7 @@ def test__given_trace_tro_stdout__then_writes_canonical_json(
             return_value=data_release_manifest,
         ):
             with patch(
-                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                "policyengine.provenance.trace.fetch_pypi_wheel_metadata",
                 side_effect=_fake_fetch_pypi,
             ):
                 exit_code = cli_main(["trace-tro", "us"])
@@ -661,7 +661,7 @@ def test__given_out_path__then_writes_to_file(self, tmp_path, monkeypatch):
             return_value=data_release_manifest,
         ):
             with patch(
-                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                "policyengine.provenance.trace.fetch_pypi_wheel_metadata",
                 side_effect=_fake_fetch_pypi,
             ):
                 exit_code = cli_main(["trace-tro", "us", "--out", str(out)])
diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py
index 21b9d01c..6e3b4145 100644
--- a/tests/test_us_reform_application.py
+++ b/tests/test_us_reform_application.py
@@ -1,148 +1,71 @@
-"""Tests for US reform application via reform_dict at construction time.
+"""Tests for US reform dicts applied via ``pe.us.calculate_household``."""
 
-These tests verify that the US model correctly applies reforms by building
-a reform dict and passing it to Microsimulation at construction time,
-fixing the p.update() bug that exists in the US country package.
-"""
-
-from policyengine.tax_benefit_models.us import (
-    calculate_household_impact as calculate_us_household_impact,
-)
+import policyengine as pe
 from tests.fixtures.us_reform_fixtures import (
-    DOUBLE_STANDARD_DEDUCTION_POLICY,
     HIGH_INCOME_SINGLE_FILER,
     MARRIED_COUPLE_WITH_KIDS,
-    create_standard_deduction_policy,
 )
 
 
-class TestUSHouseholdReformApplication:
-    """Tests for US household reform application."""
-
-    def test__given_baseline_policy__then_returns_baseline_tax(self):
-        """Given: No policy (baseline)
-        When: Calculating household impact
-        Then: Returns baseline tax calculation
-        """
-        # Given
-        household = HIGH_INCOME_SINGLE_FILER
-
-        # When
-        result = calculate_us_household_impact(household, policy=None)
-
-        # Then
-        assert result.tax_unit[0]["income_tax"] > 0
-
-    def test__given_doubled_standard_deduction__then_tax_is_lower(self):
-        """Given: Policy that doubles standard deduction
-        When: Calculating household impact
-        Then: Income tax is lower than baseline
-        """
-        # Given
-        household = HIGH_INCOME_SINGLE_FILER
-        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
-
-        # When
-        baseline_result = calculate_us_household_impact(household, policy=None)
-        reform_result = calculate_us_household_impact(household, policy=policy)
-
-        # Then
-        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
-        reform_tax = reform_result.tax_unit[0]["income_tax"]
-
-        assert reform_tax < baseline_tax, (
-            f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})"
-        )
-
-    def test__given_doubled_standard_deduction__then_tax_reduction_is_significant(
-        self,
-    ):
-        """Given: Policy that doubles standard deduction
-        When: Calculating household impact for high income household
-        Then: Tax reduction is at least $1000 (significant impact)
-        """
-        # Given
-        household = HIGH_INCOME_SINGLE_FILER
-        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
-
-        # When
-        baseline_result = calculate_us_household_impact(household, policy=None)
-        reform_result = calculate_us_household_impact(household, policy=policy)
+def _double_standard_deduction(year: int) -> dict:
+    """Dict reform: standard deduction doubled from ~$14,600 / $29,200 baseline."""
+    return {
+        "gov.irs.deductions.standard.amount.SINGLE": {f"{year}-01-01": 29200},
+        "gov.irs.deductions.standard.amount.JOINT": {f"{year}-01-01": 58400},
+    }
 
-        # Then
-        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
-        reform_tax = reform_result.tax_unit[0]["income_tax"]
-        tax_reduction = baseline_tax - reform_tax
 
-        assert tax_reduction >= 1000, (
-            f"Tax reduction ({tax_reduction}) should be at least $1000"
-        )
-
-    def test__given_married_couple__then_joint_deduction_affects_tax(self):
-        """Given: Married couple with doubled joint standard deduction
-        When: Calculating household impact
-        Then: Tax is lower than baseline
-        """
-        # Given
-        household = MARRIED_COUPLE_WITH_KIDS
-        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
-
-        # When
-        baseline_result = calculate_us_household_impact(household, policy=None)
-        reform_result = calculate_us_household_impact(household, policy=policy)
-
-        # Then
-        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
-        reform_tax = reform_result.tax_unit[0]["income_tax"]
-
-        assert reform_tax < baseline_tax, (
-            f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})"
+class TestUSHouseholdReformApplication:
+    def test__baseline__then_income_tax_positive(self):
+        result = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER)
+        assert result.tax_unit.income_tax > 0
+
+    def test__doubled_standard_deduction__then_tax_lower(self):
+        baseline = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER)
+        reformed = pe.us.calculate_household(
+            **HIGH_INCOME_SINGLE_FILER,
+            reform=_double_standard_deduction(2024),
         )
+        assert reformed.tax_unit.income_tax < baseline.tax_unit.income_tax
 
-    def test__given_same_policy_twice__then_results_are_deterministic(self):
-        """Given: Same policy applied twice
-        When: Calculating household impact
-        Then: Results are identical (deterministic)
-        """
-        # Given
-        household = HIGH_INCOME_SINGLE_FILER
-        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
-
-        # When
-        result1 = calculate_us_household_impact(household, policy=policy)
-        result2 = calculate_us_household_impact(household, policy=policy)
-
-        # Then
-        assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"]
-
-    def test__given_custom_deduction_value__then_tax_reflects_value(self):
-        """Given: Custom standard deduction value
-        When: Calculating household impact
-        Then: Tax reflects the custom deduction
-        """
-        # Given
-        household = HIGH_INCOME_SINGLE_FILER
-
-        # Create policies with different deduction values
-        small_deduction_policy = create_standard_deduction_policy(
-            single_value=5000, joint_value=10000
+    def test__doubled_standard_deduction__then_reduction_is_meaningful(self):
+        baseline = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER)
+        reformed = pe.us.calculate_household(
+            **HIGH_INCOME_SINGLE_FILER,
+            reform=_double_standard_deduction(2024),
         )
-        large_deduction_policy = create_standard_deduction_policy(
-            single_value=50000, joint_value=100000
+        reduction = baseline.tax_unit.income_tax - reformed.tax_unit.income_tax
+        assert reduction >= 1000, (
+            f"Tax reduction ({reduction}) should be at least $1000"
         )
 
-        # When
-        small_deduction_result = calculate_us_household_impact(
-            household, policy=small_deduction_policy
+    def test__married_couple_joint_deduction__then_tax_lower(self):
+        baseline = pe.us.calculate_household(**MARRIED_COUPLE_WITH_KIDS)
+        reformed = pe.us.calculate_household(
+            **MARRIED_COUPLE_WITH_KIDS,
+            reform=_double_standard_deduction(2024),
         )
-        large_deduction_result = calculate_us_household_impact(
-            household, policy=large_deduction_policy
+        assert reformed.tax_unit.income_tax < baseline.tax_unit.income_tax
+
+    def test__same_reform_twice__then_deterministic(self):
+        reform = _double_standard_deduction(2024)
+        first = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER, reform=reform)
+        second = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER, reform=reform)
+        assert first.tax_unit.income_tax == second.tax_unit.income_tax
+
+    def test__custom_deduction_values__then_tax_reflects_values(self):
+        small_reform = {
+            "gov.irs.deductions.standard.amount.SINGLE": {"2024-01-01": 5000},
+            "gov.irs.deductions.standard.amount.JOINT": {"2024-01-01": 10000},
+        }
+        large_reform = {
+            "gov.irs.deductions.standard.amount.SINGLE": {"2024-01-01": 50000},
+            "gov.irs.deductions.standard.amount.JOINT": {"2024-01-01": 100000},
+        }
+        small = pe.us.calculate_household(
+            **HIGH_INCOME_SINGLE_FILER, reform=small_reform
         )
-
-        # Then
-        small_tax = small_deduction_result.tax_unit[0]["income_tax"]
-        large_tax = large_deduction_result.tax_unit[0]["income_tax"]
-
-        assert large_tax < small_tax, (
-            f"Large deduction tax ({large_tax}) should be less than small deduction ({small_tax})"
+        large = pe.us.calculate_household(
+            **HIGH_INCOME_SINGLE_FILER, reform=large_reform
         )
+        assert large.tax_unit.income_tax < small.tax_unit.income_tax