From a0c55133c41fbc74e5716ccc22d2ce41ec6dce16 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 28 Jun 2026 08:14:24 -0400
Subject: [PATCH] Use Populace geoslices in policyengine.py

---
 changelog.d/populace-geoslices.changed.md     |   1 +
 docs/bundles.md                               |  17 +-
 docs/countries.md                             |   5 +-
 docs/data-publishing-design.md                |   6 +-
 docs/engineering/skills/data-certification.md |  25 +-
 docs/getting-started.md                       |   7 +-
 docs/impact-analysis.md                       |   7 +-
 docs/microsim.md                              |  19 +-
 docs/regions.md                               |  27 +-
 docs/release-bundles.md                       |  16 +-
 scripts/generate_trace_tros.py                |   1 +
 src/policyengine/core/region.py               |   6 +-
 src/policyengine/core/scoping_strategy.py     |  16 +-
 src/policyengine/core/simulation.py           |   7 +-
 .../countries/us/data/__init__.py             |   3 +-
 src/policyengine/countries/us/data/places.py  |   4 +-
 src/policyengine/countries/us/data/states.py  |  55 ++++
 src/policyengine/countries/us/regions.py      |  31 +-
 src/policyengine/data/bundle/manifest.json    | 311 ------------------
 .../data/bundle/uk.trace.tro.jsonld           |   9 +-
 .../data/bundle/us.trace.tro.jsonld           |   9 +-
 src/policyengine/provenance/certification.py  |  16 +
 src/policyengine/provenance/manifest.py       |  18 +
 src/policyengine/provenance/trace.py          |   5 +-
 .../tax_benefit_models/us/datasets.py         |  20 +-
 src/policyengine/utils/entity_utils.py        |  29 +-
 tests/fixtures/filtering_fixtures.py          |   7 +-
 tests/test_certify_data_release.py            |  56 ++--
 tests/test_entity_utils.py                    |  63 ++++
 tests/test_release_manifests.py               |  90 ++++-
 tests/test_us_regions.py                      |  78 +++--
 31 files changed, 452 insertions(+), 512 deletions(-)
 create mode 100644 changelog.d/populace-geoslices.changed.md
diff --git a/changelog.d/populace-geoslices.changed.md b/changelog.d/populace-geoslices.changed.md
new file mode 100644
index 00000000..421937e3
--- /dev/null
+++ b/changelog.d/populace-geoslices.changed.md
@@ -0,0 +1 @@
+Use the certified national Populace US dataset for state and congressional-district regions via row filters, and stop vendoring derived Populace area H5 slices into the PolicyEngine bundle manifest.
diff --git a/docs/bundles.md b/docs/bundles.md
index df28a2a0..e3099709 100644
--- a/docs/bundles.md
+++ b/docs/bundles.md
@@ -88,26 +88,21 @@ python scripts/bundle.py certify-data \
   --manifest-uri hf://dataset/policyengine/populace-uk-private@<release>/releases/<release>/release_manifest.json
 ```
 
-For US Populace releases, include the inherited state datasets from
-`policyengine-us-data`:
+For US Populace releases, certify the Populace release manifest directly:
 
 ```bash
 python scripts/bundle.py certify-data \
   --country us \
   --data-producer populace \
   --manifest-uri hf://dataset/policyengine/populace-us@<release>/releases/<release>/release_manifest.json \
-  --regional-manifest-uri hf://model/policyengine/policyengine-us-data@<version>/releases/<version>/release_manifest.json \
   --model-version <policyengine-us-version>
 ```
 
-The regional manifest must include all 51 `states/{STATE}.h5` artifacts with
-their original repo, revision, and sha256 pins. The resulting bundle manifest
-certifies Populace as the US national default dataset and
-`policyengine-us-data` as the state dataset source.
-The regional manifest URI is recorded for traceability; the bundle does not
-currently record the regional manifest's own sha256. The citable pins are the
-artifact-level repo, revision, and sha256 values copied into
-`data_releases.us.datasets`.
+US state and congressional-district regions scope the certified national
+Populace dataset with row filters. If a Populace release also publishes derived
+`states/*.h5` or `districts/*.h5` area slices, the bundle certification omits
+those slices from `data_releases.us.datasets`; they are not runtime dataset
+dependencies.
 
 Use `python scripts/bundle.py generate` to regenerate derived bundle metadata,
 and `python scripts/bundle.py generate --include-tros` when TRACE TRO sidecars
diff --git a/docs/countries.md b/docs/countries.md
index 29864bea..b8d63bc2 100644
--- a/docs/countries.md
+++ b/docs/countries.md
@@ -32,12 +32,13 @@ Override in any output with `income_variable=`.
 
 | | Dataset |
 |---|---|
-| US | Enhanced CPS 2024 (`enhanced_cps_2024.h5`) |
+| US | Populace US 2024 (`populace_us_2024.h5`) |
 | UK | Populace UK 2023 (`populace_uk_2023.h5`) |
 
 ## State / regional breakdown
 
-US: `state_code` and `congressional_district` on every household.
+US: Populace row scoping uses `state_fips` and `congressional_district_geoid`.
+`state_code` remains the human-readable input for custom households.
 
 UK: constituency code and local authority code on every household where available.
 
diff --git a/docs/data-publishing-design.md b/docs/data-publishing-design.md
index 26ad03a5..9bdb33fb 100644
--- a/docs/data-publishing-design.md
+++ b/docs/data-publishing-design.md
@@ -168,7 +168,7 @@ Minimal. The existing `pe.us.ensure_datasets` takes a URI today:
 
 ```python
 pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
+    datasets=["hf://policyengine/populace-us/populace_us_2024.h5@<release>"],
     years=[2026],
 )
 ```
@@ -178,13 +178,13 @@ Under the substrate, the URI scheme gains a new prefix:
 ```python
 # The release manifest pins a specific artifact:
 pe.us.ensure_datasets(
-    datasets=["pe-data://us/enhanced_cps_2024@sha256:4e92b340…"],
+    datasets=["pe-data://us/populace_us_2024@sha256:4e92b340…"],
     years=[2026],
 )
 
 # A developer asking for operational newest:
 pe.us.ensure_datasets(
-    datasets=["pe-data://us/enhanced_cps_2024@latest"],  # resolves via channel
+    datasets=["pe-data://us/populace_us_2024@latest"],  # resolves via channel
     years=[2026],
 )
 ```
diff --git a/docs/engineering/skills/data-certification.md b/docs/engineering/skills/data-certification.md
index 890a0fd4..4e3f24b5 100644
--- a/docs/engineering/skills/data-certification.md
+++ b/docs/engineering/skills/data-certification.md
@@ -26,37 +26,26 @@ python scripts/bundle.py certify-data --country uk --data-producer populace \
   --manifest-uri "hf://dataset/policyengine/populace-uk-private@<tag>/releases/<tag>/release_manifest.json"
 ```
 
-For US Populace certification, include the inherited state datasets from the
-certified `policyengine-us-data` release manifest:
+For US Populace certification, certify the Populace release manifest directly:
 
 ```bash
 python scripts/bundle.py certify-data --country us --data-producer populace \
   --manifest-uri "hf://dataset/policyengine/populace-us@<tag>/releases/<tag>/release_manifest.json" \
-  --regional-manifest-uri "hf://model/policyengine/policyengine-us-data@<version>/releases/<version>/release_manifest.json" \
   --model-version "<policyengine-us-version>"
 ```
 
-The regional manifest is required for US while the stack still serves
-state-level datasets from `policyengine-us-data`. It must contain all 51
-`states/{STATE}.h5` artifacts, including DC, and each state artifact must carry
-its original `repo_id`, `revision`, and `sha256`. Certification preserves those
-per-artifact pins in `data_releases.us.datasets` and writes:
+US state and congressional-district regions are row filters over the certified
+national Populace dataset. Certification writes:
 
 ```json
 "region_datasets": {
-  "national": {"path_template": "populace_us_2024.h5"},
-  "state": {"path_template": "states/{state_code}.h5"}
+  "national": {"path_template": "populace_us_2024.h5"}
 }
 ```
 
-Do not move or rewrite state artifacts into the Populace repo. The certified
-bundle is intentionally hybrid: Populace owns the national default dataset, and
-`policyengine-us-data` owns the inherited state datasets until that path is
-migrated.
-The regional manifest URI is recorded for traceability, but the bundle does not
-currently record the regional manifest's own sha256. Treat the copied
-artifact-level repo, revision, and sha256 pins in `data_releases.us.datasets`
-as the citable state dataset certification.
+If the Populace release publishes derived `states/*.h5` or `districts/*.h5`
+files for compatibility checks, certification omits them from the runtime
+bundle. The national H5 is the canonical `.py` dataset.
 
 The script fetches and validates the manifest (every artifact must carry a
 revision pin; the certified dataset must be reachable), writes the canonical
diff --git a/docs/getting-started.md b/docs/getting-started.md
index bbaa3cee..6dd489b1 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -76,11 +76,8 @@ For population estimates — budget cost, distributional impact, poverty — mov
 ```python
 from policyengine.core import Simulation
 
-datasets = pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-    years=[2026],
-)
-dataset = datasets["enhanced_cps_2024_2026"]
+datasets = pe.us.ensure_datasets(years=[2026])
+dataset = next(iter(datasets.values()))
 
 baseline = Simulation(
     dataset=dataset,
diff --git a/docs/impact-analysis.md b/docs/impact-analysis.md
index 25db85ae..275dc14c 100644
--- a/docs/impact-analysis.md
+++ b/docs/impact-analysis.md
@@ -10,11 +10,8 @@ title: "Impact analysis"
 import policyengine as pe
 from policyengine.core import Simulation
 
-datasets = pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-    years=[2026],
-)
-dataset = datasets["enhanced_cps_2024_2026"]
+datasets = pe.us.ensure_datasets(years=[2026])
+dataset = next(iter(datasets.values()))
 
 baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model)
 reformed = Simulation(
diff --git a/docs/microsim.md b/docs/microsim.md
index 5804431c..f6927d93 100644
--- a/docs/microsim.md
+++ b/docs/microsim.md
@@ -11,11 +11,8 @@ import policyengine as pe
 from policyengine.core import Simulation
 from policyengine.outputs import Aggregate, AggregateType
 
-datasets = pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-    years=[2026],
-)
-dataset = datasets["enhanced_cps_2024_2026"]
+datasets = pe.us.ensure_datasets(years=[2026])
+dataset = next(iter(datasets.values()))
 
 baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model)
 baseline.ensure()
@@ -37,15 +34,13 @@ Microdata is stored as HDF5 on Hugging Face. `ensure_datasets` downloads, caches
 
 ```python
 datasets = pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
     years=[2024, 2026],
     data_folder="./data",        # local cache directory
 )
-# Keys are "<dataset_stem>_<year>":
-dataset = datasets["enhanced_cps_2024_2026"]
+dataset = datasets["populace_us_2024_2026"]
 ```
 
-The default US dataset is **Enhanced CPS 2024** — CPS ASEC fused with IRS SOI tax-return records and calibrated to IRS, CMS, SNAP, and other administrative totals. The UK default is **Populace UK 2023** — a Populace-built Family Resources Survey dataset calibrated to UK administrative targets.
+The default US dataset is **Populace US 2024** — a Populace-built dataset calibrated to IRS, CMS, SNAP, Census, and other administrative totals. The UK default is **Populace UK 2023** — a Populace-built Family Resources Survey dataset calibrated to UK administrative targets.
 
 List datasets already known to the country:
 
@@ -158,7 +153,7 @@ See [Outputs](outputs.md) for the full catalog.
 
 ## Memory and performance
 
-A full Enhanced CPS microsimulation uses roughly 4 GB of memory and takes 15–30 seconds on a laptop. For parameter sweeps, reuse the baseline:
+A full Populace US microsimulation uses roughly 4 GB of memory and takes 15-30 seconds on a laptop. For parameter sweeps, reuse the baseline:
 
 ```python
 baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model)
@@ -171,11 +166,11 @@ for amount in [0, 1_000, 2_000, 3_000]:
     # each iteration runs only the reform
 ```
 
-Downsampled datasets are available for testing:
+Smaller custom H5 datasets can be passed explicitly for testing:
 
 ```python
 datasets = pe.us.ensure_datasets(
-    datasets=["hf://policyengine/policyengine-us-data/cps_small_2024.h5"],
+    datasets=["/path/to/smoke_test_populace_us_2024.h5"],
     years=[2026],
 )
 ```
diff --git a/docs/regions.md b/docs/regions.md
index f6fda56e..f493dcf9 100644
--- a/docs/regions.md
+++ b/docs/regions.md
@@ -6,7 +6,9 @@ Sub-national breakdowns: state / district filters on any output, plus dedicated
 
 ## US states
 
-`state_code` is an Enum variable on every household (values `"CA"`, `"TX"`, ...). Pass it as a filter on any `Aggregate` or `ChangeAggregate`:
+For custom households, `state_code` remains the public input (values `"CA"`,
+`"TX"`, ...). Pass it as a filter on any `Aggregate` or `ChangeAggregate` when
+working with simulated outputs that expose that variable:
 
 ```python
 from policyengine.outputs import Aggregate, AggregateType
@@ -21,15 +23,18 @@ ca_snap = Aggregate(
 ca_snap.run()
 ```
 
-Each state is a region in the US registry, with its own dataset:
+Each state is a region in the US registry. State regions scope the certified
+national Populace dataset by `state_fips`; they do not require separate state
+H5 files:
 
 ```python
 states = pe.us.model.region_registry.get_by_type("state")
 for region in states:
-    print(region.code, region.label, region.dataset_path)
+    print(region.code, region.label, region.scoping_strategy)
 ```
 
-For state-specific datasets (rather than filtering a national one), pass `scoping_strategy=region.scoping_strategy` or resolve the dataset path directly.
+For state-specific simulations, pass `scoping_strategy=region.scoping_strategy`
+with the certified national dataset.
 
 ## US congressional districts
 
@@ -44,7 +49,7 @@ for row in impacts.district_results:
     print(row["district_geoid"], row["avg_change"], row["winner_percentage"])
 ```
 
-`district_geoid` is the SSDD integer (state FIPS × 100 + district number). Requires a dataset with `congressional_district_geoid` populated — the default enhanced CPS does.
+`district_geoid` is the SSDD integer (state FIPS × 100 + district number; at-large districts use `00`). Congressional district regions scope the certified national Populace dataset by `congressional_district_geoid`.
 
 ## UK parliamentary constituencies
 
@@ -136,21 +141,19 @@ baseline = Simulation(
     dataset=dataset,
     tax_benefit_model_version=pe.us.model,
     scoping_strategy=RowFilterStrategy(
-        variable_name="state_code",
-        variable_value="CA",
+        variable_name="state_fips",
+        variable_value=6,
     ),
 )
 ```
 
-Regions that filter (US places, UK countries, and any region with `region.requires_filter == True`) carry their own `scoping_strategy`. Pull it off the region object rather than reconstructing it:
+Regions that filter (US states and congressional districts, UK countries, and any region with `region.requires_filter == True`) carry their own `scoping_strategy`. Pull it off the region object rather than reconstructing it. US place regions are present as hierarchy metadata, but current Populace datasets do not carry `place_fips`, so they do not expose runtime scoping yet:
 
 ```python
-nyc = pe.us.model.region_registry.get("place/NY-51000")
+ca = pe.us.model.region_registry.get("state/ca")
 baseline = Simulation(
     dataset=dataset,
     tax_benefit_model_version=pe.us.model,
-    scoping_strategy=nyc.scoping_strategy,
+    scoping_strategy=ca.scoping_strategy,
 )
 ```
-
-US states and congressional districts don't use a scoping strategy — they point to dedicated state- or district-specific datasets via `region.dataset_path`. Pass that dataset to `Simulation` instead.
diff --git a/docs/release-bundles.md b/docs/release-bundles.md
index c16a938f..328f8e2c 100644
--- a/docs/release-bundles.md
+++ b/docs/release-bundles.md
@@ -96,7 +96,7 @@ It does not own final runtime bundle certification.
 
 ### Country data package
 
-Examples: `policyengine-uk-data`, `policyengine-us-data`
+Examples: `populace-data`, `policyengine-uk-data`
 
 The country data package owns:
 
@@ -128,24 +128,18 @@ python scripts/bundle.py certify-data --country us \
   --manifest-uri "hf://dataset/policyengine/populace-us@<tag>/releases/<tag>/release_manifest.json"
 ```
 
-US Populace certification currently also needs the inherited state-level
-datasets from the certified `policyengine-us-data` release manifest:
+US Populace certification uses the Populace release manifest directly:
 
 ```bash
 python scripts/bundle.py certify-data --country us --data-producer populace \
   --manifest-uri "hf://dataset/policyengine/populace-us@<tag>/releases/<tag>/release_manifest.json" \
-  --regional-manifest-uri "hf://model/policyengine/policyengine-us-data@<version>/releases/<version>/release_manifest.json" \
   --model-version "<policyengine-us-version>"
 ```
 
 That produces one US bundle manifest entry containing the Populace national
-default dataset plus all 51 `states/{STATE}.h5` artifacts pinned to
-`policyengine-us-data`. The resulting `region_datasets.state` template lets
-runtime code resolve a state region to the exact certified state artifact.
-The regional manifest URI is retained for traceability, but the bundle does not
-currently store the regional manifest's own sha256. For inherited state data,
-the citable pins are the copied artifact-level repo, revision, and sha256
-values in `data_releases.us.datasets`.
+default dataset. State and congressional-district regions are runtime row
+filters over that national dataset, so derived `states/*.h5` or
+`districts/*.h5` files are not vendored into `data_releases.us.datasets`.
 
 Earlier releases (policyengine 4.15.x–4.16.x) were certified through the
 `PolicyEngine/policyengine-bundles` archive flow; those bundles remain the
diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py
index 0b252be7..bc7d5d26 100644
--- a/scripts/generate_trace_tros.py
+++ b/scripts/generate_trace_tros.py
@@ -61,6 +61,7 @@ def generated_tros() -> list[tuple[Path, bytes]]:
             certification=country_manifest.certification,
             model_wheel_sha256=country_manifest.model_package.sha256,
             model_wheel_url=country_manifest.model_package.wheel_url,
+            emission_context={"pe:emittedIn": "repository-bundle"},
         )
         payloads.append((tro_path, serialize_trace_tro(tro)))
     return payloads
diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py
index 6c5faf2a..d5d177a6 100644
--- a/src/policyengine/core/region.py
+++ b/src/policyengine/core/region.py
@@ -2,9 +2,9 @@
 
 This module provides the Region and RegionRegistry classes for defining
 geographic regions that a tax-benefit model supports. Regions can have:
-1. A dedicated dataset (e.g., US states, congressional districts)
+1. A dedicated dataset, usually for the national default.
 2. A scoping strategy that derives the region from a parent dataset
-   (row filter or weight replacement)
+   (row filter or weight replacement).
 """
 
 from typing import Literal, Optional, Union
@@ -56,7 +56,7 @@ class Region(BaseModel):
     # Dataset configuration
     dataset_path: Optional[str] = Field(
         default=None,
-        description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')",
+        description="URI to a dedicated dataset when the region has one.",
     )
 
     # Scoping strategy for regions that derive from a parent dataset
diff --git a/src/policyengine/core/scoping_strategy.py b/src/policyengine/core/scoping_strategy.py
index 2cbc8490..abd117f3 100644
--- a/src/policyengine/core/scoping_strategy.py
+++ b/src/policyengine/core/scoping_strategy.py
@@ -3,7 +3,8 @@
 Provides two concrete strategies for scoping datasets to sub-national regions:
 
 1. RowFilterStrategy: Filters dataset rows where a household variable matches
-   a specific value (e.g., UK countries by 'country' field, US places by 'place_fips').
+   a specific value (e.g., US states by 'state_fips', US congressional districts
+   by 'congressional_district_geoid').
 
 2. WeightReplacementStrategy: Legacy strategy that replaces household weights from
    a pre-computed weight matrix resolved locally or from GCS.
@@ -16,7 +17,7 @@
 import numpy as np
 import pandas as pd
 from microdf import MicroDataFrame
-from pydantic import BaseModel, Discriminator
+from pydantic import BaseModel, Discriminator, Field
 
 from policyengine.utils.entity_utils import (
     filter_dataset_by_household_variable,
@@ -62,12 +63,13 @@ class RowFilterStrategy(RegionScopingStrategy):
     """Scoping strategy that filters dataset rows by a household variable.
 
     Used for regions where we want to keep only households matching a
-    specific variable value (e.g., UK countries, US places/cities).
+    specific variable value (e.g., US states or congressional districts).
     """
 
     strategy_type: Literal["row_filter"] = "row_filter"
     variable_name: str
     variable_value: Union[str, int, float]
+    additional_filters: dict[str, Union[str, int, float]] = Field(default_factory=dict)
 
     def apply(
         self,
@@ -80,11 +82,17 @@ def apply(
             group_entities=group_entities,
             variable_name=self.variable_name,
             variable_value=self.variable_value,
+            additional_filters=self.additional_filters,
         )
 
     @property
     def cache_key(self) -> str:
-        return f"row_filter:{self.variable_name}={self.variable_value}"
+        filters = [
+            (self.variable_name, self.variable_value),
+            *self.additional_filters.items(),
+        ]
+        filter_key = ",".join(f"{name}={value}" for name, value in sorted(filters))
+        return f"row_filter:{filter_key}"
 
 
 class WeightReplacementStrategy(RegionScopingStrategy):
diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py
index e9cabc91..d9afd5ba 100644
--- a/src/policyengine/core/simulation.py
+++ b/src/policyengine/core/simulation.py
@@ -27,11 +27,8 @@ class Simulation(BaseModel):
         import policyengine as pe
         from policyengine.core import Simulation
 
-        datasets = pe.us.ensure_datasets(
-            datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
-            years=[2026], data_folder="./data",
-        )
-        dataset = datasets["enhanced_cps_2024_2026"]
+        datasets = pe.us.ensure_datasets(years=[2026], data_folder="./data")
+        dataset = next(iter(datasets.values()))
 
         # Baseline
         baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model)
diff --git a/src/policyengine/countries/us/data/__init__.py b/src/policyengine/countries/us/data/__init__.py
index fb833b64..5824f257 100644
--- a/src/policyengine/countries/us/data/__init__.py
+++ b/src/policyengine/countries/us/data/__init__.py
@@ -8,10 +8,11 @@
 
 from .districts import AT_LARGE_STATES, DISTRICT_COUNTS
 from .places import US_PLACES
-from .states import US_STATES
+from .states import US_STATE_FIPS, US_STATES
 
 __all__ = [
     "US_STATES",
+    "US_STATE_FIPS",
     "DISTRICT_COUNTS",
     "AT_LARGE_STATES",
     "US_PLACES",
diff --git a/src/policyengine/countries/us/data/places.py b/src/policyengine/countries/us/data/places.py
index a5fe632f..5669e26b 100644
--- a/src/policyengine/countries/us/data/places.py
+++ b/src/policyengine/countries/us/data/places.py
@@ -4,8 +4,8 @@
 Synced with policyengine-app-v2 main branch.
 """
 
-# US cities/places with population over 100K (from Census data)
-# These filter from their parent state's dataset using place_fips
+# US cities/places with population over 100K (from Census data).
+# These are registry metadata until Populace emits place-level row keys.
 # Total: 333 places
 US_PLACES: list[dict[str, str]] = [
     {
diff --git a/src/policyengine/countries/us/data/states.py b/src/policyengine/countries/us/data/states.py
index 1309201b..3a5c6259 100644
--- a/src/policyengine/countries/us/data/states.py
+++ b/src/policyengine/countries/us/data/states.py
@@ -57,3 +57,58 @@
     "WI": "Wisconsin",
     "WY": "Wyoming",
 }
+
+
+US_STATE_FIPS: dict[str, int] = {
+    "AL": 1,
+    "AK": 2,
+    "AZ": 4,
+    "AR": 5,
+    "CA": 6,
+    "CO": 8,
+    "CT": 9,
+    "DE": 10,
+    "DC": 11,
+    "FL": 12,
+    "GA": 13,
+    "HI": 15,
+    "ID": 16,
+    "IL": 17,
+    "IN": 18,
+    "IA": 19,
+    "KS": 20,
+    "KY": 21,
+    "LA": 22,
+    "ME": 23,
+    "MD": 24,
+    "MA": 25,
+    "MI": 26,
+    "MN": 27,
+    "MS": 28,
+    "MO": 29,
+    "MT": 30,
+    "NE": 31,
+    "NV": 32,
+    "NH": 33,
+    "NJ": 34,
+    "NM": 35,
+    "NY": 36,
+    "NC": 37,
+    "ND": 38,
+    "OH": 39,
+    "OK": 40,
+    "OR": 41,
+    "PA": 42,
+    "RI": 44,
+    "SC": 45,
+    "SD": 46,
+    "TN": 47,
+    "TX": 48,
+    "UT": 49,
+    "VT": 50,
+    "VA": 51,
+    "WA": 53,
+    "WV": 54,
+    "WI": 55,
+    "WY": 56,
+}
diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py
index ca2f6b4f..266fb5c4 100644
--- a/src/policyengine/countries/us/regions.py
+++ b/src/policyengine/countries/us/regions.py
@@ -11,9 +11,7 @@
 from policyengine.core.scoping_strategy import RowFilterStrategy
 from policyengine.provenance.manifest import resolve_region_dataset_path
 
-from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES
-
-US_DATA_BUCKET = "gs://policyengine-us-data"
+from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATE_FIPS, US_STATES
 
 
 def _ordinal(n: int) -> str:
@@ -45,7 +43,7 @@ def build_us_region_registry() -> RegionRegistry:
         )
     )
 
-    # 2. State regions (each has dedicated dataset)
+    # 2. State regions (filtered from the certified national dataset)
     for abbrev, name in US_STATES.items():
         regions.append(
             Region(
@@ -53,21 +51,23 @@ def build_us_region_registry() -> RegionRegistry:
                 label=name,
                 region_type="state",
                 parent_code="us",
-                dataset_path=resolve_region_dataset_path(
-                    "us",
-                    "state",
-                    state_code=abbrev,
+                scoping_strategy=RowFilterStrategy(
+                    variable_name="state_fips",
+                    variable_value=US_STATE_FIPS[abbrev],
                 ),
                 state_code=abbrev,
                 state_name=name,
             )
         )
 
-    # 3. Congressional district regions (each has dedicated dataset)
+    # 3. Congressional district regions (filtered from the national dataset)
     for state_abbrev, count in DISTRICT_COUNTS.items():
         state_name = US_STATES[state_abbrev]
+        state_fips = US_STATE_FIPS[state_abbrev]
         for i in range(1, count + 1):
             district_code = f"{state_abbrev}-{i:02d}"
+            district_number = 0 if state_abbrev in AT_LARGE_STATES else i
+            district_geoid = state_fips * 100 + district_number
 
             # Create appropriate label
             if state_abbrev in AT_LARGE_STATES:
@@ -81,17 +81,16 @@ def build_us_region_registry() -> RegionRegistry:
                     label=label,
                     region_type="congressional_district",
                     parent_code=f"state/{state_abbrev.lower()}",
-                    dataset_path=resolve_region_dataset_path(
-                        "us",
-                        "congressional_district",
-                        district_code=district_code,
+                    scoping_strategy=RowFilterStrategy(
+                        variable_name="congressional_district_geoid",
+                        variable_value=district_geoid,
                     ),
                     state_code=state_abbrev,
                     state_name=state_name,
                 )
             )
 
-    # 4. Place/city regions (filter from state datasets)
+    # 4. Place/city regions (hierarchy metadata only until Populace emits place_fips)
     for place in US_PLACES:
         state_abbrev = place["state"]
         fips = place["fips"]
@@ -103,10 +102,6 @@ def build_us_region_registry() -> RegionRegistry:
                 parent_code=f"state/{state_abbrev.lower()}",
                 state_code=state_abbrev,
                 state_name=place["state_name"],
-                scoping_strategy=RowFilterStrategy(
-                    variable_name="place_fips",
-                    variable_value=fips,
-                ),
             )
         )
 
diff --git a/src/policyengine/data/bundle/manifest.json b/src/policyengine/data/bundle/manifest.json
index 6fe3c9d2..25a188f8 100644
--- a/src/policyengine/data/bundle/manifest.json
+++ b/src/policyengine/data/bundle/manifest.json
@@ -157,312 +157,6 @@
           "revision": "populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z",
           "sha256": "f6360c3668f38dd9c3bfe600170fdaf1a9a631a0c2accc5ecab03adb7ddfd8d6"
         },
-        "states/AK": {
-          "path": "states/AK.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "758b84f75d167ebf35b529c7344f6b6154a86252a68780624d4542c436bf3903"
-        },
-        "states/AL": {
-          "path": "states/AL.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "7d5555a154e0f4f4bd7c9677cb6473fec69f260c9d1ddae2e2cd2ee2febbcd8a"
-        },
-        "states/AR": {
-          "path": "states/AR.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "a18d787140d33df51f8f31fe6893892e7228ad2679b1b16011f06d46eb34aedd"
-        },
-        "states/AZ": {
-          "path": "states/AZ.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "dc047de094fda3f6c61b9ab0b0f73f1e4d481b2b6d5a773abd513653514e0dfd"
-        },
-        "states/CA": {
-          "path": "states/CA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "9d7f271cc1a3c84222e284b14cdd749f5364d16a598e1e3693053ce6c580e954"
-        },
-        "states/CO": {
-          "path": "states/CO.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "7eae1034e34eacd91dcc439a951d8777606fb13b97093a5bf1553bf805445dc0"
-        },
-        "states/CT": {
-          "path": "states/CT.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "5e5ab0aa48b4dba8fe4879829717461ea6e668637faee9eaae5d3114f45ba2f3"
-        },
-        "states/DC": {
-          "path": "states/DC.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "51b8cdeecab13d45588206139ae4d106cc972b83d846a1a0f5becb6876707d93"
-        },
-        "states/DE": {
-          "path": "states/DE.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "186e9b90a11413ae0459de26a799029eb74d4d7ea2bd7625031fd3e7a1f0bf98"
-        },
-        "states/FL": {
-          "path": "states/FL.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "84bfcecffc59a7c892b4929bf5b4a150e122470a09ebc7643d374726d17057e9"
-        },
-        "states/GA": {
-          "path": "states/GA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "f2a3edbb813a43ec17d189e86bb8b087b51f44be2de66cdcbbd933f91a21eed0"
-        },
-        "states/HI": {
-          "path": "states/HI.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "5ae626eac48714bbdf18ded2485b767e37a9d842bc631035400b455e015dd218"
-        },
-        "states/IA": {
-          "path": "states/IA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "6eda342bff2a146af371d1e05f9b47bbc083100a2987a20ef5effcd282017cd2"
-        },
-        "states/ID": {
-          "path": "states/ID.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "bd43c3dce2c602abf71ab064dce76db0d99846ca4db20d78a78373377aab4201"
-        },
-        "states/IL": {
-          "path": "states/IL.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "f3840cc2833f3c8f7975631d6bda07a9c27a81dd21c986abf6c831066d6880eb"
-        },
-        "states/IN": {
-          "path": "states/IN.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "8319b0232e8883e3dc486e98888339a4eff84d22f7b12cae62e54079bc4857d3"
-        },
-        "states/KS": {
-          "path": "states/KS.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "e9256e79bc0daaa6cb2965ef65768ec336fd9d8c09b449242516832a5bf245df"
-        },
-        "states/KY": {
-          "path": "states/KY.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "ab6c15f006c0c2f9f66dd9925fc887bfe261a0cff690d40cb09665f4983e89ee"
-        },
-        "states/LA": {
-          "path": "states/LA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "d159f6c358019a0ddeeec16072efb7720c2c5efd0aca381f8e0d48ac6aa8ecd9"
-        },
-        "states/MA": {
-          "path": "states/MA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "e5b6c0fff3c638185d1c02adbca8aab74359bb92c9969a98af8046c926faf91d"
-        },
-        "states/MD": {
-          "path": "states/MD.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "80b3a88c36f441dd9d5af24ce1649448eb74f11fa9c68be14da2169742babd7f"
-        },
-        "states/ME": {
-          "path": "states/ME.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "683c320f238b7e99cecf660194f09c60486beb5b1bc919405b7fffd2a7d19314"
-        },
-        "states/MI": {
-          "path": "states/MI.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "f1e8220bf6420402b1ae0efe0f745c5b997a3c7a009f4a4f47cd49caa3fa1208"
-        },
-        "states/MN": {
-          "path": "states/MN.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "481db29ecc2128b59f4190b302dc284afdcbca95d221a559153d565cf2919a11"
-        },
-        "states/MO": {
-          "path": "states/MO.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "6b61f2dc508fbf9c4b22d0d3054a263c37f31b797b482fd25122ce46a97286d5"
-        },
-        "states/MS": {
-          "path": "states/MS.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "52c940f80de66ab143df2d9259c36140eb71ce93e4fede8139596be5ad6cf5ff"
-        },
-        "states/MT": {
-          "path": "states/MT.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "14a208de0e3d97ad95f0ff979ff7fd594b4495f12e6cbbe432a16bb3a1e0cede"
-        },
-        "states/NC": {
-          "path": "states/NC.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "c4418f5396fc2201f0100d1253f321ebad2d808c174d96b01c36230b43e31d54"
-        },
-        "states/ND": {
-          "path": "states/ND.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "c5761dc56e30460b50176f088364d95d221eb305d7e9128f028e8f17eb36ca83"
-        },
-        "states/NE": {
-          "path": "states/NE.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "e187ca6652931ba0efa402bd0570bd714bd789774b7d16e7215fe27c16246132"
-        },
-        "states/NH": {
-          "path": "states/NH.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "695323c9c07b4cff5f49c7a76e40cb05476e3f464bef0e700f79f46cb6334326"
-        },
-        "states/NJ": {
-          "path": "states/NJ.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "d1818cac60caed75ce7515715ec3b52cff886ecbd13123bfaeb909241c37ac16"
-        },
-        "states/NM": {
-          "path": "states/NM.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "c4f561151751f4bf11189c1cabe7988e22bc15e5eb1a0dea059de5685e817989"
-        },
-        "states/NV": {
-          "path": "states/NV.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "4db82bda4fb9c0a37304c9e0424e30341389f8addb7bbbdb01316066b8332cf9"
-        },
-        "states/NY": {
-          "path": "states/NY.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "4a1476e298c552a673b88a29c11f12210511ba188291dc15001c5a71d83f437c"
-        },
-        "states/OH": {
-          "path": "states/OH.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "f6984d9295d00e0e9a7b84c72a0ccb1a231598a5e9f5ff744e8326e4119cca77"
-        },
-        "states/OK": {
-          "path": "states/OK.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "a92e61e7d445e9d757a0c052afcb31868621882c6b60b3cbd4ef35354bdcf04c"
-        },
-        "states/OR": {
-          "path": "states/OR.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "5758866cdf930f8312f51c656b7e6ce88cd2877f81e73a27348828dc152948ce"
-        },
-        "states/PA": {
-          "path": "states/PA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "c1a7056b6f424cc4c9e847c1cf20395a0ec202a2e0f6d17c46c3b42bb6b5a6d2"
-        },
-        "states/RI": {
-          "path": "states/RI.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "d1e0bde70b9a760e1963d3481397920a8b73114b22b6b1493afbfc04cb9a7c09"
-        },
-        "states/SC": {
-          "path": "states/SC.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "8a7bb8d513d73cbddbfab5325d02ca94e43157d0b9b14758c74e0efe58253a17"
-        },
-        "states/SD": {
-          "path": "states/SD.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "ccd5c65a96ed73e1ddd840556bf6fdf10713d796a4e99f09e228490917ffebad"
-        },
-        "states/TN": {
-          "path": "states/TN.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "84addf0af5364750b22734246e838fffecdf9a6ed08b1bdbd1d6ca4a76e3be3d"
-        },
-        "states/TX": {
-          "path": "states/TX.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "27578258f5998b9f3dceefea04d21c5fde9fd5b8e05d80b7b5198fb5b9db924e"
-        },
-        "states/UT": {
-          "path": "states/UT.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "e165266d283105549d5395409ad6bdf02c600821ce464f95bc4f0ef8d5365b38"
-        },
-        "states/VA": {
-          "path": "states/VA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "fb1c3098639521ef744c926327c1ed2ed9f6dd17a3bee637667fb9fb6d5d9b53"
-        },
-        "states/VT": {
-          "path": "states/VT.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "16c80a27f8a3bf2e832219e9471731a6de1f4da7bb08e770dab07def180dd8bb"
-        },
-        "states/WA": {
-          "path": "states/WA.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "75723f11aebf83c9867312b422097a273e3f180e033ba2ec23cc4102bf3dc1a6"
-        },
-        "states/WI": {
-          "path": "states/WI.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "fd1171e43cfcd510b4dcd675849e6d4a6afeb0057956123087a083db83425e5d"
-        },
-        "states/WV": {
-          "path": "states/WV.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "8b0e718d4a91a3acceedc5a630ea6fd845dad1896b5dfea6baf26786f7b531fe"
-        },
-        "states/WY": {
-          "path": "states/WY.h5",
-          "repo_id": "policyengine/policyengine-us-data",
-          "revision": "1.115.5",
-          "sha256": "731d83ae37863ff994df2f953740ddb10b36910f43af01b38d36ffb55a88d4b5"
-        },
         "us_source_coverage": {
           "path": "releases/populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z/us_source_coverage.json",
           "repo_id": "policyengine/populace-us",
@@ -482,13 +176,8 @@
       "region_datasets": {
         "national": {
           "path_template": "populace_us_2024.h5"
-        },
-        "state": {
-          "path_template": "states/{state_code}.h5"
         }
       },
-      "regional_release_manifest_uri": "https://huggingface.co/policyengine/policyengine-us-data/resolve/1.115.5/releases/1.115.5/release_manifest.json",
-      "regional_source_manifest_uri": "hf://model/policyengine/policyengine-us-data@1.115.5/releases/1.115.5/release_manifest.json",
       "release_manifest_uri": "https://huggingface.co/datasets/policyengine/populace-us/resolve/populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z/releases/populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z/release_manifest.json",
       "schema_version": 1,
       "source_manifest_uri": "hf://dataset/policyengine/populace-us@populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z/releases/populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z/release_manifest.json",
diff --git a/src/policyengine/data/bundle/uk.trace.tro.jsonld b/src/policyengine/data/bundle/uk.trace.tro.jsonld
index 69c85353..02ed041d 100644
--- a/src/policyengine/data/bundle/uk.trace.tro.jsonld
+++ b/src/policyengine/data/bundle/uk.trace.tro.jsonld
@@ -75,7 +75,7 @@
             "@type": "trov:ResearchArtifact",
             "schema:name": "policyengine.py bundle manifest for uk",
             "trov:mimeType": "application/json",
-            "trov:sha256": "48f8a3946d96a8da9493709b3c681196aada942e313ddd4cb44840416cd13978"
+            "trov:sha256": "86bfa279db3a6f416e85f5e093db41de2ef7e5aa30d1d1ef76c30c9361ef0c1e"
           },
           {
             "@id": "composition/1/artifact/data_release_manifest",
@@ -102,7 +102,7 @@
         "trov:hasFingerprint": {
           "@id": "composition/1/fingerprint",
           "@type": "trov:CompositionFingerprint",
-          "trov:sha256": "260f416ed5c24c1969eb9b532a9f9070afcbd2f69bf861cad7e9b642ca9eb2f9"
+          "trov:sha256": "dfaa03f0d36a88fa5c7b969c6eca30664e843391cec93a288fff1ca949241f95"
         }
       },
       "trov:hasPerformance": {
@@ -111,12 +111,9 @@
         "pe:builtWithModelVersion": "2.89.2",
         "pe:certifiedBy": "policyengine.py bundle certification",
         "pe:certifiedForModelVersion": "2.89.2",
-        "pe:ciGitRef": "refs/heads/main",
-        "pe:ciGitSha": "44a7cac06f506d34f7565b203ca9c948f4585a43",
-        "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/28281042856",
         "pe:compatibilityBasis": "built_with_model_package",
         "pe:dataBuildId": "populace-uk-2023-dd68c73-4aa4b14-20260619T023711Z",
-        "pe:emittedIn": "github-actions",
+        "pe:emittedIn": "repository-bundle",
         "rdfs:comment": "Certification of build populace-uk-2023-dd68c73-4aa4b14-20260619T023711Z for policyengine-uk 2.89.2.",
         "trov:accessedArrangement": {
           "@id": "arrangement/1"
diff --git a/src/policyengine/data/bundle/us.trace.tro.jsonld b/src/policyengine/data/bundle/us.trace.tro.jsonld
index 422e82e5..1b2e6605 100644
--- a/src/policyengine/data/bundle/us.trace.tro.jsonld
+++ b/src/policyengine/data/bundle/us.trace.tro.jsonld
@@ -75,7 +75,7 @@
             "@type": "trov:ResearchArtifact",
             "schema:name": "policyengine.py bundle manifest for us",
             "trov:mimeType": "application/json",
-            "trov:sha256": "48f8a3946d96a8da9493709b3c681196aada942e313ddd4cb44840416cd13978"
+            "trov:sha256": "86bfa279db3a6f416e85f5e093db41de2ef7e5aa30d1d1ef76c30c9361ef0c1e"
           },
           {
             "@id": "composition/1/artifact/data_release_manifest",
@@ -102,7 +102,7 @@
         "trov:hasFingerprint": {
           "@id": "composition/1/fingerprint",
           "@type": "trov:CompositionFingerprint",
-          "trov:sha256": "a516bc0d555b6e648bb027a85d837def92c76b7e3c4dd2e5256bbc01ac900534"
+          "trov:sha256": "16263eeda537c28d24fde6f8e39b4ebe21ef72728e8021c14c728632c0e1cd3d"
         }
       },
       "trov:hasPerformance": {
@@ -111,12 +111,9 @@
         "pe:builtWithModelVersion": "1.745.0",
         "pe:certifiedBy": "policyengine.py bundle certification",
         "pe:certifiedForModelVersion": "1.745.0",
-        "pe:ciGitRef": "refs/heads/main",
-        "pe:ciGitSha": "44a7cac06f506d34f7565b203ca9c948f4585a43",
-        "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/28281042856",
         "pe:compatibilityBasis": "built_with_model_package",
         "pe:dataBuildId": "populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z",
-        "pe:emittedIn": "github-actions",
+        "pe:emittedIn": "repository-bundle",
         "rdfs:comment": "Certification of build populace-us-2024-cd-concept-budget-dbbdcec-512e-b2500-r2-20260627T022640Z for policyengine-us 1.745.0.",
         "trov:accessedArrangement": {
           "@id": "arrangement/1"
diff --git a/src/policyengine/provenance/certification.py b/src/policyengine/provenance/certification.py
index a0ade420..44bd9ef5 100644
--- a/src/policyengine/provenance/certification.py
+++ b/src/policyengine/provenance/certification.py
@@ -484,6 +484,16 @@ def build_country_manifest_payload(
 
     datasets: dict[str, dict] = {}
     for name, artifact in manifest.artifacts.items():
+        if (
+            country == "us"
+            and manifest.data_package.name == "populace-data"
+            and artifact.path.endswith(".h5")
+            and (
+                artifact.path.startswith("states/")
+                or artifact.path.startswith("districts/")
+            )
+        ):
+            continue
         payload: dict = {
             "path": artifact_path_for_country_manifest(artifact, uri_parts),
             "revision": artifact.revision,
@@ -498,6 +508,12 @@ def build_country_manifest_payload(
     raw_regions = manifest.metadata.get("region_datasets")
     if isinstance(raw_regions, dict):
         for region, template in sorted(raw_regions.items()):
+            if (
+                country == "us"
+                and manifest.data_package.name == "populace-data"
+                and region in {"state", "congressional_district"}
+            ):
+                continue
             if isinstance(template, dict) and "path_template" in template:
                 region_datasets[region] = {"path_template": template["path_template"]}
 
diff --git a/src/policyengine/provenance/manifest.py b/src/policyengine/provenance/manifest.py
index cd3dd7bd..b9a7d616 100644
--- a/src/policyengine/provenance/manifest.py
+++ b/src/policyengine/provenance/manifest.py
@@ -473,6 +473,10 @@ def resolve_dataset_reference(country_id: str, dataset: str) -> str:
             or _artifact_revision(manifest.data_package),
         )
 
+    local_path = Path(dataset).expanduser()
+    if local_path.exists():
+        return str(local_path)
+
     data_release_manifest = get_data_release_manifest(country_id)
     artifact = data_release_manifest.artifacts.get(dataset)
     if artifact is None:
@@ -506,6 +510,20 @@ def resolve_managed_dataset_reference(
     if dataset is None:
         return manifest.default_dataset_uri
 
+    if dataset in manifest.datasets:
+        return resolve_dataset_reference(country_id, dataset)
+
+    local_path = Path(dataset).expanduser()
+    if local_path.exists():
+        if allow_unmanaged:
+            return str(local_path)
+        raise ValueError(
+            "Explicit local dataset paths bypass the policyengine.py release "
+            "bundle. Pass a manifest dataset name or omit `dataset` to use the "
+            "certified default dataset. Set `allow_unmanaged=True` only if you "
+            "intend to bypass bundle enforcement."
+        )
+
     if "://" in dataset:
         if dataset == manifest.default_dataset_uri:
             return dataset
diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py
index 777b233d..be314265 100644
--- a/src/policyengine/provenance/trace.py
+++ b/src/policyengine/provenance/trace.py
@@ -297,6 +297,7 @@ def build_trace_tro_from_release_bundle(
     model_wheel_url: Optional[str] = None,
     fetch_pypi: Any = fetch_pypi_wheel_metadata,
     self_url: Optional[str] = None,
+    emission_context: Optional[Mapping[str, str]] = None,
 ) -> dict:
     """Build a TRACE TRO for a certified runtime bundle.
 
@@ -449,6 +450,7 @@ def build_trace_tro_from_release_bundle(
             f"{country_manifest.data_package.version}"
         ),
         certification=effective_certification,
+        emission_context=emission_context,
         started_at=(
             data_release_manifest.build.built_at
             if (
@@ -500,6 +502,7 @@ def _build_bundle_performance(
     *,
     certified_data_build_id: str,
     certification: Optional[DataCertification],
+    emission_context: Optional[Mapping[str, str]],
     started_at: Optional[str],
     ended_at: Optional[str],
 ) -> dict[str, Any]:
@@ -539,7 +542,7 @@ def _build_bundle_performance(
             performance["pe:dataBuildId"] = certification.data_build_id
         if certification.certified_by is not None:
             performance["pe:certifiedBy"] = certification.certified_by
-    performance.update(_emission_context())
+    performance.update(dict(emission_context or _emission_context()))
     return performance
 
 
diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py
index 71bbb395..00250fa2 100644
--- a/src/policyengine/tax_benefit_models/us/datasets.py
+++ b/src/policyengine/tax_benefit_models/us/datasets.py
@@ -264,9 +264,7 @@ def _load_policyengine_core_h5(path: Path, year: int) -> USYearData:
 
 
 def create_datasets(
-    datasets: list[str] = [
-        "enhanced_cps_2024",
-    ],
+    datasets: Optional[list[str]] = None,
     years: list[int] = [2024, 2025, 2026, 2027, 2028],
     data_folder: str = "./data",
 ) -> dict[str, PolicyEngineUSDataset]:
@@ -278,10 +276,11 @@ def create_datasets(
         data_folder: Directory to save the dataset files
 
     Returns:
-        Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
+        Dictionary mapping dataset keys (e.g., "populace_us_2024") to PolicyEngineUSDataset objects
     """
     from policyengine_us import Microsimulation
 
+    datasets = datasets or [get_release_manifest("us").default_dataset]
     result = {}
     for dataset in datasets:
         resolved_dataset = resolve_dataset_reference("us", dataset)
@@ -451,9 +450,7 @@ def create_datasets(
 
 
 def load_datasets(
-    datasets: list[str] = [
-        "enhanced_cps_2024",
-    ],
+    datasets: Optional[list[str]] = None,
     years: list[int] = [2024, 2025, 2026, 2027, 2028],
     data_folder: str = "./data",
 ) -> dict[str, PolicyEngineUSDataset]:
@@ -465,8 +462,9 @@ def load_datasets(
         data_folder: Directory containing the dataset files
 
     Returns:
-        Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
+        Dictionary mapping dataset keys (e.g., "populace_us_2024") to PolicyEngineUSDataset objects
     """
+    datasets = datasets or [get_release_manifest("us").default_dataset]
     result = {}
     for dataset in datasets:
         resolved_dataset = resolve_dataset_reference("us", dataset)
@@ -1140,9 +1138,7 @@ def load_managed_long_term_datasets(
 
 
 def ensure_datasets(
-    datasets: list[str] = [
-        "enhanced_cps_2024",
-    ],
+    datasets: Optional[list[str]] = None,
     years: list[int] = [2024, 2025, 2026, 2027, 2028],
     data_folder: str = "./data",
 ) -> dict[str, PolicyEngineUSDataset]:
@@ -1156,6 +1152,8 @@ def ensure_datasets(
     Returns:
         Dictionary mapping dataset keys to PolicyEngineUSDataset objects
     """
+    datasets = datasets or [get_release_manifest("us").default_dataset]
+
     # Check if all dataset files exist
     all_exist = True
     for dataset in datasets:
diff --git a/src/policyengine/utils/entity_utils.py b/src/policyengine/utils/entity_utils.py
index f06b5d59..481ff62a 100644
--- a/src/policyengine/utils/entity_utils.py
+++ b/src/policyengine/utils/entity_utils.py
@@ -1,6 +1,7 @@
 """Shared utilities for entity relationship building and dataset filtering."""
 
 import logging
+from typing import Optional, Union
 
 import pandas as pd
 from microdf import MicroDataFrame
@@ -55,9 +56,10 @@ def filter_dataset_by_household_variable(
     entity_data: dict[str, MicroDataFrame],
     group_entities: list[str],
     variable_name: str,
-    variable_value: str,
+    variable_value: Union[str, int, float],
+    additional_filters: Optional[dict[str, Union[str, int, float]]] = None,
 ) -> dict[str, MicroDataFrame]:
-    """Filter dataset entities to only include households where a variable matches.
+    """Filter dataset entities to only include households matching variables.
 
     Uses an entity relationship approach: builds an explicit map of all
     entity relationships, filters at the household level, and keeps all
@@ -69,6 +71,8 @@ def filter_dataset_by_household_variable(
         group_entities: List of group entity names for this country.
         variable_name: The household-level variable to filter on.
         variable_value: The value to match. Handles both str and bytes encoding.
+        additional_filters: Optional household-level filters that must also
+                            match, keyed by variable name.
 
     Returns:
         A dict mapping entity names to filtered MicroDataFrames.
@@ -84,18 +88,23 @@ def filter_dataset_by_household_variable(
             f"Variable '{variable_name}' not found in household data. "
             f"Available columns: {list(household_data.columns)}"
         )
+    additional_filters = additional_filters or {}
+    for extra_variable in additional_filters:
+        if extra_variable not in household_data.columns:
+            raise ValueError(
+                f"Variable '{extra_variable}' not found in household data. "
+                f"Available columns: {list(household_data.columns)}"
+            )
 
     # Build entity relationships
     entity_rel = build_entity_relationships(person_data, group_entities)
 
     # Find matching household IDs
-    hh_values = household_data[variable_name].values
     hh_ids = household_data["household_id"].values
 
-    if isinstance(variable_value, str):
-        hh_mask = (hh_values == variable_value) | (hh_values == variable_value.encode())
-    else:
-        hh_mask = hh_values == variable_value
+    hh_mask = _values_match(household_data[variable_name].values, variable_value)
+    for extra_variable, extra_value in additional_filters.items():
+        hh_mask &= _values_match(household_data[extra_variable].values, extra_value)
 
     matching_hh_ids = set(hh_ids[hh_mask])
 
@@ -138,3 +147,9 @@ def filter_dataset_by_household_variable(
         )
 
     return result
+
+
+def _values_match(values, expected: Union[str, int, float]):
+    if isinstance(expected, str):
+        return (values == expected) | (values == expected.encode())
+    return values == expected
diff --git a/tests/fixtures/filtering_fixtures.py b/tests/fixtures/filtering_fixtures.py
index 4534ad97..7776b798 100644
--- a/tests/fixtures/filtering_fixtures.py
+++ b/tests/fixtures/filtering_fixtures.py
@@ -20,7 +20,7 @@ def create_us_test_dataset() -> PolicyEngineUSDataset:
     Creates a dataset with 6 persons across 3 households:
     - Household 1 (place_fips="44000"): 2 persons
     - Household 2 (place_fips="44000"): 2 persons
-    - Household 3 (place_fips="57000"): 2 persons
+    - Household 3 (place_fips="44000", state_fips=34): 2 persons
     """
     # Person data - 6 persons across 3 households
     person_data = pd.DataFrame(
@@ -36,13 +36,14 @@ def create_us_test_dataset() -> PolicyEngineUSDataset:
         }
     )
 
-    # Household data - 3 households, 2 in place 44000, 1 in place 57000
+    # Household data - place_fips is only unique within state.
     household_data = pd.DataFrame(
         {
             "household_id": [1, 2, 3],
             "household_weight": [1000.0, 1000.0, 1000.0],
-            "place_fips": ["44000", "44000", "57000"],
+            "place_fips": ["44000", "44000", "44000"],
             "state_fips": [6, 6, 34],  # CA, CA, NJ
+            "congressional_district_geoid": [601, 602, 3401],
         }
     )
 
diff --git a/tests/test_certify_data_release.py b/tests/test_certify_data_release.py
index c8d9119f..62a1467f 100644
--- a/tests/test_certify_data_release.py
+++ b/tests/test_certify_data_release.py
@@ -103,6 +103,14 @@ def _release_manifest_payload() -> dict:
                 "sha256": "b" * 64,
                 "size_bytes": 1,
             },
+            "districts/CA-01": {
+                "kind": "microdata",
+                "path": "districts/CA-01.h5",
+                "repo_id": "policyengine/populace-us",
+                "revision": TAG,
+                "sha256": "1" * 64,
+                "size_bytes": 1,
+            },
         },
     }
 
@@ -111,6 +119,7 @@ def _populace_manifest_payload_without_regions() -> dict:
     payload = _release_manifest_payload()
     payload["metadata"] = {}
     payload["artifacts"].pop("states/AK")
+    payload["artifacts"].pop("districts/CA-01")
     return payload
 
 
@@ -179,6 +188,8 @@ def _uk_release_manifest_payload() -> dict:
             artifact["repo_id"] = "policyengine/populace-uk-private"
             artifact["revision"] = UK_TAG
     payload["artifacts"].pop("us_source_coverage")
+    payload["artifacts"].pop("states/AK")
+    payload["artifacts"].pop("districts/CA-01")
     return payload
 
 
@@ -290,15 +301,11 @@ def test__given_manifest__then_pins_data_package_and_default(self):
         assert payload["model_package"]["sha256"] == "d" * 64
         assert payload["model_package"]["wheel_url"] == "https://example/wheel"
 
-    def test__given_inherited_artifact__then_keeps_its_repo_pin(self):
+    def test__given_populace_area_h5_artifact__then_omits_it_from_runtime_bundle(self):
         payload = self._payload()
 
-        assert payload["datasets"]["states/AK"] == {
-            "path": "states/AK.h5",
-            "revision": "1.115.5",
-            "sha256": "b" * 64,
-            "repo_id": "policyengine/policyengine-us-data",
-        }
+        assert "states/AK" not in payload["datasets"]
+        assert "districts/CA-01" not in payload["datasets"]
 
     def test__given_release_scoped_diagnostics__then_rewrites_paths(self):
         payload = self._payload()
@@ -315,11 +322,13 @@ def test__given_release_scoped_diagnostics__then_rewrites_paths(self):
             f"releases/{TAG}/us_source_coverage.json"
         )
 
-    def test__given_region_templates__then_carried_through(self):
+    def test__given_populace_region_templates__then_only_national_is_carried_through(
+        self,
+    ):
         payload = self._payload()
 
-        assert payload["region_datasets"]["state"] == {
-            "path_template": "states/{state_code}.h5"
+        assert payload["region_datasets"] == {
+            "national": {"path_template": "populace_us_2024.h5"}
         }
 
     def test__given_build_provenance__then_certification_carries_it(self):
@@ -338,7 +347,7 @@ def test__given_build_provenance__then_certification_carries_it(self):
 
 
 class TestMergeUSStateReleaseManifest:
-    def test__given_state_manifest__then_adds_state_region_artifacts(self):
+    def test__given_state_manifest__then_does_not_vendor_state_region_artifacts(self):
         primary = DataReleaseManifest.model_validate(
             _populace_manifest_payload_without_regions()
         )
@@ -355,15 +364,9 @@ def test__given_state_manifest__then_adds_state_region_artifacts(self):
             model_wheel={},
         )
 
-        assert payload["datasets"]["states/CA"] == {
-            "path": "states/CA.h5",
-            "revision": US_DATA_VERSION,
-            "sha256": f"{US_STATE_CODES.index('CA') + 1:064x}",
-            "repo_id": "policyengine/policyengine-us-data",
-        }
+        assert "states/CA" not in payload["datasets"]
         assert payload["region_datasets"] == {
             "national": {"path_template": "populace_us_2024.h5"},
-            "state": {"path_template": "states/{state_code}.h5"},
         }
 
     def test__given_missing_state_artifact__then_raises(self):
@@ -473,11 +476,11 @@ def test__given_fetched_populace_manifest__then_updates_bundle_manifest(
         assert release["source_manifest_uri"] == UK_MANIFEST_URI
         assert written["packages"]["policyengine-uk"]["version"] == "2.89.2"
         assert result.data_producer == "populace"
-        assert result.dataset_count == 4
+        assert result.dataset_count == 3
         assert result.build_id == UK_TAG
         assert result.bundle_path == bundle_path
 
-    def test__given_us_regional_manifest__then_certifies_state_artifacts(
+    def test__given_us_regional_manifest__then_validates_but_does_not_vendor_state_artifacts(
         self, tmp_path
     ):
         bundle_path = tmp_path / "manifest.json"
@@ -532,14 +535,11 @@ def test__given_us_regional_manifest__then_certifies_state_artifacts(
         release = written["data_releases"]["us"]
         assert release["source_manifest_uri"] == MANIFEST_URI
         assert release["regional_source_manifest_uri"] == US_DATA_MANIFEST_URI
-        assert release["region_datasets"]["state"] == {
-            "path_template": "states/{state_code}.h5"
+        assert release["region_datasets"] == {
+            "national": {"path_template": "populace_us_2024.h5"}
         }
-        assert release["datasets"]["states/CA"]["repo_id"] == (
-            "policyengine/policyengine-us-data"
-        )
-        assert release["datasets"]["states/CA"]["revision"] == US_DATA_VERSION
-        assert result.dataset_count == 4 + len(US_STATE_CODES)
+        assert "states/CA" not in release["datasets"]
+        assert result.dataset_count == 4
 
     def test__given_us_without_data_producer__then_legacy_update_is_explicitly_unsupported(
         self, tmp_path
@@ -720,3 +720,5 @@ def test__given_vendored_bundle_manifest__then_tro_sidecar_binds_it(self):
         )
 
         assert bundle_manifest["trov:sha256"] == expected
+        performance = tro["@graph"][0]["trov:hasPerformance"]
+        assert performance["pe:emittedIn"] == "repository-bundle"
diff --git a/tests/test_entity_utils.py b/tests/test_entity_utils.py
index f8846457..cc51c6b1 100644
--- a/tests/test_entity_utils.py
+++ b/tests/test_entity_utils.py
@@ -158,6 +158,69 @@ def test__given_matching_value__then_returns_filtered_entities(self):
         assert len(pd.DataFrame(result["person"])) == 2
         assert len(pd.DataFrame(result["household"])) == 1
 
+    def test__given_us_numeric_geography__then_filters_state_and_district(
+        self, us_test_dataset
+    ):
+        """Given: US data with Populace geography columns
+        When: Filtering by state FIPS and congressional district GEOID
+        Then: Related entities are preserved for matching households only
+        """
+        state_result = filter_dataset_by_household_variable(
+            entity_data=us_test_dataset.data.entity_data,
+            group_entities=[
+                "household",
+                "tax_unit",
+                "spm_unit",
+                "family",
+                "marital_unit",
+            ],
+            variable_name="state_fips",
+            variable_value=6,
+        )
+        district_result = filter_dataset_by_household_variable(
+            entity_data=us_test_dataset.data.entity_data,
+            group_entities=[
+                "household",
+                "tax_unit",
+                "spm_unit",
+                "family",
+                "marital_unit",
+            ],
+            variable_name="congressional_district_geoid",
+            variable_value=601,
+        )
+
+        assert len(pd.DataFrame(state_result["household"])) == 2
+        assert len(pd.DataFrame(state_result["person"])) == 4
+        assert len(pd.DataFrame(district_result["household"])) == 1
+        assert len(pd.DataFrame(district_result["person"])) == 2
+
+    def test__given_place_fips_collision__then_additional_state_filter_disambiguates(
+        self, us_test_dataset
+    ):
+        """Given: Two states with the same place FIPS code
+        When: Filtering by place FIPS plus state FIPS
+        Then: Only households from the requested state are included
+        """
+        result = filter_dataset_by_household_variable(
+            entity_data=us_test_dataset.data.entity_data,
+            group_entities=[
+                "household",
+                "tax_unit",
+                "spm_unit",
+                "family",
+                "marital_unit",
+            ],
+            variable_name="place_fips",
+            variable_value="44000",
+            additional_filters={"state_fips": 6},
+        )
+
+        households = pd.DataFrame(result["household"])
+        assert len(households) == 2
+        assert set(households["state_fips"]) == {6}
+        assert len(pd.DataFrame(result["person"])) == 4
+
     def test__given_no_match__then_raises_value_error(self):
         """Given: Dataset with no matching households
         When: Filtering
diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py
index df8c9fa5..627e8478 100644
--- a/tests/test_release_manifests.py
+++ b/tests/test_release_manifests.py
@@ -26,8 +26,10 @@
     get_release_manifest,
     https_release_manifest_uri,
     resolve_dataset_reference,
+    resolve_default_datasets,
     resolve_local_managed_dataset_source,
     resolve_managed_dataset_reference,
+    resolve_region_dataset_path,
 )
 
 PYPROJECT = Path(__file__).resolve().parents[1] / "pyproject.toml"
@@ -236,6 +238,12 @@ def test__given_explicit_url__then_resolution_is_noop(self):
 
         assert resolve_dataset_reference("us", url) == url
 
+    def test__given_existing_local_path__then_resolution_is_noop(self, tmp_path):
+        dataset = tmp_path / "smoke_test_populace_us_2024.h5"
+        dataset.write_bytes(b"")
+
+        assert resolve_dataset_reference("us", str(dataset)) == str(dataset)
+
     def test__given_default_dataset__then_prefers_certified_data_artifact_uri(self):
         manifest = get_release_manifest("us")
 
@@ -248,6 +256,49 @@ def test__given_no_dataset__then_managed_resolution_uses_certified_default(self)
             == get_release_manifest("us").default_dataset_uri
         )
 
+    def test__given_us_manifest__then_has_no_inherited_area_artifacts(self):
+        manifest = get_release_manifest("us")
+
+        assert "state" not in manifest.region_datasets
+        assert "congressional_district" not in manifest.region_datasets
+        assert resolve_region_dataset_path("us", "state", state_code="CA") is None
+        assert (
+            resolve_region_dataset_path(
+                "us",
+                "congressional_district",
+                district_code="CA-01",
+            )
+            is None
+        )
+        assert not any(
+            key.startswith(("states/", "districts/"))
+            for key in resolve_default_datasets("us")
+        )
+
+    def test__given_us_ensure_datasets_without_dataset__then_uses_certified_default(
+        self,
+    ):
+        us_datasets = importlib.import_module(
+            "policyengine.tax_benefit_models.us.datasets"
+        )
+
+        with (
+            patch.object(us_datasets.Path, "exists", return_value=False),
+            patch.object(
+                us_datasets,
+                "create_datasets",
+                return_value={"populace_us_2024_2026": object()},
+            ) as create_datasets,
+        ):
+            result = us_datasets.ensure_datasets(years=[2026])
+
+        assert list(result) == ["populace_us_2024_2026"]
+        create_datasets.assert_called_once_with(
+            datasets=["populace_us_2024"],
+            years=[2026],
+            data_folder="./data",
+        )
+
     def test__given_explicit_uri__then_managed_resolution_requires_opt_in(self):
         dataset = "hf://policyengine/policyengine-us-data/cps_2023.h5@1.73.0"
 
@@ -267,6 +318,41 @@ def test__given_explicit_uri__then_managed_resolution_requires_opt_in(self):
             == dataset
         )
 
+    def test__given_local_path__then_managed_resolution_requires_opt_in(self, tmp_path):
+        dataset = tmp_path / "smoke_test_populace_us_2024.h5"
+        dataset.write_bytes(b"")
+
+        try:
+            resolve_managed_dataset_reference("us", str(dataset))
+        except ValueError as error:
+            assert "bypass the policyengine.py release bundle" in str(error)
+        else:
+            raise AssertionError("Expected explicit local path to be rejected")
+
+        assert resolve_managed_dataset_reference(
+            "us",
+            str(dataset),
+            allow_unmanaged=True,
+        ) == str(dataset)
+
+    def test__given_local_file_named_like_logical_dataset__then_manifest_wins(
+        self,
+        tmp_path,
+        monkeypatch,
+    ):
+        dataset = "populace_us_2024"
+        (tmp_path / dataset).write_bytes(b"")
+        monkeypatch.chdir(tmp_path)
+
+        assert (
+            resolve_dataset_reference("us", dataset)
+            == get_release_manifest("us").default_dataset_uri
+        )
+        assert (
+            resolve_managed_dataset_reference("us", dataset)
+            == get_release_manifest("us").default_dataset_uri
+        )
+
     def test__given_versioned_dataset_url__then_logical_name_drops_version(self):
         dataset = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0"
 
@@ -773,7 +859,7 @@ def test__given_us_managed_microsimulation__then_passes_certified_dataset_and_bu
             with patch.object(
                 us_model,
                 "materialize_dataset_source",
-                return_value="/tmp/enhanced_cps_2024.h5",
+                return_value="/tmp/populace_us_2024.h5",
             ):
                 microsim = us_model.managed_microsimulation()
 
@@ -788,7 +874,7 @@ def test__given_us_managed_microsimulation__then_passes_certified_dataset_and_bu
             == us_model.us_latest.default_dataset_uri
         )
         dataset_source = microsim.policyengine_bundle["runtime_dataset_source"]
-        assert dataset_source == "/tmp/enhanced_cps_2024.h5"
+        assert dataset_source == "/tmp/populace_us_2024.h5"
 
     def test__given_us_unmanaged_dataset_uri__then_source_is_not_rewritten(self):
         dataset = "hf://policyengine/policyengine-us-data/cps_2023.h5@1.73.0"
diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py
index 37cd9a0a..95be4cf4 100644
--- a/tests/test_us_regions.py
+++ b/tests/test_us_regions.py
@@ -1,6 +1,6 @@
 """Tests for US region definitions."""
 
-from policyengine.countries.us.data import DISTRICT_COUNTS, US_STATES
+from policyengine.countries.us.data import DISTRICT_COUNTS, US_STATE_FIPS, US_STATES
 from policyengine.countries.us.regions import (
     build_us_region_registry,
     us_region_registry,
@@ -135,12 +135,13 @@ def test__given_california_region__then_has_correct_format(self):
         assert ca.label == "California"
         assert ca.region_type == "state"
         assert ca.parent_code == "us"
-        assert ca.dataset_path == (
-            "hf://policyengine/policyengine-us-data/states/CA.h5@1.115.5"
-        )
+        assert ca.dataset_path is None
+        assert ca.requires_filter
+        assert ca.scoping_strategy is not None
+        assert ca.scoping_strategy.variable_name == "state_fips"
+        assert ca.scoping_strategy.variable_value == US_STATE_FIPS["CA"]
         assert ca.state_code == "CA"
         assert ca.state_name == "California"
-        assert not ca.requires_filter
 
     def test__given_us_registry__then_has_436_congressional_districts(self):
         """Given: US region registry
@@ -168,8 +169,25 @@ def test__given_ca_first_district__then_has_correct_format(self):
         assert ca01.region_type == "congressional_district"
         assert ca01.parent_code == "state/ca"
         assert ca01.dataset_path is None
+        assert ca01.requires_filter
+        assert ca01.scoping_strategy is not None
+        assert ca01.scoping_strategy.variable_name == "congressional_district_geoid"
+        assert ca01.scoping_strategy.variable_value == US_STATE_FIPS["CA"] * 100 + 1
         assert ca01.state_code == "CA"
-        assert not ca01.requires_filter
+
+    def test__given_at_large_district__then_filter_uses_zero_district_geoid(self):
+        """Given: an at-large congressional district
+        When: Checking its row filter
+        Then: It uses the Populace/Census SS00 district GEOID convention
+        """
+        # When
+        ak_al = us_region_registry.get("congressional_district/AK-01")
+
+        # Then
+        assert ak_al is not None
+        assert ak_al.scoping_strategy is not None
+        assert ak_al.scoping_strategy.variable_name == "congressional_district_geoid"
+        assert ak_al.scoping_strategy.variable_value == US_STATE_FIPS["AK"] * 100
 
     def test__given_dc_district__then_is_at_large(self):
         """Given: DC's congressional district
@@ -183,6 +201,8 @@ def test__given_dc_district__then_is_at_large(self):
         assert dc_al is not None
         assert dc_al.label == "District of Columbia's at-large congressional district"
         assert dc_al.parent_code == "state/dc"
+        assert dc_al.scoping_strategy is not None
+        assert dc_al.scoping_strategy.variable_value == US_STATE_FIPS["DC"] * 100
 
     def test__given_us_registry__then_has_places(self):
         """Given: US region registry
@@ -198,7 +218,7 @@ def test__given_us_registry__then_has_places(self):
     def test__given_los_angeles_region__then_has_correct_format(self):
         """Given: Los Angeles place region
         When: Checking its properties
-        Then: Requires filter with place_fips field
+        Then: Exists as hierarchy metadata but does not claim runtime scoping
         """
         # When
         la = us_region_registry.get("place/CA-44000")
@@ -208,10 +228,8 @@ def test__given_los_angeles_region__then_has_correct_format(self):
         assert "Los Angeles" in la.label
         assert la.region_type == "place"
         assert la.parent_code == "state/ca"
-        assert la.requires_filter
-        assert la.scoping_strategy is not None
-        assert la.scoping_strategy.variable_name == "place_fips"
-        assert la.scoping_strategy.variable_value == "44000"
+        assert not la.requires_filter
+        assert la.scoping_strategy is None
         assert la.state_code == "CA"
         assert la.dataset_path is None  # No dedicated dataset
 
@@ -233,27 +251,24 @@ def test__given_california__then_children_include_districts_and_places(
         assert len(district_children) == DISTRICT_COUNTS["CA"]
         assert len(place_children) >= 10  # CA has many large cities
 
-    def test__given_us_registry__then_dataset_regions_are_national_and_states(self):
+    def test__given_us_registry__then_dataset_regions_are_national_only(self):
         """Given: US region registry
         When: Getting regions with datasets
-        Then: Current certified bundle has national and state datasets
+        Then: Only the national canonical Populace dataset is dedicated
         """
         # When
         dataset_regions = us_region_registry.get_dataset_regions()
 
         # Then
-        assert len(dataset_regions) == 52
-        assert {region.region_type for region in dataset_regions} == {
-            "national",
-            "state",
-        }
+        assert len(dataset_regions) == 1
+        assert dataset_regions[0].region_type == "national"
 
-    def test__given_certified_state_template__then_states_have_dataset_paths(
+    def test__given_certified_state_template__then_state_filters_national_dataset(
         self, monkeypatch
     ):
         """Given: US bundle manifest with a certified state template
         When: Building the region registry
-        Then: State regions resolve to pinned state dataset artifacts
+        Then: State regions still filter the national certified dataset
         """
         manifest = CountryReleaseManifest.model_validate(
             {
@@ -298,20 +313,31 @@ def test__given_certified_state_template__then_states_have_dataset_paths(
         ca = registry.get("state/ca")
 
         assert ca is not None
-        assert ca.dataset_path == (
-            "hf://policyengine/policyengine-us-data/states/CA.h5@1.115.5"
-        )
+        assert ca.dataset_path is None
+        assert ca.requires_filter
+        assert ca.scoping_strategy is not None
+        assert ca.scoping_strategy.variable_name == "state_fips"
+        assert ca.scoping_strategy.variable_value == US_STATE_FIPS["CA"]
 
-    def test__given_us_registry__then_filter_regions_are_all_places(self):
+    def test__given_us_registry__then_filter_regions_include_states_and_districts(self):
         """Given: US region registry
         When: Getting regions requiring filter
-        Then: All are place regions
+        Then: State and congressional district regions filter the national data
         """
         # When
         filter_regions = us_region_registry.get_filter_regions()
 
         # Then
-        assert all(r.region_type == "place" for r in filter_regions)
+        region_types = {r.region_type for r in filter_regions}
+        assert {"state", "congressional_district"} <= region_types
+        assert "place" not in region_types
+        assert len([r for r in filter_regions if r.region_type == "state"]) == 51
+        assert (
+            len(
+                [r for r in filter_regions if r.region_type == "congressional_district"]
+            )
+            == 436
+        )
 
     def test__given_us_registry__then_total_exceeds_588(self):
         """Given: US region registry