Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions changelog.d/v4-provenance-package.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
**BREAKING (v4):** Separate the provenance layer from the core
value-object layer.

- ``policyengine/core/release_manifest.py`` → ``policyengine/provenance/manifest.py``
- ``policyengine/core/trace_tro.py`` → ``policyengine/provenance/trace.py``
- New ``policyengine.provenance`` package re-exports the public
surface (``get_release_manifest``, ``get_data_release_manifest``,
``build_trace_tro_from_release_bundle``, ``build_simulation_trace_tro``,
``serialize_trace_tro``, ``canonical_json_bytes``,
``compute_trace_composition_fingerprint``, etc.).
- ``policyengine.core`` no longer re-exports provenance types.
``policyengine.core`` shrinks to value objects only (Dataset,
Variable, Parameter, Policy, Dynamic, Simulation, Region,
TaxBenefitModel, TaxBenefitModelVersion, scoping strategies).
- ``import policyengine.core.scoping_strategy`` no longer imports
``h5py`` at module load; the weight-replacement code path
lazy-imports it. ``import policyengine.outputs.constituency_impact``
and ``import policyengine.outputs.local_authority_impact`` do the
same.
- Migration for downstream: replace
``from policyengine.core import DataReleaseManifest`` (et al.)
with ``from policyengine.provenance import DataReleaseManifest``.
The country-module imports in internal code (``tax_benefit_models/{us,uk}/model.py``
and ``datasets.py``) are already updated.
6 changes: 3 additions & 3 deletions docs/release-bundles.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ That is a country-data concern and lives in those repos.
From Python:

```python
from policyengine.core.release_manifest import get_data_release_manifest, get_release_manifest
from policyengine.core.trace_tro import build_trace_tro_from_release_bundle, serialize_trace_tro
from policyengine.provenance.manifest import get_data_release_manifest, get_release_manifest
from policyengine.provenance.trace import build_trace_tro_from_release_bundle, serialize_trace_tro

country = get_release_manifest("us")
tro = build_trace_tro_from_release_bundle(country, get_data_release_manifest("us"))
Expand Down Expand Up @@ -286,7 +286,7 @@ should run):

```python
import hashlib, json, requests
from policyengine.core.trace_tro import canonical_json_bytes
from policyengine.provenance.trace import canonical_json_bytes

sim_tro = json.load(open("results.trace.tro.jsonld"))
perf = sim_tro["@graph"][0]["trov:hasPerformance"]
Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_trace_tros.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import sys
from pathlib import Path

from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
DataReleaseManifestUnavailableError,
get_data_release_manifest,
get_release_manifest,
)
from policyengine.core.trace_tro import (
from policyengine.provenance.trace import (
build_trace_tro_from_release_bundle,
serialize_trace_tro,
)
Expand Down
6 changes: 3 additions & 3 deletions src/policyengine/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
- ``trace-tro-validate <path>`` validate a TRO against the shipped schema
- ``release-manifest <country>`` print the bundled country manifest

See :mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``.
See :mod:`policyengine.provenance.trace` and ``docs/release-bundles.md``.
"""

from __future__ import annotations
Expand All @@ -18,11 +18,11 @@
from pathlib import Path
from typing import Optional, Sequence

from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
get_data_release_manifest,
get_release_manifest,
)
from policyengine.core.trace_tro import (
from policyengine.provenance.trace import (
build_trace_tro_from_release_bundle,
serialize_trace_tro,
)
Expand Down
37 changes: 8 additions & 29 deletions src/policyengine/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""Core value objects: Dataset, Variable, Parameter, Policy, Simulation, Region.

Provenance (release manifests, TRACE TROs) lives in
:mod:`policyengine.provenance` and is intentionally not re-exported
here — importing a core value object should not pull in the
provenance layer.
"""

from .dataset import Dataset
from .dataset import YearData as YearData
from .dataset import map_to_entity as map_to_entity
Expand All @@ -11,22 +19,6 @@
from .region import Region as Region
from .region import RegionRegistry as RegionRegistry
from .region import RegionType as RegionType
from .release_manifest import CertifiedDataArtifact as CertifiedDataArtifact
from .release_manifest import CountryReleaseManifest as CountryReleaseManifest
from .release_manifest import DataBuildInfo as DataBuildInfo
from .release_manifest import DataCertification as DataCertification
from .release_manifest import DataPackageVersion as DataPackageVersion
from .release_manifest import DataReleaseArtifact as DataReleaseArtifact
from .release_manifest import DataReleaseManifest as DataReleaseManifest
from .release_manifest import PackageVersion as PackageVersion
from .release_manifest import (
certify_data_release_compatibility as certify_data_release_compatibility,
)
from .release_manifest import get_data_release_manifest as get_data_release_manifest
from .release_manifest import get_release_manifest as get_release_manifest
from .release_manifest import (
resolve_managed_dataset_reference as resolve_managed_dataset_reference,
)
from .scoping_strategy import RegionScopingStrategy as RegionScopingStrategy
from .scoping_strategy import RowFilterStrategy as RowFilterStrategy
from .scoping_strategy import ScopingStrategy as ScopingStrategy
Expand All @@ -38,19 +30,6 @@
from .tax_benefit_model_version import (
TaxBenefitModelVersion as TaxBenefitModelVersion,
)
from .trace_tro import (
build_simulation_trace_tro as build_simulation_trace_tro,
)
from .trace_tro import (
build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle,
)
from .trace_tro import (
compute_trace_composition_fingerprint as compute_trace_composition_fingerprint,
)
from .trace_tro import (
extract_bundle_tro_reference as extract_bundle_tro_reference,
)
from .trace_tro import serialize_trace_tro as serialize_trace_tro
from .variable import Variable as Variable

# Rebuild models to resolve forward references
Expand Down
7 changes: 5 additions & 2 deletions src/policyengine/core/scoping_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from pathlib import Path
from typing import Annotated, Literal, Optional, Union

import h5py
import numpy as np
import pandas as pd
from microdf import MicroDataFrame
Expand Down Expand Up @@ -127,7 +126,11 @@ def apply(

region_id = self._find_region_index(lookup_df, self.region_code)

# Download weight matrix and extract weights for this region
# Download weight matrix and extract weights for this region.
# h5py is only needed here, so import lazily to keep
# `from policyengine.core import ...` light.
import h5py

weights_path = download_gcs_file(
bucket=self.weight_matrix_bucket,
file_path=self.weight_matrix_key,
Expand Down
5 changes: 3 additions & 2 deletions src/policyengine/core/tax_benefit_model_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

from pydantic import BaseModel, Field

from .release_manifest import (
from policyengine.provenance.manifest import (
CountryReleaseManifest,
DataCertification,
PackageVersion,
get_data_release_manifest,
)
from policyengine.provenance.trace import build_trace_tro_from_release_bundle

from .tax_benefit_model import TaxBenefitModel
from .trace_tro import build_trace_tro_from_release_bundle

if TYPE_CHECKING:
from .parameter import Parameter
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/countries/uk/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from typing import TYPE_CHECKING

from policyengine.core.region import Region, RegionRegistry
from policyengine.core.release_manifest import resolve_region_dataset_path
from policyengine.core.scoping_strategy import (
RowFilterStrategy,
WeightReplacementStrategy,
)
from policyengine.provenance.manifest import resolve_region_dataset_path

if TYPE_CHECKING:
pass
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/countries/us/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
"""

from policyengine.core.region import Region, RegionRegistry
from policyengine.core.release_manifest import resolve_region_dataset_path
from policyengine.core.scoping_strategy import RowFilterStrategy
from policyengine.provenance.manifest import resolve_region_dataset_path

from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES

Expand Down
3 changes: 2 additions & 1 deletion src/policyengine/outputs/constituency_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from typing import TYPE_CHECKING, Optional

import h5py
import numpy as np
import pandas as pd
from pydantic import ConfigDict
Expand Down Expand Up @@ -43,6 +42,8 @@ def run(self) -> None:
constituency_df = pd.read_csv(self.constituency_csv_path)

# Load weight matrix: shape (N_constituencies, N_households)
import h5py

with h5py.File(self.weight_matrix_path, "r") as f:
weight_matrix = f[self.year][...]

Expand Down
3 changes: 2 additions & 1 deletion src/policyengine/outputs/local_authority_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from typing import TYPE_CHECKING, Optional

import h5py
import numpy as np
import pandas as pd
from pydantic import ConfigDict
Expand Down Expand Up @@ -43,6 +42,8 @@ def run(self) -> None:
la_df = pd.read_csv(self.local_authority_csv_path)

# Load weight matrix: shape (N_local_authorities, N_households)
import h5py

with h5py.File(self.weight_matrix_path, "r") as f:
weight_matrix = f[self.year][...]

Expand Down
89 changes: 89 additions & 0 deletions src/policyengine/provenance/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Release-bundle provenance + TRACE TRO emission.

Separated from :mod:`policyengine.core` so the value-object layer
(Dataset, Variable, Parameter, Policy, Simulation, Region) doesn't
force provenance imports on every consumer.

.. code-block:: python

from policyengine.provenance import (
get_release_manifest,
get_data_release_manifest,
build_trace_tro_from_release_bundle,
build_simulation_trace_tro,
serialize_trace_tro,
)
"""

from .manifest import (
CertifiedDataArtifact as CertifiedDataArtifact,
)
from .manifest import (
CountryReleaseManifest as CountryReleaseManifest,
)
from .manifest import (
DataBuildInfo as DataBuildInfo,
)
from .manifest import (
DataCertification as DataCertification,
)
from .manifest import (
DataPackageVersion as DataPackageVersion,
)
from .manifest import (
DataReleaseArtifact as DataReleaseArtifact,
)
from .manifest import (
DataReleaseManifest as DataReleaseManifest,
)
from .manifest import (
DataReleaseManifestUnavailableError as DataReleaseManifestUnavailableError,
)
from .manifest import (
PackageVersion as PackageVersion,
)
from .manifest import (
certify_data_release_compatibility as certify_data_release_compatibility,
)
from .manifest import (
fetch_pypi_wheel_metadata as fetch_pypi_wheel_metadata,
)
from .manifest import (
get_data_release_manifest as get_data_release_manifest,
)
from .manifest import (
get_release_manifest as get_release_manifest,
)
from .manifest import (
https_dataset_uri as https_dataset_uri,
)
from .manifest import (
https_release_manifest_uri as https_release_manifest_uri,
)
from .manifest import (
resolve_dataset_reference as resolve_dataset_reference,
)
from .manifest import (
resolve_local_managed_dataset_source as resolve_local_managed_dataset_source,
)
from .manifest import (
resolve_managed_dataset_reference as resolve_managed_dataset_reference,
)
from .trace import (
build_simulation_trace_tro as build_simulation_trace_tro,
)
from .trace import (
build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle,
)
from .trace import (
canonical_json_bytes as canonical_json_bytes,
)
from .trace import (
compute_trace_composition_fingerprint as compute_trace_composition_fingerprint,
)
from .trace import (
extract_bundle_tro_reference as extract_bundle_tro_reference,
)
from .trace import (
serialize_trace_tro as serialize_trace_tro,
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from collections.abc import Iterable, Mapping
from typing import Any, Optional

from .release_manifest import (
from .manifest import (
CountryReleaseManifest,
DataCertification,
DataReleaseManifest,
Expand Down
4 changes: 2 additions & 2 deletions src/policyengine/results/trace_tro.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
specific reform + ``results.json`` payload so a published result can
be cited with an immutable composition fingerprint.

See :mod:`policyengine.core.trace_tro` for the bundle-level layer.
See :mod:`policyengine.provenance.trace` for the bundle-level layer.
"""

from __future__ import annotations
Expand All @@ -14,7 +14,7 @@
from pathlib import Path
from typing import Optional, Union

from policyengine.core.trace_tro import (
from policyengine.provenance.trace import (
build_simulation_trace_tro,
serialize_trace_tro,
)
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/tax_benefit_models/uk/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pydantic import ConfigDict

from policyengine.core import Dataset, YearData
from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
dataset_logical_name,
resolve_dataset_reference,
)
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/tax_benefit_models/uk/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
TaxBenefitModelVersion,
Variable,
)
from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
certify_data_release_compatibility,
dataset_logical_name,
get_release_manifest,
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/tax_benefit_models/us/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pydantic import ConfigDict

from policyengine.core import Dataset, YearData
from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
dataset_logical_name,
resolve_dataset_reference,
)
Expand Down
2 changes: 1 addition & 1 deletion src/policyengine/tax_benefit_models/us/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
TaxBenefitModelVersion,
Variable,
)
from policyengine.core.release_manifest import (
from policyengine.provenance.manifest import (
certify_data_release_compatibility,
dataset_logical_name,
get_release_manifest,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manifest_version_mismatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import warnings
from unittest.mock import patch

from policyengine.core.release_manifest import get_release_manifest
from policyengine.provenance.manifest import get_release_manifest


def _pick_mismatched_version(manifest_version: str) -> str:
Expand Down
Loading
Loading