diff --git a/projects/policyengine-api-simulation/fixtures/gateway/test_endpoints.py b/projects/policyengine-api-simulation/fixtures/gateway/test_endpoints.py index 127cb0580..78ea13f22 100644 --- a/projects/policyengine-api-simulation/fixtures/gateway/test_endpoints.py +++ b/projects/policyengine-api-simulation/fixtures/gateway/test_endpoints.py @@ -9,40 +9,22 @@ "policyengine_version": "4.10.0", "us": { "model_version": "1.500.0", - "data_version": "1.110.12", - "data_artifact_revision": "1.110.12", - "default_dataset": "enhanced_cps_2024", - "default_dataset_uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12", + "data_version": "populace-us-2024-test", + "data_artifact_revision": "us-artifact-revision", + "default_dataset": "populace_us_2024", + "default_dataset_uri": "hf://policyengine/populace-us/populace_us_2024.h5@us-artifact-revision", "dataset_uris": { - "enhanced_cps_2024": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12", - "cps_2023": "hf://policyengine/policyengine-us-data/cps_2023.h5@1.110.12", - "pooled_3_year_cps_2023": "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5@1.110.12", - "states/UT": "hf://policyengine/policyengine-us-data/states/UT.h5@1.115.5", - }, - "dataset_aliases": { - "enhanced_cps": "enhanced_cps_2024", - "enhanced_cps_2024": "enhanced_cps_2024", - "cps": "cps_2023", - "cps_2023": "cps_2023", - "pooled_cps": "pooled_3_year_cps_2023", - "pooled_3_year_cps_2023": "pooled_3_year_cps_2023", + "populace_us_2024": "hf://policyengine/populace-us/populace_us_2024.h5@us-artifact-revision", }, }, "uk": { "model_version": "2.66.0", - "data_version": "1.40.3", - "data_artifact_revision": "1.40.3", - "default_dataset": "enhanced_frs_2023_24", - "default_dataset_uri": "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.3", + "data_version": "populace-uk-2023-test", + "data_artifact_revision": "uk-artifact-revision", + "default_dataset": "populace_uk_2023", + "default_dataset_uri": "hf://policyengine/populace-uk-private/populace_uk_2023.h5@uk-artifact-revision", "dataset_uris": { - "enhanced_frs_2023_24": "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.3", - "frs_2023_24": "hf://policyengine/policyengine-uk-data-private/frs_2023_24.h5@1.40.3", - }, - "dataset_aliases": { - "enhanced_frs": "enhanced_frs_2023_24", - "enhanced_frs_2023_24": "enhanced_frs_2023_24", - "frs": "frs_2023_24", - "frs_2023_24": "frs_2023_24", + "populace_uk_2023": "hf://policyengine/populace-uk-private/populace_uk_2023.h5@uk-artifact-revision", }, }, } @@ -102,14 +84,15 @@ def _runtime_dataset_uri( selected_revision = revision or existing_revision if dataset_without_revision.startswith("hf://policyengine/"): - if ( - selected_revision == country_bundle.get("data_artifact_revision") - and revision is None - ): - selected_revision = country_bundle["data_version"] remainder = dataset_without_revision.removeprefix("hf://policyengine/") bucket, _, path = remainder.partition("/") - dataset_without_revision = f"gs://{bucket}/{path}" + if bucket.startswith("policyengine-") and "-data" in bucket: + if ( + selected_revision == country_bundle.get("data_artifact_revision") + and revision is None + ): + selected_revision = country_bundle["data_version"] + dataset_without_revision = f"gs://{bucket}/{path}" if selected_revision is None and use_bundle_default: selected_revision = country_bundle["data_version"] @@ -140,7 +123,9 @@ def resolve_test_dataset_uri( ) dataset_name, revision = _split_revision(dataset) - dataset_name = country_bundle["dataset_aliases"].get(dataset_name, dataset_name) + aliases = country_bundle.get("dataset_aliases") + if isinstance(aliases, dict): + dataset_name = aliases.get(dataset_name, dataset_name) dataset_uri = country_bundle["dataset_uris"].get(dataset_name, dataset_name) if revision is not None and dataset_uri == dataset_name: return dataset diff --git a/projects/policyengine-api-simulation/pyproject.toml b/projects/policyengine-api-simulation/pyproject.toml index 9b190548a..d88dc1f36 100644 --- a/projects/policyengine-api-simulation/pyproject.toml +++ b/projects/policyengine-api-simulation/pyproject.toml @@ -16,10 +16,10 @@ dependencies = [ "pydantic-settings (>=2.7.1,<3.0.0)", "opentelemetry-instrumentation-fastapi (>=0.51b0,<0.52)", "policyengine-fastapi", - "policyengine==4.18.6", + "policyengine==4.18.7", "policyengine-core==3.28.0", "policyengine-uk==2.89.2", - "policyengine-us==1.745.0", + "policyengine-us==1.729.0", "tables>=3.10.2", "modal>=0.73.0", "logfire>=3.0.0", diff --git a/projects/policyengine-api-simulation/src/modal/gateway/endpoints.py b/projects/policyengine-api-simulation/src/modal/gateway/endpoints.py index ece5ca7bd..c9fc357a1 100644 --- a/projects/policyengine-api-simulation/src/modal/gateway/endpoints.py +++ b/projects/policyengine-api-simulation/src/modal/gateway/endpoints.py @@ -95,6 +95,23 @@ def _revision_from_dataset_uri(dataset_uri: str | None) -> str | None: return revision +def _bundle_response_data_version( + *, + country_bundle: dict, + requested_dataset: str | None, + requested_data_version: str | None, + resolved_dataset: str | None, +) -> str | None: + if requested_data_version is not None: + return requested_data_version + if _revision_from_dataset_uri(requested_dataset) is not None: + return _revision_from_dataset_uri(resolved_dataset) + data_version = country_bundle.get("data_version") + if isinstance(data_version, str): + return data_version + return _revision_from_dataset_uri(resolved_dataset) + + def _bundle_certified_hf_uri_roots(country_bundle: dict) -> set[str]: roots: set[str] = set() default_uri = country_bundle.get("default_dataset_uri") @@ -192,6 +209,8 @@ def _resolve_dataset_uri_from_app_bundle( if not isinstance(country_bundle, dict): return requested_data + # Older Modal snapshots may contain aliases. Newly published bundle snapshots + # resolve direct .py dataset names through dataset_uris instead. aliases = country_bundle.get("dataset_aliases") if not isinstance(aliases, dict): aliases = {} @@ -506,11 +525,11 @@ def _build_policyengine_bundle( requested_data=requested_dataset, requested_data_version=requested_data_version, ) - data_version = ( - requested_data_version - if requested_data_version is not None - else _revision_from_dataset_uri(resolved_dataset) - or country_bundle.get("data_version") + data_version = _bundle_response_data_version( + country_bundle=country_bundle, + requested_dataset=requested_dataset, + requested_data_version=requested_data_version, + resolved_dataset=resolved_dataset, ) model_version = country_bundle.get("model_version") or resolution.response_version policyengine_version = app_bundle.get( diff --git a/projects/policyengine-api-simulation/src/modal/utils/update_version_registry.py b/projects/policyengine-api-simulation/src/modal/utils/update_version_registry.py index 3d4cc7377..b3d03371b 100644 --- a/projects/policyengine-api-simulation/src/modal/utils/update_version_registry.py +++ b/projects/policyengine-api-simulation/src/modal/utils/update_version_registry.py @@ -30,7 +30,6 @@ class CountryBundleMetadata(TypedDict): default_dataset_uri: str dataset_uris: dict[str, str] dataset_repo_types: dict[str, str] - dataset_aliases: dict[str, str] class BundleManifestMetadata(TypedDict): @@ -79,10 +78,7 @@ def _is_newer_version(candidate: str, current: str | None) -> bool: def _country_bundle_metadata(country: str) -> CountryBundleMetadata: - from policyengine_api_simulation.release_bundle import ( - DATASET_ALIASES, - get_country_release_bundle, - ) + from policyengine_api_simulation.release_bundle import get_country_release_bundle bundle = get_country_release_bundle(country) return { @@ -96,7 +92,6 @@ def _country_bundle_metadata(country: str) -> CountryBundleMetadata: "default_dataset_uri": bundle.default_dataset_uri, "dataset_uris": dict(bundle.dataset_uris), "dataset_repo_types": dict(bundle.dataset_repo_types), - "dataset_aliases": dict(DATASET_ALIASES.get(bundle.country, {})), } diff --git a/projects/policyengine-api-simulation/src/policyengine_api_simulation/release_bundle.py b/projects/policyengine-api-simulation/src/policyengine_api_simulation/release_bundle.py index 057c0e006..05aab01d5 100644 --- a/projects/policyengine-api-simulation/src/policyengine_api_simulation/release_bundle.py +++ b/projects/policyengine-api-simulation/src/policyengine_api_simulation/release_bundle.py @@ -24,42 +24,6 @@ SUPPORTED_COUNTRIES = frozenset({"us", "uk"}) BUNDLE_RECEIPT_FILENAME = ".policyengine-bundle-receipt.json" -LEGACY_US_DATA_REVISION = "1.110.12" -LEGACY_ENHANCED_CPS_URI = ( - "hf://policyengine/policyengine-us-data/" - f"enhanced_cps_2024.h5@{LEGACY_US_DATA_REVISION}" -) - -DATASET_ALIASES: dict[str, dict[str, str]] = { - "us": { - "enhanced_cps": LEGACY_ENHANCED_CPS_URI, - "enhanced_cps_2024": LEGACY_ENHANCED_CPS_URI, - "cps_small": "cps_small_2024", - "cps_small_2024": "cps_small_2024", - "cps": ( - "hf://policyengine/policyengine-us-data/" - f"cps_2023.h5@{LEGACY_US_DATA_REVISION}" - ), - "cps_2023": ( - "hf://policyengine/policyengine-us-data/" - f"cps_2023.h5@{LEGACY_US_DATA_REVISION}" - ), - "pooled_cps": ( - "hf://policyengine/policyengine-us-data/" - f"pooled_3_year_cps_2023.h5@{LEGACY_US_DATA_REVISION}" - ), - "pooled_3_year_cps_2023": ( - "hf://policyengine/policyengine-us-data/" - f"pooled_3_year_cps_2023.h5@{LEGACY_US_DATA_REVISION}" - ), - }, - "uk": { - "enhanced_frs": "enhanced_frs_2023_24", - "enhanced_frs_2023_24": "enhanced_frs_2023_24", - "frs": "frs_2023_24", - "frs_2023_24": "frs_2023_24", - }, -} @dataclass(frozen=True) @@ -332,20 +296,15 @@ def resolve_bundle_dataset_name(country: str, requested_data: str | None) -> str return requested_data requested_without_revision, revision = _split_requested_revision(requested_data) - aliased = DATASET_ALIASES.get(bundle.country, {}).get( - requested_without_revision, requested_data - ) if revision is not None: - if "://" in aliased: - return _with_hf_revision_unvalidated(aliased, revision) - uri = bundle.dataset_uris.get(aliased) + uri = bundle.dataset_uris.get(requested_without_revision) if uri is None: raise ValueError( "Unknown dataset revision reference " f"{requested_data!r} for country {bundle.country!r}" ) return _with_hf_revision_unvalidated(uri, revision) - return aliased + return requested_without_revision def resolve_bundle_dataset_uri(country: str, requested_data: str | None) -> str: @@ -402,11 +361,10 @@ def _is_default_bundle_dataset( requested_revision=requested_revision, requested_data_version=requested_data_version, ) - aliased = DATASET_ALIASES.get(bundle.country, {}).get( - requested_without_revision, - requested_without_revision, - ) - return aliased == bundle.default_dataset and revision in {None, bundle.data_version} + return requested_without_revision == bundle.default_dataset and revision in { + None, + bundle.data_version, + } def resolve_local_bundle_dataset_path( diff --git a/projects/policyengine-api-simulation/tests/gateway/test_budget_window_state.py b/projects/policyengine-api-simulation/tests/gateway/test_budget_window_state.py index 1058146c2..9e62b2a65 100644 --- a/projects/policyengine-api-simulation/tests/gateway/test_budget_window_state.py +++ b/projects/policyengine-api-simulation/tests/gateway/test_budget_window_state.py @@ -28,7 +28,7 @@ def test_create_initial_batch_state_builds_queued_years_and_run_id(): start_year="2026", window_size=3, max_parallel=2, - data="enhanced_cps_2024", + data="custom_dataset_label", scope="macro", reform={}, _telemetry={ @@ -52,7 +52,7 @@ def test_create_initial_batch_state_builds_queued_years_and_run_id(): assert state.target == "general" assert state.years == ["2026", "2027", "2028"] assert state.queued_years == ["2026", "2027", "2028"] - assert state.request_payload["data"] == "enhanced_cps_2024" + assert state.request_payload["data"] == "custom_dataset_label" assert state.request_payload["scope"] == "macro" assert state.request_payload["reform"] == {} assert state.run_id == "batch-run-123" diff --git a/projects/policyengine-api-simulation/tests/gateway/test_endpoints.py b/projects/policyengine-api-simulation/tests/gateway/test_endpoints.py index 7df3832ea..82b45b8c8 100644 --- a/projects/policyengine-api-simulation/tests/gateway/test_endpoints.py +++ b/projects/policyengine-api-simulation/tests/gateway/test_endpoints.py @@ -212,7 +212,7 @@ def test__given_regular_data_value__then_routes_to_run_simulation( "country": "us", "scope": "macro", "reform": {}, - "data": "gs://policyengine-us-data/enhanced_cps_2024.h5", + "data": "gs://external-bucket/custom/file.h5", } # When @@ -358,7 +358,7 @@ def test__given_submission_with_data__then_returns_resolved_bundle_metadata( "country": "us", "scope": "macro", "reform": {}, - "data": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", + "data": "gs://external-bucket/custom/file.h5@custom-v1", } # When @@ -371,11 +371,11 @@ def test__given_submission_with_data__then_returns_resolved_bundle_metadata( assert data["policyengine_bundle"] == expected_bundle( "us", "1.500.0", - dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", - data_version="1.115.5", + dataset="gs://external-bucket/custom/file.h5@custom-v1", + data_version="custom-v1", ) - def test__given_submission_with_alias_data__then_bundle_dataset_uses_manifest_uri( + def test__given_submission_with_dataset_name__then_bundle_dataset_uses_manifest_uri( self, mock_modal, client: TestClient ): mock_modal["dicts"]["simulation-api-us-versions"] = { @@ -387,7 +387,7 @@ def test__given_submission_with_alias_data__then_bundle_dataset_uses_manifest_ur "country": "us", "scope": "macro", "reform": {}, - "data": "enhanced_cps_2024", + "data": "populace_us_2024", } response = client.post("/simulate/economy/comparison", json=request_body) @@ -395,7 +395,35 @@ def test__given_submission_with_alias_data__then_bundle_dataset_uses_manifest_ur assert response.status_code == 200 data = response.json() assert data["policyengine_bundle"]["dataset"] == resolve_test_dataset_uri( - "us", "enhanced_cps_2024" + "us", "populace_us_2024" + ) + + def test__given_legacy_alias_in_bundle_snapshot__then_gateway_still_resolves_it( + self, mock_modal, client: TestClient + ): + mock_modal["dicts"]["simulation-api-us-versions"] = { + "latest": "1.500.0", + "1.500.0": "policyengine-simulation-py4-10-0", + } + state = deepcopy(TEST_ROUTING_STATE) + state["bundles"]["4.10.0"]["us"]["dataset_aliases"] = { + "legacy_populace_us": "populace_us_2024", + } + mock_modal["dicts"]["simulation-api-routing-state"] = {"active": state} + + response = client.post( + "/simulate/economy/comparison", + json={ + "country": "us", + "scope": "macro", + "reform": {}, + "data": "legacy_populace_us", + }, + ) + + assert response.status_code == 200 + assert response.json()["policyengine_bundle"]["dataset"] == ( + resolve_test_dataset_uri("us", "populace_us_2024") ) def test__given_us_state_region_without_data__then_keeps_contract_and_uses_default_dataset( @@ -436,16 +464,16 @@ def test__given_submission_with_logical_revision__then_bundle_dataset_uses_revis "country": "us", "scope": "macro", "reform": {}, - "data": "enhanced_cps_2024@1.77.0", + "data": "populace_us_2024@custom-v1", }, ) assert response.status_code == 200 bundle = response.json()["policyengine_bundle"] assert bundle["dataset"] == ( - "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) - assert bundle["data_version"] == "1.77.0" + assert bundle["data_version"] == "custom-v1" def test__given_submission_with_explicit_uri_revision__then_bundle_data_version_uses_revision( self, mock_modal, client: TestClient @@ -461,16 +489,14 @@ def test__given_submission_with_explicit_uri_revision__then_bundle_data_version_ "country": "us", "scope": "macro", "reform": {}, - "data": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0", + "data": "gs://external-bucket/custom/file.h5@custom-v1", }, ) assert response.status_code == 200 bundle = response.json()["policyengine_bundle"] - assert bundle["dataset"] == ( - "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" - ) - assert bundle["data_version"] == "1.77.0" + assert bundle["dataset"] == "gs://external-bucket/custom/file.h5@custom-v1" + assert bundle["data_version"] == "custom-v1" def test__given_submission_with_conflicting_data_versions__then_returns_400( self, mock_modal, client: TestClient @@ -486,8 +512,8 @@ def test__given_submission_with_conflicting_data_versions__then_returns_400( "country": "us", "scope": "macro", "reform": {}, - "data": "enhanced_cps_2024@1.77.0", - "data_version": "1.78.2", + "data": "populace_us_2024@custom-v1", + "data_version": "custom-v2", }, ) @@ -524,7 +550,7 @@ def reject_revision(dataset_uri): assert response.json()["detail"] == "revision missing" assert mock_modal["func"].last_payload is None - def test__given_submission_with_uk_alias_data__then_bundle_dataset_is_versioned_uri( + def test__given_submission_with_uk_dataset_name__then_bundle_dataset_is_versioned_uri( self, mock_modal, client: TestClient ): mock_modal["dicts"]["simulation-api-uk-versions"] = { @@ -536,7 +562,7 @@ def test__given_submission_with_uk_alias_data__then_bundle_dataset_is_versioned_ "country": "uk", "scope": "macro", "reform": {}, - "data": "enhanced_frs", + "data": "populace_uk_2023", } response = client.post("/simulate/economy/comparison", json=request_body) @@ -544,10 +570,10 @@ def test__given_submission_with_uk_alias_data__then_bundle_dataset_is_versioned_ assert response.status_code == 200 data = response.json() assert data["policyengine_bundle"]["dataset"] == resolve_test_dataset_uri( - "uk", "enhanced_frs" + "uk", "populace_uk_2023" ) - def test__given_uk_submission_without_data_and_manifest_commit__then_resolves_gcs_default( + def test__given_uk_submission_without_data_and_manifest_commit__then_uses_populace_default( self, mock_modal, client: TestClient, @@ -555,19 +581,17 @@ def test__given_uk_submission_without_data_and_manifest_commit__then_resolves_gc ): app_name = "policyengine-simulation-py4-13-1" manifest_uri = ( - "hf://policyengine/policyengine-uk-data-private/" - "enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f" + "hf://policyengine/populace-uk-private/" + "populace_uk_2023.h5@uk-manifest-commit" ) bundle = deepcopy(TEST_APP_RELEASE_BUNDLE) bundle["app_name"] = app_name bundle["policyengine_version"] = "4.13.1" bundle["uk"]["model_version"] = "2.88.20" - bundle["uk"]["data_version"] = "1.55.10" - bundle["uk"]["data_artifact_revision"] = ( - "655dd07e4bb9c777b00dac044949611f1feb824f" - ) + bundle["uk"]["data_version"] = "populace-uk-2023-release" + bundle["uk"]["data_artifact_revision"] = "uk-manifest-commit" bundle["uk"]["default_dataset_uri"] = manifest_uri - bundle["uk"]["dataset_uris"]["enhanced_frs_2023_24"] = manifest_uri + bundle["uk"]["dataset_uris"]["populace_uk_2023"] = manifest_uri state = deepcopy(TEST_ROUTING_STATE) state["routes"]["policyengine"]["4.13.1"] = app_name state["routes"]["uk"]["2.88.20"] = app_name @@ -595,9 +619,7 @@ def reject_revision(dataset_uri, revision): ) assert response.status_code == 200 - assert response.json()["policyengine_bundle"]["dataset"] == ( - "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.55.10" - ) + assert response.json()["policyengine_bundle"]["dataset"] == manifest_uri def test__given_submission_with_runtime_bundle__then_accepts_internal_provenance( self, mock_modal, client: TestClient @@ -611,11 +633,11 @@ def test__given_submission_with_runtime_bundle__then_accepts_internal_provenance "country": "us", "scope": "macro", "reform": {}, - "data": "enhanced_cps_2024", - "data_version": "1.78.2", + "data": "populace_us_2024", + "data_version": "custom-v2", "_runtime_bundle": { "model_version": "1.500.0", - "data_version": "1.78.2", + "data_version": "custom-v2", }, "_metadata": {"process_id": "process-123"}, } @@ -627,14 +649,14 @@ def test__given_submission_with_runtime_bundle__then_accepts_internal_provenance assert data["policyengine_bundle"] == expected_bundle( "us", "1.500.0", - dataset="enhanced_cps_2024", - data_version="1.78.2", + dataset="populace_us_2024", + data_version="custom-v2", ) - assert mock_modal["func"].last_payload["data_version"] == "1.78.2" + assert mock_modal["func"].last_payload["data_version"] == "custom-v2" assert "_runtime_bundle" not in mock_modal["func"].last_payload assert "_metadata" not in mock_modal["func"].last_payload - def test__given_submission_with_unknown_alias_data__then_bundle_dataset_is_preserved( + def test__given_submission_with_unknown_dataset_name__then_bundle_dataset_is_preserved( self, mock_modal, client: TestClient ): mock_modal["dicts"]["simulation-api-us-versions"] = { @@ -675,7 +697,7 @@ def test__given_submitted_job__then_job_status_includes_bundle_metadata( "country": "us", "scope": "macro", "reform": {}, - "data": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", + "data": "gs://external-bucket/custom/file.h5@custom-v1", }, ) @@ -691,8 +713,8 @@ def test__given_submitted_job__then_job_status_includes_bundle_metadata( assert data["policyengine_bundle"] == expected_bundle( "us", "1.500.0", - dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", - data_version="1.115.5", + dataset="gs://external-bucket/custom/file.h5@custom-v1", + data_version="custom-v1", ) def test__given_submitted_job_with_telemetry__then_polling_echoes_run_id( diff --git a/projects/policyengine-api-simulation/tests/gateway/test_models.py b/projects/policyengine-api-simulation/tests/gateway/test_models.py index c9bde67d2..46a10f4f8 100644 --- a/projects/policyengine-api-simulation/tests/gateway/test_models.py +++ b/projects/policyengine-api-simulation/tests/gateway/test_models.py @@ -187,7 +187,7 @@ def test_simulation_request_accepts_documented_simulation_fields(self): "country": "us", "region": "enhanced_us", "reform": {"some.parameter": {"2024-01-01": True}}, - "data": "enhanced_cps_2024", + "data": "custom_dataset_label", "scope": "macro", } @@ -199,17 +199,15 @@ def test_simulation_request_accepts_documented_simulation_fields(self): dumped = request.model_dump(exclude_none=True) assert dumped["region"] == "enhanced_us" assert dumped["reform"] == {"some.parameter": {"2024-01-01": True}} - assert dumped["data"] == "enhanced_cps_2024" + assert dumped["data"] == "custom_dataset_label" assert dumped["scope"] == "macro" def test_simulation_request_rejects_unknown_fields(self): """Unknown fields should fail fast with ``extra="forbid"``.""" with pytest.raises(ValidationError): - SimulationRequest(country="us", dataset="enhanced_cps_2024") + SimulationRequest(country="us", dataset="custom_dataset_label") with pytest.raises(ValidationError): SimulationRequest(country="us", mystery_flag=True) - with pytest.raises(ValidationError): - SimulationRequest(country="us", subsample=100) def test_simulation_request_rejects_oversized_payload(self): """Payloads that exceed the gateway max should 422 before Pydantic @@ -279,7 +277,7 @@ def test_job_submit_response_creates_with_all_fields(self): "model_version": "1.459.0", "policyengine_version": None, "data_version": None, - "dataset": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", + "dataset": "gs://external-bucket/custom/file.h5@custom-v1", }, } @@ -296,7 +294,7 @@ def test_job_submit_response_creates_with_all_fields(self): assert response.policyengine_bundle.model_version == "1.459.0" assert response.policyengine_bundle.policyengine_version is None assert response.policyengine_bundle.dataset == ( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5" + "gs://external-bucket/custom/file.h5@custom-v1" ) @@ -360,14 +358,14 @@ def test_job_status_response_accepts_bundle_metadata(self): "model_version": "1.459.0", "policyengine_version": None, "data_version": None, - "dataset": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5", + "dataset": "gs://external-bucket/custom/file.h5@custom-v1", }, ) assert response.resolved_app_name == "policyengine-simulation-py3-9-0" assert response.policyengine_bundle is not None assert response.policyengine_bundle.dataset == ( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.115.5" + "gs://external-bucket/custom/file.h5@custom-v1" ) diff --git a/projects/policyengine-api-simulation/tests/test_dataset_uri.py b/projects/policyengine-api-simulation/tests/test_dataset_uri.py index 6886aacb7..847d249ee 100644 --- a/projects/policyengine-api-simulation/tests/test_dataset_uri.py +++ b/projects/policyengine-api-simulation/tests/test_dataset_uri.py @@ -5,12 +5,12 @@ from policyengine_api_simulation.dataset_uri import runtime_dataset_uri -def test_runtime_dataset_uri_converts_policyengine_hf_to_gcs_without_hf_validation( +def test_runtime_dataset_uri_preserves_populace_hf_artifact_without_hf_validation( monkeypatch, ): def reject_hf_validation(dataset_uri: str, revision: str) -> str: raise AssertionError( - f"HF validation should not run for PolicyEngine GCS data: {dataset_uri}@{revision}" + f"HF validation should not run for trusted bundle data: {dataset_uri}@{revision}" ) monkeypatch.setattr( @@ -20,35 +20,41 @@ def reject_hf_validation(dataset_uri: str, revision: str) -> str: assert ( runtime_dataset_uri( - "hf://policyengine/policyengine-uk-data-private/" - "enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f", - default_revision="1.55.10", - artifact_revision="655dd07e4bb9c777b00dac044949611f1feb824f", + "hf://policyengine/populace-uk-private/" + "populace_uk_2023.h5@uk-artifact-revision", + default_revision="populace-uk-2023-release", + artifact_revision="uk-artifact-revision", + validate_hf=False, + ) + == ( + "hf://policyengine/populace-uk-private/" + "populace_uk_2023.h5@uk-artifact-revision" ) - == "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.55.10" ) -def test_runtime_dataset_uri_preserves_explicit_policyengine_hf_data_version(): +def test_runtime_dataset_uri_preserves_explicit_hf_data_version(): assert ( runtime_dataset_uri( - "hf://policyengine/policyengine-us-data/states/CA.h5@1.110.12", - default_revision="1.115.5", - artifact_revision="d47fb5475144260a75467d2f2e22b2d5d53d4d57", + "hf://external/example-data/file.h5@custom-v1", + default_revision="bundle-default", + artifact_revision="artifact-revision", + validate_hf=False, ) - == "gs://policyengine-us-data/states/CA.h5@1.110.12" + == "hf://external/example-data/file.h5@custom-v1" ) -def test_runtime_dataset_uri_override_revision_wins_for_policyengine_hf_uri(): +def test_runtime_dataset_uri_override_revision_wins_for_hf_uri(): assert ( runtime_dataset_uri( - "hf://policyengine/policyengine-us-data/states/CA.h5@1.110.12", - default_revision="1.115.5", - override_revision="1.77.0", - artifact_revision="d47fb5475144260a75467d2f2e22b2d5d53d4d57", + "hf://external/example-data/file.h5@custom-v1", + default_revision="bundle-default", + override_revision="custom-v2", + artifact_revision="artifact-revision", + validate_hf=False, ) - == "gs://policyengine-us-data/states/CA.h5@1.77.0" + == "hf://external/example-data/file.h5@custom-v2" ) @@ -73,6 +79,6 @@ def pin_hf_revision(dataset_uri: str, revision: str) -> str: def test_runtime_dataset_uri_rejects_conflicting_gcs_revisions(): with pytest.raises(ValueError, match="Conflicting dataset revisions"): runtime_dataset_uri( - "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5@commit", - default_revision="1.55.10", + "gs://external-bucket/custom/file.h5@custom-v1", + default_revision="custom-v2", ) diff --git a/projects/policyengine-api-simulation/tests/test_hf_dataset.py b/projects/policyengine-api-simulation/tests/test_hf_dataset.py index a343654e7..54b1fb77d 100644 --- a/projects/policyengine-api-simulation/tests/test_hf_dataset.py +++ b/projects/policyengine-api-simulation/tests/test_hf_dataset.py @@ -31,13 +31,13 @@ def read(self): def test_parse_hf_dataset_uri_extracts_repo_path_and_revision(): parsed = parse_hf_dataset_uri( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) assert parsed is not None - assert parsed.repo_id == "policyengine/policyengine-us-data" - assert parsed.path == "enhanced_cps_2024.h5" - assert parsed.revision == "1.77.0" + assert parsed.repo_id == "policyengine/populace-us" + assert parsed.path == "populace_us_2024.h5" + assert parsed.revision == "custom-v1" def test_fetch_hf_dataset_revision_uses_dataset_revision_api(monkeypatch): @@ -53,15 +53,15 @@ def fake_urlopen(request, timeout): monkeypatch.setattr(hf_dataset, "urlopen", fake_urlopen) payload = hf_dataset._fetch_hf_dataset_revision( - "policyengine/policyengine-us-data", - "1.77.0", + "policyengine/populace-us", + "custom-v1", "hf-token", ) assert payload == {"sha": "abc123", "siblings": []} assert seen["url"] == ( "https://huggingface.co/api/datasets/" - "policyengine/policyengine-us-data/revision/1.77.0" + "policyengine/populace-us/revision/custom-v1" ) assert seen["headers"]["Authorization"] == "Bearer hf-token" assert seen["timeout"] == hf_dataset.HF_REQUEST_TIMEOUT_SECONDS @@ -79,7 +79,7 @@ def test_validate_hf_dataset_uri_rejects_revision_missing_artifact(monkeypatch): match="does not contain artifact", ): validate_hf_dataset_uri( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) @@ -94,11 +94,11 @@ def fake_validate(dataset_uri): assert ( with_hf_revision( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12", - "1.77.0", + "hf://policyengine/populace-us/populace_us_2024.h5@old", + "custom-v1", ) - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + == "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) assert calls == [ - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ] diff --git a/projects/policyengine-api-simulation/tests/test_policyengine_package_update_scripts.py b/projects/policyengine-api-simulation/tests/test_policyengine_package_update_scripts.py index 0280d2512..ba88d2241 100644 --- a/projects/policyengine-api-simulation/tests/test_policyengine_package_update_scripts.py +++ b/projects/policyengine-api-simulation/tests/test_policyengine_package_update_scripts.py @@ -152,8 +152,6 @@ def test_update_policyengine_package_updates_py_and_bundled_runtime_pins( assert "policyengine-core==999.999.999" in pyproject_text assert "policyengine-us==1.1.0" in pyproject_text assert "policyengine-uk==2.1.0" in pyproject_text - assert "policyengine-us-data" not in pyproject_text - assert "policyengine-uk-data" not in pyproject_text uv_calls = uv_log.read_text(encoding="utf-8") assert "lock --upgrade-package policyengine" in uv_calls assert "run python -m src.modal.utils.extract_bundle_versions --shell" in uv_calls diff --git a/projects/policyengine-api-simulation/tests/test_release_bundle.py b/projects/policyengine-api-simulation/tests/test_release_bundle.py index 243bed382..3eedb89e4 100644 --- a/projects/policyengine-api-simulation/tests/test_release_bundle.py +++ b/projects/policyengine-api-simulation/tests/test_release_bundle.py @@ -74,57 +74,52 @@ def test_resolve_bundle_dataset_uri_maps_certified_defaults_to_manifest_uris(): ) -def test_resolve_bundle_dataset_uri_does_not_certify_us_state_sidecars(): +def test_resolve_bundle_dataset_uri_does_not_certify_unknown_dataset_labels(): bundle = get_country_release_bundle("us") - assert "states/UT" not in bundle.dataset_uris - assert resolve_bundle_dataset_uri("us", "states/UT") == "states/UT" + assert "custom_dataset_label" not in bundle.dataset_uris + assert ( + resolve_bundle_dataset_uri("us", "custom_dataset_label") + == "custom_dataset_label" + ) -def test_resolve_bundle_dataset_uri_keeps_legacy_aliases_as_explicit_overrides(): - assert resolve_bundle_dataset_uri("us", "enhanced_cps") == ( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" +def test_resolve_bundle_dataset_uri_maps_populace_dataset_names_to_manifest_uri(): + assert ( + resolve_bundle_dataset_uri("us", "populace_us_2024") + == get_country_release_bundle("us").default_dataset_uri ) assert ( - resolve_bundle_dataset_uri("uk", "enhanced_frs") - == (get_country_release_bundle("uk").dataset_uris["enhanced_frs_2023_24"]) + resolve_bundle_dataset_uri("uk", "populace_uk_2023") + == get_country_release_bundle("uk").default_dataset_uri ) def test_resolve_bundle_dataset_uri_preserves_explicit_dataset_uri_and_revision(): - uri = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" + uri = "hf://external/example-data/file.h5@custom-v1" assert resolve_bundle_dataset_name("us", uri) == uri assert resolve_bundle_dataset_uri("us", uri) == uri def test_resolve_bundle_dataset_uri_maps_explicit_logical_revision_to_hf_uri(): - dataset = "enhanced_cps_2024@1.110.12" + dataset = "populace_us_2024@custom-v1" assert resolve_bundle_dataset_name("us", dataset).startswith( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) assert resolve_bundle_dataset_uri("us", dataset).startswith( - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12" + "hf://policyengine/populace-us/populace_us_2024.h5@custom-v1" ) def test_resolve_bundle_dataset_uri_preserves_explicit_gcs_uri(): - uri = "gs://policyengine-us-data/enhanced_cps_2024.h5" + uri = "gs://external-bucket/custom/file.h5" assert resolve_bundle_dataset_name("us", uri) == uri assert resolve_bundle_dataset_uri("us", uri) == uri -def test_resolve_bundle_dataset_uri_supports_legacy_us_aliases(): - assert resolve_bundle_dataset_uri("us", "cps") == ( - "hf://policyengine/policyengine-us-data/cps_2023.h5@1.110.12" - ) - assert resolve_bundle_dataset_uri("us", "pooled_cps") == ( - "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5@1.110.12" - ) - - def test_resolve_bundle_dataset_uri_preserves_unmanaged_unknown_values(): assert resolve_bundle_dataset_uri("us", "custom_dataset_label") == ( "custom_dataset_label" @@ -142,23 +137,27 @@ def test_resolve_runtime_bundle_dataset_uri_maps_default_to_gcs_version(): assert resolve_runtime_bundle_dataset_uri("us", None) == bundle.default_dataset_uri -def test_resolve_runtime_bundle_dataset_uri_maps_alias_to_gcs_version(): +def test_resolve_runtime_bundle_dataset_uri_maps_dataset_name_to_populace_uri(): bundle = get_country_release_bundle("uk") - assert resolve_runtime_bundle_dataset_uri("uk", "enhanced_frs").startswith( - "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5@" + assert ( + resolve_runtime_bundle_dataset_uri("uk", "populace_uk_2023") + == bundle.default_dataset_uri ) assert bundle.default_dataset == "populace_uk_2023" def test_resolve_runtime_bundle_dataset_uri_applies_requested_version(): + bundle_uri = get_country_release_bundle("us").default_dataset_uri + bundle_uri_without_revision = bundle_uri.rsplit("@", maxsplit=1)[0] + assert ( resolve_runtime_bundle_dataset_uri( "us", - "enhanced_cps_2024", - "1.77.0", + "populace_us_2024", + "custom-v1", ) - == "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + == f"{bundle_uri_without_revision}@custom-v1" ) @@ -166,9 +165,9 @@ def test_resolve_runtime_bundle_dataset_uri_preserves_explicit_hf_data_version() assert ( resolve_runtime_bundle_dataset_uri( "us", - "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.77.0", + "hf://external/example-data/file.h5@custom-v1", ) - == "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + == "hf://external/example-data/file.h5@custom-v1" ) @@ -176,9 +175,9 @@ def test_resolve_runtime_bundle_dataset_uri_preserves_explicit_gcs_data_version( assert ( resolve_runtime_bundle_dataset_uri( "us", - "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0", + "gs://external-bucket/custom/file.h5@custom-v1", ) - == "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + == "gs://external-bucket/custom/file.h5@custom-v1" ) @@ -190,7 +189,7 @@ def test_resolve_runtime_bundle_dataset_uri_preserves_unmanaged_unknown_values() def test_resolve_runtime_bundle_dataset_uri_preserves_explicit_gcs_uri(): - uri = "gs://policyengine-us-data/enhanced_cps_2024.h5" + uri = "gs://external-bucket/custom/file.h5" assert resolve_runtime_bundle_dataset_uri("us", uri) == uri @@ -255,6 +254,7 @@ def test_resolve_runtime_bundle_dataset_uri_preserves_nondefault_override_with_r monkeypatch.setenv("POLICYENGINE_BUNDLE_RECEIPT", str(receipt_path)) get_country_release_bundle.cache_clear() - assert resolve_runtime_bundle_dataset_uri("us", "cps") == ( - "gs://policyengine-us-data/cps_2023.h5@1.110.12" + assert ( + resolve_runtime_bundle_dataset_uri("us", "custom_dataset_label") + == "custom_dataset_label" ) diff --git a/projects/policyengine-api-simulation/tests/test_simulation_output_builder.py b/projects/policyengine-api-simulation/tests/test_simulation_output_builder.py index 384140e85..bc453e192 100644 --- a/projects/policyengine-api-simulation/tests/test_simulation_output_builder.py +++ b/projects/policyengine-api-simulation/tests/test_simulation_output_builder.py @@ -463,92 +463,18 @@ def serialize(self): assert build_calls[1][3] is scoping_strategy -def test_resolve_region_uses_dedicated_region_dataset_with_requested_version( - monkeypatch, -): - monkeypatch.setattr( - "policyengine_api_simulation.dataset_uri.with_hf_revision", - lambda dataset_uri, revision: ( - f"{dataset_uri.rsplit('@', maxsplit=1)[0]}@{revision}" - ), - ) - state = SimpleNamespace( - dataset_path="hf://policyengine/policyengine-us-data/states/CA.h5@1.110.12", - scoping_strategy=None, - parent_code="us", - ) - country_module = SimpleNamespace( - model=SimpleNamespace( - get_region=lambda code: state if code == "state/ca" else None - ) - ) - - resolution = _resolve_region( - country_module=country_module, - country="us", - params={"region": "state/ca", "data_version": "1.77.0"}, - ) - - assert resolution.code == "state/ca" - assert resolution.dataset_reference == ( - "gs://policyengine-us-data/states/CA.h5@1.77.0" - ) - assert resolution.scoping_strategy is None - - -def test_resolve_region_maps_bundle_manifest_revision_to_data_version(monkeypatch): - manifest_revision = "d47fb5475144260a75467d2f2e22b2d5d53d4d57" - monkeypatch.setattr( - "policyengine_api_simulation.simulation_runtime.get_country_release_bundle", - lambda country: SimpleNamespace( - data_version="1.115.5", - data_artifact_revision=manifest_revision, - ), - ) - state = SimpleNamespace( - dataset_path=( - f"hf://policyengine/policyengine-us-data/states/UT.h5@{manifest_revision}" - ), - scoping_strategy=None, - parent_code="us", - ) - country_module = SimpleNamespace( - model=SimpleNamespace( - get_region=lambda code: state if code == "state/ut" else None - ) - ) - - resolution = _resolve_region( - country_module=country_module, - country="us", - params={ - "region": "state/ut", - "data": "gs://policyengine-us-data/states/UT.h5", - }, - ) - - assert resolution.code == "state/ut" - assert resolution.dataset_reference == ( - "gs://policyengine-us-data/states/UT.h5@1.115.5" - ) - - def test_resolve_dataset_reference_applies_data_version_to_logical_dataset( monkeypatch, ): - monkeypatch.setattr( - "policyengine_api_simulation.dataset_uri.with_hf_revision", - lambda dataset_uri, revision: ( - f"{dataset_uri.rsplit('@', maxsplit=1)[0]}@{revision}" - ), - ) + bundle_uri = get_country_release_bundle("us").default_dataset_uri + bundle_uri_without_revision = bundle_uri.rsplit("@", maxsplit=1)[0] assert ( _resolve_dataset_reference( "us", - {"data": "enhanced_cps_2024", "data_version": "1.77.0"}, + {"data": "populace_us_2024", "data_version": "custom-v1"}, ) - == "gs://policyengine-us-data/enhanced_cps_2024.h5@1.77.0" + == f"{bundle_uri_without_revision}@custom-v1" ) @@ -698,89 +624,8 @@ def test_resolve_region_rejects_unscoped_us_place_region(): ) -def test_resolve_region_scopes_us_place_from_parent_state_dataset(monkeypatch): - monkeypatch.setattr( - "policyengine_api_simulation.dataset_uri.with_hf_revision", - lambda dataset_uri, revision: ( - f"{dataset_uri.rsplit('@', maxsplit=1)[0]}@{revision}" - ), - ) - scoping_strategy = object() - place = SimpleNamespace( - dataset_path=None, - scoping_strategy=scoping_strategy, - parent_code="state/ca", - ) - state = SimpleNamespace( - dataset_path="hf://policyengine/policyengine-us-data/states/CA.h5@1.110.12", - scoping_strategy=None, - parent_code="us", - ) - regions = {"place/CA-57000": place, "state/ca": state} - country_module = SimpleNamespace( - model=SimpleNamespace(get_region=lambda code: regions.get(code)) - ) - - resolution = _resolve_region( - country_module=country_module, - country="us", - params={"region": "place/ca-57000"}, - ) - - assert resolution.code == "place/CA-57000" - assert resolution.dataset_reference == ( - "gs://policyengine-us-data/states/CA.h5@1.110.12" - ) - assert resolution.scoping_strategy is scoping_strategy - - -def test_resolve_region_maps_parent_manifest_revision_to_data_version(monkeypatch): - manifest_revision = "d47fb5475144260a75467d2f2e22b2d5d53d4d57" - monkeypatch.setattr( - "policyengine_api_simulation.simulation_runtime.get_country_release_bundle", - lambda country: SimpleNamespace( - data_version="1.115.5", - data_artifact_revision=manifest_revision, - ), - ) - scoping_strategy = object() - place = SimpleNamespace( - dataset_path=None, - scoping_strategy=scoping_strategy, - parent_code="state/ca", - ) - state = SimpleNamespace( - dataset_path=( - f"hf://policyengine/policyengine-us-data/states/CA.h5@{manifest_revision}" - ), - scoping_strategy=None, - parent_code="us", - ) - regions = {"place/CA-57000": place, "state/ca": state} - country_module = SimpleNamespace( - model=SimpleNamespace(get_region=lambda code: regions.get(code)) - ) - - resolution = _resolve_region( - country_module=country_module, - country="us", - params={"region": "place/ca-57000"}, - ) - - assert resolution.code == "place/CA-57000" - assert resolution.dataset_reference == ( - "gs://policyengine-us-data/states/CA.h5@1.115.5" - ) - assert resolution.scoping_strategy is scoping_strategy - - -def test_resolve_region_scopes_uk_country_from_national_dataset(monkeypatch): - monkeypatch.setattr( - "policyengine_api_simulation.dataset_uri.with_hf_revision", - lambda dataset_uri, revision: ( - f"{dataset_uri.rsplit('@', maxsplit=1)[0]}@{revision}" - ), - ) +def test_resolve_region_scopes_uk_country_from_national_populace_dataset(): + bundle = get_country_release_bundle("uk") scoping_strategy = object() england = SimpleNamespace( dataset_path=None, @@ -788,7 +633,7 @@ def test_resolve_region_scopes_uk_country_from_national_dataset(monkeypatch): parent_code="uk", ) uk = SimpleNamespace( - dataset_path="hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.3", + dataset_path=bundle.default_dataset_uri, scoping_strategy=None, parent_code=None, ) @@ -804,9 +649,7 @@ def test_resolve_region_scopes_uk_country_from_national_dataset(monkeypatch): ) assert resolution.code == "country/england" - assert resolution.dataset_reference == ( - "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.3" - ) + assert resolution.dataset_reference == bundle.default_dataset_uri assert resolution.scoping_strategy is scoping_strategy diff --git a/projects/policyengine-api-simulation/tests/test_standalone_simulation_contract.py b/projects/policyengine-api-simulation/tests/test_standalone_simulation_contract.py index 4008c92ab..16be63e4b 100644 --- a/projects/policyengine-api-simulation/tests/test_standalone_simulation_contract.py +++ b/projects/policyengine-api-simulation/tests/test_standalone_simulation_contract.py @@ -51,11 +51,6 @@ def test_standalone_simulation_openapi_keeps_legacy_schema_names(): "telemetry" not in spec["components"]["schemas"]["SimulationOptions"]["properties"] ) - assert ( - "subsample" - not in spec["components"]["schemas"]["SimulationOptions"]["properties"] - ) - def test_standalone_simulation_route_returns_legacy_macro_contract(monkeypatch): def fake_run_simulation_impl(params): diff --git a/projects/policyengine-api-simulation/tests/test_update_version_registry.py b/projects/policyengine-api-simulation/tests/test_update_version_registry.py index 02513ed84..d462fed5b 100644 --- a/projects/policyengine-api-simulation/tests/test_update_version_registry.py +++ b/projects/policyengine-api-simulation/tests/test_update_version_registry.py @@ -78,7 +78,6 @@ def fake_country_bundle_metadata( "default_dataset_uri": f"hf://datasets/policyengine/{country}/default", "dataset_uris": {"default": f"hf://datasets/policyengine/{country}"}, "dataset_repo_types": {"default": "dataset"}, - "dataset_aliases": {"alias": "default"}, } monkeypatch.setattr( @@ -272,7 +271,7 @@ def test_publish_routing_state_writes_only_active_snapshot( active["routes"]["policyengine"]["4.19.1"] == "policyengine-simulation-py4-19-1" ) assert active["routes"]["us"]["1.687.0"] == "policyengine-simulation-py4-19-1" - assert active["bundles"]["4.19.1"]["us"]["dataset_aliases"] == {"alias": "default"} + assert "dataset_aliases" not in active["bundles"]["4.19.1"]["us"] def test_build_legacy_seed_routing_state_copies_legacy_routes_and_manifests(): diff --git a/projects/policyengine-api-simulation/uv.lock b/projects/policyengine-api-simulation/uv.lock index 073143a0c..600707163 100644 --- a/projects/policyengine-api-simulation/uv.lock +++ b/projects/policyengine-api-simulation/uv.lock @@ -1675,7 +1675,7 @@ wheels = [ [[package]] name = "policyengine" -version = "4.18.6" +version = "4.18.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "diskcache" }, @@ -1689,9 +1689,9 @@ dependencies = [ { name = "pydantic" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ec/18/9d1351b5212887a5ea440538d31e4916824a6f3ecdd3426da722e4d7c1bf/policyengine-4.18.6.tar.gz", hash = "sha256:e20a5ce0a190a07ff22aa4a676aa4099295a8939acbe9aa79de6f865c9d353b4", size = 696623, upload-time = "2026-06-28T13:05:13.858Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/ba/b2b0e7bf953c2ff512526df58069244c14a6435f42c450dd20883d2f5b77/policyengine-4.18.7.tar.gz", hash = "sha256:a5c43e1b4018e130713c5826adff9c74dbff6ca792ac866878906ce116c07dec", size = 696648, upload-time = "2026-06-30T22:57:16.241Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/76/489694ea25fa4644e36d2e94ab55f7e4d24b7a1655be39cd599d6dba16bd/policyengine-4.18.6-py3-none-any.whl", hash = "sha256:09c7556ca26d35ddd438381d342ea53595b6440c2f05b0101b7bf527f816a8a2", size = 208900, upload-time = "2026-06-28T13:05:12.44Z" }, + { url = "https://files.pythonhosted.org/packages/f6/0f/70d4c959690e19b4fc591ef86472684144300c36c00dcd2b76b7b89ac07d/policyengine-4.18.7-py3-none-any.whl", hash = "sha256:a710f09c8ceec4e50a6322d27bb1888a9d89efd2f006d80a9d17dbbf5f35e3c7", size = 208885, upload-time = "2026-06-30T22:57:14.677Z" }, ] [[package]] @@ -1798,11 +1798,11 @@ requires-dist = [ { name = "openapi-python-client", marker = "extra == 'build'", specifier = ">=0.21.6" }, { name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.51b0,<0.52" }, { name = "opentelemetry-instrumentation-sqlalchemy", specifier = ">=0.51b0,<0.52" }, - { name = "policyengine", specifier = "==4.18.6" }, + { name = "policyengine", specifier = "==4.18.7" }, { name = "policyengine-core", specifier = "==3.28.0" }, { name = "policyengine-fastapi", editable = "../../libs/policyengine-fastapi" }, { name = "policyengine-uk", specifier = "==2.89.2" }, - { name = "policyengine-us", specifier = "==1.745.0" }, + { name = "policyengine-us", specifier = "==1.729.0" }, { name = "pydantic-settings", specifier = ">=2.7.1,<3.0.0" }, { name = "pyright", marker = "extra == 'build'", specifier = ">=1.1.401" }, { name = "pytest", marker = "extra == 'test'", specifier = ">=8.3.4" }, @@ -1829,7 +1829,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.745.0" +version = "1.729.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -1839,9 +1839,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3f/87/bb5e15d5e63208d6451393818f35a1824f8808e44d9a491ade601e59e20c/policyengine_us-1.745.0.tar.gz", hash = "sha256:7c93f8e7c46ef85fb50871f44bceed7da697400484fe79318db7b2d981c855c0", size = 10561674, upload-time = "2026-06-25T04:18:52.45Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/cb/b2efba2094a708cd71890d98d72b99394fabc5894a4cceec14381e03fa35/policyengine_us-1.729.0.tar.gz", hash = "sha256:ac05c4d621c7f848b0806effc14e913160d5d47d777eadced6bc18edf392d75c", size = 10373862, upload-time = "2026-06-14T18:05:25.747Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/36/ef50de1108cd0e0b89a7820ac3262bda7701d5ac6d998c27e570e63b7b82/policyengine_us-1.745.0-py3-none-any.whl", hash = "sha256:2af30b694b681adca7c2e2bd695bbeb41c03769d9ae908f82f671f8502ce32d9", size = 12300884, upload-time = "2026-06-25T04:18:49.056Z" }, + { url = "https://files.pythonhosted.org/packages/b9/7d/778f92ae94997b00c3c9ac34b345f6c9333435f905670ee4eeb2f5e19809/policyengine_us-1.729.0-py3-none-any.whl", hash = "sha256:8d21d3f7c0e82a9415edffe8ea53939330a63d9c8f6bd334299bddb697cf2c00", size = 11905076, upload-time = "2026-06-14T18:05:21.806Z" }, ] [[package]] diff --git a/projects/policyengine-apis-integ/tests/simulation/conftest.py b/projects/policyengine-apis-integ/tests/simulation/conftest.py index f842b755b..1e4578a82 100644 --- a/projects/policyengine-apis-integ/tests/simulation/conftest.py +++ b/projects/policyengine-apis-integ/tests/simulation/conftest.py @@ -21,9 +21,7 @@ BUDGET_WINDOW_REFORM = { "gov.irs.credits.ctc.refundable.fully_refundable": {"2023-01-01.2100-12-31": True} } -BUDGET_WINDOW_DATASET = "gs://policyengine-us-data/enhanced_cps_2024.h5" -BUDGET_WINDOW_REGION = "us" -BUDGET_WINDOW_SUBSAMPLE = 200 +BUDGET_WINDOW_REGION = "state/ut" BUDGET_WINDOW_MAX_PARALLEL = 2 @@ -152,8 +150,6 @@ def budget_window_request(us_model_version: str) -> BudgetWindowBatchRequest: "region": BUDGET_WINDOW_REGION, "scope": "macro", "reform": BUDGET_WINDOW_REFORM, - "subsample": BUDGET_WINDOW_SUBSAMPLE, - "data": BUDGET_WINDOW_DATASET, "start_year": BUDGET_WINDOW_YEARS[0], "window_size": len(BUDGET_WINDOW_YEARS), "max_parallel": BUDGET_WINDOW_MAX_PARALLEL, diff --git a/projects/policyengine-apis-integ/tests/simulation/test_calculate.py b/projects/policyengine-apis-integ/tests/simulation/test_calculate.py index 03400b014..a621e9b4a 100644 --- a/projects/policyengine-apis-integ/tests/simulation/test_calculate.py +++ b/projects/policyengine-apis-integ/tests/simulation/test_calculate.py @@ -111,8 +111,6 @@ def test_calculate_default_model( "2023-01-01.2100-12-31": True } }, - "subsample": 200, # Reduce households to speed up test - "data": "gs://policyengine-us-data/enhanced_cps_2024.h5", } ) @@ -164,7 +162,6 @@ def test_calculate_us_state_region_model( "2023-01-01.2100-12-31": True } }, - "subsample": 200, "time_period": "2026", } ) @@ -211,14 +208,14 @@ def test_calculate_specific_model( { "country": "us", "version": us_model_version, + "region": "state/ut", "scope": "macro", "reform": { "gov.irs.credits.ctc.refundable.fully_refundable": { "2023-01-01.2100-12-31": True } }, - "subsample": 200, - "data": "gs://policyengine-us-data/enhanced_cps_2024.h5", + "time_period": "2026", } ) @@ -263,7 +260,6 @@ def test_calculate_uk_model( "reform": { "gov.hmrc.income_tax.rates.uk[0].rate": {"2023-01-01.2100-12-31": 0.21} }, - # No subsample - UKMultiYearDataset lacks .name attribute required by subsample method } )