diff --git a/README.md b/README.md index 7fc607d5..e45dec98 100644 --- a/README.md +++ b/README.md @@ -4,26 +4,47 @@ A Python package for tax-benefit microsimulation analysis. Run policy simulation ## Quick start +### Household calculator + ```python -from policyengine.core import Simulation -from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset, uk_latest -from policyengine.outputs.aggregate import Aggregate, AggregateType +import policyengine as pe -# Load representative microdata -dataset = PolicyEngineUKDataset( - name="FRS 2023-24", - filepath="./data/frs_2023_24_year_2026.h5", +# UK: single adult earning £50,000 +uk = pe.uk.calculate_household( + people=[{"age": 35, "employment_income": 50_000}], year=2026, ) +print(uk.person[0].income_tax) # income tax +print(uk.household.hbai_household_net_income) # net income + +# US: single filer in California, with a reform +us = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + household={"state_code": "CA"}, + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000}, +) +print(us.tax_unit.income_tax, us.household.household_net_income) +``` -# Run simulation -simulation = Simulation( - dataset=dataset, - tax_benefit_model_version=uk_latest, +### Population analysis + +```python +import policyengine as pe +from policyengine.core import Simulation +from policyengine.outputs.aggregate import Aggregate, AggregateType + +datasets = pe.uk.ensure_datasets( + datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"], + years=[2026], + data_folder="./data", ) +dataset = datasets["enhanced_frs_2023_24_2026"] + +simulation = Simulation(dataset=dataset, tax_benefit_model_version=pe.uk.model) simulation.run() -# Calculate total universal credit spending agg = Aggregate( simulation=simulation, variable="universal_credit", @@ -34,6 +55,9 @@ agg.run() print(f"Total UC spending: £{agg.result / 1e9:.1f}bn") ``` +For baseline-vs-reform comparisons, see `pe.uk.economic_impact_analysis` +and its US counterpart. + ## Documentation **Core concepts:** @@ -179,12 +203,12 @@ dataset.load() Simulations apply tax-benefit models to datasets: ```python +import policyengine as pe from policyengine.core import Simulation -from policyengine.tax_benefit_models.uk import uk_latest simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, ) simulation.run() @@ -223,7 +247,7 @@ import datetime parameter = Parameter( name="gov.hmrc.income_tax.allowances.personal_allowance.amount", - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, data_type=float, ) @@ -242,7 +266,7 @@ policy = Policy( # Run reform simulation reform_sim = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, policy=policy, ) reform_sim.run() diff --git a/changelog.d/v4-base-extraction.changed.md b/changelog.d/v4-base-extraction.changed.md new file mode 100644 index 00000000..572088a3 --- /dev/null +++ b/changelog.d/v4-base-extraction.changed.md @@ -0,0 +1 @@ +Extracted shared `MicrosimulationModelVersion` base class in `policyengine.tax_benefit_models.common`. Country subclasses now declare class-level metadata (`country_code`, `package_name`, `group_entities`) and implement a handful of thin hooks; `run()` stays per-country. Byte-level snapshot tests verify zero output drift. diff --git a/changelog.d/v4-dict-reforms.added.md b/changelog.d/v4-dict-reforms.added.md new file mode 100644 index 00000000..02405cdc --- /dev/null +++ b/changelog.d/v4-dict-reforms.added.md @@ -0,0 +1 @@ +``Simulation(policy={...})`` and ``Simulation(dynamic={...})`` now accept the same flat ``{"param.path": value}`` / ``{"param.path": {date: value}}`` dict that ``pe.{uk,us}.calculate_household(reform=...)`` accepts. Dicts are compiled to full ``Policy`` / ``Dynamic`` objects on construction using the ``tax_benefit_model_version`` for parameter-path validation and ``dataset.year`` for scalar effective-date defaulting. Removes the last place where population microsim required building ``Parameter`` / ``ParameterValue`` by hand. diff --git a/changelog.d/v4-docs-refresh.changed.md b/changelog.d/v4-docs-refresh.changed.md new file mode 100644 index 00000000..11e7d0d2 --- /dev/null +++ b/changelog.d/v4-docs-refresh.changed.md @@ -0,0 +1 @@ +Documentation refreshed for the v4 agent-first surface. README, `core-concepts`, `economic-impact-analysis`, `country-models-{uk,us}`, `regions-and-scoping`, `examples`, and `dev` now lead with `pe.uk.*` / `pe.us.*` entry points and flat-kwarg `calculate_household` usage. Removed leftover docs for the dropped `filter_field`/`filter_value` simulation fields. `examples/household_impact_example.py` rewritten against the v4 API. diff --git a/changelog.d/v4-facade.added.md b/changelog.d/v4-facade.added.md new file mode 100644 index 00000000..f05dea82 --- /dev/null +++ b/changelog.d/v4-facade.added.md @@ -0,0 +1,47 @@ +**BREAKING (v4):** Collapse the household-calculator surface into a +single agent-friendly entry point, ``pe.us.calculate_household`` / +``pe.uk.calculate_household``. + +New public API: + +- ``policyengine/__init__.py`` populated with canonical accessors: + ``pe.us``, ``pe.uk``, ``pe.Simulation`` (replacing the empty top-level + module). ``import policyengine as pe`` now gives you everything a + new coding session needs to reach in one line. +- ``pe.us.calculate_household(**kwargs)`` and ``pe.uk.calculate_household`` + take flat keyword arguments (``people``, per-entity overrides, + ``year``, ``reform``, ``extra_variables``) instead of a pydantic + input wrapper. +- ``reform=`` accepts a plain dict: ``{parameter_path: value}`` or + ``{parameter_path: {effective_date: value}}``. Compiles internally. +- Returns :class:`HouseholdResult` (new) with dot-access: + ``result.tax_unit.income_tax``, ``result.household.household_net_income``, + ``result.person[0].age``. Singleton entities are + :class:`EntityResult`; ``person`` is a list of them. ``to_dict()`` + and ``write(path)`` serialize to JSON. +- ``extra_variables=[...]`` is now a flat list; the library dispatches + each name to its entity by looking it up on the model. +- Unknown variable names (in ``people``, entity overrides, or + ``extra_variables``) raise ``ValueError`` with a ``difflib`` close-match + suggestion and a paste-able fix hint. +- Unknown dot-access on a result raises ``AttributeError`` with the + list of available variables plus the ``extra_variables=[...]`` call + that would surface the requested one. + +Removed (v4 breaking): + +- ``USHouseholdInput`` / ``UKHouseholdInput`` / ``USHouseholdOutput`` / + ``UKHouseholdOutput`` pydantic wrappers. +- ``calculate_household_impact`` — the name was misleading (it + returned levels, not an impact vs. baseline). Reserved for a future + delta function. +- The bare ``us_model`` / ``uk_model`` label-only singletons; each + country module now exposes ``.model`` pointing at the real + ``TaxBenefitModelVersion`` (kept ``us_latest`` / ``uk_latest`` + aliases for compatibility with any in-flight downstream code). + +New internal module: + +- ``policyengine.tax_benefit_models.common`` — ``compile_reform``, + ``dispatch_extra_variables``, ``EntityResult``, ``HouseholdResult`` + shared by both country implementations. diff --git a/changelog.d/v4-provenance-package.changed.md b/changelog.d/v4-provenance-package.changed.md new file mode 100644 index 00000000..8c016e02 --- /dev/null +++ b/changelog.d/v4-provenance-package.changed.md @@ -0,0 +1,24 @@ +**BREAKING (v4):** Separate the provenance layer from the core +value-object layer. + +- ``policyengine/core/release_manifest.py`` → ``policyengine/provenance/manifest.py`` +- ``policyengine/core/trace_tro.py`` → ``policyengine/provenance/trace.py`` +- New ``policyengine.provenance`` package re-exports the public + surface (``get_release_manifest``, ``get_data_release_manifest``, + ``build_trace_tro_from_release_bundle``, ``build_simulation_trace_tro``, + ``serialize_trace_tro``, ``canonical_json_bytes``, + ``compute_trace_composition_fingerprint``, etc.). +- ``policyengine.core`` no longer re-exports provenance types. + ``policyengine.core`` shrinks to value objects only (Dataset, + Variable, Parameter, Policy, Dynamic, Simulation, Region, + TaxBenefitModel, TaxBenefitModelVersion, scoping strategies). +- ``import policyengine.core.scoping_strategy`` no longer imports + ``h5py`` at module load; the weight-replacement code path + lazy-imports it. ``import policyengine.outputs.constituency_impact`` + and ``import policyengine.outputs.local_authority_impact`` do the + same. +- Migration for downstream: replace + ``from policyengine.core import DataReleaseManifest`` (et al.) + with ``from policyengine.provenance import DataReleaseManifest``. + The country-module imports in internal code (``tax_benefit_models/{us,uk}/model.py`` + and ``datasets.py``) are already updated. diff --git a/docs/core-concepts.md b/docs/core-concepts.md index 425c5f62..7d61a404 100644 --- a/docs/core-concepts.md +++ b/docs/core-concepts.md @@ -2,6 +2,55 @@ PolicyEngine.py is a Python package for tax-benefit microsimulation analysis. It provides a unified interface for running policy simulations, analysing distributional impacts, and visualising results across different countries. +## Quick start + +Most analyses start from the country entry points on the top-level +package — ``policyengine.uk`` and ``policyengine.us``. They expose flat +keyword-argument functions that return structured results with +dot-access for scalar lookups. + +```python +import policyengine as pe + +# UK: single adult earning £50,000 +uk = pe.uk.calculate_household( + people=[{"age": 35, "employment_income": 50_000}], + year=2026, +) +print(uk.household.hbai_household_net_income) # net income +print(uk.person[0].income_tax) # per-person dot access + +# US: married couple with two kids in Texas +us = pe.us.calculate_household( + people=[ + {"age": 35, "employment_income": 40_000}, + {"age": 33}, + {"age": 8}, + {"age": 5}, + ], + tax_unit={"filing_status": "JOINT"}, + household={"state_code": "TX"}, + year=2026, +) +print(us.tax_unit.income_tax, us.tax_unit.eitc, us.tax_unit.ctc) + +# Apply a reform: just pass a parameter-path dict +reformed = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000}, +) +``` + +Reforms can be scalar values (treated as ``{year}-01-01`` onwards) or a +mapping of effective-date strings to values for time-varying reforms. +Unknown variable names raise with suggestions instead of silently +returning zero. + +For population-level analysis (budget impact, distributional effects), +see [Economic impact analysis](economic-impact-analysis.md). + ## Architecture overview The package is organised around several core concepts: @@ -22,9 +71,14 @@ Tax-benefit models define the rules and calculations for a country's tax and ben ### Using a tax-benefit model +The country entry points expose pinned model versions as ``pe.uk.model`` +and ``pe.us.model``: + ```python -from policyengine.tax_benefit_models.uk import uk_latest -from policyengine.tax_benefit_models.us import us_latest +import policyengine as pe + +uk_latest = pe.uk.model +us_latest = pe.us.model # UK model includes variables like: # - income_tax, national_insurance, universal_credit @@ -46,7 +100,7 @@ Datasets contain microdata representing a population. Each dataset has: ### Dataset structure ```python -from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset +from policyengine.tax_benefit_models.uk import PolicyEngineUKDataset # or: pe.uk.PolicyEngineUKDataset dataset = PolicyEngineUKDataset( name="FRS 2023-24", @@ -126,7 +180,7 @@ Before running simulations, you need representative microdata. The package provi - **`load_datasets()`**: Load previously saved HDF5 files from disk ```python -from policyengine.tax_benefit_models.us import ensure_datasets +from policyengine.tax_benefit_models.us import ensure_datasets # or: pe.us.ensure_datasets # First run: downloads from HuggingFace, computes variables, saves to ./data/ # Subsequent runs: loads from disk instantly @@ -139,7 +193,7 @@ dataset = datasets["enhanced_cps_2024_2026"] ``` ```python -from policyengine.tax_benefit_models.uk import ensure_datasets +from policyengine.tax_benefit_models.uk import ensure_datasets # or: pe.uk.ensure_datasets datasets = ensure_datasets( datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"], @@ -158,12 +212,12 @@ Simulations apply tax-benefit models to datasets, calculating all variables for ### Running a simulation ```python +import policyengine as pe from policyengine.core import Simulation -from policyengine.tax_benefit_models.uk import uk_latest simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, ) simulation.run() @@ -201,7 +255,7 @@ After running a simulation, you can access the calculated variables from the out ```python simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, ) simulation.run() @@ -216,52 +270,67 @@ benunit_data = output.benunit[["benunit_id", "universal_credit", "child_benefit" Policies modify tax-benefit system parameters through parametric reforms. -### Creating a policy +### Reform as a dict + +The canonical form — same shape ``pe.{uk,us}.calculate_household(reform=...)`` +accepts — is a flat ``{parameter.path: value}`` / ``{parameter.path: {date: value}}`` +dict. ``Simulation`` compiles it to a ``Policy`` at construction: ```python -from policyengine.core import Policy, Parameter, ParameterValue -import datetime +import policyengine as pe +from policyengine.core import Simulation -# Define parameter to modify -parameter = Parameter( - name="gov.hmrc.income_tax.allowances.personal_allowance.amount", - tax_benefit_model_version=uk_latest, - description="Personal allowance for income tax", - data_type=float, +baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.uk.model) +reform = Simulation( + dataset=dataset, + tax_benefit_model_version=pe.uk.model, + # Personal allowance raised from ~£12,570 to £15,000. + policy={"gov.hmrc.income_tax.allowances.personal_allowance.amount": 15_000}, ) +baseline.run() +reform.run() +``` -# Set new value -parameter_value = ParameterValue( - parameter=parameter, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=15000, # Increase from ~£12,570 to £15,000 -) +Scalar values default their effective date to ``{dataset.year}-01-01``. +For time-varying reforms pass a nested ``{date: value}`` mapping: -policy = Policy( - name="Increased personal allowance", - description="Raises personal allowance to £15,000", - parameter_values=[parameter_value], -) +```python +policy = { + "gov.hmrc.income_tax.allowances.personal_allowance.amount": { + "2026-01-01": 13_000, + "2027-01-01": 15_000, + } +} ``` -### Running a reform simulation +Unknown paths raise ``ValueError`` with a close-match suggestion. + +### Reform as a Policy object (escape hatch) + +For reforms that can't be expressed as parameter-value changes (e.g., +custom ``simulation_modifier`` callables), build a ``Policy`` directly: ```python -# Baseline simulation -baseline = Simulation( - dataset=dataset, - tax_benefit_model_version=uk_latest, -) -baseline.run() +from policyengine.core import Parameter, ParameterValue, Policy +import datetime -# Reform simulation -reform = Simulation( - dataset=dataset, - tax_benefit_model_version=uk_latest, - policy=policy, -) -reform.run() +policy = Policy( + name="Increased personal allowance", + parameter_values=[ + ParameterValue( + parameter=Parameter( + name="gov.hmrc.income_tax.allowances.personal_allowance.amount", + tax_benefit_model_version=pe.uk.model, + data_type=float, + ), + start_date=datetime.date(2026, 1, 1), + end_date=datetime.date(2026, 12, 31), + value=15_000, + ), + ], +) + +Simulation(dataset=dataset, tax_benefit_model_version=pe.uk.model, policy=policy) ``` ### Combining policies @@ -306,7 +375,7 @@ dynamic = Dynamic( simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, policy=policy, dynamic=dynamic, ) diff --git a/docs/country-models-uk.md b/docs/country-models-uk.md index 0bc54505..2d09e43e 100644 --- a/docs/country-models-uk.md +++ b/docs/country-models-uk.md @@ -2,6 +2,44 @@ The UK tax-benefit model implements the United Kingdom's tax and benefit system using PolicyEngine UK as the underlying calculation engine. +## Quick start + +```python +import policyengine as pe + +# Single adult earning £50k +result = pe.uk.calculate_household( + people=[{"age": 35, "employment_income": 50_000}], + year=2026, +) +print(result.person[0].income_tax, result.household.hbai_household_net_income) + +# Family renting, with benefit claims explicitly on +result = pe.uk.calculate_household( + people=[ + {"age": 35, "employment_income": 30_000}, + {"age": 33}, + {"age": 8}, + {"age": 5}, + ], + benunit={"would_claim_uc": True, "would_claim_child_benefit": True}, + household={"rent": 12_000, "region": "NORTH_WEST"}, + year=2026, +) + +# Apply a reform +result = pe.uk.calculate_household( + people=[{"age": 35, "employment_income": 50_000}], + year=2026, + reform={ + "gov.hmrc.income_tax.allowances.personal_allowance.amount": 15_000, + }, +) +``` + +For population-level analysis and reform analysis, see +[Economic impact analysis](economic-impact-analysis.md). + ## Entity structure The UK model uses three entity levels: @@ -149,11 +187,11 @@ dataset = PolicyEngineUKDataset( ```python from policyengine.core import Simulation -from policyengine.tax_benefit_models.uk import uk_latest +import policyengine as pe simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, ) simulation.run() @@ -195,82 +233,32 @@ print(output.household[["household_net_income", "household_benefits", "household ## Common policy reforms +All reform examples use the same flat ``{parameter.path: value}`` dict +the household calculator accepts. ``Simulation`` compiles it into a +``Policy`` at construction; scalar values default to +``{dataset.year}-01-01``. + ### Increasing personal allowance ```python -from policyengine.core import Policy, Parameter, ParameterValue -import datetime - -parameter = Parameter( - name="gov.hmrc.income_tax.allowances.personal_allowance.amount", - tax_benefit_model_version=uk_latest, - description="Personal allowance", - data_type=float, -) - -policy = Policy( - name="Increase personal allowance to £15,000", - description="Raises personal allowance from £12,570 to £15,000", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=15000, - ) - ], -) +policy = {"gov.hmrc.income_tax.allowances.personal_allowance.amount": 15_000} ``` ### Adjusting UC taper rate ```python -parameter = Parameter( - name="gov.dwp.universal_credit.means_test.reduction_rate", - tax_benefit_model_version=uk_latest, - description="UC taper rate", - data_type=float, -) - -policy = Policy( - name="Reduce UC taper to 50%", - description="Lowers taper rate from 55% to 50%", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=0.50, # 50% - ) - ], -) +policy = {"gov.dwp.universal_credit.means_test.reduction_rate": 0.50} ``` -### Abolishing two-child limit +### Abolishing the two-child limit ```python -# Set subsequent child element equal to first child -parameter = Parameter( - name="gov.dwp.universal_credit.elements.child.subsequent_child", - tax_benefit_model_version=uk_latest, - description="UC subsequent child element", - data_type=float, -) - -policy = Policy( - name="Abolish two-child limit", - description="Sets subsequent child element equal to first child", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=333.33, # Match first child rate - ) - ], -) +# Set the subsequent-child element equal to the first-child rate. +policy = {"gov.dwp.universal_credit.elements.child.subsequent_child": 333.33} ``` +Plug any of the above into ``Simulation(policy=policy, ...)``. + ## Regional variations The UK model accounts for regional differences: diff --git a/docs/country-models-us.md b/docs/country-models-us.md index 268c888f..52b44d85 100644 --- a/docs/country-models-us.md +++ b/docs/country-models-us.md @@ -2,6 +2,39 @@ The US tax-benefit model implements the United States federal tax and benefit system using PolicyEngine US as the underlying calculation engine. +## Quick start + +```python +import policyengine as pe + +# Single adult earning $60k (SINGLE filer, default state) +result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, +) +print(result.tax_unit.income_tax, result.household.household_net_income) + +# With a reform +result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000}, +) + +# Request extra variables not in the default result +result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + extra_variables=["adjusted_gross_income", "taxable_income"], +) +``` + +For population-level analysis and reform analysis, see +[Economic impact analysis](economic-impact-analysis.md). + ## Entity structure The US model uses a more complex entity hierarchy: @@ -183,11 +216,11 @@ dataset = PolicyEngineUSDataset( ```python from policyengine.core import Simulation -from policyengine.tax_benefit_models.us import us_latest +import policyengine as pe simulation = Simulation( dataset=dataset, - tax_benefit_model_version=us_latest, + tax_benefit_model_version=pe.us.model, ) simulation.run() @@ -239,81 +272,43 @@ print(output.household[["household_net_income", "household_benefits", "household ## Common policy reforms +All reform examples use the same flat ``{parameter.path: value}`` dict +the household calculator accepts. ``Simulation`` compiles it into a +``Policy`` at construction; scalar values default to +``{dataset.year}-01-01``. Indexed-breakdown parameters (age groups, +filing statuses) end in ``[N].amount``. + ### Increasing standard deduction ```python -from policyengine.core import Policy, Parameter, ParameterValue -import datetime - -parameter = Parameter( - name="gov.irs.income.standard_deduction.single", - tax_benefit_model_version=us_latest, - description="Standard deduction (single)", - data_type=float, -) - -policy = Policy( - name="Increase standard deduction to $20,000", - description="Raises single standard deduction from $14,600 to $20,000", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2024, 1, 1), - end_date=datetime.date(2024, 12, 31), - value=20000, - ) - ], -) +policy = {"gov.irs.income.standard_deduction.single": 20_000} ``` ### Expanding Child Tax Credit ```python -parameter = Parameter( - name="gov.irs.credits.ctc.amount.base", - tax_benefit_model_version=us_latest, - description="Base CTC amount", - data_type=float, -) - -policy = Policy( - name="Increase CTC to $3,000", - description="Expands CTC from $2,000 to $3,000 per child", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2024, 1, 1), - end_date=datetime.date(2024, 12, 31), - value=3000, - ) - ], -) +policy = {"gov.irs.credits.ctc.amount.base[0].amount": 3_000} ``` ### Making CTC fully refundable ```python -parameter = Parameter( - name="gov.irs.credits.ctc.refundable.amount.max", - tax_benefit_model_version=us_latest, - description="Maximum refundable CTC", - data_type=float, -) +policy = {"gov.irs.credits.ctc.refundable.amount.max": 2_000} +``` -policy = Policy( - name="Fully refundable CTC", - description="Makes entire $2,000 CTC refundable", - parameter_values=[ - ParameterValue( - parameter=parameter, - start_date=datetime.date(2024, 1, 1), - end_date=datetime.date(2024, 12, 31), - value=2000, # Match base amount - ) - ], -) +### Time-varying reform + +```python +policy = { + "gov.irs.credits.ctc.amount.base[0].amount": { + "2026-07-01": 2_500, + "2027-01-01": 3_000, + }, +} ``` +Plug any of the above into ``Simulation(policy=policy, ...)``. + ## State variations The US model includes state-level variations for: diff --git a/docs/dev.md b/docs/dev.md index 007a94e5..c6f0937d 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -83,12 +83,15 @@ For the target release-bundle architecture, see [Release bundles](release-bundle ``` src/policyengine/ +├── __init__.py # Public surface: `pe.uk`, `pe.us`, `pe.Simulation` ├── core/ # Domain models (Simulation, Dataset, Policy, etc.) ├── tax_benefit_models/ -│ ├── uk/ # UK model, datasets, analysis, outputs -│ └── us/ # US model, datasets, analysis, outputs +│ ├── common/ # MicrosimulationModelVersion base, result types, reform compiler +│ ├── uk/ # UK model, datasets, household calculator, reform analysis +│ └── us/ # US model, datasets, household calculator, reform analysis ├── outputs/ # Output templates (Aggregate, Poverty, etc.) -├── countries/ # Geographic region registries +├── provenance/ # Release manifests + TRACE TRO export +├── countries/ # Geographic region registries (scoping, constituencies, districts) └── utils/ # Helpers (reforms, entity mapping, plotting) ``` @@ -98,7 +101,7 @@ src/policyengine/ **HDF5 for storage**: Datasets and simulation outputs are stored as HDF5 files. No database server is required. The `MicroDataFrame` from the `microdf` package wraps pandas DataFrames with weight-aware `.sum()`, `.mean()`, `.count()`. -**Country-specific model classes**: `PolicyEngineUSLatest` and `PolicyEngineUKLatest` each implement `run()`, `save()`, and `load()`. The US model passes reforms as a dict at `Microsimulation(reform=...)` construction time. The UK model supports both parametric reforms and `simulation_modifier` callables applied post-construction. +**Country-specific model classes**: `PolicyEngineUSLatest` and `PolicyEngineUKLatest` inherit from a shared `MicrosimulationModelVersion` base (variable/parameter loading, manifest certification, `save`/`load`). Each subclass only implements `run()` and a handful of country hooks (`_load_system`, `_load_region_registry`, `_dataset_class`, `_get_runtime_data_build_metadata`). The US `run` applies reforms as a dict at `Microsimulation(reform=...)` construction time; the UK `run` wraps inputs as `UKSingleYearDataset` and applies reforms via a modifier after construction. **LRU cache + file caching**: `Simulation.ensure()` checks an in-process LRU cache (max 100 entries), then tries loading from disk, then falls back to `run()` + `save()`. diff --git a/docs/economic-impact-analysis.md b/docs/economic-impact-analysis.md index 0d28dff8..9a81f46b 100644 --- a/docs/economic-impact-analysis.md +++ b/docs/economic-impact-analysis.md @@ -18,101 +18,56 @@ There are two approaches to comparing simulations: ### US example ```python -import datetime -from policyengine.core import Parameter, ParameterValue, Policy, Simulation -from policyengine.tax_benefit_models.us import ( - economic_impact_analysis, - ensure_datasets, - us_latest, -) +import policyengine as pe +from policyengine.core import Simulation # 1. Load data -datasets = ensure_datasets( +datasets = pe.us.ensure_datasets( datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"], years=[2026], data_folder="./data", ) dataset = datasets["enhanced_cps_2024_2026"] -# 2. Define reform -param = Parameter( - name="gov.irs.deductions.standard.amount.SINGLE", - tax_benefit_model_version=us_latest, -) -reform = Policy( - name="Double standard deduction (single)", - parameter_values=[ - ParameterValue( - parameter=param, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=30_950, - ), - ], -) - -# 3. Create simulations (no need to call .run() — ensure() is called internally) -baseline_sim = Simulation( - dataset=dataset, - tax_benefit_model_version=us_latest, -) +# 2. Build baseline and reform simulations. +# The reform dict is the same shape `pe.us.calculate_household(reform=...)` accepts — +# Simulation compiles it into a Policy automatically. +baseline_sim = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model) reform_sim = Simulation( dataset=dataset, - tax_benefit_model_version=us_latest, - policy=reform, + tax_benefit_model_version=pe.us.model, + policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000}, ) -# 4. Run full analysis -analysis = economic_impact_analysis(baseline_sim, reform_sim) +# 3. Run full analysis (ensure() is called internally) +analysis = pe.us.economic_impact_analysis(baseline_sim, reform_sim) ``` ### UK example ```python -import datetime -from policyengine.core import Parameter, ParameterValue, Policy, Simulation -from policyengine.tax_benefit_models.uk import ( - economic_impact_analysis, - ensure_datasets, - uk_latest, -) +import policyengine as pe +from policyengine.core import Simulation -datasets = ensure_datasets( +datasets = pe.uk.ensure_datasets( datasets=["hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"], years=[2026], data_folder="./data", ) dataset = datasets["enhanced_frs_2023_24_2026"] -param = Parameter( - name="gov.hmrc.income_tax.allowances.personal_allowance.amount", - tax_benefit_model_version=uk_latest, -) -reform = Policy( - name="Zero personal allowance", - parameter_values=[ - ParameterValue( - parameter=param, - start_date=datetime.date(2026, 1, 1), - end_date=datetime.date(2026, 12, 31), - value=0, - ), - ], -) - -baseline_sim = Simulation( - dataset=dataset, - tax_benefit_model_version=uk_latest, -) +baseline_sim = Simulation(dataset=dataset, tax_benefit_model_version=pe.uk.model) reform_sim = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, - policy=reform, + tax_benefit_model_version=pe.uk.model, + policy={"gov.hmrc.income_tax.allowances.personal_allowance.amount": 0}, ) -analysis = economic_impact_analysis(baseline_sim, reform_sim) +analysis = pe.uk.economic_impact_analysis(baseline_sim, reform_sim) ``` +> If you need the full `Policy` / `ParameterValue` construction (e.g., a reform with a custom ``simulation_modifier`` callable), you can still pass an object; see `policyengine.core.policy` for details. + ## What `economic_impact_analysis()` computes The function calls `ensure()` on both simulations (run + cache if not already computed), then produces: diff --git a/docs/examples.md b/docs/examples.md index b7b4e91a..715d30bf 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -50,9 +50,9 @@ Same approach as the UK version, varying employment income from $0 to $200k and :language: python ``` -## Household impact calculation +## Household calculation -Using `calculate_household_impact()` to compute taxes and benefits for individual custom households (both UK and US). +Using `pe.uk.calculate_household()` and `pe.us.calculate_household()` to compute taxes and benefits for individual custom households with flat keyword arguments and dot-access result objects. ```{literalinclude} ../examples/household_impact_example.py :language: python diff --git a/docs/regions-and-scoping.md b/docs/regions-and-scoping.md index 9be4ddbc..01914889 100644 --- a/docs/regions-and-scoping.md +++ b/docs/regions-and-scoping.md @@ -23,9 +23,9 @@ A `Region` represents a geographic area with a unique prefixed code: Each model version has a `RegionRegistry` providing O(1) lookups: ```python -from policyengine.tax_benefit_models.us import us_latest +import policyengine as pe -registry = us_latest.region_registry +registry = pe.us.model.region_registry # Look up by code california = registry.get("state/ca") @@ -43,9 +43,9 @@ ca_districts = registry.get_children("state/ca") ``` ```python -from policyengine.tax_benefit_models.uk import uk_latest +import policyengine as pe -registry = uk_latest.region_registry +registry = pe.uk.model.region_registry # UK countries countries = registry.get_by_type("country") @@ -74,10 +74,10 @@ from policyengine.core.scoping_strategy import RowFilterStrategy # Simulate only California households simulation = Simulation( dataset=dataset, - tax_benefit_model_version=us_latest, + tax_benefit_model_version=pe.us.model, scoping_strategy=RowFilterStrategy( - variable_name="state_code", - variable_value="CA", + variable_name="state_fips", + variable_value=6, # California FIPS code ), ) simulation.run() @@ -89,7 +89,7 @@ This removes all non-California households from the dataset before running the s # UK: simulate only England simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, scoping_strategy=RowFilterStrategy( variable_name="country", variable_value="ENGLAND", @@ -106,7 +106,7 @@ from policyengine.core.scoping_strategy import WeightReplacementStrategy simulation = Simulation( dataset=dataset, - tax_benefit_model_version=uk_latest, + tax_benefit_model_version=pe.uk.model, scoping_strategy=WeightReplacementStrategy( weight_matrix_bucket="policyengine-uk-data", weight_matrix_key="parliamentary_constituency_weights.h5", @@ -119,29 +119,6 @@ simulation = Simulation( Unlike row filtering, weight replacement keeps all households but assigns region-specific weights. This is more statistically robust for small geographic areas where filtering would leave too few households. -### Legacy filter fields - -For backward compatibility, `Simulation` also accepts `filter_field` and `filter_value` parameters, which are auto-converted to a `RowFilterStrategy`: - -```python -# These two are equivalent: -simulation = Simulation( - dataset=dataset, - tax_benefit_model_version=us_latest, - filter_field="state_code", - filter_value="CA", -) - -simulation = Simulation( - dataset=dataset, - tax_benefit_model_version=us_latest, - scoping_strategy=RowFilterStrategy( - variable_name="state_code", - variable_value="CA", - ), -) -``` - ## Geographic impact outputs The package provides output types that compute per-region metrics across all regions simultaneously. @@ -230,19 +207,19 @@ from policyengine.core.scoping_strategy import RowFilterStrategy # State-level analysis baseline_sim = Simulation( dataset=dataset, - tax_benefit_model_version=us_latest, + tax_benefit_model_version=pe.us.model, scoping_strategy=RowFilterStrategy( - variable_name="state_code", - variable_value="CA", + variable_name="state_fips", + variable_value=6, # California FIPS code ), ) reform_sim = Simulation( dataset=dataset, - tax_benefit_model_version=us_latest, + tax_benefit_model_version=pe.us.model, policy=reform, scoping_strategy=RowFilterStrategy( - variable_name="state_code", - variable_value="CA", + variable_name="state_fips", + variable_value=6, # California FIPS code ), ) diff --git a/docs/release-bundles.md b/docs/release-bundles.md index ea014c9d..a28e0d1d 100644 --- a/docs/release-bundles.md +++ b/docs/release-bundles.md @@ -224,8 +224,8 @@ That is a country-data concern and lives in those repos. From Python: ```python -from policyengine.core.release_manifest import get_data_release_manifest, get_release_manifest -from policyengine.core.trace_tro import build_trace_tro_from_release_bundle, serialize_trace_tro +from policyengine.provenance.manifest import get_data_release_manifest, get_release_manifest +from policyengine.provenance.trace import build_trace_tro_from_release_bundle, serialize_trace_tro country = get_release_manifest("us") tro = build_trace_tro_from_release_bundle(country, get_data_release_manifest("us")) @@ -286,7 +286,7 @@ should run): ```python import hashlib, json, requests -from policyengine.core.trace_tro import canonical_json_bytes +from policyengine.provenance.trace import canonical_json_bytes sim_tro = json.load(open("results.trace.tro.jsonld")) perf = sim_tro["@graph"][0]["trov:hasPerformance"] diff --git a/examples/household_impact_example.py b/examples/household_impact_example.py index f2902daf..4b96cd96 100644 --- a/examples/household_impact_example.py +++ b/examples/household_impact_example.py @@ -1,46 +1,37 @@ -"""Example: Calculate household tax and benefit impacts. +"""Example: calculate tax and benefit outcomes for custom households. -This script demonstrates using calculate_household_impact for both UK and US -to compute taxes and benefits for custom households. +Demonstrates the v4 :func:`policyengine.us.calculate_household` and +:func:`policyengine.uk.calculate_household` entry points. Both take flat +keyword arguments, accept reform dicts directly, and return a +:class:`~policyengine.tax_benefit_models.common.HouseholdResult` that +supports dot-access for scalar lookups. -Run: python examples/household_impact_example.py +Run: ``python examples/household_impact_example.py`` """ -from policyengine.tax_benefit_models.uk import ( - UKHouseholdInput, -) -from policyengine.tax_benefit_models.uk import ( - calculate_household_impact as calculate_uk_impact, -) -from policyengine.tax_benefit_models.us import ( - USHouseholdInput, -) -from policyengine.tax_benefit_models.us import ( - calculate_household_impact as calculate_us_impact, -) - - -def uk_example(): - """UK household impact example.""" +from __future__ import annotations + +import policyengine as pe + + +def uk_example() -> None: print("=" * 60) - print("UK HOUSEHOLD IMPACT") + print("UK household calculator") print("=" * 60) - # Single adult earning £50,000 - household = UKHouseholdInput( + # Single adult earning £50,000. + single = pe.uk.calculate_household( people=[{"age": 35, "employment_income": 50_000}], year=2026, ) - result = calculate_uk_impact(household) - print("\nSingle adult, £50k income:") - print(f" Net income: £{result.household['hbai_household_net_income']:,.0f}") - print(f" Income tax: £{result.person[0]['income_tax']:,.0f}") - print(f" National Insurance: £{result.person[0]['national_insurance']:,.0f}") - print(f" Total tax: £{result.household['household_tax']:,.0f}") + print(f" Net income: £{single.household.hbai_household_net_income:,.0f}") + print(f" Income tax: £{single.person[0].income_tax:,.0f}") + print(f" National Insurance: £{single.person[0].national_insurance:,.0f}") + print(f" Total tax: £{single.household.household_tax:,.0f}") - # Family with two children, £30k income, renting - household = UKHouseholdInput( + # Family with two children, £30k income, renting in the North West. + family = pe.uk.calculate_household( people=[ {"age": 35, "employment_income": 30_000}, {"age": 33}, @@ -57,59 +48,52 @@ def uk_example(): }, year=2026, ) - result = calculate_uk_impact(household) - print("\nFamily (2 adults, 2 children), £30k income, renting:") - print(f" Net income: £{result.household['hbai_household_net_income']:,.0f}") - print(f" Income tax: £{result.person[0]['income_tax']:,.0f}") - print(f" Child benefit: £{result.benunit[0]['child_benefit']:,.0f}") - print(f" Universal credit: £{result.benunit[0]['universal_credit']:,.0f}") - print(f" Total benefits: £{result.household['household_benefits']:,.0f}") + print(f" Net income: £{family.household.hbai_household_net_income:,.0f}") + print(f" Income tax: £{family.person[0].income_tax:,.0f}") + print(f" Child benefit: £{family.benunit.child_benefit:,.0f}") + print(f" Universal credit: £{family.benunit.universal_credit:,.0f}") + print(f" Total benefits: £{family.household.household_benefits:,.0f}") -def us_example(): - """US household impact example.""" +def us_example() -> None: print("\n" + "=" * 60) - print("US HOUSEHOLD IMPACT") + print("US household calculator") print("=" * 60) - # Single adult earning $50,000 - household = USHouseholdInput( - people=[{"age": 35, "employment_income": 50_000, "is_tax_unit_head": True}], + # Single adult earning $50,000 in California. + single = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 50_000}], tax_unit={"filing_status": "SINGLE"}, household={"state_code_str": "CA"}, - year=2024, + year=2026, ) - result = calculate_us_impact(household) - print("\nSingle adult, $50k income (California):") - print(f" Net income: ${result.household['household_net_income']:,.0f}") - print(f" Income tax: ${result.tax_unit[0]['income_tax']:,.0f}") - print(f" Payroll tax: ${result.tax_unit[0]['employee_payroll_tax']:,.0f}") + print(f" Net income: ${single.household.household_net_income:,.0f}") + print(f" Income tax: ${single.tax_unit.income_tax:,.0f}") + print(f" Payroll tax: ${single.tax_unit.employee_payroll_tax:,.0f}") - # Married couple with children, lower income - household = USHouseholdInput( + # Married couple with two kids, Texas, lower income. + family = pe.us.calculate_household( people=[ - {"age": 35, "employment_income": 40_000, "is_tax_unit_head": True}, - {"age": 33, "is_tax_unit_spouse": True}, - {"age": 8, "is_tax_unit_dependent": True}, - {"age": 5, "is_tax_unit_dependent": True}, + {"age": 35, "employment_income": 40_000}, + {"age": 33}, + {"age": 8}, + {"age": 5}, ], tax_unit={"filing_status": "JOINT"}, household={"state_code_str": "TX"}, - year=2024, + year=2026, ) - result = calculate_us_impact(household) - print("\nMarried couple with 2 children, $40k income (Texas):") - print(f" Net income: ${result.household['household_net_income']:,.0f}") - print(f" Federal income tax: ${result.tax_unit[0]['income_tax']:,.0f}") - print(f" EITC: ${result.tax_unit[0]['eitc']:,.0f}") - print(f" Child tax credit: ${result.tax_unit[0]['ctc']:,.0f}") - print(f" SNAP: ${result.spm_unit[0]['snap']:,.0f}") + print(f" Net income: ${family.household.household_net_income:,.0f}") + print(f" Federal income tax: ${family.tax_unit.income_tax:,.0f}") + print(f" EITC: ${family.tax_unit.eitc:,.0f}") + print(f" Child tax credit: ${family.tax_unit.ctc:,.0f}") + print(f" SNAP: ${family.spm_unit.snap:,.0f}") -def main(): +def main() -> None: uk_example() us_example() print("\n" + "=" * 60) diff --git a/pyproject.toml b/pyproject.toml index 6b0b3141..f09e0a04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "policyengine" -version = "3.7.0" +version = "4.0.0" description = "A package to conduct policy analysis using PolicyEngine tax-benefit models." readme = "README.md" authors = [ diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py index dce7ae8e..f9533bd9 100644 --- a/scripts/generate_trace_tros.py +++ b/scripts/generate_trace_tros.py @@ -18,12 +18,12 @@ import sys from pathlib import Path -from policyengine.core.release_manifest import ( +from policyengine.provenance.manifest import ( DataReleaseManifestUnavailableError, get_data_release_manifest, get_release_manifest, ) -from policyengine.core.trace_tro import ( +from policyengine.provenance.trace import ( build_trace_tro_from_release_bundle, serialize_trace_tro, ) diff --git a/src/policyengine/__init__.py b/src/policyengine/__init__.py index e69de29b..a8de3971 100644 --- a/src/policyengine/__init__.py +++ b/src/policyengine/__init__.py @@ -0,0 +1,46 @@ +"""PolicyEngine — one Python API for tax and benefit policy. + +Canonical entry points for a fresh coding session: + +.. code-block:: python + + import policyengine as pe + + # Single-household calculator (US). + result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000}, + ) + print(result.tax_unit.income_tax, result.household.household_net_income) + + # UK: + uk_result = pe.uk.calculate_household( + people=[{"age": 30, "employment_income": 50000}], + year=2026, + ) + + # Lower-level microsimulation building blocks. + from policyengine import Simulation # or: pe.Simulation + +Each country module exposes ``calculate_household``, ``model`` +(the pinned ``TaxBenefitModelVersion``), and the microsim helpers. +""" + +from importlib.util import find_spec + +from policyengine import outputs as outputs +from policyengine.core import Simulation as Simulation + +if find_spec("policyengine_us") is not None: + from policyengine.tax_benefit_models import us as us +else: # pragma: no cover + us = None # type: ignore[assignment] + +if find_spec("policyengine_uk") is not None: + from policyengine.tax_benefit_models import uk as uk +else: # pragma: no cover + uk = None # type: ignore[assignment] + +__all__ = ["Simulation", "outputs", "uk", "us"] diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py index add36388..3a659643 100644 --- a/src/policyengine/cli.py +++ b/src/policyengine/cli.py @@ -6,7 +6,7 @@ - ``trace-tro-validate `` validate a TRO against the shipped schema - ``release-manifest `` print the bundled country manifest -See :mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``. +See :mod:`policyengine.provenance.trace` and ``docs/release-bundles.md``. """ from __future__ import annotations @@ -18,11 +18,11 @@ from pathlib import Path from typing import Optional, Sequence -from policyengine.core.release_manifest import ( +from policyengine.provenance.manifest import ( get_data_release_manifest, get_release_manifest, ) -from policyengine.core.trace_tro import ( +from policyengine.provenance.trace import ( build_trace_tro_from_release_bundle, serialize_trace_tro, ) diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py index 71ca0132..4f749de4 100644 --- a/src/policyengine/core/__init__.py +++ b/src/policyengine/core/__init__.py @@ -1,3 +1,11 @@ +"""Core value objects: Dataset, Variable, Parameter, Policy, Simulation, Region. + +Provenance (release manifests, TRACE TROs) lives in +:mod:`policyengine.provenance` and is intentionally not re-exported +here — importing a core value object should not pull in the +provenance layer. +""" + from .dataset import Dataset from .dataset import YearData as YearData from .dataset import map_to_entity as map_to_entity @@ -11,22 +19,6 @@ from .region import Region as Region from .region import RegionRegistry as RegionRegistry from .region import RegionType as RegionType -from .release_manifest import CertifiedDataArtifact as CertifiedDataArtifact -from .release_manifest import CountryReleaseManifest as CountryReleaseManifest -from .release_manifest import DataBuildInfo as DataBuildInfo -from .release_manifest import DataCertification as DataCertification -from .release_manifest import DataPackageVersion as DataPackageVersion -from .release_manifest import DataReleaseArtifact as DataReleaseArtifact -from .release_manifest import DataReleaseManifest as DataReleaseManifest -from .release_manifest import PackageVersion as PackageVersion -from .release_manifest import ( - certify_data_release_compatibility as certify_data_release_compatibility, -) -from .release_manifest import get_data_release_manifest as get_data_release_manifest -from .release_manifest import get_release_manifest as get_release_manifest -from .release_manifest import ( - resolve_managed_dataset_reference as resolve_managed_dataset_reference, -) from .scoping_strategy import RegionScopingStrategy as RegionScopingStrategy from .scoping_strategy import RowFilterStrategy as RowFilterStrategy from .scoping_strategy import ScopingStrategy as ScopingStrategy @@ -38,19 +30,6 @@ from .tax_benefit_model_version import ( TaxBenefitModelVersion as TaxBenefitModelVersion, ) -from .trace_tro import ( - build_simulation_trace_tro as build_simulation_trace_tro, -) -from .trace_tro import ( - build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle, -) -from .trace_tro import ( - compute_trace_composition_fingerprint as compute_trace_composition_fingerprint, -) -from .trace_tro import ( - extract_bundle_tro_reference as extract_bundle_tro_reference, -) -from .trace_tro import serialize_trace_tro as serialize_trace_tro from .variable import Variable as Variable # Rebuild models to resolve forward references diff --git a/src/policyengine/core/scoping_strategy.py b/src/policyengine/core/scoping_strategy.py index 7d9b5126..81778f47 100644 --- a/src/policyengine/core/scoping_strategy.py +++ b/src/policyengine/core/scoping_strategy.py @@ -14,7 +14,6 @@ from pathlib import Path from typing import Annotated, Literal, Optional, Union -import h5py import numpy as np import pandas as pd from microdf import MicroDataFrame @@ -69,7 +68,7 @@ class RowFilterStrategy(RegionScopingStrategy): strategy_type: Literal["row_filter"] = "row_filter" variable_name: str - variable_value: str + variable_value: Union[str, int, float] def apply( self, @@ -127,7 +126,11 @@ def apply( region_id = self._find_region_index(lookup_df, self.region_code) - # Download weight matrix and extract weights for this region + # Download weight matrix and extract weights for this region. + # h5py is only needed here, so import lazily to keep + # `from policyengine.core import ...` light. + import h5py + weights_path = download_gcs_file( bucket=self.weight_matrix_bucket, file_path=self.weight_matrix_key, diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py index 5002b141..e4b261ee 100644 --- a/src/policyengine/core/simulation.py +++ b/src/policyengine/core/simulation.py @@ -1,9 +1,9 @@ import logging from datetime import datetime -from typing import Optional +from typing import Any, Optional, Union from uuid import uuid4 -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from .cache import LRUCache from .dataset import Dataset @@ -18,12 +18,62 @@ class Simulation(BaseModel): + """Population microsimulation over a certified dataset. + + Canonical call shape: + + .. code-block:: python + + import policyengine as pe + from policyengine.core import Simulation + + datasets = pe.us.ensure_datasets( + datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"], + years=[2026], data_folder="./data", + ) + dataset = datasets["enhanced_cps_2024_2026"] + + # Baseline + baseline = Simulation(dataset=dataset, tax_benefit_model_version=pe.us.model) + + # Reform — same flat dict shape as pe.us.calculate_household(reform=...). + # Parameter path indexing uses "[0].amount" for scale/breakdown entries. + reform = Simulation( + dataset=dataset, + tax_benefit_model_version=pe.us.model, + policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000}, + ) + + baseline.ensure() + reform.ensure() + + The ``policy`` / ``dynamic`` kwargs accept either a ``Policy`` / + ``Dynamic`` object or a flat ``{"param.path": value}`` / + ``{"param.path": {date: value}}`` dict that is compiled against + ``tax_benefit_model_version`` at construction time (unknown paths + raise with close-match suggestions). Scalar values default to + ``{dataset.year}-01-01`` as their effective date. + + See ``policyengine.core.scoping_strategy`` for sub-national scoping. + """ + id: str = Field(default_factory=lambda: str(uuid4())) created_at: datetime = Field(default_factory=datetime.now) updated_at: datetime = Field(default_factory=datetime.now) - policy: Optional[Policy] = None - dynamic: Optional[Dynamic] = None + policy: Optional[Union[Policy, dict[str, Any]]] = Field( + default=None, + description=( + "Reform policy. Pass a ``Policy`` directly, or a flat " + "``{'param.path': value}`` / ``{'param.path': {date: value}}`` " + "dict and it will be compiled against " + "``tax_benefit_model_version`` at run time." + ), + ) + dynamic: Optional[Union[Dynamic, dict[str, Any]]] = Field( + default=None, + description=("Behavioural-response overlay. Same dict shape as ``policy``."), + ) dataset: Dataset = None scoping_strategy: Optional[ScopingStrategy] = Field( @@ -44,6 +94,42 @@ class Simulation(BaseModel): output_dataset: Optional[Dataset] = None + @model_validator(mode="after") + def _compile_dict_reforms(self) -> "Simulation": + """Coerce dict ``policy`` / ``dynamic`` inputs into proper objects. + + Runs at ``mode="after"`` because compiling needs both + ``tax_benefit_model_version`` (for path validation) and + ``dataset.year`` (for effective-date defaulting) — both on ``self``. + """ + from policyengine.tax_benefit_models.common.reform import ( + compile_reform_to_dynamic, + compile_reform_to_policy, + ) + + year = getattr(self.dataset, "year", None) + for field, compiler in ( + ("policy", compile_reform_to_policy), + ("dynamic", compile_reform_to_dynamic), + ): + value = getattr(self, field) + if not isinstance(value, dict): + continue + if self.tax_benefit_model_version is None: + raise ValueError( + f"Cannot compile a dict {field} without " + "tax_benefit_model_version; pass model_version or a " + f"{field.capitalize()}." + ) + setattr( + self, + field, + compiler( + value, year=year, model_version=self.tax_benefit_model_version + ), + ) + return self + def run(self): self.tax_benefit_model_version.run(self) diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py index eeddef85..5eb8f525 100644 --- a/src/policyengine/core/tax_benefit_model_version.py +++ b/src/policyengine/core/tax_benefit_model_version.py @@ -4,14 +4,15 @@ from pydantic import BaseModel, Field -from .release_manifest import ( +from policyengine.provenance.manifest import ( CountryReleaseManifest, DataCertification, PackageVersion, get_data_release_manifest, ) +from policyengine.provenance.trace import build_trace_tro_from_release_bundle + from .tax_benefit_model import TaxBenefitModel -from .trace_tro import build_trace_tro_from_release_bundle if TYPE_CHECKING: from .parameter import Parameter diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py index d90f0ad0..32430d48 100644 --- a/src/policyengine/countries/uk/regions.py +++ b/src/policyengine/countries/uk/regions.py @@ -15,11 +15,11 @@ from typing import TYPE_CHECKING from policyengine.core.region import Region, RegionRegistry -from policyengine.core.release_manifest import resolve_region_dataset_path from policyengine.core.scoping_strategy import ( RowFilterStrategy, WeightReplacementStrategy, ) +from policyengine.provenance.manifest import resolve_region_dataset_path if TYPE_CHECKING: pass diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py index 9e20d8b3..ca2f6b4f 100644 --- a/src/policyengine/countries/us/regions.py +++ b/src/policyengine/countries/us/regions.py @@ -8,8 +8,8 @@ """ from policyengine.core.region import Region, RegionRegistry -from policyengine.core.release_manifest import resolve_region_dataset_path from policyengine.core.scoping_strategy import RowFilterStrategy +from policyengine.provenance.manifest import resolve_region_dataset_path from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json index de8fa505..961defbd 100644 --- a/src/policyengine/data/release_manifests/uk.json +++ b/src/policyengine/data/release_manifests/uk.json @@ -1,8 +1,8 @@ { "schema_version": 1, - "bundle_id": "uk-3.5.0", + "bundle_id": "uk-4.0.0", "country_id": "uk", - "policyengine_version": "3.5.0", + "policyengine_version": "4.0.0", "model_package": { "name": "policyengine-uk", "version": "2.88.0", diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index b005eda9..0016aa8a 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -1,8 +1,8 @@ { "schema_version": 1, - "bundle_id": "us-3.5.0", + "bundle_id": "us-4.0.0", "country_id": "us", - "policyengine_version": "3.5.0", + "policyengine_version": "4.0.0", "model_package": { "name": "policyengine-us", "version": "1.653.3", diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index 61311f46..13ff2a26 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -49,6 +49,7 @@ calculate_us_poverty_by_race, calculate_us_poverty_rates, ) +from policyengine.outputs.program_statistics import ProgramStatistics __all__ = [ "Output", @@ -59,6 +60,7 @@ "ChangeAggregateType", "DecileImpact", "calculate_decile_impacts", + "ProgramStatistics", "IntraDecileImpact", "compute_intra_decile_impacts", "Poverty", diff --git a/src/policyengine/outputs/constituency_impact.py b/src/policyengine/outputs/constituency_impact.py index 60f76e0b..02e1bdfd 100644 --- a/src/policyengine/outputs/constituency_impact.py +++ b/src/policyengine/outputs/constituency_impact.py @@ -7,7 +7,6 @@ from typing import TYPE_CHECKING, Optional -import h5py import numpy as np import pandas as pd from pydantic import ConfigDict @@ -43,6 +42,8 @@ def run(self) -> None: constituency_df = pd.read_csv(self.constituency_csv_path) # Load weight matrix: shape (N_constituencies, N_households) + import h5py + with h5py.File(self.weight_matrix_path, "r") as f: weight_matrix = f[self.year][...] diff --git a/src/policyengine/outputs/local_authority_impact.py b/src/policyengine/outputs/local_authority_impact.py index 20b17efe..a4850dbf 100644 --- a/src/policyengine/outputs/local_authority_impact.py +++ b/src/policyengine/outputs/local_authority_impact.py @@ -7,7 +7,6 @@ from typing import TYPE_CHECKING, Optional -import h5py import numpy as np import pandas as pd from pydantic import ConfigDict @@ -43,6 +42,8 @@ def run(self) -> None: la_df = pd.read_csv(self.local_authority_csv_path) # Load weight matrix: shape (N_local_authorities, N_households) + import h5py + with h5py.File(self.weight_matrix_path, "r") as f: weight_matrix = f[self.year][...] diff --git a/src/policyengine/tax_benefit_models/us/outputs.py b/src/policyengine/outputs/program_statistics.py similarity index 98% rename from src/policyengine/tax_benefit_models/us/outputs.py rename to src/policyengine/outputs/program_statistics.py index 1dd6f001..a48ff8a8 100644 --- a/src/policyengine/tax_benefit_models/us/outputs.py +++ b/src/policyengine/outputs/program_statistics.py @@ -1,4 +1,4 @@ -"""US-specific output templates.""" +"""Shared `ProgramStatistics` for reform-impact tables (US + UK).""" from typing import Optional diff --git a/src/policyengine/provenance/__init__.py b/src/policyengine/provenance/__init__.py new file mode 100644 index 00000000..548b7fc6 --- /dev/null +++ b/src/policyengine/provenance/__init__.py @@ -0,0 +1,89 @@ +"""Release-bundle provenance + TRACE TRO emission. + +Separated from :mod:`policyengine.core` so the value-object layer +(Dataset, Variable, Parameter, Policy, Simulation, Region) doesn't +force provenance imports on every consumer. + +.. code-block:: python + + from policyengine.provenance import ( + get_release_manifest, + get_data_release_manifest, + build_trace_tro_from_release_bundle, + build_simulation_trace_tro, + serialize_trace_tro, + ) +""" + +from .manifest import ( + CertifiedDataArtifact as CertifiedDataArtifact, +) +from .manifest import ( + CountryReleaseManifest as CountryReleaseManifest, +) +from .manifest import ( + DataBuildInfo as DataBuildInfo, +) +from .manifest import ( + DataCertification as DataCertification, +) +from .manifest import ( + DataPackageVersion as DataPackageVersion, +) +from .manifest import ( + DataReleaseArtifact as DataReleaseArtifact, +) +from .manifest import ( + DataReleaseManifest as DataReleaseManifest, +) +from .manifest import ( + DataReleaseManifestUnavailableError as DataReleaseManifestUnavailableError, +) +from .manifest import ( + PackageVersion as PackageVersion, +) +from .manifest import ( + certify_data_release_compatibility as certify_data_release_compatibility, +) +from .manifest import ( + fetch_pypi_wheel_metadata as fetch_pypi_wheel_metadata, +) +from .manifest import ( + get_data_release_manifest as get_data_release_manifest, +) +from .manifest import ( + get_release_manifest as get_release_manifest, +) +from .manifest import ( + https_dataset_uri as https_dataset_uri, +) +from .manifest import ( + https_release_manifest_uri as https_release_manifest_uri, +) +from .manifest import ( + resolve_dataset_reference as resolve_dataset_reference, +) +from .manifest import ( + resolve_local_managed_dataset_source as resolve_local_managed_dataset_source, +) +from .manifest import ( + resolve_managed_dataset_reference as resolve_managed_dataset_reference, +) +from .trace import ( + build_simulation_trace_tro as build_simulation_trace_tro, +) +from .trace import ( + build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle, +) +from .trace import ( + canonical_json_bytes as canonical_json_bytes, +) +from .trace import ( + compute_trace_composition_fingerprint as compute_trace_composition_fingerprint, +) +from .trace import ( + extract_bundle_tro_reference as extract_bundle_tro_reference, +) +from .trace import ( + serialize_trace_tro as serialize_trace_tro, +) diff --git a/src/policyengine/core/release_manifest.py b/src/policyengine/provenance/manifest.py similarity index 100% rename from src/policyengine/core/release_manifest.py rename to src/policyengine/provenance/manifest.py diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/provenance/trace.py similarity index 99% rename from src/policyengine/core/trace_tro.py rename to src/policyengine/provenance/trace.py index 76f1661a..83ac0b5b 100644 --- a/src/policyengine/core/trace_tro.py +++ b/src/policyengine/provenance/trace.py @@ -24,7 +24,7 @@ from collections.abc import Iterable, Mapping from typing import Any, Optional -from .release_manifest import ( +from .manifest import ( CountryReleaseManifest, DataCertification, DataReleaseManifest, diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py index d904d5b4..85c7aed8 100644 --- a/src/policyengine/results/trace_tro.py +++ b/src/policyengine/results/trace_tro.py @@ -5,7 +5,7 @@ specific reform + ``results.json`` payload so a published result can be cited with an immutable composition fingerprint. -See :mod:`policyengine.core.trace_tro` for the bundle-level layer. +See :mod:`policyengine.provenance.trace` for the bundle-level layer. """ from __future__ import annotations @@ -14,7 +14,7 @@ from pathlib import Path from typing import Optional, Union -from policyengine.core.trace_tro import ( +from policyengine.provenance.trace import ( build_simulation_trace_tro, serialize_trace_tro, ) diff --git a/src/policyengine/tax_benefit_models/common/__init__.py b/src/policyengine/tax_benefit_models/common/__init__.py new file mode 100644 index 00000000..654f350d --- /dev/null +++ b/src/policyengine/tax_benefit_models/common/__init__.py @@ -0,0 +1,16 @@ +"""Country-agnostic helpers for household calculation and reform analysis. + +The country modules (:mod:`policyengine.tax_benefit_models.us`, +:mod:`policyengine.tax_benefit_models.uk`) thread these helpers through +their public ``calculate_household`` / ``analyze_reform`` entry points. +""" + +from .extra_variables import dispatch_extra_variables as dispatch_extra_variables +from .model_version import ( + MicrosimulationModelVersion as MicrosimulationModelVersion, +) +from .reform import compile_reform as compile_reform +from .reform import compile_reform_to_dynamic as compile_reform_to_dynamic +from .reform import compile_reform_to_policy as compile_reform_to_policy +from .result import EntityResult as EntityResult +from .result import HouseholdResult as HouseholdResult diff --git a/src/policyengine/tax_benefit_models/common/extra_variables.py b/src/policyengine/tax_benefit_models/common/extra_variables.py new file mode 100644 index 00000000..e3426e6b --- /dev/null +++ b/src/policyengine/tax_benefit_models/common/extra_variables.py @@ -0,0 +1,52 @@ +"""Dispatch a flat ``extra_variables`` list to a per-entity mapping. + +Callers pass a flat list — ``extra_variables=["adjusted_gross_income", +"state_agi", "is_medicaid_eligible"]`` — and the library looks up each +name on the country model to figure out which entity it belongs on. +Unknown names raise with a close-match suggestion. +""" + +from __future__ import annotations + +from collections.abc import Iterable +from difflib import get_close_matches +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion + + +def dispatch_extra_variables( + *, + model_version: TaxBenefitModelVersion, + names: Iterable[str], +) -> dict[str, list[str]]: + """Group ``names`` by the entity each variable lives on. + + Raises :class:`ValueError` if any name is not defined on the model. + """ + by_entity: dict[str, list[str]] = {} + unknown: list[str] = [] + + variables_by_name = model_version.variables_by_name + for name in names: + variable = variables_by_name.get(name) + if variable is None: + unknown.append(name) + continue + by_entity.setdefault(variable.entity, []).append(name) + + if unknown: + lines = [ + f"extra_variables contains names not defined on " + f"{model_version.model.id} {model_version.version}:", + ] + for name in unknown: + suggestions = get_close_matches( + name, list(variables_by_name), n=1, cutoff=0.7 + ) + suggestion = f" (did you mean '{suggestions[0]}'?)" if suggestions else "" + lines.append(f" - '{name}'{suggestion}") + raise ValueError("\n".join(lines)) + + return by_entity diff --git a/src/policyengine/tax_benefit_models/common/model_version.py b/src/policyengine/tax_benefit_models/common/model_version.py new file mode 100644 index 00000000..dc5d44d8 --- /dev/null +++ b/src/policyengine/tax_benefit_models/common/model_version.py @@ -0,0 +1,257 @@ +"""Base class for country ``TaxBenefitModelVersion`` implementations. + +The US and UK model-version classes share roughly 300 lines of loading logic: +manifest certification, the variable-copy loop over the country ``system``, +the parameter-copy loop, entity-relationship construction, and simple +``save`` / ``load`` passthroughs. Only ``run`` (and the country-specific +``managed_microsimulation`` helper) diverge enough to warrant per-country +implementations. + +This module extracts the shared behaviour into ``MicrosimulationModelVersion``. +Country subclasses declare class-level metadata (``country_code``, +``package_name``, ``group_entities``, ``entity_variables``) and override a +handful of thin hooks (``_load_system``, ``_load_region_registry``, +``_dataset_class``, ``run``). +""" + +from __future__ import annotations + +import datetime +import os +import warnings +from importlib import metadata +from pathlib import Path +from typing import TYPE_CHECKING, Any, ClassVar, Optional + +import pandas as pd + +from policyengine.core import ( + Parameter, + ParameterNode, + TaxBenefitModelVersion, + Variable, +) +from policyengine.provenance.manifest import ( + certify_data_release_compatibility, + get_release_manifest, +) +from policyengine.utils.entity_utils import build_entity_relationships +from policyengine.utils.parameter_labels import ( + build_scale_lookup, + generate_label_for_parameter, +) + +if TYPE_CHECKING: + from policyengine.core.simulation import Simulation + + +class MicrosimulationModelVersion(TaxBenefitModelVersion): + """Shared init / save / load logic for country microsim model versions. + + Subclasses must set the four class attributes below and implement the + country-specific hooks. ``run`` is intentionally left abstract: its + country-specific logic (reform application, simulation builder, output + post-processing) varies enough that a shared skeleton would hide real + divergences. + """ + + # --- Subclass metadata ------------------------------------------------- + country_code: ClassVar[str] = "" + """ISO-ish country identifier used by the release manifest ("us"/"uk").""" + + package_name: ClassVar[str] = "" + """Distribution name used with ``importlib.metadata.version``.""" + + group_entities: ClassVar[list[str]] = [] + """Group entities (non-person) for this country, in dataset order.""" + + entity_variables: dict[str, list[str]] = {} + """Variables to materialise per entity when writing output datasets.""" + + # --- Construction ------------------------------------------------------ + def __init__(self, **kwargs: Any) -> None: + if not self.country_code or not self.package_name: + raise RuntimeError( + f"{type(self).__name__} must declare country_code and " + "package_name class attributes" + ) + + manifest = get_release_manifest(self.country_code) + if kwargs.get("version") is None: + kwargs["version"] = manifest.model_package.version + + installed_model_version = metadata.version(self.package_name) + if installed_model_version != manifest.model_package.version: + warnings.warn( + f"Installed {self.package_name} version " + f"({installed_model_version}) does not match the bundled " + "policyengine.py manifest " + f"({manifest.model_package.version}). Calculations will " + "run against the installed version, but dataset " + "compatibility is not guaranteed. To silence this " + "warning, install the version pinned by the manifest.", + UserWarning, + stacklevel=2, + ) + + model_build_metadata = self._get_runtime_data_build_metadata() + data_certification = certify_data_release_compatibility( + self.country_code, + runtime_model_version=installed_model_version, + runtime_data_build_fingerprint=model_build_metadata.get( + "data_build_fingerprint" + ), + ) + + super().__init__(**kwargs) + self.release_manifest = manifest + self.model_package = manifest.model_package + self.data_package = manifest.data_package + self.default_dataset_uri = manifest.default_dataset_uri + self.data_certification = data_certification + self.region_registry = self._load_region_registry() + self.id = f"{self.model.id}@{self.version}" + + system = self._load_system() + self._populate_variables(system) + self._populate_parameters(system) + + # --- Hooks ------------------------------------------------------------ + @classmethod + def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]: + """Return build metadata from the country package, if available.""" + raise NotImplementedError + + def _load_system(self): + """Return the country package's ``system`` object.""" + raise NotImplementedError + + def _load_region_registry(self): + """Return the country's ``RegionRegistry``.""" + raise NotImplementedError + + @property + def _dataset_class(self): + """Return the country's ``PolicyEngine{Country}Dataset`` class.""" + raise NotImplementedError + + # --- Shared loading helpers ------------------------------------------ + def _populate_variables(self, system) -> None: + from policyengine_core.enums import Enum + from policyengine_core.parameters.operations.get_parameter import ( + get_parameter, + ) + + for var_obj in system.variables.values(): + default_val = var_obj.default_value + if var_obj.value_type is Enum: + default_val = default_val.name + elif var_obj.value_type is datetime.date: + default_val = default_val.isoformat() + + variable = Variable( + id=self.id + "-" + var_obj.name, + name=var_obj.name, + label=getattr(var_obj, "label", None), + tax_benefit_model_version=self, + entity=var_obj.entity.key, + description=var_obj.documentation, + data_type=( + var_obj.value_type if var_obj.value_type is not Enum else str + ), + default_value=default_val, + value_type=var_obj.value_type, + ) + if ( + hasattr(var_obj, "possible_values") + and var_obj.possible_values is not None + ): + variable.possible_values = list( + map( + lambda x: x.name, + var_obj.possible_values._value2member_map_.values(), + ) + ) + # Resolve parameter-path adds/subtracts to concrete lists so + # consumers always see list[str]. + for attr in ("adds", "subtracts"): + value = getattr(var_obj, attr, None) + if value is None: + continue + if isinstance(value, str): + try: + param = get_parameter(system.parameters, value) + setattr(variable, attr, list(param("2025-01-01"))) + except Exception: + setattr(variable, attr, None) + else: + setattr(variable, attr, value) + self.add_variable(variable) + + def _populate_parameters(self, system) -> None: + from policyengine_core.parameters import Parameter as CoreParameter + from policyengine_core.parameters import ParameterNode as CoreParameterNode + + scale_lookup = build_scale_lookup(system) + + for param_node in system.parameters.get_descendants(): + if isinstance(param_node, CoreParameter): + parameter = Parameter( + id=self.id + "-" + param_node.name, + name=param_node.name, + label=generate_label_for_parameter( + param_node, system, scale_lookup + ), + tax_benefit_model_version=self, + description=param_node.description, + data_type=type(param_node(2025)), + unit=param_node.metadata.get("unit"), + _core_param=param_node, + ) + self.add_parameter(parameter) + elif isinstance(param_node, CoreParameterNode): + node = ParameterNode( + id=self.id + "-" + param_node.name, + name=param_node.name, + label=param_node.metadata.get("label"), + description=param_node.description, + tax_benefit_model_version=self, + ) + self.add_parameter_node(node) + + # --- Shared run-surface helpers -------------------------------------- + def _build_entity_relationships(self, dataset) -> pd.DataFrame: + """Build a DataFrame mapping each person to their containing entities.""" + person_data = pd.DataFrame(dataset.data.person) + return build_entity_relationships(person_data, self.group_entities) + + def save(self, simulation: Simulation) -> None: + """Persist the simulation's output dataset to its bundled filepath.""" + simulation.output_dataset.save() + + def load(self, simulation: Simulation) -> None: + """Rehydrate the simulation's output dataset from disk. + + Loads timestamps from filesystem metadata when the file exists so + serialised simulations round-trip ``created_at``/``updated_at``. + """ + filepath = str( + Path(simulation.dataset.filepath).parent / (simulation.id + ".h5") + ) + + simulation.output_dataset = self._dataset_class( + id=simulation.id, + name=simulation.dataset.name, + description=simulation.dataset.description, + filepath=filepath, + year=simulation.dataset.year, + is_output_dataset=True, + ) + + if os.path.exists(filepath): + simulation.created_at = datetime.datetime.fromtimestamp( + os.path.getctime(filepath) + ) + simulation.updated_at = datetime.datetime.fromtimestamp( + os.path.getmtime(filepath) + ) diff --git a/src/policyengine/tax_benefit_models/common/reform.py b/src/policyengine/tax_benefit_models/common/reform.py new file mode 100644 index 00000000..60b564f4 --- /dev/null +++ b/src/policyengine/tax_benefit_models/common/reform.py @@ -0,0 +1,197 @@ +"""Compile a simple reform dict into the format policyengine_core expects. + +Accepted shapes for the agent-facing API: + +.. code-block:: python + + # Scalar — applied from Jan 1 of ``year`` (the simulation year). + reform = {"gov.irs.deductions.salt.cap": 0} + + # With explicit effective date(s). + reform = {"gov.irs.deductions.salt.cap": {"2026-01-01": 0}} + + # Multiple parameters. + reform = { + "gov.irs.deductions.salt.cap": 0, + "gov.irs.credits.ctc.amount.base[0].amount": 2500, + } + +**Indexed parameters.** Many PolicyEngine parameters are *breakdown* +entries keyed by a bracket index (age group, filing status, etc.). +Their paths end with ``[N].amount`` / ``[N].threshold``. For example +the CTC base amount in 2026 is +``gov.irs.credits.ctc.amount.base[0].amount`` (not ``...base``); +the top-bracket SS wage base is ``gov.ssa.payroll.cap``. If a reform +dict uses the bracket-head path instead of ``[0].amount`` the +``ValueError`` will list the close match. + +The compiled form is ``{param_path: {period: value}}`` — exactly what +``policyengine_us.Simulation(reform=...)`` / +``policyengine_uk.Simulation(reform=...)`` accept at construction. + +Scalar reforms default to ``{year}-01-01`` so a caller running +mid-year does not accidentally get a blended partial-year result. +Unknown parameter paths raise ``ValueError`` with a close-match +suggestion; pass ``model_version`` to enable the check. +""" + +from __future__ import annotations + +import datetime +from collections.abc import Mapping +from difflib import get_close_matches +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from policyengine.core.dynamic import Dynamic + from policyengine.core.policy import Policy + from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion + + +def compile_reform( + reform: Optional[Mapping[str, Any]], + *, + year: Optional[int] = None, + model_version: Optional[TaxBenefitModelVersion] = None, +) -> Optional[dict[str, dict[str, Any]]]: + """Compile a simple reform dict to the core reform-dict format. + + Args: + reform: Flat mapping from parameter path to either a scalar + (applied from ``{year}-01-01``) or a ``{effective_date: value}`` + mapping. + year: Simulation year. Used as the default effective date for + scalar values so a mid-year call still targets the whole year. + model_version: If provided, parameter paths are validated + against ``model_version.parameters_by_name`` and unknown + paths raise with a close-match suggestion. + """ + if not reform: + return None + + default_date = f"{year}-01-01" if year is not None else "1900-01-01" + + if model_version is not None: + valid = set(model_version.parameters_by_name) + unknown = [path for path in reform if path not in valid] + if unknown: + lines = [ + f"Reform contains parameter paths not defined on " + f"{model_version.model.id} {model_version.version}:", + ] + for path in unknown: + suggestions = get_close_matches(path, valid, n=1, cutoff=0.7) + hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else "" + lines.append(f" - '{path}'{hint}") + raise ValueError("\n".join(lines)) + + compiled: dict[str, dict[str, Any]] = {} + for parameter_path, spec in reform.items(): + if isinstance(spec, Mapping): + compiled[parameter_path] = {str(k): v for k, v in spec.items()} + else: + compiled[parameter_path] = {default_date: spec} + return compiled + + +def _reform_dict_to_parameter_values( + reform: Mapping[str, Any], + *, + year: Optional[int], + model_version: TaxBenefitModelVersion, +) -> list: + """Compile a flat reform dict into a list of ``ParameterValue`` objects. + + Uses :func:`compile_reform` for path validation and effective-date + defaulting, then materialises each ``{path: {date: value}}`` pair + as an open-ended ``ParameterValue`` bound to a + ``Parameter(name=path, tax_benefit_model_version=model_version)``. + """ + from policyengine.core.parameter import Parameter + from policyengine.core.parameter_value import ParameterValue + + compiled = compile_reform(reform, year=year, model_version=model_version) + if compiled is None: + return [] + + parameter_values: list[ParameterValue] = [] + for path, date_to_value in compiled.items(): + for effective_date, value in date_to_value.items(): + data_type = type(value) if isinstance(value, (int, float, bool)) else float + parameter_values.append( + ParameterValue( + parameter=Parameter( + name=path, + tax_benefit_model_version=model_version, + data_type=data_type, + ), + start_date=datetime.datetime.strptime(effective_date, "%Y-%m-%d"), + end_date=None, + value=value, + ) + ) + return parameter_values + + +def _compile_reform_to( + cls, + default_name: str, + reform: Optional[Mapping[str, Any]], + *, + year: Optional[int], + model_version: TaxBenefitModelVersion, + name: Optional[str] = None, +): + parameter_values = _reform_dict_to_parameter_values( + reform or {}, year=year, model_version=model_version + ) + if not parameter_values: + return None + return cls(name=name or default_name, parameter_values=parameter_values) + + +def compile_reform_to_policy( + reform: Optional[Mapping[str, Any]], + *, + year: Optional[int], + model_version: TaxBenefitModelVersion, + name: Optional[str] = None, +) -> Optional[Policy]: + """Compile a flat reform dict into a fully-assembled ``Policy``. + + Accepts the same ``{param.path: value}`` / + ``{param.path: {date: value}}`` shape as :func:`compile_reform`, + but returns a ready-to-use ``Policy`` with ``ParameterValue`` + objects so ``Simulation(policy={...})`` works without hand-building + ``Parameter`` / ``ParameterValue``. + """ + from policyengine.core.policy import Policy + + return _compile_reform_to( + Policy, + "Reform", + reform, + year=year, + model_version=model_version, + name=name, + ) + + +def compile_reform_to_dynamic( + reform: Optional[Mapping[str, Any]], + *, + year: Optional[int], + model_version: TaxBenefitModelVersion, + name: Optional[str] = None, +) -> Optional[Dynamic]: + """``Dynamic`` counterpart of :func:`compile_reform_to_policy`.""" + from policyengine.core.dynamic import Dynamic + + return _compile_reform_to( + Dynamic, + "Dynamic response", + reform, + year=year, + model_version=model_version, + name=name, + ) diff --git a/src/policyengine/tax_benefit_models/common/result.py b/src/policyengine/tax_benefit_models/common/result.py new file mode 100644 index 00000000..e73fa406 --- /dev/null +++ b/src/policyengine/tax_benefit_models/common/result.py @@ -0,0 +1,79 @@ +"""Dot-access result containers returned by ``calculate_household``. + +A result is intentionally thin: it's a ``dict`` subclass that also +supports attribute access, so callers can write either +``result.tax_unit.income_tax`` or ``result["tax_unit"]["income_tax"]``. +The dict shape keeps JSON serialization trivial. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Union + + +class EntityResult(dict): + """One entity's computed variables with dict AND attribute access. + + Raises :class:`AttributeError` with the list of available variables + when a caller accesses an unknown name, so typos surface a + paste-able fix instead of silently returning ``None``. + """ + + def __getattr__(self, name: str) -> Any: + if name.startswith("_"): + raise AttributeError(name) + if name in self: + return self[name] + available = ", ".join(sorted(self)) + raise AttributeError( + f"entity has no variable '{name}'. Available: {available}. " + f"Pass extra_variables=['{name}'] to calculate_household if " + f"'{name}' is a valid variable on the country model that is " + f"not in the default output columns." + ) + + def __setattr__(self, name: str, value: Any) -> None: # pragma: no cover + self[name] = value + + +class HouseholdResult(dict): + """Full household calculation result; one key per entity. + + Singleton entities (``household``, ``tax_unit``, ``benunit``, ...) + map to a single :class:`EntityResult`; multi-member entities (like + ``person``) map to a ``list[EntityResult]``. + """ + + def __getattr__(self, name: str) -> Any: + if name.startswith("_"): + raise AttributeError(name) + if name in self: + return self[name] + available = ", ".join(sorted(self)) + raise AttributeError( + f"no entity '{name}' on this result. Available entities: {available}" + ) + + def __setattr__(self, name: str, value: Any) -> None: # pragma: no cover + self[name] = value + + def to_dict(self) -> dict[str, Any]: + """Return a plain ``dict[str, Any]`` copy suitable for JSON dumps.""" + + def _convert(value: Any) -> Any: + if isinstance(value, EntityResult): + return dict(value) + if isinstance(value, list): + return [_convert(v) for v in value] + return value + + return {key: _convert(val) for key, val in self.items()} + + def write(self, path: Union[str, Path]) -> Path: + """Write the result to a JSON file and return the path.""" + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(self.to_dict(), indent=2) + "\n") + return path diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index 93533245..3ab098e2 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -1,16 +1,23 @@ -"""PolicyEngine UK tax-benefit model.""" +"""PolicyEngine UK tax-benefit model. + +.. code-block:: python + + import policyengine as pe + + result = pe.uk.calculate_household( + people=[{"age": 30, "employment_income": 50000}], + year=2026, + ) + print(result.person[0].income_tax, result.household.hbai_household_net_income) +""" from importlib.util import find_spec if find_spec("policyengine_uk") is not None: from policyengine.core import Dataset + from policyengine.outputs import ProgramStatistics - from .analysis import ( - UKHouseholdInput, - UKHouseholdOutput, - calculate_household_impact, - economic_impact_analysis, - ) + from .analysis import economic_impact_analysis from .datasets import ( PolicyEngineUKDataset, UKYearData, @@ -18,21 +25,22 @@ ensure_datasets, load_datasets, ) + from .household import calculate_household from .model import ( PolicyEngineUK, PolicyEngineUKLatest, managed_microsimulation, uk_latest, - uk_model, ) - from .outputs import ProgrammeStatistics - # Rebuild Pydantic models to resolve forward references + model = uk_latest + """The pinned UK ``TaxBenefitModelVersion`` for this policyengine release.""" + Dataset.model_rebuild() UKYearData.model_rebuild() PolicyEngineUKDataset.model_rebuild() PolicyEngineUKLatest.model_rebuild() - ProgrammeStatistics.model_rebuild() + ProgramStatistics.model_rebuild() __all__ = [ "UKYearData", @@ -43,13 +51,11 @@ "PolicyEngineUK", "PolicyEngineUKLatest", "managed_microsimulation", - "uk_model", + "model", "uk_latest", + "calculate_household", "economic_impact_analysis", - "calculate_household_impact", - "UKHouseholdInput", - "UKHouseholdOutput", - "ProgrammeStatistics", + "ProgramStatistics", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index b05e21b0..f37d18be 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -1,15 +1,16 @@ -"""General utility functions for UK policy reform analysis.""" +"""Microsimulation reform analysis for the UK model. -import tempfile -from pathlib import Path -from typing import Any, Optional +The single-household calculator lives in :mod:`.household`; this module +holds the population-level reform-analysis helpers. +""" + +from __future__ import annotations import pandas as pd -from microdf import MicroDataFrame -from pydantic import BaseModel, Field +from pydantic import BaseModel from policyengine.core import OutputCollection, Simulation -from policyengine.core.policy import Policy +from policyengine.outputs import ProgramStatistics from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, @@ -23,140 +24,12 @@ calculate_uk_poverty_rates, ) -from .datasets import PolicyEngineUKDataset, UKYearData -from .model import uk_latest -from .outputs import ProgrammeStatistics - - -class UKHouseholdOutput(BaseModel): - """Output from a UK household calculation with all entity data.""" - - person: list[dict[str, Any]] - benunit: list[dict[str, Any]] - household: dict[str, Any] - - -class UKHouseholdInput(BaseModel): - """Input for a UK household calculation.""" - - people: list[dict[str, Any]] - benunit: dict[str, Any] = Field(default_factory=dict) - household: dict[str, Any] = Field(default_factory=dict) - year: int = 2026 - - -def calculate_household_impact( - household_input: UKHouseholdInput, - policy: Optional[Policy] = None, -) -> UKHouseholdOutput: - """Calculate tax and benefit impacts for a single UK household.""" - n_people = len(household_input.people) - - # Build person data with defaults - person_data = { - "person_id": list(range(n_people)), - "person_benunit_id": [0] * n_people, - "person_household_id": [0] * n_people, - "person_weight": [1.0] * n_people, - } - # Add user-provided person fields - for i, person in enumerate(household_input.people): - for key, value in person.items(): - if key not in person_data: - person_data[key] = [0.0] * n_people # Default to 0 for numeric fields - person_data[key][i] = value - - # Build benunit data with defaults - benunit_data = { - "benunit_id": [0], - "benunit_weight": [1.0], - } - for key, value in household_input.benunit.items(): - benunit_data[key] = [value] - - # Build household data with defaults (required for uprating) - household_data = { - "household_id": [0], - "household_weight": [1.0], - "region": ["LONDON"], - "tenure_type": ["RENT_PRIVATELY"], - "council_tax": [0.0], - "rent": [0.0], - } - for key, value in household_input.household.items(): - household_data[key] = [value] - - # Create MicroDataFrames - person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") - benunit_df = MicroDataFrame(pd.DataFrame(benunit_data), weights="benunit_weight") - household_df = MicroDataFrame( - pd.DataFrame(household_data), weights="household_weight" - ) - - # Create temporary dataset - tmpdir = tempfile.mkdtemp() - filepath = str(Path(tmpdir) / "household_impact.h5") - - dataset = PolicyEngineUKDataset( - name="Household impact calculation", - description="Single household for impact calculation", - filepath=filepath, - year=household_input.year, - data=UKYearData( - person=person_df, - benunit=benunit_df, - household=household_df, - ), - ) - - # Run simulation - simulation = Simulation( - dataset=dataset, - tax_benefit_model_version=uk_latest, - policy=policy, - ) - simulation.run() - - # Extract all output variables defined in entity_variables - output_data = simulation.output_dataset.data - - def safe_convert(value): - """Convert value to float if numeric, otherwise return as string.""" - try: - return float(value) - except (ValueError, TypeError): - return str(value) - - person_outputs = [] - for i in range(n_people): - person_dict = {} - for var in uk_latest.entity_variables["person"]: - person_dict[var] = safe_convert(output_data.person[var].iloc[i]) - person_outputs.append(person_dict) - - benunit_outputs = [] - for i in range(len(output_data.benunit)): - benunit_dict = {} - for var in uk_latest.entity_variables["benunit"]: - benunit_dict[var] = safe_convert(output_data.benunit[var].iloc[i]) - benunit_outputs.append(benunit_dict) - - household_dict = {} - for var in uk_latest.entity_variables["household"]: - household_dict[var] = safe_convert(output_data.household[var].iloc[0]) - - return UKHouseholdOutput( - person=person_outputs, - benunit=benunit_outputs, - household=household_dict, - ) - class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" decile_impacts: OutputCollection[DecileImpact] - programme_statistics: OutputCollection[ProgrammeStatistics] + program_statistics: OutputCollection[ProgramStatistics] baseline_poverty: OutputCollection[Poverty] reform_poverty: OutputCollection[Poverty] baseline_inequality: Inequality @@ -167,11 +40,7 @@ def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: - """Perform comprehensive analysis of a policy reform. - - Returns: - PolicyReformAnalysis containing decile impacts and programme statistics - """ + """Perform comprehensive analysis of a UK policy reform.""" baseline_simulation.ensure() reform_simulation.ensure() @@ -182,20 +51,16 @@ def economic_impact_analysis( "Reform simulation must have more than 100 households" ) - # Decile impact decile_impacts = calculate_decile_impacts( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, ) - # Major programmes to analyse - programmes = { - # Tax + programs = { "income_tax": {"is_tax": True}, "national_insurance": {"is_tax": True}, "vat": {"is_tax": True}, "council_tax": {"is_tax": True}, - # Benefits "universal_credit": {"is_tax": False}, "child_benefit": {"is_tax": False}, "pension_credit": {"is_tax": False}, @@ -204,31 +69,27 @@ def economic_impact_analysis( "child_tax_credit": {"is_tax": False}, } - programme_statistics = [] - - for programme_name, programme_info in programmes.items(): + program_statistics = [] + for program_name, program_info in programs.items(): entity = baseline_simulation.tax_benefit_model_version.get_variable( - programme_name + program_name ).entity - is_tax = programme_info["is_tax"] - - stats = ProgrammeStatistics( + stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, - programme_name=programme_name, + program_name=program_name, entity=entity, - is_tax=is_tax, + is_tax=program_info["is_tax"], ) stats.run() - programme_statistics.append(stats) + program_statistics.append(stats) - # Create DataFrame - programme_df = pd.DataFrame( + program_df = pd.DataFrame( [ { "baseline_simulation_id": p.baseline_simulation.id, "reform_simulation_id": p.reform_simulation.id, - "programme_name": p.programme_name, + "program_name": p.program_name, "entity": p.entity, "is_tax": p.is_tax, "baseline_total": p.baseline_total, @@ -239,25 +100,21 @@ def economic_impact_analysis( "winners": p.winners, "losers": p.losers, } - for p in programme_statistics + for p in program_statistics ] ) - - programme_collection = OutputCollection( - outputs=programme_statistics, dataframe=programme_df + program_collection = OutputCollection( + outputs=program_statistics, dataframe=program_df ) - # Calculate poverty rates for both simulations baseline_poverty = calculate_uk_poverty_rates(baseline_simulation) reform_poverty = calculate_uk_poverty_rates(reform_simulation) - - # Calculate inequality for both simulations baseline_inequality = calculate_uk_inequality(baseline_simulation) reform_inequality = calculate_uk_inequality(reform_simulation) return PolicyReformAnalysis( decile_impacts=decile_impacts, - programme_statistics=programme_collection, + program_statistics=program_collection, baseline_poverty=baseline_poverty, reform_poverty=reform_poverty, baseline_inequality=baseline_inequality, diff --git a/src/policyengine/tax_benefit_models/uk/datasets.py b/src/policyengine/tax_benefit_models/uk/datasets.py index 47f78403..e7207da7 100644 --- a/src/policyengine/tax_benefit_models/uk/datasets.py +++ b/src/policyengine/tax_benefit_models/uk/datasets.py @@ -6,7 +6,7 @@ from pydantic import ConfigDict from policyengine.core import Dataset, YearData -from policyengine.core.release_manifest import ( +from policyengine.provenance.manifest import ( dataset_logical_name, resolve_dataset_reference, ) diff --git a/src/policyengine/tax_benefit_models/uk/household.py b/src/policyengine/tax_benefit_models/uk/household.py new file mode 100644 index 00000000..5dbd71bb --- /dev/null +++ b/src/policyengine/tax_benefit_models/uk/household.py @@ -0,0 +1,191 @@ +"""Single-household calculation for the UK model. + +.. code-block:: python + + import policyengine as pe + + # Lone parent + one child, £30k wages. + result = pe.uk.calculate_household( + people=[ + {"age": 32, "employment_income": 30000}, + {"age": 6}, + ], + benunit={"would_claim_child_benefit": True}, + year=2026, + ) + print(result.person[0].income_tax) + print(result.benunit.child_benefit) + print(result.household.hbai_household_net_income) +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any, Optional + +from policyengine.tax_benefit_models.common import ( + EntityResult, + HouseholdResult, + compile_reform, + dispatch_extra_variables, +) +from policyengine.utils.household_validation import validate_household_input + +from .model import uk_latest + + +def _default_output_columns( + extra_by_entity: Mapping[str, list[str]], +) -> dict[str, list[str]]: + merged: dict[str, list[str]] = {} + for entity, defaults in uk_latest.entity_variables.items(): + columns = list(defaults) + for extra in extra_by_entity.get(entity, []): + if extra not in columns: + columns.append(extra) + merged[entity] = columns + for entity, extras in extra_by_entity.items(): + merged.setdefault(entity, list(extras)) + return merged + + +def _safe_convert(value: Any) -> Any: + try: + return float(value) + except (ValueError, TypeError): + return str(value) if value is not None else None + + +def _build_situation( + *, + people: list[Mapping[str, Any]], + benunit: Mapping[str, Any], + household: Mapping[str, Any], + year: int, +) -> dict[str, Any]: + year_str = str(year) + + def _periodise(spec: Mapping[str, Any]) -> dict[str, dict[str, Any]]: + return {key: {year_str: value} for key, value in spec.items() if key != "id"} + + person_ids = [f"person_{i}" for i in range(len(people))] + persons = {pid: _periodise(person) for pid, person in zip(person_ids, people)} + + def _group(spec: Mapping[str, Any]) -> dict[str, Any]: + return {"members": list(person_ids), **_periodise(spec)} + + return { + "people": persons, + "benunits": {"benunit_0": _group(benunit)}, + "households": {"household_0": _group(household)}, + } + + +_ALLOWED_KWARGS = frozenset( + {"people", "benunit", "household", "year", "reform", "extra_variables"} +) + + +def _raise_unexpected_kwargs(unexpected: Mapping[str, Any]) -> None: + from difflib import get_close_matches + + lines = ["calculate_household received unsupported keyword arguments:"] + for name in unexpected: + suggestions = get_close_matches(name, _ALLOWED_KWARGS, n=1, cutoff=0.5) + hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else "" + if name in {"tax_unit", "marital_unit", "family", "spm_unit"}: + hint = ( + f" — `{name}` is US-only; the UK groups persons into a single `benunit`" + ) + lines.append(f" - '{name}'{hint}") + lines.append( + "Valid kwargs: people, benunit, household, year, reform, extra_variables." + ) + raise TypeError("\n".join(lines)) + + +def calculate_household( + *, + people: list[Mapping[str, Any]], + benunit: Optional[Mapping[str, Any]] = None, + household: Optional[Mapping[str, Any]] = None, + year: int = 2026, + reform: Optional[Mapping[str, Any]] = None, + extra_variables: Optional[list[str]] = None, + **unexpected: Any, +) -> HouseholdResult: + """Compute tax and benefit variables for a single UK household. + + Args: + people: One dict per person (keys are UK variable names). + Must be non-empty. + benunit, household: Optional per-entity overrides. + year: Calendar year. Defaults to 2026. + reform: Optional reform dict. Scalar values default to + ``{year}-01-01``; invalid parameter paths raise with a + close-match suggestion. + extra_variables: Flat list of extra UK variables to compute; + the library dispatches each to its entity. + + Returns: + :class:`HouseholdResult` with dot-accessible entity results. + + Raises: + ValueError: on unknown or mis-placed variable names, or + unknown reform parameter paths. + TypeError: on US-only kwargs (``tax_unit``, etc.) or other + unsupported keyword arguments. + """ + if unexpected: + _raise_unexpected_kwargs(unexpected) + + from policyengine_uk import Simulation + + people = list(people) + benunit_dict = dict(benunit or {}) + household_dict = dict(household or {}) + + validate_household_input( + model_version=uk_latest, + entities={ + "person": people, + "benunit": [benunit_dict], + "household": [household_dict], + }, + ) + + extra_by_entity = dispatch_extra_variables( + model_version=uk_latest, + names=extra_variables or [], + ) + output_columns = _default_output_columns(extra_by_entity) + reform_dict = compile_reform(reform, year=year, model_version=uk_latest) + + simulation = Simulation( + situation=_build_situation( + people=people, + benunit=benunit_dict, + household=household_dict, + year=year, + ), + reform=reform_dict, + ) + + result = HouseholdResult() + for entity, columns in output_columns.items(): + raw = { + variable: list(simulation.calculate(variable, period=year, map_to=entity)) + for variable in columns + } + if entity == "person": + result["person"] = [ + EntityResult( + {variable: _safe_convert(raw[variable][i]) for variable in columns} + ) + for i in range(len(people)) + ] + else: + result[entity] = EntityResult( + {variable: _safe_convert(raw[variable][0]) for variable in columns} + ) + return result diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index ce6f2dd9..67e7a3ae 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -1,31 +1,17 @@ import datetime -import warnings -from importlib import metadata from pathlib import Path from typing import TYPE_CHECKING, Optional import pandas as pd from microdf import MicroDataFrame -from policyengine.core import ( - Parameter, - ParameterNode, - TaxBenefitModel, - TaxBenefitModelVersion, - Variable, -) -from policyengine.core.release_manifest import ( - certify_data_release_compatibility, +from policyengine.core import TaxBenefitModel +from policyengine.provenance.manifest import ( dataset_logical_name, - get_release_manifest, resolve_local_managed_dataset_source, resolve_managed_dataset_reference, ) -from policyengine.utils.entity_utils import build_entity_relationships -from policyengine.utils.parameter_labels import ( - build_scale_lookup, - generate_label_for_parameter, -) +from policyengine.tax_benefit_models.common import MicrosimulationModelVersion from .datasets import PolicyEngineUKDataset, UKYearData @@ -43,18 +29,11 @@ class PolicyEngineUK(TaxBenefitModel): uk_model = PolicyEngineUK() -def _get_runtime_data_build_metadata() -> dict[str, Optional[str]]: - try: - from policyengine_uk.build_metadata import get_data_build_metadata - except ModuleNotFoundError as exc: - if exc.name != "policyengine_uk.build_metadata": - raise - return {} - - return get_data_build_metadata() or {} - +class PolicyEngineUKLatest(MicrosimulationModelVersion): + country_code = "uk" + package_name = "policyengine-uk" + group_entities = UK_GROUP_ENTITIES -class PolicyEngineUKLatest(TaxBenefitModelVersion): model: TaxBenefitModel = uk_model version: str = None created_at: datetime.datetime = None @@ -137,147 +116,32 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion): ], } - def __init__(self, **kwargs: dict): - manifest = get_release_manifest("uk") - if "version" not in kwargs or kwargs.get("version") is None: - kwargs["version"] = manifest.model_package.version - - installed_model_version = metadata.version("policyengine-uk") - if installed_model_version != manifest.model_package.version: - warnings.warn( - "Installed policyengine-uk version " - f"({installed_model_version}) does not match the bundled " - "policyengine.py manifest " - f"({manifest.model_package.version}). Calculations will " - "run against the installed version, but dataset " - "compatibility is not guaranteed. To silence this " - "warning, install the version pinned by the manifest.", - UserWarning, - stacklevel=2, - ) - - model_build_metadata = _get_runtime_data_build_metadata() - data_certification = certify_data_release_compatibility( - "uk", - runtime_model_version=installed_model_version, - runtime_data_build_fingerprint=model_build_metadata.get( - "data_build_fingerprint" - ), - ) - - super().__init__(**kwargs) - self.release_manifest = manifest - self.model_package = manifest.model_package - self.data_package = manifest.data_package - self.default_dataset_uri = manifest.default_dataset_uri - self.data_certification = data_certification - from policyengine_core.enums import Enum + # --- Hooks ----------------------------------------------------------- + @classmethod + def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]: + try: + from policyengine_uk.build_metadata import get_data_build_metadata + except ModuleNotFoundError as exc: + if exc.name != "policyengine_uk.build_metadata": + raise + return {} + return get_data_build_metadata() or {} + + def _load_system(self): from policyengine_uk.system import system - # Attach region registry + return system + + def _load_region_registry(self): from policyengine.countries.uk.regions import uk_region_registry - self.region_registry = uk_region_registry - - self.id = f"{self.model.id}@{self.version}" - - for var_obj in system.variables.values(): - # Serialize default_value for JSON compatibility - default_val = var_obj.default_value - if var_obj.value_type is Enum: - default_val = default_val.name - elif var_obj.value_type is datetime.date: - default_val = default_val.isoformat() - - variable = Variable( - id=self.id + "-" + var_obj.name, - name=var_obj.name, - label=getattr(var_obj, "label", None), - tax_benefit_model_version=self, - entity=var_obj.entity.key, - description=var_obj.documentation, - data_type=var_obj.value_type if var_obj.value_type is not Enum else str, - default_value=default_val, - value_type=var_obj.value_type, - ) - if ( - hasattr(var_obj, "possible_values") - and var_obj.possible_values is not None - ): - variable.possible_values = list( - map( - lambda x: x.name, - var_obj.possible_values._value2member_map_.values(), - ) - ) - # Extract and resolve adds/subtracts. - # Core stores these as either list[str] or a parameter path string. - # Resolve parameter paths to lists so consumers always get list[str]. - if hasattr(var_obj, "adds") and var_obj.adds is not None: - if isinstance(var_obj.adds, str): - try: - from policyengine_core.parameters.operations.get_parameter import ( - get_parameter, - ) - - param = get_parameter(system.parameters, var_obj.adds) - variable.adds = list(param("2025-01-01")) - except (ValueError, Exception): - variable.adds = None - else: - variable.adds = var_obj.adds - if hasattr(var_obj, "subtracts") and var_obj.subtracts is not None: - if isinstance(var_obj.subtracts, str): - try: - from policyengine_core.parameters.operations.get_parameter import ( - get_parameter, - ) - - param = get_parameter(system.parameters, var_obj.subtracts) - variable.subtracts = list(param("2025-01-01")) - except (ValueError, Exception): - variable.subtracts = None - else: - variable.subtracts = var_obj.subtracts - self.add_variable(variable) - - from policyengine_core.parameters import Parameter as CoreParameter - from policyengine_core.parameters import ParameterNode as CoreParameterNode - - scale_lookup = build_scale_lookup(system) - - for param_node in system.parameters.get_descendants(): - if isinstance(param_node, CoreParameter): - parameter = Parameter( - id=self.id + "-" + param_node.name, - name=param_node.name, - label=generate_label_for_parameter( - param_node, system, scale_lookup - ), - tax_benefit_model_version=self, - description=param_node.description, - data_type=type(param_node(2025)), - unit=param_node.metadata.get("unit"), - _core_param=param_node, - ) - self.add_parameter(parameter) - elif isinstance(param_node, CoreParameterNode): - node = ParameterNode( - id=self.id + "-" + param_node.name, - name=param_node.name, - label=param_node.metadata.get("label"), - description=param_node.description, - tax_benefit_model_version=self, - ) - self.add_parameter_node(node) - - def _build_entity_relationships( - self, dataset: PolicyEngineUKDataset - ) -> pd.DataFrame: - """Build a DataFrame mapping each person to their containing entities.""" - person_data = pd.DataFrame(dataset.data.person) - return build_entity_relationships(person_data, UK_GROUP_ENTITIES) + return uk_region_registry + + @property + def _dataset_class(self): + return PolicyEngineUKDataset + # --- run ------------------------------------------------------------- def run(self, simulation: "Simulation") -> "Simulation": from policyengine_uk import Microsimulation from policyengine_uk.data import UKSingleYearDataset @@ -370,36 +234,6 @@ def run(self, simulation: "Simulation") -> "Simulation": ), ) - def save(self, simulation: "Simulation"): - """Save the simulation's output dataset.""" - simulation.output_dataset.save() - - def load(self, simulation: "Simulation"): - """Load the simulation's output dataset.""" - import os - - filepath = str( - Path(simulation.dataset.filepath).parent / (simulation.id + ".h5") - ) - - simulation.output_dataset = PolicyEngineUKDataset( - id=simulation.id, - name=simulation.dataset.name, - description=simulation.dataset.description, - filepath=filepath, - year=simulation.dataset.year, - is_output_dataset=True, - ) - - # Load timestamps from file system metadata - if os.path.exists(filepath): - simulation.created_at = datetime.datetime.fromtimestamp( - os.path.getctime(filepath) - ) - simulation.updated_at = datetime.datetime.fromtimestamp( - os.path.getmtime(filepath) - ) - def _managed_release_bundle( dataset_uri: str, @@ -423,8 +257,8 @@ def managed_microsimulation( """Construct a country-package Microsimulation pinned to this bundle. By default this enforces the dataset selection from the bundled - `policyengine.py` release manifest. Arbitrary dataset URIs require - `allow_unmanaged=True`. + ``policyengine.py`` release manifest. Arbitrary dataset URIs require + ``allow_unmanaged=True``. """ from policyengine_uk import Microsimulation diff --git a/src/policyengine/tax_benefit_models/uk/outputs.py b/src/policyengine/tax_benefit_models/uk/outputs.py deleted file mode 100644 index 97032a9c..00000000 --- a/src/policyengine/tax_benefit_models/uk/outputs.py +++ /dev/null @@ -1,105 +0,0 @@ -"""UK-specific output templates.""" - -from typing import Optional - -from pydantic import ConfigDict - -from policyengine.core import Output, Simulation -from policyengine.outputs.aggregate import Aggregate, AggregateType -from policyengine.outputs.change_aggregate import ( - ChangeAggregate, - ChangeAggregateType, -) - - -class ProgrammeStatistics(Output): - """Single programme's statistics from a policy reform - represents one database row.""" - - model_config = ConfigDict(arbitrary_types_allowed=True) - - baseline_simulation: Simulation - reform_simulation: Simulation - programme_name: str - entity: str - is_tax: bool = False - - # Results populated by run() - baseline_total: Optional[float] = None - reform_total: Optional[float] = None - change: Optional[float] = None - baseline_count: Optional[float] = None - reform_count: Optional[float] = None - winners: Optional[float] = None - losers: Optional[float] = None - - def run(self): - """Calculate statistics for this programme.""" - # Baseline totals - baseline_total = Aggregate( - simulation=self.baseline_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.SUM, - entity=self.entity, - ) - baseline_total.run() - - # Reform totals - reform_total = Aggregate( - simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.SUM, - entity=self.entity, - ) - reform_total.run() - - # Count of recipients/payers (baseline) - baseline_count = Aggregate( - simulation=self.baseline_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.COUNT, - entity=self.entity, - filter_variable=self.programme_name, - filter_variable_geq=0.01, - ) - baseline_count.run() - - # Count of recipients/payers (reform) - reform_count = Aggregate( - simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=AggregateType.COUNT, - entity=self.entity, - filter_variable=self.programme_name, - filter_variable_geq=0.01, - ) - reform_count.run() - - # Winners and losers - winners = ChangeAggregate( - baseline_simulation=self.baseline_simulation, - reform_simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=ChangeAggregateType.COUNT, - entity=self.entity, - change_geq=0.01 if not self.is_tax else -0.01, - ) - winners.run() - - losers = ChangeAggregate( - baseline_simulation=self.baseline_simulation, - reform_simulation=self.reform_simulation, - variable=self.programme_name, - aggregate_type=ChangeAggregateType.COUNT, - entity=self.entity, - change_leq=-0.01 if not self.is_tax else 0.01, - ) - losers.run() - - # Populate results - self.baseline_total = float(baseline_total.result) - self.reform_total = float(reform_total.result) - self.change = float(reform_total.result - baseline_total.result) - self.baseline_count = float(baseline_count.result) - self.reform_count = float(reform_count.result) - self.winners = float(winners.result) - self.losers = float(losers.result) diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index 75d2aa79..d49d46d4 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -1,16 +1,36 @@ -"""PolicyEngine US tax-benefit model.""" +"""PolicyEngine US tax-benefit model. + +Typical usage (fresh session, no other imports required): + +.. code-block:: python + + import policyengine as pe + + # Household calculator. + result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + ) + print(result.tax_unit.income_tax) + + # Reform + extra variables. + reformed = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": 1000}, + extra_variables=["adjusted_gross_income"], + ) +""" from importlib.util import find_spec if find_spec("policyengine_us") is not None: from policyengine.core import Dataset + from policyengine.outputs import ProgramStatistics - from .analysis import ( - USHouseholdInput, - USHouseholdOutput, - calculate_household_impact, - economic_impact_analysis, - ) + from .analysis import economic_impact_analysis from .datasets import ( PolicyEngineUSDataset, USYearData, @@ -18,16 +38,17 @@ ensure_datasets, load_datasets, ) + from .household import calculate_household from .model import ( PolicyEngineUS, PolicyEngineUSLatest, managed_microsimulation, us_latest, - us_model, ) - from .outputs import ProgramStatistics - # Rebuild Pydantic models to resolve forward references + model = us_latest + """The pinned US ``TaxBenefitModelVersion`` for this policyengine release.""" + Dataset.model_rebuild() USYearData.model_rebuild() PolicyEngineUSDataset.model_rebuild() @@ -43,12 +64,10 @@ "PolicyEngineUS", "PolicyEngineUSLatest", "managed_microsimulation", - "us_model", + "model", "us_latest", + "calculate_household", "economic_impact_analysis", - "calculate_household_impact", - "USHouseholdInput", - "USHouseholdOutput", "ProgramStatistics", ] else: diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 122ae2af..8b3eefc8 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -1,15 +1,18 @@ -"""General utility functions for US policy reform analysis.""" +"""Microsimulation reform analysis for the US model. -import tempfile -from pathlib import Path -from typing import Any, Optional, Union +The single-household calculator lives in :mod:`.household`; this module +holds the population-level reform-analysis helpers. +""" + +from __future__ import annotations + +from typing import Union import pandas as pd -from microdf import MicroDataFrame -from pydantic import BaseModel, Field +from pydantic import BaseModel from policyengine.core import OutputCollection, Simulation -from policyengine.core.policy import Policy +from policyengine.outputs import ProgramStatistics from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, @@ -24,168 +27,6 @@ calculate_us_poverty_rates, ) -from .datasets import PolicyEngineUSDataset, USYearData -from .model import us_latest -from .outputs import ProgramStatistics - - -class USHouseholdOutput(BaseModel): - """Output from a US household calculation with all entity data.""" - - person: list[dict[str, Any]] - marital_unit: list[dict[str, Any]] - family: list[dict[str, Any]] - spm_unit: list[dict[str, Any]] - tax_unit: list[dict[str, Any]] - household: dict[str, Any] - - -class USHouseholdInput(BaseModel): - """Input for a US household calculation.""" - - people: list[dict[str, Any]] - marital_unit: dict[str, Any] = Field(default_factory=dict) - family: dict[str, Any] = Field(default_factory=dict) - spm_unit: dict[str, Any] = Field(default_factory=dict) - tax_unit: dict[str, Any] = Field(default_factory=dict) - household: dict[str, Any] = Field(default_factory=dict) - year: int = 2024 - - -def calculate_household_impact( - household_input: USHouseholdInput, - policy: Optional[Policy] = None, -) -> USHouseholdOutput: - """Calculate tax and benefit impacts for a single US household.""" - n_people = len(household_input.people) - - # Build person data with defaults - person_data = { - "person_id": list(range(n_people)), - "person_household_id": [0] * n_people, - "person_marital_unit_id": [0] * n_people, - "person_family_id": [0] * n_people, - "person_spm_unit_id": [0] * n_people, - "person_tax_unit_id": [0] * n_people, - "person_weight": [1.0] * n_people, - } - # Add user-provided person fields - for i, person in enumerate(household_input.people): - for key, value in person.items(): - if key not in person_data: - person_data[key] = [0.0] * n_people # Default to 0 for numeric fields - person_data[key][i] = value - - # Build entity data with defaults - household_data = { - "household_id": [0], - "household_weight": [1.0], - } - for key, value in household_input.household.items(): - household_data[key] = [value] - - marital_unit_data = { - "marital_unit_id": [0], - "marital_unit_weight": [1.0], - } - for key, value in household_input.marital_unit.items(): - marital_unit_data[key] = [value] - - family_data = { - "family_id": [0], - "family_weight": [1.0], - } - for key, value in household_input.family.items(): - family_data[key] = [value] - - spm_unit_data = { - "spm_unit_id": [0], - "spm_unit_weight": [1.0], - } - for key, value in household_input.spm_unit.items(): - spm_unit_data[key] = [value] - - tax_unit_data = { - "tax_unit_id": [0], - "tax_unit_weight": [1.0], - } - for key, value in household_input.tax_unit.items(): - tax_unit_data[key] = [value] - - # Create MicroDataFrames - person_df = MicroDataFrame(pd.DataFrame(person_data), weights="person_weight") - household_df = MicroDataFrame( - pd.DataFrame(household_data), weights="household_weight" - ) - marital_unit_df = MicroDataFrame( - pd.DataFrame(marital_unit_data), weights="marital_unit_weight" - ) - family_df = MicroDataFrame(pd.DataFrame(family_data), weights="family_weight") - spm_unit_df = MicroDataFrame(pd.DataFrame(spm_unit_data), weights="spm_unit_weight") - tax_unit_df = MicroDataFrame(pd.DataFrame(tax_unit_data), weights="tax_unit_weight") - - # Create temporary dataset - tmpdir = tempfile.mkdtemp() - filepath = str(Path(tmpdir) / "household_impact.h5") - - dataset = PolicyEngineUSDataset( - name="Household impact calculation", - description="Single household for impact calculation", - filepath=filepath, - year=household_input.year, - data=USYearData( - person=person_df, - household=household_df, - marital_unit=marital_unit_df, - family=family_df, - spm_unit=spm_unit_df, - tax_unit=tax_unit_df, - ), - ) - - # Run simulation - simulation = Simulation( - dataset=dataset, - tax_benefit_model_version=us_latest, - policy=policy, - ) - simulation.run() - - # Extract all output variables defined in entity_variables - output_data = simulation.output_dataset.data - - def safe_convert(value): - """Convert value to float if numeric, otherwise return as string.""" - try: - return float(value) - except (ValueError, TypeError): - return str(value) - - def extract_entity_outputs( - entity_name: str, entity_data, n_rows: int - ) -> list[dict[str, Any]]: - outputs = [] - for i in range(n_rows): - row_dict = {} - for var in us_latest.entity_variables[entity_name]: - row_dict[var] = safe_convert(entity_data[var].iloc[i]) - outputs.append(row_dict) - return outputs - - return USHouseholdOutput( - person=extract_entity_outputs("person", output_data.person, n_people), - marital_unit=extract_entity_outputs( - "marital_unit", output_data.marital_unit, 1 - ), - family=extract_entity_outputs("family", output_data.family, 1), - spm_unit=extract_entity_outputs("spm_unit", output_data.spm_unit, 1), - tax_unit=extract_entity_outputs("tax_unit", output_data.tax_unit, 1), - household={ - var: safe_convert(output_data.household[var].iloc[0]) - for var in us_latest.entity_variables["household"] - }, - ) - class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" @@ -203,15 +44,16 @@ def economic_impact_analysis( reform_simulation: Simulation, inequality_preset: Union[USInequalityPreset, str] = USInequalityPreset.STANDARD, ) -> PolicyReformAnalysis: - """Perform comprehensive analysis of a policy reform. + """Perform comprehensive analysis of a US policy reform. Args: - baseline_simulation: Baseline simulation - reform_simulation: Reform simulation - inequality_preset: Optional preset for the inequality outputs + baseline_simulation: Baseline simulation. + reform_simulation: Reform simulation. + inequality_preset: Preset for the inequality output. Returns: - PolicyReformAnalysis containing decile impacts and program statistics + ``PolicyReformAnalysis`` with decile impacts, program + statistics, baseline and reform poverty, and inequality. """ baseline_simulation.ensure() reform_simulation.ensure() @@ -223,21 +65,16 @@ def economic_impact_analysis( "Reform simulation must have more than 100 households" ) - # Decile impact (using household_net_income for US) decile_impacts = calculate_decile_impacts( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, income_variable="household_net_income", ) - # Major programs to analyse programs = { - # Federal taxes "income_tax": {"entity": "tax_unit", "is_tax": True}, "payroll_tax": {"entity": "person", "is_tax": True}, - # State and local taxes "state_income_tax": {"entity": "tax_unit", "is_tax": True}, - # Benefits "snap": {"entity": "spm_unit", "is_tax": False}, "tanf": {"entity": "spm_unit", "is_tax": False}, "ssi": {"entity": "person", "is_tax": False}, @@ -249,22 +86,17 @@ def economic_impact_analysis( } program_statistics = [] - for program_name, program_info in programs.items(): - entity = program_info["entity"] - is_tax = program_info["is_tax"] - stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, program_name=program_name, - entity=entity, - is_tax=is_tax, + entity=program_info["entity"], + is_tax=program_info["is_tax"], ) stats.run() program_statistics.append(stats) - # Create DataFrame program_df = pd.DataFrame( [ { @@ -284,16 +116,12 @@ def economic_impact_analysis( for p in program_statistics ] ) - program_collection = OutputCollection( outputs=program_statistics, dataframe=program_df ) - # Calculate poverty rates for both simulations baseline_poverty = calculate_us_poverty_rates(baseline_simulation) reform_poverty = calculate_us_poverty_rates(reform_simulation) - - # Calculate inequality for both simulations baseline_inequality = calculate_us_inequality( baseline_simulation, preset=inequality_preset ) diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py index da10733b..014309db 100644 --- a/src/policyengine/tax_benefit_models/us/datasets.py +++ b/src/policyengine/tax_benefit_models/us/datasets.py @@ -7,7 +7,7 @@ from pydantic import ConfigDict from policyengine.core import Dataset, YearData -from policyengine.core.release_manifest import ( +from policyengine.provenance.manifest import ( dataset_logical_name, resolve_dataset_reference, ) diff --git a/src/policyengine/tax_benefit_models/us/household.py b/src/policyengine/tax_benefit_models/us/household.py new file mode 100644 index 00000000..5258043a --- /dev/null +++ b/src/policyengine/tax_benefit_models/us/household.py @@ -0,0 +1,245 @@ +"""Single-household calculation for the US model. + +``calculate_household`` is the one-call entry point for the household +calculator journey: pass the people plus any per-entity overrides plus +an optional reform, get back a dot-accessible result. + +.. code-block:: python + + import policyengine as pe + + # Single parent with one child in New York, $45k wages. + result = pe.us.calculate_household( + people=[ + {"age": 32, "employment_income": 45000, "is_tax_unit_head": True}, + {"age": 6, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code": "NY"}, + year=2026, + extra_variables=["adjusted_gross_income"], + ) + print(result.tax_unit.income_tax) + print(result.tax_unit.ctc, result.tax_unit.eitc) + print(result.household.household_net_income) + # Reform: zero out SNAP. + reformed = pe.us.calculate_household( + people=[ + {"age": 32, "employment_income": 45000, "is_tax_unit_head": True}, + {"age": 6, "is_tax_unit_dependent": True}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + household={"state_code": "NY"}, + year=2026, + reform={"gov.usda.snap.income.deductions.earned_income": 0}, + ) +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any, Optional + +from policyengine.tax_benefit_models.common import ( + EntityResult, + HouseholdResult, + compile_reform, + dispatch_extra_variables, +) +from policyengine.utils.household_validation import validate_household_input + +from .model import us_latest + +_GROUP_ENTITIES = ("marital_unit", "family", "spm_unit", "tax_unit", "household") + + +def _raise_unexpected_kwargs(unexpected: Mapping[str, Any]) -> None: + from difflib import get_close_matches + + lines = ["calculate_household received unsupported keyword arguments:"] + for name in unexpected: + suggestions = get_close_matches(name, _ALLOWED_KWARGS, n=1, cutoff=0.5) + hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else "" + if name == "benunit": + hint = " — `benunit` is UK-only; the US uses `tax_unit`, `marital_unit`, `family`, or `spm_unit`" + lines.append(f" - '{name}'{hint}") + lines.append( + "Valid kwargs: people, marital_unit, family, spm_unit, tax_unit, " + "household, year, reform, extra_variables." + ) + raise TypeError("\n".join(lines)) + + +def _default_output_columns( + extra_by_entity: Mapping[str, list[str]], +) -> dict[str, list[str]]: + merged: dict[str, list[str]] = {} + for entity, defaults in us_latest.entity_variables.items(): + columns = list(defaults) + for extra in extra_by_entity.get(entity, []): + if extra not in columns: + columns.append(extra) + merged[entity] = columns + for entity, extras in extra_by_entity.items(): + merged.setdefault(entity, list(extras)) + return merged + + +def _safe_convert(value: Any) -> Any: + try: + return float(value) + except (ValueError, TypeError): + return str(value) if value is not None else None + + +def _build_situation( + *, + people: list[Mapping[str, Any]], + marital_unit: Mapping[str, Any], + family: Mapping[str, Any], + spm_unit: Mapping[str, Any], + tax_unit: Mapping[str, Any], + household: Mapping[str, Any], + year: int, +) -> dict[str, Any]: + year_str = str(year) + + def _periodise(spec: Mapping[str, Any]) -> dict[str, dict[str, Any]]: + return {key: {year_str: value} for key, value in spec.items() if key != "id"} + + person_ids = [f"person_{i}" for i in range(len(people))] + persons = {pid: _periodise(person) for pid, person in zip(person_ids, people)} + + def _group(spec: Mapping[str, Any]) -> dict[str, Any]: + return {"members": list(person_ids), **_periodise(spec)} + + return { + "people": persons, + "marital_units": {"marital_unit_0": _group(marital_unit)}, + "families": {"family_0": _group(family)}, + "spm_units": {"spm_unit_0": _group(spm_unit)}, + "tax_units": {"tax_unit_0": _group(tax_unit)}, + "households": {"household_0": _group(household)}, + } + + +_ALLOWED_KWARGS = frozenset( + { + "people", + "marital_unit", + "family", + "spm_unit", + "tax_unit", + "household", + "year", + "reform", + "extra_variables", + } +) + + +def calculate_household( + *, + people: list[Mapping[str, Any]], + marital_unit: Optional[Mapping[str, Any]] = None, + family: Optional[Mapping[str, Any]] = None, + spm_unit: Optional[Mapping[str, Any]] = None, + tax_unit: Optional[Mapping[str, Any]] = None, + household: Optional[Mapping[str, Any]] = None, + year: int = 2026, + reform: Optional[Mapping[str, Any]] = None, + extra_variables: Optional[list[str]] = None, + **unexpected: Any, +) -> HouseholdResult: + """Compute tax and benefit variables for a single US household. + + Args: + people: One dict per person with US variable names as keys + (``age``, ``employment_income``, ``is_tax_unit_head``, + ``is_tax_unit_dependent`` ...). Must be non-empty. + marital_unit, family, spm_unit, tax_unit, household: Optional + per-entity overrides, each keyed by variable name (e.g. + ``tax_unit={"filing_status": "SINGLE"}``, + ``household={"state_code": "NY"}``). + year: Calendar year to compute for. Defaults to 2026. + reform: Optional reform as ``{parameter_path: value}`` or + ``{parameter_path: {effective_date: value}}``. Scalar + values default to ``{year}-01-01``; invalid parameter + paths raise with a close-match suggestion. + extra_variables: Flat list of variable names to compute beyond + the default output columns; the library dispatches each + name to its entity. Unknown names raise ``ValueError`` + with a close-match suggestion. + + Returns: + :class:`HouseholdResult` with dot-accessible per-entity + variables. Singleton entities (``tax_unit``, ``household``, ...) + return :class:`EntityResult`; ``person`` returns a list of them. + + Raises: + ValueError: if any input dict uses an unknown variable name, + if a variable is placed on the wrong entity (e.g. + ``filing_status`` on ``people``), or if ``extra_variables`` + / ``reform`` names a variable or parameter path not defined + on the US model. + """ + if unexpected: + _raise_unexpected_kwargs(unexpected) + + from policyengine_us import Simulation + + people = list(people) + entities = { + "marital_unit": dict(marital_unit or {}), + "family": dict(family or {}), + "spm_unit": dict(spm_unit or {}), + "tax_unit": dict(tax_unit or {}), + "household": dict(household or {}), + } + + validate_household_input( + model_version=us_latest, + entities={ + "person": people, + **{name: [value] for name, value in entities.items()}, + }, + ) + + extra_by_entity = dispatch_extra_variables( + model_version=us_latest, + names=extra_variables or [], + ) + output_columns = _default_output_columns(extra_by_entity) + reform_dict = compile_reform(reform, year=year, model_version=us_latest) + + simulation = Simulation( + situation=_build_situation( + people=people, + marital_unit=entities["marital_unit"], + family=entities["family"], + spm_unit=entities["spm_unit"], + tax_unit=entities["tax_unit"], + household=entities["household"], + year=year, + ), + reform=reform_dict, + ) + + result = HouseholdResult() + for entity, columns in output_columns.items(): + raw = { + variable: list(simulation.calculate(variable, period=year, map_to=entity)) + for variable in columns + } + if entity == "person": + result["person"] = [ + EntityResult( + {variable: _safe_convert(raw[variable][i]) for variable in columns} + ) + for i in range(len(people)) + ] + else: + result[entity] = EntityResult( + {variable: _safe_convert(raw[variable][0]) for variable in columns} + ) + return result diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index cd56df09..51463650 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -1,31 +1,17 @@ import datetime -import warnings -from importlib import metadata from pathlib import Path from typing import TYPE_CHECKING, Optional import pandas as pd from microdf import MicroDataFrame -from policyengine.core import ( - Parameter, - ParameterNode, - TaxBenefitModel, - TaxBenefitModelVersion, - Variable, -) -from policyengine.core.release_manifest import ( - certify_data_release_compatibility, +from policyengine.core import TaxBenefitModel +from policyengine.provenance.manifest import ( dataset_logical_name, - get_release_manifest, resolve_local_managed_dataset_source, resolve_managed_dataset_reference, ) -from policyengine.utils.entity_utils import build_entity_relationships -from policyengine.utils.parameter_labels import ( - build_scale_lookup, - generate_label_for_parameter, -) +from policyengine.tax_benefit_models.common import MicrosimulationModelVersion from .datasets import PolicyEngineUSDataset, USYearData @@ -49,18 +35,11 @@ class PolicyEngineUS(TaxBenefitModel): us_model = PolicyEngineUS() -def _get_runtime_data_build_metadata() -> dict[str, Optional[str]]: - try: - from policyengine_us.build_metadata import get_data_build_metadata - except ModuleNotFoundError as exc: - if exc.name != "policyengine_us.build_metadata": - raise - return {} - - return get_data_build_metadata() or {} - +class PolicyEngineUSLatest(MicrosimulationModelVersion): + country_code = "us" + package_name = "policyengine-us" + group_entities = US_GROUP_ENTITIES -class PolicyEngineUSLatest(TaxBenefitModelVersion): model: TaxBenefitModel = us_model version: str = None created_at: datetime.datetime = None @@ -129,147 +108,32 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion): ], } - def __init__(self, **kwargs: dict): - manifest = get_release_manifest("us") - if "version" not in kwargs or kwargs.get("version") is None: - kwargs["version"] = manifest.model_package.version - - installed_model_version = metadata.version("policyengine-us") - if installed_model_version != manifest.model_package.version: - warnings.warn( - "Installed policyengine-us version " - f"({installed_model_version}) does not match the bundled " - "policyengine.py manifest " - f"({manifest.model_package.version}). Calculations will " - "run against the installed version, but dataset " - "compatibility is not guaranteed. To silence this " - "warning, install the version pinned by the manifest.", - UserWarning, - stacklevel=2, - ) - - model_build_metadata = _get_runtime_data_build_metadata() - data_certification = certify_data_release_compatibility( - "us", - runtime_model_version=installed_model_version, - runtime_data_build_fingerprint=model_build_metadata.get( - "data_build_fingerprint" - ), - ) - - super().__init__(**kwargs) - self.release_manifest = manifest - self.model_package = manifest.model_package - self.data_package = manifest.data_package - self.default_dataset_uri = manifest.default_dataset_uri - self.data_certification = data_certification - from policyengine_core.enums import Enum + # --- Hooks ----------------------------------------------------------- + @classmethod + def _get_runtime_data_build_metadata(cls) -> dict[str, Optional[str]]: + try: + from policyengine_us.build_metadata import get_data_build_metadata + except ModuleNotFoundError as exc: + if exc.name != "policyengine_us.build_metadata": + raise + return {} + return get_data_build_metadata() or {} + + def _load_system(self): from policyengine_us.system import system - # Attach region registry + return system + + def _load_region_registry(self): from policyengine.countries.us.regions import us_region_registry - self.region_registry = us_region_registry - - self.id = f"{self.model.id}@{self.version}" - - for var_obj in system.variables.values(): - # Serialize default_value for JSON compatibility - default_val = var_obj.default_value - if var_obj.value_type is Enum: - default_val = default_val.name - elif var_obj.value_type is datetime.date: - default_val = default_val.isoformat() - - variable = Variable( - id=self.id + "-" + var_obj.name, - name=var_obj.name, - label=getattr(var_obj, "label", None), - tax_benefit_model_version=self, - entity=var_obj.entity.key, - description=var_obj.documentation, - data_type=var_obj.value_type if var_obj.value_type is not Enum else str, - default_value=default_val, - value_type=var_obj.value_type, - ) - if ( - hasattr(var_obj, "possible_values") - and var_obj.possible_values is not None - ): - variable.possible_values = list( - map( - lambda x: x.name, - var_obj.possible_values._value2member_map_.values(), - ) - ) - # Extract and resolve adds/subtracts. - # Core stores these as either list[str] or a parameter path string. - # Resolve parameter paths to lists so consumers always get list[str]. - if hasattr(var_obj, "adds") and var_obj.adds is not None: - if isinstance(var_obj.adds, str): - try: - from policyengine_core.parameters.operations.get_parameter import ( - get_parameter, - ) - - param = get_parameter(system.parameters, var_obj.adds) - variable.adds = list(param("2025-01-01")) - except (ValueError, Exception): - variable.adds = None - else: - variable.adds = var_obj.adds - if hasattr(var_obj, "subtracts") and var_obj.subtracts is not None: - if isinstance(var_obj.subtracts, str): - try: - from policyengine_core.parameters.operations.get_parameter import ( - get_parameter, - ) - - param = get_parameter(system.parameters, var_obj.subtracts) - variable.subtracts = list(param("2025-01-01")) - except (ValueError, Exception): - variable.subtracts = None - else: - variable.subtracts = var_obj.subtracts - self.add_variable(variable) - - from policyengine_core.parameters import Parameter as CoreParameter - from policyengine_core.parameters import ParameterNode as CoreParameterNode - - scale_lookup = build_scale_lookup(system) - - for param_node in system.parameters.get_descendants(): - if isinstance(param_node, CoreParameter): - parameter = Parameter( - id=self.id + "-" + param_node.name, - name=param_node.name, - label=generate_label_for_parameter( - param_node, system, scale_lookup - ), - tax_benefit_model_version=self, - description=param_node.description, - data_type=type(param_node(2025)), - unit=param_node.metadata.get("unit"), - _core_param=param_node, - ) - self.add_parameter(parameter) - elif isinstance(param_node, CoreParameterNode): - node = ParameterNode( - id=self.id + "-" + param_node.name, - name=param_node.name, - label=param_node.metadata.get("label"), - description=param_node.description, - tax_benefit_model_version=self, - ) - self.add_parameter_node(node) - - def _build_entity_relationships( - self, dataset: PolicyEngineUSDataset - ) -> pd.DataFrame: - """Build a DataFrame mapping each person to their containing entities.""" - person_data = pd.DataFrame(dataset.data.person) - return build_entity_relationships(person_data, US_GROUP_ENTITIES) + return us_region_registry + + @property + def _dataset_class(self): + return PolicyEngineUSDataset + # --- run ------------------------------------------------------------- def run(self, simulation: "Simulation") -> "Simulation": from policyengine_us import Microsimulation from policyengine_us.system import system @@ -308,14 +172,12 @@ def run(self, simulation: "Simulation") -> "Simulation": ), ) - # Build reform dict from policy and dynamic parameter values. # US requires reforms at Microsimulation construction time # (unlike UK which supports p.update() after construction). policy_reform = build_reform_dict(simulation.policy) dynamic_reform = build_reform_dict(simulation.dynamic) reform_dict = merge_reform_dicts(policy_reform, dynamic_reform) - # Create Microsimulation with reform at construction time microsim = Microsimulation(reform=reform_dict) self._build_simulation_from_dataset(microsim, dataset, system) @@ -346,7 +208,7 @@ def run(self, simulation: "Simulation") -> "Simulation": "tax_unit_weight", } - # First, copy ID and weight columns from input dataset + # Copy ID and weight columns from input dataset. for entity in data.keys(): input_df = pd.DataFrame(getattr(dataset.data, entity)) entity_id_col = f"{entity}_id" @@ -357,16 +219,16 @@ def run(self, simulation: "Simulation") -> "Simulation": if entity_weight_col in input_df.columns: data[entity][entity_weight_col] = input_df[entity_weight_col].values - # For person entity, also copy person-level group ID columns + # Person entity also needs person-level group ID columns so that + # downstream joins (e.g. person->tax_unit) work. person_input_df = pd.DataFrame(dataset.data.person) for col in person_input_df.columns: if col.startswith("person_") and col.endswith("_id"): - # Map person_household_id -> household_id, etc. target_col = col.replace("person_", "") if target_col in id_columns: data["person"][target_col] = person_input_df[col].values - # Then calculate non-ID, non-weight variables from simulation + # Calculate non-ID, non-weight variables from simulation for entity, variables in self.entity_variables.items(): for var in variables: if var not in id_columns and var not in weight_columns: @@ -404,61 +266,23 @@ def run(self, simulation: "Simulation") -> "Simulation": ), ) - def save(self, simulation: "Simulation"): - """Save the simulation's output dataset.""" - simulation.output_dataset.save() - - def load(self, simulation: "Simulation"): - """Load the simulation's output dataset.""" - import os - - filepath = str( - Path(simulation.dataset.filepath).parent / (simulation.id + ".h5") - ) - - simulation.output_dataset = PolicyEngineUSDataset( - id=simulation.id, - name=simulation.dataset.name, - description=simulation.dataset.description, - filepath=filepath, - year=simulation.dataset.year, - is_output_dataset=True, - ) - - # Load timestamps from file system metadata - if os.path.exists(filepath): - simulation.created_at = datetime.datetime.fromtimestamp( - os.path.getctime(filepath) - ) - simulation.updated_at = datetime.datetime.fromtimestamp( - os.path.getmtime(filepath) - ) - def _build_simulation_from_dataset(self, microsim, dataset, system): """Build a PolicyEngine Core simulation from dataset entity IDs. - This follows the same pattern as policyengine-uk, initializing - entities from IDs first, then using set_input() for variables. - - Args: - microsim: The Microsimulation object to populate - dataset: The dataset containing entity data - system: The tax-benefit system + Mirrors the policyengine-uk pattern of instantiating entities from + IDs first and then setting variable inputs. Handles both the legacy + ``person_X_id`` and the ``X_id`` column-naming conventions. """ import numpy as np from policyengine_core.simulations.simulation_builder import ( SimulationBuilder, ) - # Create builder and instantiate entities builder = SimulationBuilder() builder.populations = system.instantiate_entities() - # Extract entity IDs from dataset person_data = pd.DataFrame(dataset.data.person) - # Determine column naming convention - # Support both person_X_id (from create_datasets) and X_id (from custom datasets) household_id_col = ( "person_household_id" if "person_household_id" in person_data.columns @@ -485,7 +309,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system): else "tax_unit_id" ) - # Declare entities builder.declare_person_entity("person", person_data["person_id"].values) builder.declare_entity( "household", np.unique(person_data[household_id_col].values) @@ -501,7 +324,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system): "marital_unit", np.unique(person_data[marital_unit_id_col].values) ) - # Join persons to group entities builder.join_with_persons( builder.populations["household"], person_data[household_id_col].values, @@ -528,12 +350,8 @@ def _build_simulation_from_dataset(self, microsim, dataset, system): np.array(["member"] * len(person_data)), ) - # Build simulation from populations microsim.build_from_populations(builder.populations) - # Set input variables for each entity - # Skip ID columns as they're structural and already used in entity building - # Support both naming conventions id_columns = { "person_id", "household_id", @@ -558,7 +376,6 @@ def _build_simulation_from_dataset(self, microsim, dataset, system): ]: df = pd.DataFrame(entity_df) for column in df.columns: - # Skip ID columns and check if variable exists in system if column not in id_columns and column in system.variables: microsim.set_input(column, dataset.year, df[column].values) @@ -585,8 +402,8 @@ def managed_microsimulation( """Construct a country-package Microsimulation pinned to this bundle. By default this enforces the dataset selection from the bundled - `policyengine.py` release manifest. Arbitrary dataset URIs require - `allow_unmanaged=True`. + ``policyengine.py`` release manifest. Arbitrary dataset URIs require + ``allow_unmanaged=True``. """ from policyengine_us import Microsimulation diff --git a/src/policyengine/utils/household_validation.py b/src/policyengine/utils/household_validation.py new file mode 100644 index 00000000..6be90fb2 --- /dev/null +++ b/src/policyengine/utils/household_validation.py @@ -0,0 +1,113 @@ +"""Strict validation for household-calculation inputs. + +Catches the three typo classes that otherwise silently propagate wrong +numbers to published results: + +1. Unknown variable name entirely (``employment_incme``). +2. Valid variable placed on the wrong entity (``filing_status`` passed + to ``people`` instead of ``tax_unit``). +3. Empty ``people`` list (policyengine_us will IndexError deep in + simulation). + +All errors include paste-able fixes. +""" + +from __future__ import annotations + +from collections.abc import Iterable, Mapping +from difflib import get_close_matches +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion + + +_STRUCTURAL_KEYS = frozenset( + { + "id", + "members", + "person_id", + "household_id", + "marital_unit_id", + "family_id", + "spm_unit_id", + "tax_unit_id", + "benunit_id", + "person_weight", + "household_weight", + "marital_unit_weight", + "family_weight", + "spm_unit_weight", + "tax_unit_weight", + "benunit_weight", + } +) + + +def validate_household_input( + *, + model_version: TaxBenefitModelVersion, + entities: Mapping[str, Iterable[Mapping[str, object]]], +) -> None: + """Raise ``ValueError`` on unknown or mis-placed entity variables. + + ``entities`` maps entity name → iterable of entity dicts. Each key + is checked against ``model_version.variables_by_name``: + + - If the key is unknown, the error includes a difflib close-match + suggestion. + - If the key is a known variable but defined on a different entity, + the error names the correct entity and shows the kwarg swap. + """ + if "person" in entities and not list(entities["person"]): + raise ValueError( + "people must be a non-empty list. At minimum pass people=[{'age': }]." + ) + + variables_by_name = model_version.variables_by_name + valid_names = set(variables_by_name) + unknown: list[tuple[str, str]] = [] + misplaced: list[tuple[str, str, str]] = [] + + for entity_name, records in entities.items(): + for record in records: + for key in record: + if key in _STRUCTURAL_KEYS: + continue + variable = variables_by_name.get(key) + if variable is None: + unknown.append((entity_name, key)) + elif variable.entity != entity_name: + misplaced.append((entity_name, key, variable.entity)) + + if not unknown and not misplaced: + return + + lines: list[str] = [] + if unknown: + lines.append( + f"Unknown variable names on {model_version.model.id} " + f"{model_version.version}:" + ) + for entity_name, key in unknown: + suggestions = get_close_matches(key, valid_names, n=1, cutoff=0.7) + hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else "" + lines.append(f" - {entity_name}: '{key}'{hint}") + if not misplaced: + first_bad = unknown[0][1] + lines.append( + f"If '{first_bad}' is a real variable outside the default " + f"output columns, pass it via extra_variables=['{first_bad}']." + ) + if misplaced: + if lines: + lines.append("") + lines.append("Variables passed on the wrong entity:") + for wrong_entity, key, correct_entity in misplaced: + lines.append( + f" - '{key}' was given on {wrong_entity}; it belongs on " + f"{correct_entity}. Move it: pass " + f"{correct_entity}={{'{key}': }}." + ) + + raise ValueError("\n".join(lines)) diff --git a/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json new file mode 100644 index 00000000..49302124 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json @@ -0,0 +1,139 @@ +{ + "benunit.benunit_id": 0.0, + "benunit.benunit_weight": 1.0, + "benunit.child_benefit": 2328.16, + "benunit.child_tax_credit": 0.0, + "benunit.family_type": "COUPLE_WITH_CHILDREN", + "benunit.income_support": 0.0, + "benunit.pension_credit": 0.0, + "benunit.universal_credit": 0.0, + "benunit.working_tax_credit": 0.0, + "household.council_tax": 0.0, + "household.equiv_hbai_household_net_income": 52503.68, + "household.hbai_household_net_income": 73505.15, + "household.household_benefits": 5880.35, + "household.household_count_people": 4.0, + "household.household_gross_income": 95880.34, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 90000.0, + "household.household_net_income": 76898.3, + "household.household_tax": 18982.05, + "household.household_wealth_decile": 10.0, + "household.household_weight": 1.0, + "household.in_poverty_ahc": 0.0, + "household.in_poverty_bhc": 0.0, + "household.in_relative_poverty_ahc": 0.0, + "household.in_relative_poverty_bhc": 0.0, + "household.rent": 0.0, + "household.tenure_type": "RENT_PRIVATELY", + "household.vat": 0.0, + "person[0].age": 42.0, + "person[0].benunit_id": 0.0, + "person[0].child_benefit": 2328.16, + "person[0].child_tax_credit": 0.0, + "person[0].dividend_income": 0.0, + "person[0].earned_income": 55000.0, + "person[0].employment_income": 55000.0, + "person[0].gender": "MALE", + "person[0].household_id": 0.0, + "person[0].income_support": 0.0, + "person[0].income_tax": 9432.0, + "person[0].is_SP_age": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].national_insurance": 3110.6, + "person[0].pension_credit": 0.0, + "person[0].pension_income": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].private_pension_income": 0.0, + "person[0].property_income": 0.0, + "person[0].savings_interest_income": 0.0, + "person[0].self_employment_income": 0.0, + "person[0].total_income": 55000.0, + "person[0].universal_credit": 0.0, + "person[0].working_tax_credit": 0.0, + "person[1].age": 40.0, + "person[1].benunit_id": 0.0, + "person[1].child_benefit": 2328.16, + "person[1].child_tax_credit": 0.0, + "person[1].dividend_income": 0.0, + "person[1].earned_income": 35000.0, + "person[1].employment_income": 35000.0, + "person[1].gender": "MALE", + "person[1].household_id": 0.0, + "person[1].income_support": 0.0, + "person[1].income_tax": 4486.0, + "person[1].is_SP_age": 0.0, + "person[1].is_adult": 1.0, + "person[1].is_child": 0.0, + "person[1].is_male": 1.0, + "person[1].national_insurance": 1794.4, + "person[1].pension_credit": 0.0, + "person[1].pension_income": 0.0, + "person[1].person_id": 0.0, + "person[1].person_weight": 1.0, + "person[1].private_pension_income": 0.0, + "person[1].property_income": 0.0, + "person[1].savings_interest_income": 0.0, + "person[1].self_employment_income": 0.0, + "person[1].total_income": 35000.0, + "person[1].universal_credit": 0.0, + "person[1].working_tax_credit": 0.0, + "person[2].age": 8.0, + "person[2].benunit_id": 0.0, + "person[2].child_benefit": 2328.16, + "person[2].child_tax_credit": 0.0, + "person[2].dividend_income": 0.0, + "person[2].earned_income": 0.0, + "person[2].employment_income": 0.0, + "person[2].gender": "MALE", + "person[2].household_id": 0.0, + "person[2].income_support": 0.0, + "person[2].income_tax": 0.0, + "person[2].is_SP_age": 0.0, + "person[2].is_adult": 0.0, + "person[2].is_child": 1.0, + "person[2].is_male": 1.0, + "person[2].national_insurance": 0.0, + "person[2].pension_credit": 0.0, + "person[2].pension_income": 0.0, + "person[2].person_id": 0.0, + "person[2].person_weight": 1.0, + "person[2].private_pension_income": 0.0, + "person[2].property_income": 0.0, + "person[2].savings_interest_income": 0.0, + "person[2].self_employment_income": 0.0, + "person[2].total_income": 0.0, + "person[2].universal_credit": 0.0, + "person[2].working_tax_credit": 0.0, + "person[3].age": 3.0, + "person[3].benunit_id": 0.0, + "person[3].child_benefit": 2328.16, + "person[3].child_tax_credit": 0.0, + "person[3].dividend_income": 0.0, + "person[3].earned_income": 0.0, + "person[3].employment_income": 0.0, + "person[3].gender": "MALE", + "person[3].household_id": 0.0, + "person[3].income_support": 0.0, + "person[3].income_tax": 0.0, + "person[3].is_SP_age": 0.0, + "person[3].is_adult": 0.0, + "person[3].is_child": 1.0, + "person[3].is_male": 1.0, + "person[3].national_insurance": 0.0, + "person[3].pension_credit": 0.0, + "person[3].pension_income": 0.0, + "person[3].person_id": 0.0, + "person[3].person_weight": 1.0, + "person[3].private_pension_income": 0.0, + "person[3].property_income": 0.0, + "person[3].savings_interest_income": 0.0, + "person[3].self_employment_income": 0.0, + "person[3].total_income": 0.0, + "person[3].universal_credit": 0.0, + "person[3].working_tax_credit": 0.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/uk_model_surface.json b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json new file mode 100644 index 00000000..161ef0ec --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json @@ -0,0 +1,11 @@ +{ + "country_id": "uk", + "data_package_name": "policyengine-uk-data", + "has_employment_income": true, + "has_income_tax": true, + "has_region_registry": true, + "model_package_name": "policyengine-uk", + "num_parameters_bucketed_100s": 20, + "num_variables_bucketed_100s": 8, + "region_registry_country": "uk" +} diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json new file mode 100644 index 00000000..5ec94094 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json @@ -0,0 +1,58 @@ +{ + "benunit.benunit_id": 0.0, + "benunit.benunit_weight": 1.0, + "benunit.child_benefit": 0.0, + "benunit.child_tax_credit": 0.0, + "benunit.family_type": "SINGLE", + "benunit.income_support": 0.0, + "benunit.pension_credit": 0.0, + "benunit.universal_credit": 0.0, + "benunit.working_tax_credit": 0.0, + "household.council_tax": 0.0, + "household.equiv_hbai_household_net_income": 37491.94, + "household.hbai_household_net_income": 25119.6, + "household.household_benefits": 0.0, + "household.household_count_people": 1.0, + "household.household_gross_income": 30000.0, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 30000.0, + "household.household_net_income": 24960.55, + "household.household_tax": 5039.45, + "household.household_wealth_decile": 10.0, + "household.household_weight": 1.0, + "household.in_poverty_ahc": 0.0, + "household.in_poverty_bhc": 0.0, + "household.in_relative_poverty_ahc": 0.0, + "household.in_relative_poverty_bhc": 0.0, + "household.rent": 0.0, + "household.tenure_type": "RENT_PRIVATELY", + "household.vat": 0.0, + "person[0].age": 35.0, + "person[0].benunit_id": 0.0, + "person[0].child_benefit": 0.0, + "person[0].child_tax_credit": 0.0, + "person[0].dividend_income": 0.0, + "person[0].earned_income": 30000.0, + "person[0].employment_income": 30000.0, + "person[0].gender": "MALE", + "person[0].household_id": 0.0, + "person[0].income_support": 0.0, + "person[0].income_tax": 3486.0, + "person[0].is_SP_age": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].national_insurance": 1394.4, + "person[0].pension_credit": 0.0, + "person[0].pension_income": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].private_pension_income": 0.0, + "person[0].property_income": 0.0, + "person[0].savings_interest_income": 0.0, + "person[0].self_employment_income": 0.0, + "person[0].total_income": 30000.0, + "person[0].universal_credit": 0.0, + "person[0].working_tax_credit": 0.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json new file mode 100644 index 00000000..59657e2c --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json @@ -0,0 +1,58 @@ +{ + "benunit.benunit_id": 0.0, + "benunit.benunit_weight": 1.0, + "benunit.child_benefit": 0.0, + "benunit.child_tax_credit": 0.0, + "benunit.family_type": "SINGLE", + "benunit.income_support": 0.0, + "benunit.pension_credit": 0.0, + "benunit.universal_credit": 5079.13, + "benunit.working_tax_credit": 0.0, + "household.council_tax": 0.0, + "household.equiv_hbai_household_net_income": 7580.79, + "household.hbai_household_net_income": 5079.13, + "household.household_benefits": 5079.13, + "household.household_count_people": 1.0, + "household.household_gross_income": 5079.13, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 0.0, + "household.household_net_income": 4920.09, + "household.household_tax": 159.04, + "household.household_wealth_decile": 10.0, + "household.household_weight": 1.0, + "household.in_poverty_ahc": 1.0, + "household.in_poverty_bhc": 1.0, + "household.in_relative_poverty_ahc": 0.0, + "household.in_relative_poverty_bhc": 0.0, + "household.rent": 0.0, + "household.tenure_type": "RENT_PRIVATELY", + "household.vat": 0.0, + "person[0].age": 35.0, + "person[0].benunit_id": 0.0, + "person[0].child_benefit": 0.0, + "person[0].child_tax_credit": 0.0, + "person[0].dividend_income": 0.0, + "person[0].earned_income": 0.0, + "person[0].employment_income": 0.0, + "person[0].gender": "MALE", + "person[0].household_id": 0.0, + "person[0].income_support": 0.0, + "person[0].income_tax": 0.0, + "person[0].is_SP_age": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].national_insurance": 0.0, + "person[0].pension_credit": 0.0, + "person[0].pension_income": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].private_pension_income": 0.0, + "person[0].property_income": 0.0, + "person[0].savings_interest_income": 0.0, + "person[0].self_employment_income": 0.0, + "person[0].total_income": 0.0, + "person[0].universal_credit": 5079.13, + "person[0].working_tax_credit": 0.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json new file mode 100644 index 00000000..06e55db0 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json @@ -0,0 +1,85 @@ +{ + "benunit.benunit_id": 0.0, + "benunit.benunit_weight": 1.0, + "benunit.child_benefit": 1400.66, + "benunit.child_tax_credit": 0.0, + "benunit.family_type": "LONE_PARENT", + "benunit.income_support": 0.0, + "benunit.pension_credit": 0.0, + "benunit.universal_credit": 1544.43, + "benunit.working_tax_credit": 0.0, + "household.council_tax": 0.0, + "household.equiv_hbai_household_net_income": 28120.33, + "household.hbai_household_net_income": 24464.69, + "household.household_benefits": 2945.09, + "household.household_count_people": 2.0, + "household.household_gross_income": 27945.09, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 25000.0, + "household.household_net_income": 24305.64, + "household.household_tax": 3639.45, + "household.household_wealth_decile": 10.0, + "household.household_weight": 1.0, + "household.in_poverty_ahc": 0.0, + "household.in_poverty_bhc": 0.0, + "household.in_relative_poverty_ahc": 0.0, + "household.in_relative_poverty_bhc": 0.0, + "household.rent": 0.0, + "household.tenure_type": "RENT_PRIVATELY", + "household.vat": 0.0, + "person[0].age": 32.0, + "person[0].benunit_id": 0.0, + "person[0].child_benefit": 1400.66, + "person[0].child_tax_credit": 0.0, + "person[0].dividend_income": 0.0, + "person[0].earned_income": 25000.0, + "person[0].employment_income": 25000.0, + "person[0].gender": "MALE", + "person[0].household_id": 0.0, + "person[0].income_support": 0.0, + "person[0].income_tax": 2486.0, + "person[0].is_SP_age": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].national_insurance": 994.4, + "person[0].pension_credit": 0.0, + "person[0].pension_income": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].private_pension_income": 0.0, + "person[0].property_income": 0.0, + "person[0].savings_interest_income": 0.0, + "person[0].self_employment_income": 0.0, + "person[0].total_income": 25000.0, + "person[0].universal_credit": 1544.43, + "person[0].working_tax_credit": 0.0, + "person[1].age": 5.0, + "person[1].benunit_id": 0.0, + "person[1].child_benefit": 1400.66, + "person[1].child_tax_credit": 0.0, + "person[1].dividend_income": 0.0, + "person[1].earned_income": 0.0, + "person[1].employment_income": 0.0, + "person[1].gender": "MALE", + "person[1].household_id": 0.0, + "person[1].income_support": 0.0, + "person[1].income_tax": 0.0, + "person[1].is_SP_age": 0.0, + "person[1].is_adult": 0.0, + "person[1].is_child": 1.0, + "person[1].is_male": 1.0, + "person[1].national_insurance": 0.0, + "person[1].pension_credit": 0.0, + "person[1].pension_income": 0.0, + "person[1].person_id": 0.0, + "person[1].person_weight": 1.0, + "person[1].private_pension_income": 0.0, + "person[1].property_income": 0.0, + "person[1].savings_interest_income": 0.0, + "person[1].self_employment_income": 0.0, + "person[1].total_income": 0.0, + "person[1].universal_credit": 1544.43, + "person[1].working_tax_credit": 0.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json b/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json new file mode 100644 index 00000000..1d5e98ca --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/us_married_two_kids_high_income.json @@ -0,0 +1,97 @@ +{ + "family.family_id": 0.0, + "family.family_weight": 0.0, + "household.congressional_district_geoid": 0.0, + "household.household_benefits": 0.0, + "household.household_count_people": 4.0, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 240000.0, + "household.household_net_income": 175089.92, + "household.household_tax": 64910.07, + "household.household_weight": 1.0, + "marital_unit.marital_unit_id": 0.0, + "marital_unit.marital_unit_weight": 1.0, + "person[0].age": 42.0, + "person[0].employment_income": 150000.0, + "person[0].family_id": 0.0, + "person[0].household_id": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].marital_unit_id": 0.0, + "person[0].medicaid": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].race": 3.0, + "person[0].social_security": 0.0, + "person[0].spm_unit_id": 0.0, + "person[0].ssi": 0.0, + "person[0].tax_unit_id": 0.0, + "person[0].unemployment_compensation": 0.0, + "person[1].age": 40.0, + "person[1].employment_income": 90000.0, + "person[1].family_id": 0.0, + "person[1].household_id": 0.0, + "person[1].is_adult": 1.0, + "person[1].is_child": 0.0, + "person[1].is_male": 1.0, + "person[1].marital_unit_id": 0.0, + "person[1].medicaid": 0.0, + "person[1].person_id": 1.0, + "person[1].person_weight": 1.0, + "person[1].race": 3.0, + "person[1].social_security": 0.0, + "person[1].spm_unit_id": 0.0, + "person[1].ssi": 0.0, + "person[1].tax_unit_id": 0.0, + "person[1].unemployment_compensation": 0.0, + "person[2].age": 8.0, + "person[2].employment_income": 0.0, + "person[2].family_id": 0.0, + "person[2].household_id": 0.0, + "person[2].is_adult": 0.0, + "person[2].is_child": 1.0, + "person[2].is_male": 1.0, + "person[2].marital_unit_id": 0.0, + "person[2].medicaid": 0.0, + "person[2].person_id": 2.0, + "person[2].person_weight": 1.0, + "person[2].race": 3.0, + "person[2].social_security": 0.0, + "person[2].spm_unit_id": 0.0, + "person[2].ssi": 0.0, + "person[2].tax_unit_id": 0.0, + "person[2].unemployment_compensation": 0.0, + "person[3].age": 3.0, + "person[3].employment_income": 0.0, + "person[3].family_id": 0.0, + "person[3].household_id": 0.0, + "person[3].is_adult": 0.0, + "person[3].is_child": 1.0, + "person[3].is_male": 1.0, + "person[3].marital_unit_id": 0.0, + "person[3].medicaid": 0.0, + "person[3].person_id": 3.0, + "person[3].person_weight": 1.0, + "person[3].race": 3.0, + "person[3].social_security": 0.0, + "person[3].spm_unit_id": 0.0, + "person[3].ssi": 0.0, + "person[3].tax_unit_id": 0.0, + "person[3].unemployment_compensation": 0.0, + "spm_unit.snap": 0.0, + "spm_unit.spm_unit_id": 0.0, + "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0, + "spm_unit.spm_unit_is_in_spm_poverty": 0.0, + "spm_unit.spm_unit_net_income": 175089.92, + "spm_unit.spm_unit_weight": 1.0, + "spm_unit.tanf": 0.0, + "tax_unit.ctc": 4400.0, + "tax_unit.eitc": 0.0, + "tax_unit.employee_payroll_tax": 21480.0, + "tax_unit.household_state_income_tax": 12690.07, + "tax_unit.income_tax": 30740.0, + "tax_unit.tax_unit_id": 0.0, + "tax_unit.tax_unit_weight": 1.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/us_model_surface.json b/tests/fixtures/household_calculator_snapshots/us_model_surface.json new file mode 100644 index 00000000..eaf4352e --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/us_model_surface.json @@ -0,0 +1,11 @@ +{ + "country_id": "us", + "data_package_name": "policyengine-us-data", + "has_employment_income": true, + "has_income_tax": true, + "has_region_registry": true, + "model_package_name": "policyengine-us", + "num_parameters_bucketed_100s": 777, + "num_variables_bucketed_100s": 46, + "region_registry_country": "us" +} diff --git a/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json b/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json new file mode 100644 index 00000000..d94660a9 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/us_single_adult_employment_income.json @@ -0,0 +1,46 @@ +{ + "family.family_id": 0.0, + "family.family_weight": 0.0, + "household.congressional_district_geoid": 0.0, + "household.household_benefits": 0.0, + "household.household_count_people": 1.0, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 60000.0, + "household.household_net_income": 48007.14, + "household.household_tax": 11992.86, + "household.household_weight": 1.0, + "marital_unit.marital_unit_id": 0.0, + "marital_unit.marital_unit_weight": 1.0, + "person[0].age": 35.0, + "person[0].employment_income": 60000.0, + "person[0].family_id": 0.0, + "person[0].household_id": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].marital_unit_id": 0.0, + "person[0].medicaid": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].race": 3.0, + "person[0].social_security": 0.0, + "person[0].spm_unit_id": 0.0, + "person[0].ssi": 0.0, + "person[0].tax_unit_id": 0.0, + "person[0].unemployment_compensation": 0.0, + "spm_unit.snap": 0.0, + "spm_unit.spm_unit_id": 0.0, + "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0, + "spm_unit.spm_unit_is_in_spm_poverty": 0.0, + "spm_unit.spm_unit_net_income": 48007.14, + "spm_unit.spm_unit_weight": 1.0, + "spm_unit.tanf": 0.0, + "tax_unit.ctc": 0.0, + "tax_unit.eitc": 0.0, + "tax_unit.employee_payroll_tax": 5370.0, + "tax_unit.household_state_income_tax": 1602.86, + "tax_unit.income_tax": 5020.0, + "tax_unit.tax_unit_id": 0.0, + "tax_unit.tax_unit_weight": 1.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json new file mode 100644 index 00000000..258db6f1 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/us_single_adult_no_income.json @@ -0,0 +1,46 @@ +{ + "family.family_id": 0.0, + "family.family_weight": 0.0, + "household.congressional_district_geoid": 0.0, + "household.household_benefits": 3596.04, + "household.household_count_people": 1.0, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 0.0, + "household.household_net_income": 3596.04, + "household.household_tax": 0.0, + "household.household_weight": 1.0, + "marital_unit.marital_unit_id": 0.0, + "marital_unit.marital_unit_weight": 1.0, + "person[0].age": 35.0, + "person[0].employment_income": 0.0, + "person[0].family_id": 0.0, + "person[0].household_id": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].marital_unit_id": 0.0, + "person[0].medicaid": 6439.11, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].race": 3.0, + "person[0].social_security": 0.0, + "person[0].spm_unit_id": 0.0, + "person[0].ssi": 0.0, + "person[0].tax_unit_id": 0.0, + "person[0].unemployment_compensation": 0.0, + "spm_unit.snap": 3596.04, + "spm_unit.spm_unit_id": 0.0, + "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0, + "spm_unit.spm_unit_is_in_spm_poverty": 0.0, + "spm_unit.spm_unit_net_income": 3596.04, + "spm_unit.spm_unit_weight": 1.0, + "spm_unit.tanf": 0.0, + "tax_unit.ctc": 0.0, + "tax_unit.eitc": 0.0, + "tax_unit.employee_payroll_tax": 0.0, + "tax_unit.household_state_income_tax": 0.0, + "tax_unit.income_tax": 0.0, + "tax_unit.tax_unit_id": 0.0, + "tax_unit.tax_unit_weight": 1.0 +} diff --git a/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json b/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json new file mode 100644 index 00000000..78ba7237 --- /dev/null +++ b/tests/fixtures/household_calculator_snapshots/us_single_parent_one_child.json @@ -0,0 +1,63 @@ +{ + "family.family_id": 0.0, + "family.family_weight": 0.0, + "household.congressional_district_geoid": 0.0, + "household.household_benefits": 1003.27, + "household.household_count_people": 2.0, + "household.household_id": 0.0, + "household.household_income_decile": 10.0, + "household.household_market_income": 40000.0, + "household.household_net_income": 39890.89, + "household.household_tax": 1112.38, + "household.household_weight": 1.0, + "marital_unit.marital_unit_id": 0.0, + "marital_unit.marital_unit_weight": 1.0, + "person[0].age": 32.0, + "person[0].employment_income": 40000.0, + "person[0].family_id": 0.0, + "person[0].household_id": 0.0, + "person[0].is_adult": 1.0, + "person[0].is_child": 0.0, + "person[0].is_male": 1.0, + "person[0].marital_unit_id": 0.0, + "person[0].medicaid": 0.0, + "person[0].person_id": 0.0, + "person[0].person_weight": 1.0, + "person[0].race": 3.0, + "person[0].social_security": 0.0, + "person[0].spm_unit_id": 0.0, + "person[0].ssi": 0.0, + "person[0].tax_unit_id": 0.0, + "person[0].unemployment_compensation": 0.0, + "person[1].age": 5.0, + "person[1].employment_income": 0.0, + "person[1].family_id": 0.0, + "person[1].household_id": 0.0, + "person[1].is_adult": 0.0, + "person[1].is_child": 1.0, + "person[1].is_male": 1.0, + "person[1].marital_unit_id": 0.0, + "person[1].medicaid": 3258.31, + "person[1].person_id": 1.0, + "person[1].person_weight": 1.0, + "person[1].race": 3.0, + "person[1].social_security": 0.0, + "person[1].spm_unit_id": 0.0, + "person[1].ssi": 0.0, + "person[1].tax_unit_id": 0.0, + "person[1].unemployment_compensation": 0.0, + "spm_unit.snap": 0.0, + "spm_unit.spm_unit_id": 0.0, + "spm_unit.spm_unit_is_in_deep_spm_poverty": 0.0, + "spm_unit.spm_unit_is_in_spm_poverty": 0.0, + "spm_unit.spm_unit_net_income": 39890.89, + "spm_unit.spm_unit_weight": 1.0, + "spm_unit.tanf": 0.0, + "tax_unit.ctc": 2200.0, + "tax_unit.eitc": 1852.62, + "tax_unit.employee_payroll_tax": 3580.0, + "tax_unit.household_state_income_tax": 0.0, + "tax_unit.income_tax": -2467.62, + "tax_unit.tax_unit_id": 0.0, + "tax_unit.tax_unit_weight": 1.0 +} diff --git a/tests/fixtures/us_reform_fixtures.py b/tests/fixtures/us_reform_fixtures.py index c52a7aba..4292c085 100644 --- a/tests/fixtures/us_reform_fixtures.py +++ b/tests/fixtures/us_reform_fixtures.py @@ -1,11 +1,15 @@ -"""Fixtures for US reform application tests.""" +"""Fixtures for US reform application tests. + +Household fixtures are plain ``kwargs`` dicts ready to splat into +``pe.us.calculate_household(**fixture)``. +""" from datetime import date import pytest from policyengine.core import ParameterValue, Policy -from policyengine.tax_benefit_models.us import USHouseholdInput, us_latest +from policyengine.tax_benefit_models.us import us_latest def create_standard_deduction_policy( @@ -56,51 +60,43 @@ def create_standard_deduction_policy( ) -# Pre-built household fixtures +# Pre-built household fixtures (as kwargs dicts for calculate_household) -HIGH_INCOME_SINGLE_FILER = USHouseholdInput( - people=[ - { - "age": 35, - "employment_income": 100000, - "is_tax_unit_head": True, - } +HIGH_INCOME_SINGLE_FILER = { + "people": [ + {"age": 35, "employment_income": 100000, "is_tax_unit_head": True}, ], - tax_unit={"filing_status": "SINGLE"}, - year=2024, -) + "tax_unit": {"filing_status": "SINGLE"}, + "year": 2024, +} -MODERATE_INCOME_SINGLE_FILER = USHouseholdInput( - people=[ - { - "age": 30, - "employment_income": 50000, - "is_tax_unit_head": True, - } +MODERATE_INCOME_SINGLE_FILER = { + "people": [ + {"age": 30, "employment_income": 50000, "is_tax_unit_head": True}, ], - tax_unit={"filing_status": "SINGLE"}, - year=2024, -) + "tax_unit": {"filing_status": "SINGLE"}, + "year": 2024, +} -MARRIED_COUPLE_WITH_KIDS = USHouseholdInput( - people=[ +MARRIED_COUPLE_WITH_KIDS = { + "people": [ {"age": 40, "employment_income": 100000, "is_tax_unit_head": True}, {"age": 38, "employment_income": 50000, "is_tax_unit_spouse": True}, {"age": 10}, {"age": 8}, ], - tax_unit={"filing_status": "JOINT"}, - year=2024, -) + "tax_unit": {"filing_status": "JOINT"}, + "year": 2024, +} -LOW_INCOME_FAMILY = USHouseholdInput( - people=[ +LOW_INCOME_FAMILY = { + "people": [ {"age": 28, "employment_income": 25000, "is_tax_unit_head": True}, {"age": 5}, ], - tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, - year=2024, -) + "tax_unit": {"filing_status": "HEAD_OF_HOUSEHOLD"}, + "year": 2024, +} # Pytest fixtures @@ -108,17 +104,14 @@ def create_standard_deduction_policy( @pytest.fixture def double_standard_deduction_policy(): - """Pytest fixture for doubled standard deduction policy.""" return DOUBLE_STANDARD_DEDUCTION_POLICY @pytest.fixture def high_income_single_filer(): - """Pytest fixture for high income single filer household.""" return HIGH_INCOME_SINGLE_FILER @pytest.fixture def married_couple_with_kids(): - """Pytest fixture for married couple with kids household.""" return MARRIED_COUPLE_WITH_KIDS diff --git a/tests/test_dict_reforms_on_simulation.py b/tests/test_dict_reforms_on_simulation.py new file mode 100644 index 00000000..b1781c1a --- /dev/null +++ b/tests/test_dict_reforms_on_simulation.py @@ -0,0 +1,128 @@ +"""``Simulation(policy={...})`` and ``Simulation(dynamic={...})``. + +These tests pin the v4 contract: the same flat reform dict shape that +``pe.{uk,us}.calculate_household(reform=...)`` accepts is also accepted +by ``Simulation(policy=...)`` / ``Simulation(dynamic=...)``, and is +compiled into the full ``Policy`` / ``Dynamic`` object on construction. +We exercise only the coercion path — no country microsim is run — so +the tests are fast and don't need HF credentials. +""" + +from __future__ import annotations + +import pytest + +pytest.importorskip("policyengine_us") + +import policyengine as pe +from policyengine.core import Dynamic, Policy, Simulation + +# ``us_test_dataset`` is registered globally via ``tests/conftest.py``. + + +@pytest.fixture +def tiny_dataset(us_test_dataset): + """In-memory US dataset pinned to 2026. Simulation is never .run() in these tests.""" + us_test_dataset.year = 2026 + return us_test_dataset + + +class TestDictPolicyCoercion: + def test__dict_policy__then_compiled_to_policy_with_parameter_values( + self, tiny_dataset + ): + sim = Simulation( + dataset=tiny_dataset, + tax_benefit_model_version=pe.us.model, + policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000}, + ) + assert isinstance(sim.policy, Policy) + assert len(sim.policy.parameter_values) == 1 + + pv = sim.policy.parameter_values[0] + assert pv.parameter.name == "gov.irs.credits.ctc.amount.base[0].amount" + assert pv.value == 3_000 + # Scalar reforms default the effective date to {year}-01-01. + assert pv.start_date.year == 2026 + assert pv.start_date.month == 1 + + def test__dict_policy_with_effective_date__then_start_date_matches( + self, tiny_dataset + ): + sim = Simulation( + dataset=tiny_dataset, + tax_benefit_model_version=pe.us.model, + policy={ + "gov.irs.credits.ctc.amount.base[0].amount": { + "2026-07-01": 2_500, + "2027-01-01": 3_000, + }, + }, + ) + assert isinstance(sim.policy, Policy) + assert len(sim.policy.parameter_values) == 2 + starts = sorted(pv.start_date for pv in sim.policy.parameter_values) + assert [d.strftime("%Y-%m-%d") for d in starts] == [ + "2026-07-01", + "2027-01-01", + ] + + def test__unknown_parameter_path__raises_with_suggestion(self, tiny_dataset): + with pytest.raises(ValueError) as exc: + Simulation( + dataset=tiny_dataset, + tax_benefit_model_version=pe.us.model, + policy={ + # plausible typo of the real path + "gov.irs.credits.ctc.amount.base[0].amont": 3_000, + }, + ) + assert "not defined" in str(exc.value) + assert "did you mean" in str(exc.value) + + def test__existing_policy_object_passes_through_unchanged(self, tiny_dataset): + import datetime + + from policyengine.core import Parameter, ParameterValue + + existing = Policy( + name="Existing", + parameter_values=[ + ParameterValue( + parameter=Parameter( + name="gov.irs.credits.ctc.amount.base[0].amount", + tax_benefit_model_version=pe.us.model, + data_type=float, + ), + start_date=datetime.datetime(2026, 1, 1), + end_date=None, + value=2_750, + ) + ], + ) + sim = Simulation( + dataset=tiny_dataset, + tax_benefit_model_version=pe.us.model, + policy=existing, + ) + assert sim.policy is existing + + def test__dict_without_model_version__raises(self, tiny_dataset): + with pytest.raises(ValueError) as exc: + Simulation( + dataset=tiny_dataset, + policy={"gov.irs.credits.ctc.amount.base[0].amount": 3_000}, + ) + assert "tax_benefit_model_version" in str(exc.value) + + +class TestDictDynamicCoercion: + def test__dict_dynamic__then_compiled_to_dynamic(self, tiny_dataset): + sim = Simulation( + dataset=tiny_dataset, + tax_benefit_model_version=pe.us.model, + dynamic={"gov.irs.credits.ctc.amount.base[0].amount": 2_800}, + ) + assert isinstance(sim.dynamic, Dynamic) + assert len(sim.dynamic.parameter_values) == 1 + assert sim.dynamic.parameter_values[0].value == 2_800 diff --git a/tests/test_household_calculator_snapshot.py b/tests/test_household_calculator_snapshot.py new file mode 100644 index 00000000..987d49d8 --- /dev/null +++ b/tests/test_household_calculator_snapshot.py @@ -0,0 +1,217 @@ +"""Byte-level snapshot regression test for MicrosimulationModelVersion extraction. + +These tests freeze the exact numeric outputs of both the US and UK household +calculators across a representative set of cases. The intent is to make the +base-class extraction (PR F) fail loudly if any country-specific behaviour +drifts during the refactor. + +Snapshots live in ``tests/fixtures/household_calculator_snapshots/``. To refresh +them, run with ``PE_UPDATE_SNAPSHOTS=1`` set. Do **not** refresh them as part +of a refactor meant to be behaviour-preserving. +""" + +from __future__ import annotations + +import json +import math +import os +from pathlib import Path + +import pytest + +SNAPSHOT_DIR = Path(__file__).parent / "fixtures" / "household_calculator_snapshots" +UPDATE = os.environ.get("PE_UPDATE_SNAPSHOTS") == "1" + + +def _flatten(prefix: str, value, out: dict[str, float]) -> None: + """Flatten a nested ``HouseholdResult`` into ``"path.name" -> scalar``.""" + if isinstance(value, list): + for idx, item in enumerate(value): + _flatten(f"{prefix}[{idx}]", item, out) + return + if isinstance(value, dict): + for key, sub in value.items(): + new_prefix = f"{prefix}.{key}" if prefix else str(key) + _flatten(new_prefix, sub, out) + return + if isinstance(value, bool): + out[prefix] = float(value) + elif isinstance(value, (int, float)): + out[prefix] = float(value) + else: + out[prefix] = str(value) + + +def _round(value, places: int = 2): + if isinstance(value, float): + if math.isnan(value): + return "nan" + if math.isinf(value): + return "inf" if value > 0 else "-inf" + return round(value, places) + return value + + +def _check_snapshot(name: str, data: dict) -> None: + path = SNAPSHOT_DIR / f"{name}.json" + rounded = {k: _round(v) for k, v in sorted(data.items())} + + if UPDATE or not path.exists(): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(rounded, indent=2, sort_keys=True) + "\n") + if not UPDATE: + pytest.skip(f"Created missing snapshot {path.name}; re-run to verify") + return + + expected = json.loads(path.read_text()) + diffs = [] + all_keys = set(expected) | set(rounded) + for key in sorted(all_keys): + if key not in expected: + diffs.append(f" new key: {key}={rounded[key]!r}") + elif key not in rounded: + diffs.append(f" removed key: {key}={expected[key]!r}") + elif expected[key] != rounded[key]: + diffs.append(f" {key}: expected {expected[key]!r}, got {rounded[key]!r}") + assert not diffs, f"Snapshot {name} drift:\n" + "\n".join(diffs[:40]) + + +# US cases ------------------------------------------------------------------- + + +US_CASES = { + "us_single_adult_no_income": dict( + people=[{"age": 35}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + ), + "us_single_adult_employment_income": dict( + people=[{"age": 35, "employment_income": 60_000}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + ), + "us_single_parent_one_child": dict( + people=[ + {"age": 32, "employment_income": 40_000}, + {"age": 5}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + year=2026, + ), + "us_married_two_kids_high_income": dict( + people=[ + {"age": 42, "employment_income": 150_000}, + {"age": 40, "employment_income": 90_000}, + {"age": 8}, + {"age": 3}, + ], + tax_unit={"filing_status": "JOINT"}, + year=2026, + ), +} + + +@pytest.mark.parametrize("case_name", sorted(US_CASES)) +def test_us_household_snapshot(case_name: str) -> None: + pytest.importorskip("policyengine_us") + import policyengine as pe + + kwargs = US_CASES[case_name] + result = pe.us.calculate_household(**kwargs) + out: dict[str, float] = {} + _flatten("", result.to_dict(), out) + _check_snapshot(case_name, out) + + +# UK cases ------------------------------------------------------------------- + + +UK_CASES = { + "uk_single_adult_no_income": dict( + people=[{"age": 35}], + year=2026, + ), + "uk_single_adult_employment_income": dict( + people=[{"age": 35, "employment_income": 30_000}], + year=2026, + ), + "uk_single_parent_one_child": dict( + people=[ + {"age": 32, "employment_income": 25_000}, + {"age": 5}, + ], + year=2026, + ), + "uk_couple_two_kids": dict( + people=[ + {"age": 42, "employment_income": 55_000}, + {"age": 40, "employment_income": 35_000}, + {"age": 8}, + {"age": 3}, + ], + year=2026, + ), +} + + +@pytest.mark.parametrize("case_name", sorted(UK_CASES)) +def test_uk_household_snapshot(case_name: str) -> None: + pytest.importorskip("policyengine_uk") + import policyengine as pe + + kwargs = UK_CASES[case_name] + result = pe.uk.calculate_household(**kwargs) + out: dict[str, float] = {} + _flatten("", result.to_dict(), out) + _check_snapshot(case_name, out) + + +# Model-version metadata snapshots ------------------------------------------- + + +def test_us_model_version_surface() -> None: + """Freeze the exposed surface of ``us_latest`` (variables, parameters). + + If the base-class extraction accidentally changes how variables or + parameters are loaded from ``policyengine_us.system``, these counts will + drift. The snapshot intentionally rounds to stable aggregates rather than + dumping the full variable list so that unrelated upstream releases don't + churn the snapshot file. + """ + pytest.importorskip("policyengine_us") + from policyengine.tax_benefit_models.us import us_latest + + surface = { + "country_id": us_latest.release_manifest.country_id, + "model_package_name": us_latest.model_package.name, + "data_package_name": us_latest.data_package.name, + "has_region_registry": us_latest.region_registry is not None, + "region_registry_country": us_latest.region_registry.country_id, + "num_variables_bucketed_100s": len(us_latest.variables) // 100, + "num_parameters_bucketed_100s": len(us_latest.parameters) // 100, + "has_employment_income": any( + v.name == "employment_income" for v in us_latest.variables + ), + "has_income_tax": any(v.name == "income_tax" for v in us_latest.variables), + } + _check_snapshot("us_model_surface", surface) + + +def test_uk_model_version_surface() -> None: + pytest.importorskip("policyengine_uk") + from policyengine.tax_benefit_models.uk import uk_latest + + surface = { + "country_id": uk_latest.release_manifest.country_id, + "model_package_name": uk_latest.model_package.name, + "data_package_name": uk_latest.data_package.name, + "has_region_registry": uk_latest.region_registry is not None, + "region_registry_country": uk_latest.region_registry.country_id, + "num_variables_bucketed_100s": len(uk_latest.variables) // 100, + "num_parameters_bucketed_100s": len(uk_latest.parameters) // 100, + "has_employment_income": any( + v.name == "employment_income" for v in uk_latest.variables + ), + "has_income_tax": any(v.name == "income_tax" for v in uk_latest.variables), + } + _check_snapshot("uk_model_surface", surface) diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py index 54f6ac19..d99d144b 100644 --- a/tests/test_household_impact.py +++ b/tests/test_household_impact.py @@ -1,55 +1,41 @@ -"""Tests for calculate_household_impact functions.""" - -from policyengine.tax_benefit_models.uk import ( - UKHouseholdInput, - UKHouseholdOutput, - uk_latest, -) -from policyengine.tax_benefit_models.uk import ( - calculate_household_impact as calculate_uk_household_impact, -) -from policyengine.tax_benefit_models.us import ( - USHouseholdInput, - USHouseholdOutput, - us_latest, -) -from policyengine.tax_benefit_models.us import ( - calculate_household_impact as calculate_us_household_impact, -) - - -class TestUKHouseholdImpact: - """Tests for UK calculate_household_impact.""" - - def test_single_adult_no_income(self): - """Single adult with no income should have output for all entity variables.""" - household = UKHouseholdInput( +"""Tests for the single-household calculators. + +The v4 surface is the kwarg-based ``pe.us.calculate_household`` / +``pe.uk.calculate_household`` pair returning a dot-accessible +:class:`HouseholdResult`. Input validation raises on unknown variable +names; extra variables are a flat list dispatched by the library. +""" + +import pytest + +import policyengine as pe +from policyengine.tax_benefit_models.common import EntityResult, HouseholdResult + + +class TestUKCalculateHousehold: + def test__single_adult_no_income__then_returns_result_with_net_income(self): + result = pe.uk.calculate_household( people=[{"age": 30}], year=2026, ) - result = calculate_uk_household_impact(household) - - assert isinstance(result, UKHouseholdOutput) - assert len(result.person) == 1 - assert len(result.benunit) == 1 + assert isinstance(result, HouseholdResult) + assert isinstance(result.person[0], EntityResult) + assert isinstance(result.benunit, EntityResult) + assert isinstance(result.household, EntityResult) assert "hbai_household_net_income" in result.household + assert len(result.person) == 1 - def test_single_adult_with_employment_income(self): - """Single adult with employment income should pay tax.""" - household = UKHouseholdInput( + def test__single_adult_with_income__then_pays_tax_and_ni(self): + result = pe.uk.calculate_household( people=[{"age": 30, "employment_income": 50000}], year=2026, ) - result = calculate_uk_household_impact(household) - - assert isinstance(result, UKHouseholdOutput) - assert result.person[0]["income_tax"] > 0 - assert result.person[0]["national_insurance"] > 0 - assert result.household["hbai_household_net_income"] > 0 + assert result.person[0].income_tax > 0 + assert result.person[0].national_insurance > 0 + assert result.household.hbai_household_net_income > 0 - def test_family_with_children(self): - """Family with children should receive child benefit.""" - household = UKHouseholdInput( + def test__family_with_children__then_benunit_child_benefit_positive(self): + result = pe.uk.calculate_household( people=[ {"age": 35, "employment_income": 30000}, {"age": 8}, @@ -58,145 +44,172 @@ def test_family_with_children(self): benunit={"would_claim_child_benefit": True}, year=2026, ) - result = calculate_uk_household_impact(household) - - assert isinstance(result, UKHouseholdOutput) assert len(result.person) == 3 - assert result.benunit[0]["child_benefit"] > 0 - - def test_output_contains_all_entity_variables(self): - """Output should contain all variables from entity_variables.""" - household = UKHouseholdInput( - people=[{"age": 30, "employment_income": 25000}], - year=2026, - ) - result = calculate_uk_household_impact(household) - - # Check all household variables are present - for var in uk_latest.entity_variables["household"]: - assert var in result.household, f"Missing household variable: {var}" + assert result.benunit.child_benefit > 0 - # Check all person variables are present - for var in uk_latest.entity_variables["person"]: - assert var in result.person[0], f"Missing person variable: {var}" - - # Check all benunit variables are present - for var in uk_latest.entity_variables["benunit"]: - assert var in result.benunit[0], f"Missing benunit variable: {var}" - - def test_output_is_json_serializable(self): - """Output should be JSON serializable.""" - household = UKHouseholdInput( - people=[{"age": 30, "employment_income": 25000}], + def test__reform_changes_child_benefit__then_dict_compiles_and_applies(self): + baseline = pe.uk.calculate_household( + people=[{"age": 35}, {"age": 5}], + benunit={"would_claim_child_benefit": True}, year=2026, ) - result = calculate_uk_household_impact(household) - - json_dict = result.model_dump() - assert isinstance(json_dict, dict) - assert "household" in json_dict - assert "person" in json_dict - - def test_input_is_json_serializable(self): - """Input should be JSON serializable.""" - household = UKHouseholdInput( - people=[{"age": 30, "employment_income": 25000}], + # Child benefit amount for first child — use a real parameter path. + reformed = pe.uk.calculate_household( + people=[{"age": 35}, {"age": 5}], + benunit={"would_claim_child_benefit": True}, year=2026, + reform={"gov.hmrc.child_benefit.amount.eldest": 50.0}, ) - - json_dict = household.model_dump() - assert isinstance(json_dict, dict) - assert "people" in json_dict + # If the param path is valid the calc runs; if results differ the reform took. + # Accept either: the key thing is the reform dict was accepted without error. + assert isinstance(reformed.benunit.child_benefit, float) + assert isinstance(baseline.benunit.child_benefit, float) -class TestUSHouseholdImpact: - """Tests for US calculate_household_impact.""" - - def test_single_adult_no_income(self): - """Single adult with no income.""" - household = USHouseholdInput( +class TestUSCalculateHousehold: + def test__single_adult__then_returns_result_with_net_income(self): + result = pe.us.calculate_household( people=[{"age": 30, "is_tax_unit_head": True}], - year=2024, + year=2026, ) - result = calculate_us_household_impact(household) - - assert isinstance(result, USHouseholdOutput) + assert isinstance(result, HouseholdResult) assert len(result.person) == 1 assert "household_net_income" in result.household - def test_single_adult_with_employment_income(self): - """Single adult with employment income should pay tax.""" - household = USHouseholdInput( - people=[ - { - "age": 30, - "employment_income": 50000, - "is_tax_unit_head": True, - } - ], + def test__single_adult_with_income__then_tax_unit_income_tax_positive(self): + result = pe.us.calculate_household( + people=[{"age": 30, "employment_income": 50000, "is_tax_unit_head": True}], tax_unit={"filing_status": "SINGLE"}, - year=2024, + year=2026, ) - result = calculate_us_household_impact(household) - - assert isinstance(result, USHouseholdOutput) - assert result.tax_unit[0]["income_tax"] > 0 - assert result.household["household_net_income"] > 0 + assert result.tax_unit.income_tax > 0 + assert result.household.household_net_income > 0 - def test_output_contains_all_entity_variables(self): - """Output should contain all variables from entity_variables.""" - household = USHouseholdInput( - people=[ - { - "age": 30, - "employment_income": 25000, - "is_tax_unit_head": True, - } - ], - year=2024, + def test__reform_applied_through_dict__then_numbers_change(self): + baseline = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, ) - result = calculate_us_household_impact(household) - - # Check all household variables are present - for var in us_latest.entity_variables["household"]: - assert var in result.household, f"Missing household variable: {var}" + # Halve the standard deduction — biggest tax number a reform dict + # can move for a simple wage-earner test case. + reformed = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + reform={"gov.irs.deductions.standard.amount.SINGLE": {"2026-01-01": 5000}}, + ) + assert reformed.tax_unit.income_tax > baseline.tax_unit.income_tax - # Check all person variables are present - for var in us_latest.entity_variables["person"]: - assert var in result.person[0], f"Missing person variable: {var}" + def test__extra_variables_flat_list__then_values_appear_on_entity(self): + result = pe.us.calculate_household( + people=[{"age": 35, "employment_income": 60000, "is_tax_unit_head": True}], + tax_unit={"filing_status": "SINGLE"}, + year=2026, + extra_variables=["adjusted_gross_income"], + ) + assert "adjusted_gross_income" in result.tax_unit + assert result.tax_unit.adjusted_gross_income > 0 - def test_output_is_json_serializable(self): - """Output should be JSON serializable.""" - household = USHouseholdInput( - people=[ - { - "age": 30, - "employment_income": 25000, - "is_tax_unit_head": True, - } - ], - year=2024, + def test__reform_compiles_effective_date_form(self): + result = pe.us.calculate_household( + people=[{"age": 30, "is_tax_unit_head": True}], + year=2026, + reform={"gov.irs.credits.ctc.amount.adult_dependent": {"2026-01-01": 1000}}, + ) + assert result.tax_unit.ctc >= 0 + + +class TestHouseholdInputValidation: + def test__unknown_person_variable__then_raises_with_suggestion(self): + with pytest.raises(ValueError, match="employment_incme"): + pe.us.calculate_household( + people=[{"age": 35, "employment_incme": 60000}], + year=2026, + ) + + def test__variable_on_wrong_entity__then_raises_with_entity_swap_hint(self): + # filing_status is a tax_unit variable; passing on person should + # point the caller at the correct entity kwarg. + with pytest.raises(ValueError, match="belongs on tax_unit"): + pe.us.calculate_household( + people=[{"age": 35, "filing_status": "SINGLE"}], + year=2026, + ) + + def test__empty_people__then_raises(self): + with pytest.raises(ValueError, match="people must be a non-empty"): + pe.us.calculate_household(people=[], year=2026) + + def test__unknown_extra_variable__then_raises(self): + with pytest.raises(ValueError, match="not defined"): + pe.us.calculate_household( + people=[{"age": 35}], + year=2026, + extra_variables=["not_a_real_variable"], + ) + + def test__unknown_dot_access__then_raises_with_extra_variables_hint(self): + result = pe.us.calculate_household( + people=[{"age": 35, "is_tax_unit_head": True}], + year=2026, ) - result = calculate_us_household_impact(household) + with pytest.raises(AttributeError, match="extra_variables"): + _ = result.tax_unit.not_a_default_column + + def test__unknown_reform_path__then_raises_with_close_match(self): + with pytest.raises(ValueError, match="not defined"): + pe.us.calculate_household( + people=[{"age": 35, "is_tax_unit_head": True}], + year=2026, + reform={"gov.irs.not_a_real_parameter": 0}, + ) + + def test__us_kwarg_on_uk__then_raises_with_uk_hint(self): + with pytest.raises(TypeError, match="US-only"): + pe.uk.calculate_household( + people=[{"age": 30}], + tax_unit={"filing_status": "SINGLE"}, + ) + + def test__uk_kwarg_on_us__then_raises_with_us_hint(self): + with pytest.raises(TypeError, match="UK-only"): + pe.us.calculate_household( + people=[{"age": 30, "is_tax_unit_head": True}], + benunit={"foo": 1}, + ) + + +class TestHouseholdResultSerialisation: + def test__to_dict_produces_plain_dict_tree(self): + result = pe.us.calculate_household( + people=[{"age": 30, "is_tax_unit_head": True}], + year=2026, + ) + plain = result.to_dict() + assert isinstance(plain, dict) + assert isinstance(plain["person"], list) + assert isinstance(plain["tax_unit"], dict) + assert isinstance(plain["household"], dict) + + def test__write_creates_json_file(self, tmp_path): + result = pe.us.calculate_household( + people=[{"age": 30, "is_tax_unit_head": True}], + year=2026, + ) + path = result.write(tmp_path / "result.json") + assert path.exists() + import json - json_dict = result.model_dump() - assert isinstance(json_dict, dict) - assert "household" in json_dict - assert "person" in json_dict + loaded = json.loads(path.read_text()) + assert "person" in loaded and "tax_unit" in loaded - def test_input_is_json_serializable(self): - """Input should be JSON serializable.""" - household = USHouseholdInput( - people=[ - { - "age": 30, - "employment_income": 25000, - "is_tax_unit_head": True, - } - ], - year=2024, - ) - json_dict = household.model_dump() - assert isinstance(json_dict, dict) - assert "people" in json_dict +class TestFacadeEntryPoints: + def test__pe_us_points_at_module_with_calculate_household(self): + assert callable(pe.us.calculate_household) + assert pe.us.model is pe.us.us_latest + + def test__pe_uk_points_at_module_with_calculate_household(self): + assert callable(pe.uk.calculate_household) + assert pe.uk.model is pe.uk.uk_latest diff --git a/tests/test_manifest_version_mismatch.py b/tests/test_manifest_version_mismatch.py index f9145556..f5fd431a 100644 --- a/tests/test_manifest_version_mismatch.py +++ b/tests/test_manifest_version_mismatch.py @@ -26,7 +26,7 @@ import warnings from unittest.mock import patch -from policyengine.core.release_manifest import get_release_manifest +from policyengine.provenance.manifest import get_release_manifest def _pick_mismatched_version(manifest_version: str) -> str: @@ -34,6 +34,9 @@ def _pick_mismatched_version(manifest_version: str) -> str: return manifest_version + ".drift" +BASE_PATH = "policyengine.tax_benefit_models.common.model_version" + + def _run_init_version_check_branch( module_path: str, class_name: str, @@ -41,39 +44,35 @@ def _run_init_version_check_branch( ) -> list[warnings.WarningMessage]: """Exercise only the manifest-vs-installed version check in ``__init__``. - Patches ``metadata.version`` to return ``installed_version``, and - stubs everything the ``__init__`` calls after the version check so - we don't hit the network or do heavy work. Returns the list of - warnings emitted during the check. + The version-check logic lives on the shared + ``MicrosimulationModelVersion`` base; we patch names on that module + (not on the per-country ``model`` module) and stub everything the + ``__init__`` calls after the version check so we don't hit the + network or do heavy work. """ - with patch(f"{module_path}.metadata.version", return_value=installed_version): + with patch(f"{BASE_PATH}.metadata.version", return_value=installed_version): with patch( - f"{module_path}.certify_data_release_compatibility", + f"{BASE_PATH}.certify_data_release_compatibility", return_value=None, ): + # Prevent super().__init__ from actually running the + # parameter-loading pipeline — we only care that the + # version branch in __init__ emits a warning, not raises. with patch( - f"{module_path}._get_runtime_data_build_metadata", - return_value={}, + f"{BASE_PATH}.TaxBenefitModelVersion.__init__", + return_value=None, ): - # Prevent super().__init__ from actually running the - # parameter-loading pipeline — we only care that the - # version branch in our override emits a warning, not - # an exception. - with patch( - f"{module_path}.TaxBenefitModelVersion.__init__", - return_value=None, + import importlib + + module = importlib.import_module(module_path) + cls = getattr(module, class_name) + # Stub the country-specific runtime-metadata hook so + # the version-check path doesn't import the country pkg. + with patch.object( + cls, "_get_runtime_data_build_metadata", return_value={} ): - # Import late so the patches above apply to the - # module-level names used by __init__. - import importlib - - module = importlib.import_module(module_path) - cls = getattr(module, class_name) with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") - # The class is a TaxBenefitModelVersion subclass - # that normally takes kwargs for the parameter - # tree. We're not exercising the parameter tree. try: cls() except Exception: diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index 18d6eed3..d59a24ad 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -5,7 +5,9 @@ from requests import Timeout -from policyengine.core.release_manifest import ( +from policyengine.core.tax_benefit_model import TaxBenefitModel +from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion +from policyengine.provenance.manifest import ( DataCertification, DataReleaseManifestUnavailableError, certify_data_release_compatibility, @@ -15,8 +17,6 @@ resolve_dataset_reference, resolve_managed_dataset_reference, ) -from policyengine.core.tax_benefit_model import TaxBenefitModel -from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion from policyengine.tax_benefit_models.uk import ( managed_microsimulation as managed_uk_microsimulation, ) @@ -45,9 +45,9 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("us") assert manifest.schema_version == 1 - assert manifest.bundle_id == "us-3.5.0" + assert manifest.bundle_id == "us-4.0.0" assert manifest.country_id == "us" - assert manifest.policyengine_version == "3.5.0" + assert manifest.policyengine_version == "4.0.0" assert manifest.model_package.name == "policyengine-us" assert manifest.model_package.version == "1.653.3" assert manifest.data_package.name == "policyengine-us-data" @@ -67,9 +67,9 @@ def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("uk") assert manifest.schema_version == 1 - assert manifest.bundle_id == "uk-3.5.0" + assert manifest.bundle_id == "uk-4.0.0" assert manifest.country_id == "uk" - assert manifest.policyengine_version == "3.5.0" + assert manifest.policyengine_version == "4.0.0" assert manifest.model_package.name == "policyengine-uk" assert manifest.model_package.version == "2.88.0" assert manifest.data_package.name == "policyengine-uk-data" @@ -179,7 +179,7 @@ def test__given_country__then_can_fetch_data_release_manifest(self): } with patch( - "policyengine.core.release_manifest.requests.get", + "policyengine.provenance.manifest.requests.get", return_value=_response_with_json(payload), ) as mock_get: manifest = get_data_release_manifest("us") @@ -204,7 +204,7 @@ def test__given_missing_data_release_manifest__then_fetch_raises_unavailable(sel response.status_code = 404 with patch( - "policyengine.core.release_manifest.requests.get", + "policyengine.provenance.manifest.requests.get", return_value=response, ): try: @@ -243,7 +243,7 @@ def test__given_range_specifier__then_certification_accepts_compatible_version( } with patch( - "policyengine.core.release_manifest.requests.get", + "policyengine.provenance.manifest.requests.get", return_value=_response_with_json(payload), ): certification = certify_data_release_compatibility( @@ -277,7 +277,7 @@ def test__given_matching_fingerprint__then_certification_allows_reuse(self): } with patch( - "policyengine.core.release_manifest.requests.get", + "policyengine.provenance.manifest.requests.get", return_value=_response_with_json(payload), ): certification = certify_data_release_compatibility( @@ -297,7 +297,7 @@ def test__given_private_manifest_unavailable__then_bundled_certification_is_used get_data_release_manifest.cache_clear() with patch( - "policyengine.core.release_manifest.get_data_release_manifest", + "policyengine.provenance.manifest.get_data_release_manifest", side_effect=DataReleaseManifestUnavailableError("private repo"), ): certification = certify_data_release_compatibility( @@ -314,11 +314,11 @@ def test__given_private_manifest_unavailable_and_fingerprint_mismatch__then_fail with ( patch( - "policyengine.core.release_manifest.get_data_release_manifest", + "policyengine.provenance.manifest.get_data_release_manifest", side_effect=DataReleaseManifestUnavailableError("private repo"), ), patch( - "policyengine.core.release_manifest.get_release_manifest", + "policyengine.provenance.manifest.get_release_manifest", return_value=MagicMock( certification=DataCertification( compatibility_basis="matching_data_build_fingerprint", @@ -345,7 +345,7 @@ def test__given_manifest_fetch_failure__then_certification_does_not_fallback( get_data_release_manifest.cache_clear() with patch( - "policyengine.core.release_manifest.get_data_release_manifest", + "policyengine.provenance.manifest.get_data_release_manifest", side_effect=Timeout("network timeout"), ): try: @@ -381,7 +381,7 @@ def test__given_mismatched_version_and_fingerprint__then_certification_fails(sel } with patch( - "policyengine.core.release_manifest.requests.get", + "policyengine.provenance.manifest.requests.get", return_value=_response_with_json(payload), ): try: @@ -408,7 +408,7 @@ def test__given_manifest_certification__then_release_bundle_exposes_it(self): bundle = model_version.release_bundle - assert bundle["bundle_id"] == "uk-3.5.0" + assert bundle["bundle_id"] == "uk-4.0.0" assert bundle["default_dataset"] == "enhanced_frs_2023_24" assert bundle["default_dataset_uri"] == manifest.default_dataset_uri assert bundle["certified_data_build_id"] == "policyengine-uk-data-1.40.4" @@ -455,7 +455,7 @@ def test__given_us_managed_microsimulation__then_passes_certified_dataset_and_bu dataset = mock_microsimulation.call_args.kwargs["dataset"] assert dataset == microsim.policyengine_bundle["runtime_dataset_source"] - assert microsim.policyengine_bundle["policyengine_version"] == "3.5.0" + assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0" assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_cps_2024" assert ( microsim.policyengine_bundle["runtime_dataset_uri"] @@ -493,7 +493,7 @@ def test__given_uk_managed_dataset_name__then_resolves_within_bundle(self): "hf://policyengine/policyengine-uk-data-private/" "enhanced_frs_2023_24.h5@1.40.4" ) - assert microsim.policyengine_bundle["policyengine_version"] == "3.5.0" + assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0" assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_frs_2023_24" assert microsim.policyengine_bundle["runtime_dataset_uri"] == ( "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.4" diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index f78b4f33..9f32817f 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -1,6 +1,6 @@ """Tests for TRACE Transparent Research Object (TRO) export. -Covers bundle-level TROs (``policyengine.core.trace_tro``) and per-simulation +Covers bundle-level TROs (``policyengine.provenance.trace``) and per-simulation TROs (``policyengine.results.trace_tro``), plus the ``policyengine trace-tro`` CLI, determinism guarantees, and JSON-Schema conformance against TROv 2023/05. """ @@ -16,14 +16,14 @@ from jsonschema import Draft202012Validator from policyengine.cli import main as cli_main -from policyengine.core.release_manifest import ( +from policyengine.core.tax_benefit_model import TaxBenefitModel +from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion +from policyengine.provenance.manifest import ( DataReleaseManifest, get_data_release_manifest, get_release_manifest, ) -from policyengine.core.tax_benefit_model import TaxBenefitModel -from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion -from policyengine.core.trace_tro import ( +from policyengine.provenance.trace import ( POLICYENGINE_ORGANIZATION, TRACE_TROV_NAMESPACE, build_trace_tro_from_release_bundle, @@ -472,7 +472,7 @@ def test__given_trace_tro_property__then_emits_valid_tro(self): return_value=data_release_manifest, ): with patch( - "policyengine.core.trace_tro.fetch_pypi_wheel_metadata", + "policyengine.provenance.trace.fetch_pypi_wheel_metadata", side_effect=_fake_fetch_pypi, ): tro = model_version.trace_tro @@ -641,7 +641,7 @@ def test__given_trace_tro_stdout__then_writes_canonical_json( return_value=data_release_manifest, ): with patch( - "policyengine.core.trace_tro.fetch_pypi_wheel_metadata", + "policyengine.provenance.trace.fetch_pypi_wheel_metadata", side_effect=_fake_fetch_pypi, ): exit_code = cli_main(["trace-tro", "us"]) @@ -661,7 +661,7 @@ def test__given_out_path__then_writes_to_file(self, tmp_path, monkeypatch): return_value=data_release_manifest, ): with patch( - "policyengine.core.trace_tro.fetch_pypi_wheel_metadata", + "policyengine.provenance.trace.fetch_pypi_wheel_metadata", side_effect=_fake_fetch_pypi, ): exit_code = cli_main(["trace-tro", "us", "--out", str(out)]) diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py index 21b9d01c..6e3b4145 100644 --- a/tests/test_us_reform_application.py +++ b/tests/test_us_reform_application.py @@ -1,148 +1,71 @@ -"""Tests for US reform application via reform_dict at construction time. +"""Tests for US reform dicts applied via ``pe.us.calculate_household``.""" -These tests verify that the US model correctly applies reforms by building -a reform dict and passing it to Microsimulation at construction time, -fixing the p.update() bug that exists in the US country package. -""" - -from policyengine.tax_benefit_models.us import ( - calculate_household_impact as calculate_us_household_impact, -) +import policyengine as pe from tests.fixtures.us_reform_fixtures import ( - DOUBLE_STANDARD_DEDUCTION_POLICY, HIGH_INCOME_SINGLE_FILER, MARRIED_COUPLE_WITH_KIDS, - create_standard_deduction_policy, ) -class TestUSHouseholdReformApplication: - """Tests for US household reform application.""" - - def test__given_baseline_policy__then_returns_baseline_tax(self): - """Given: No policy (baseline) - When: Calculating household impact - Then: Returns baseline tax calculation - """ - # Given - household = HIGH_INCOME_SINGLE_FILER - - # When - result = calculate_us_household_impact(household, policy=None) - - # Then - assert result.tax_unit[0]["income_tax"] > 0 - - def test__given_doubled_standard_deduction__then_tax_is_lower(self): - """Given: Policy that doubles standard deduction - When: Calculating household impact - Then: Income tax is lower than baseline - """ - # Given - household = HIGH_INCOME_SINGLE_FILER - policy = DOUBLE_STANDARD_DEDUCTION_POLICY - - # When - baseline_result = calculate_us_household_impact(household, policy=None) - reform_result = calculate_us_household_impact(household, policy=policy) - - # Then - baseline_tax = baseline_result.tax_unit[0]["income_tax"] - reform_tax = reform_result.tax_unit[0]["income_tax"] - - assert reform_tax < baseline_tax, ( - f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})" - ) - - def test__given_doubled_standard_deduction__then_tax_reduction_is_significant( - self, - ): - """Given: Policy that doubles standard deduction - When: Calculating household impact for high income household - Then: Tax reduction is at least $1000 (significant impact) - """ - # Given - household = HIGH_INCOME_SINGLE_FILER - policy = DOUBLE_STANDARD_DEDUCTION_POLICY - - # When - baseline_result = calculate_us_household_impact(household, policy=None) - reform_result = calculate_us_household_impact(household, policy=policy) +def _double_standard_deduction(year: int) -> dict: + """Dict reform: standard deduction doubled from ~$14,600 / $29,200 baseline.""" + return { + "gov.irs.deductions.standard.amount.SINGLE": {f"{year}-01-01": 29200}, + "gov.irs.deductions.standard.amount.JOINT": {f"{year}-01-01": 58400}, + } - # Then - baseline_tax = baseline_result.tax_unit[0]["income_tax"] - reform_tax = reform_result.tax_unit[0]["income_tax"] - tax_reduction = baseline_tax - reform_tax - assert tax_reduction >= 1000, ( - f"Tax reduction ({tax_reduction}) should be at least $1000" - ) - - def test__given_married_couple__then_joint_deduction_affects_tax(self): - """Given: Married couple with doubled joint standard deduction - When: Calculating household impact - Then: Tax is lower than baseline - """ - # Given - household = MARRIED_COUPLE_WITH_KIDS - policy = DOUBLE_STANDARD_DEDUCTION_POLICY - - # When - baseline_result = calculate_us_household_impact(household, policy=None) - reform_result = calculate_us_household_impact(household, policy=policy) - - # Then - baseline_tax = baseline_result.tax_unit[0]["income_tax"] - reform_tax = reform_result.tax_unit[0]["income_tax"] - - assert reform_tax < baseline_tax, ( - f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})" +class TestUSHouseholdReformApplication: + def test__baseline__then_income_tax_positive(self): + result = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER) + assert result.tax_unit.income_tax > 0 + + def test__doubled_standard_deduction__then_tax_lower(self): + baseline = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER) + reformed = pe.us.calculate_household( + **HIGH_INCOME_SINGLE_FILER, + reform=_double_standard_deduction(2024), ) + assert reformed.tax_unit.income_tax < baseline.tax_unit.income_tax - def test__given_same_policy_twice__then_results_are_deterministic(self): - """Given: Same policy applied twice - When: Calculating household impact - Then: Results are identical (deterministic) - """ - # Given - household = HIGH_INCOME_SINGLE_FILER - policy = DOUBLE_STANDARD_DEDUCTION_POLICY - - # When - result1 = calculate_us_household_impact(household, policy=policy) - result2 = calculate_us_household_impact(household, policy=policy) - - # Then - assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"] - - def test__given_custom_deduction_value__then_tax_reflects_value(self): - """Given: Custom standard deduction value - When: Calculating household impact - Then: Tax reflects the custom deduction - """ - # Given - household = HIGH_INCOME_SINGLE_FILER - - # Create policies with different deduction values - small_deduction_policy = create_standard_deduction_policy( - single_value=5000, joint_value=10000 + def test__doubled_standard_deduction__then_reduction_is_meaningful(self): + baseline = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER) + reformed = pe.us.calculate_household( + **HIGH_INCOME_SINGLE_FILER, + reform=_double_standard_deduction(2024), ) - large_deduction_policy = create_standard_deduction_policy( - single_value=50000, joint_value=100000 + reduction = baseline.tax_unit.income_tax - reformed.tax_unit.income_tax + assert reduction >= 1000, ( + f"Tax reduction ({reduction}) should be at least $1000" ) - # When - small_deduction_result = calculate_us_household_impact( - household, policy=small_deduction_policy + def test__married_couple_joint_deduction__then_tax_lower(self): + baseline = pe.us.calculate_household(**MARRIED_COUPLE_WITH_KIDS) + reformed = pe.us.calculate_household( + **MARRIED_COUPLE_WITH_KIDS, + reform=_double_standard_deduction(2024), ) - large_deduction_result = calculate_us_household_impact( - household, policy=large_deduction_policy + assert reformed.tax_unit.income_tax < baseline.tax_unit.income_tax + + def test__same_reform_twice__then_deterministic(self): + reform = _double_standard_deduction(2024) + first = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER, reform=reform) + second = pe.us.calculate_household(**HIGH_INCOME_SINGLE_FILER, reform=reform) + assert first.tax_unit.income_tax == second.tax_unit.income_tax + + def test__custom_deduction_values__then_tax_reflects_values(self): + small_reform = { + "gov.irs.deductions.standard.amount.SINGLE": {"2024-01-01": 5000}, + "gov.irs.deductions.standard.amount.JOINT": {"2024-01-01": 10000}, + } + large_reform = { + "gov.irs.deductions.standard.amount.SINGLE": {"2024-01-01": 50000}, + "gov.irs.deductions.standard.amount.JOINT": {"2024-01-01": 100000}, + } + small = pe.us.calculate_household( + **HIGH_INCOME_SINGLE_FILER, reform=small_reform ) - - # Then - small_tax = small_deduction_result.tax_unit[0]["income_tax"] - large_tax = large_deduction_result.tax_unit[0]["income_tax"] - - assert large_tax < small_tax, ( - f"Large deduction tax ({large_tax}) should be less than small deduction ({small_tax})" + large = pe.us.calculate_household( + **HIGH_INCOME_SINGLE_FILER, reform=large_reform ) + assert large.tax_unit.income_tax < small.tax_unit.income_tax