Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/fix-extra-variables-silently-ignored.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed `Simulation.extra_variables` being silently ignored on the US and UK microsim paths. The field is declared on the base `Simulation` and honored by the household calculator, but the country `run()` methods previously only iterated `self.entity_variables` — extras passed via `Simulation(extra_variables={...})` never reached the output dataset. Both country paths now route through a shared `MicrosimulationModelVersion.resolve_entity_variables` helper that merges defaults with extras, dedupes overlapping entries, and validates entity keys + variable names with close-match suggestions. Closes #303.
63 changes: 63 additions & 0 deletions src/policyengine/tax_benefit_models/common/model_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import datetime
import os
import warnings
from difflib import get_close_matches
from importlib import metadata
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Optional
Expand Down Expand Up @@ -225,6 +226,68 @@ def _build_entity_relationships(self, dataset) -> pd.DataFrame:
person_data = pd.DataFrame(dataset.data.person)
return build_entity_relationships(person_data, self.group_entities)

def resolve_entity_variables(
self,
simulation: Simulation,
) -> dict[str, list[str]]:
"""Merge :attr:`entity_variables` with ``simulation.extra_variables``.

Returned dict has one key per known entity, value being the
union of the bundled defaults and the caller's extras, with
duplicates removed and original order preserved (defaults
first, extras appended).

Raises ``ValueError`` with close-match suggestions if the
caller passes an unknown entity key or a variable name that
does not resolve on the tax-benefit system's variable
registry.
"""
extras = dict(simulation.extra_variables or {})
known_entities = set(self.entity_variables)
unknown_entities = [e for e in extras if e not in known_entities]
if unknown_entities:
lines = [
f"Simulation.extra_variables contains entity keys not "
f"defined on {self.model.id} {self.version}:"
]
for entity in unknown_entities:
suggestions = get_close_matches(
entity, sorted(known_entities), n=1, cutoff=0.7
)
hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
lines.append(f" - '{entity}'{hint}")
raise ValueError("\n".join(lines))

known_variables = set(self.variables_by_name)
unknown_variables: list[tuple[str, str]] = []
for entity, names in extras.items():
for name in names:
if name not in known_variables:
unknown_variables.append((entity, name))
if unknown_variables:
lines = [
f"Simulation.extra_variables contains variable names not "
f"defined on {self.model.id} {self.version}:"
]
for entity, name in unknown_variables:
suggestions = get_close_matches(
name, sorted(known_variables), n=1, cutoff=0.7
)
hint = f" (did you mean '{suggestions[0]}'?)" if suggestions else ""
lines.append(f" - '{name}' (on '{entity}'){hint}")
raise ValueError("\n".join(lines))

resolved: dict[str, list[str]] = {}
for entity, defaults in self.entity_variables.items():
seen: set[str] = set()
merged: list[str] = []
for var in list(defaults) + list(extras.get(entity, [])):
if var not in seen:
seen.add(var)
merged.append(var)
resolved[entity] = merged
return resolved

def save(self, simulation: Simulation) -> None:
"""Persist the simulation's output dataset to its bundled filepath."""
simulation.output_dataset.save()
Expand Down
5 changes: 4 additions & 1 deletion src/policyengine/tax_benefit_models/uk/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,10 @@ def run(self, simulation: "Simulation") -> "Simulation":
"household": pd.DataFrame(),
}

for entity, variables in self.entity_variables.items():
# ``resolve_entity_variables`` merges the bundled defaults
# with caller-supplied ``simulation.extra_variables``; unknown
# entity keys or variable names raise with close-match hints.
for entity, variables in self.resolve_entity_variables(simulation).items():
for var in variables:
data[entity][var] = microsim.calculate(
var, period=simulation.dataset.year, map_to=entity
Expand Down
7 changes: 5 additions & 2 deletions src/policyengine/tax_benefit_models/us/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,11 @@ def run(self, simulation: "Simulation") -> "Simulation":
if target_col in id_columns:
data["person"][target_col] = person_input_df[col].values

# Calculate non-ID, non-weight variables from simulation
for entity, variables in self.entity_variables.items():
# Calculate non-ID, non-weight variables from simulation.
# ``resolve_entity_variables`` merges bundled defaults with
# caller-supplied ``simulation.extra_variables``; unknown
# entity keys or variable names raise with close-match hints.
for entity, variables in self.resolve_entity_variables(simulation).items():
for var in variables:
if var not in id_columns and var not in weight_columns:
data[entity][var] = microsim.calculate(
Expand Down
Loading
Loading