From 02a9161be8d876800ee0fc6ba128c6da2d105745 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 17 Apr 2026 18:13:37 -0400 Subject: [PATCH 1/2] Preserve set_input values across apply_reform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The H3 fix (PR #463) added `_invalidate_all_caches` to wipe `holder._memory_storage._arrays` for every variable after `apply_reform`, so formula output caches couldn't survive a reform that invalidated them. But user-provided `set_input` values share the same storage, so they got wiped too. Country packages (e.g. `policyengine_uk.Simulation.__init__`) follow this pattern: 1. build populations, call `self.set_input(...)` from the dataset 2. apply a structural reform derived from parameters 3. calculate downstream variables Step 2's `apply_reform` triggered `_invalidate_all_caches`, silently discarding the dataset loaded in step 1. Surfaced in PolicyEngine/policyengine.py#1628 (UK household-impact tests returning 0 because age, employment_income, would_claim_* — every single input — were wiped before any calculation ran). Fix: track every `(variable, branch, period)` populated via `Simulation.set_input` in a new `_user_input_keys` set. On `_invalidate_all_caches`, snapshot those entries from storage, perform the wipe, then replay them back. Formula-output caches are still invalidated; user inputs survive. The attribute is lazy-initialised inside `set_input` so country- package subclasses that bypass `super().__init__` (the same pattern `_fast_cache` was guarded for in PR #474) automatically pick up the preservation without a downstream code change. Three new tests under `tests/core/test_apply_reform_preserves_user_inputs.py`: - a set_input value survives a no-op reform - multiple set_input values survive - the H3 fix (formula cache invalidation) still holds — a neutralize_variable reform still drops the cached formula output End-to-end: installing this branch into a policyengine.py checkout with `policyengine-uk==2.88.0` turns the failing `TestUKHouseholdImpact.test_single_adult_with_employment_income` (0.0 > 0) into a passing £7,486 income tax on a £50k salary, and `test_family_with_children` into a passing £2,328 child benefit. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...-invalidate-preserves-user-inputs.fixed.md | 1 + policyengine_core/simulations/simulation.py | 45 ++++++- ...test_apply_reform_preserves_user_inputs.py | 113 ++++++++++++++++++ uv.lock | 2 +- 4 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 changelog.d/fix-invalidate-preserves-user-inputs.fixed.md create mode 100644 tests/core/test_apply_reform_preserves_user_inputs.py diff --git a/changelog.d/fix-invalidate-preserves-user-inputs.fixed.md b/changelog.d/fix-invalidate-preserves-user-inputs.fixed.md new file mode 100644 index 00000000..1e2e8582 --- /dev/null +++ b/changelog.d/fix-invalidate-preserves-user-inputs.fixed.md @@ -0,0 +1 @@ +Preserve `set_input` values across `apply_reform`. The H3 cache invalidation wiped every variable's `_memory_storage._arrays`, which also wiped user-provided dataset inputs loaded via `set_input`. Country-package subclasses calling `set_input` during construction and then applying a structural reform (the `policyengine-uk` pattern) silently lost their datasets. Now tracks `set_input` provenance and replays those values after the invalidation wipe; formula-output caches are still invalidated as before. diff --git a/policyengine_core/simulations/simulation.py b/policyengine_core/simulations/simulation.py index 17f60273..3944c3d2 100644 --- a/policyengine_core/simulations/simulation.py +++ b/policyengine_core/simulations/simulation.py @@ -132,6 +132,12 @@ def __init__( self.invalidated_caches = set() self._fast_cache: dict = {} + # ``set_input`` records each (variable_name, branch_name, period) it + # populates so ``_invalidate_all_caches`` can tell user-provided + # source data apart from formula-computed caches. Without this the + # post-``apply_reform`` cache wipe would also wipe the dataset the + # simulation was loaded from. + self._user_input_keys: set[tuple[str, str, Period]] = set() self.debug: bool = False self.trace: bool = trace self.tracer: SimpleTracer = SimpleTracer() if not trace else FullTracer() @@ -251,23 +257,44 @@ def apply_reform(self, reform: Union[tuple, Reform]): self._invalidate_all_caches() def _invalidate_all_caches(self) -> None: - """Purge every cached calculation on this simulation. + """Purge cached formula output, preserving user-provided inputs. Called after ``apply_reform`` and any other operation that changes the tax-benefit system underneath an already-calculated simulation. - Also cascades into any branches created via ``get_branch`` so those - don't keep returning stale pre-reform values either. + + Every (variable, branch, period) that was populated via + ``set_input`` is preserved — those are source data, not stale + formula output — so a structural reform applied after dataset + load doesn't silently discard the dataset. Everything else + (formula outputs, cached short-path results, on-disk caches) is + wiped so the next ``calculate`` recomputes under the new + tax-benefit system. """ self._fast_cache = {} self.invalidated_caches = set() + # Snapshot user-provided inputs before wiping so they can be + # replayed into the fresh storage. Storage keys each entry as + # f"{branch_name}:{period}"; preserve exactly those keys. + preserved: dict[str, dict[str, object]] = {} + user_input_keys = getattr(self, "_user_input_keys", None) or set() + for variable_name, branch_name, period in user_input_keys: + holder = self.get_holder(variable_name) + storage_key = f"{branch_name}:{period}" + stored_value = holder._memory_storage._arrays.get(storage_key) + if stored_value is not None: + preserved.setdefault(variable_name, {})[storage_key] = stored_value for variable in list(self.tax_benefit_system.variables): holder = self.get_holder(variable) - # ``Holder.delete_arrays`` with ``period=None`` wipes every - # period on both memory and disk storage. After the storage-delete - # bug fix (C2) that now respects branch_name, so wipe both. + # Wipe formula outputs and on-disk caches on both memory and + # disk storage. After the storage-delete bug fix (C2) that + # respects branch_name, so wipe both. holder._memory_storage._arrays = {} if holder._disk_storage is not None: holder._disk_storage._files = {} + # Replay preserved user inputs so ``calculate`` still sees them. + for variable_name, key_to_array in preserved.items(): + holder = self.get_holder(variable_name) + holder._memory_storage._arrays.update(key_to_array) for branch in self.branches.values(): branch._invalidate_all_caches() @@ -1246,6 +1273,12 @@ def set_input(self, variable_name: str, period: Period, value: ArrayLike) -> Non if (variable.end is not None) and (period.start.date > variable.end): return self.get_holder(variable_name).set_input(period, value, self.branch_name) + # Lazy-init ``_user_input_keys`` so country-package subclasses that + # override ``__init__`` without calling ``super().__init__`` still + # benefit from the set-input preservation across ``apply_reform``. + if not hasattr(self, "_user_input_keys"): + self._user_input_keys = set() + self._user_input_keys.add((variable_name, self.branch_name, period)) _fast_cache = getattr(self, "_fast_cache", None) if _fast_cache is not None: _fast_cache.pop((variable_name, period), None) diff --git a/tests/core/test_apply_reform_preserves_user_inputs.py b/tests/core/test_apply_reform_preserves_user_inputs.py new file mode 100644 index 00000000..b9d27961 --- /dev/null +++ b/tests/core/test_apply_reform_preserves_user_inputs.py @@ -0,0 +1,113 @@ +"""Regression test: ``apply_reform`` must not wipe user-provided inputs. + +The cache-invalidation added to fix bug H3 cleared +``holder._memory_storage._arrays`` for every variable, which also wiped +values populated via ``set_input`` that pre-dated the reform. Those values +are source data, not stale formula output, and must survive +``apply_reform`` so that downstream country packages (e.g. policyengine-uk, +policyengine-us) can load a dataset, apply a structural reform, and then +calculate against the loaded data. + +See: +- https://github.com/PolicyEngine/policyengine.py/issues/1628 (symptom + surfaced here — UK household-impact tests returning 0 after reform apply) +- bug H3 in the existing ``test_apply_reform_invalidates_cache.py`` + (the cache invalidation that over-reached) +""" + +from __future__ import annotations + +import numpy as np + +from policyengine_core.model_api import Reform +from policyengine_core.country_template import situation_examples +from policyengine_core.simulations import SimulationBuilder + + +def test_apply_reform_preserves_set_input_values(tax_benefit_system): + """Values set via ``set_input`` before ``apply_reform`` must survive it. + + ``set_input`` is the data-load path: it populates the holder with source + values, not cached formula output. Wiping it across ``apply_reform`` + would mean every country-package dataset is silently discarded whenever + a structural reform is applied during initialisation. + """ + sim = SimulationBuilder().build_from_entities( + tax_benefit_system, situation_examples.single + ) + period = "2017-01" + expected_salary = np.array([5_000.0]) + + sim.set_input("salary", period, expected_salary) + + assert sim.get_holder("salary").get_known_periods(), ( + "precondition failure: set_input did not register the period" + ) + + class NoOpReform(Reform): + """Reform that touches nothing; should not invalidate inputs.""" + + def apply(self): + pass + + sim.apply_reform(NoOpReform) + + assert sim.get_holder("salary").get_known_periods(), ( + "apply_reform wiped salary holder — set_input values must be preserved" + ) + + result = sim.calculate("salary", period=period) + assert np.allclose(result, expected_salary), ( + f"apply_reform lost the user-provided salary input; got {result} " + f"instead of {expected_salary}." + ) + + +def test_apply_reform_preserves_inputs_across_multiple_variables(tax_benefit_system): + """Every variable set via ``set_input`` must survive, not just one.""" + sim = SimulationBuilder().build_from_entities( + tax_benefit_system, situation_examples.single + ) + period = "2017-01" + + sim.set_input("salary", period, np.array([1_234.0])) + sim.set_input("age", period, np.array([27])) + + class NoOpReform(Reform): + def apply(self): + pass + + sim.apply_reform(NoOpReform) + + assert np.allclose(sim.calculate("salary", period=period), [1_234.0]) + assert sim.calculate("age", period=period)[0] == 27 + + +def test_apply_reform_still_invalidates_formula_caches(tax_benefit_system): + """The H3 fix must still hold — formula output caches must be cleared. + + This is a belt-and-braces test: preserving set_input values is orthogonal + to invalidating formula outputs. A reform that neutralizes a variable + must still cause subsequent ``calculate`` calls to return the new value, + not the cached pre-reform output. + """ + sim = SimulationBuilder().build_from_entities( + tax_benefit_system, situation_examples.single + ) + period = "2017-01" + + # Compute once to populate the formula-output cache. + before_reform = sim.calculate("basic_income", period=period) + assert before_reform[0] > 0 + + class NeutraliseBasicIncome(Reform): + def apply(self): + self.neutralize_variable("basic_income") + + sim.apply_reform(NeutraliseBasicIncome) + + after_reform = sim.calculate("basic_income", period=period) + assert after_reform[0] == 0, ( + f"apply_reform did not invalidate formula cache for basic_income; " + f"got {after_reform[0]} instead of 0." + ) diff --git a/uv.lock b/uv.lock index 92032733..e27f414e 100644 --- a/uv.lock +++ b/uv.lock @@ -2384,7 +2384,7 @@ wheels = [ [[package]] name = "policyengine-core" -version = "3.24.1" +version = "3.24.2" source = { editable = "." } dependencies = [ { name = "dpath" }, From 745ffdf3a3e598d737814e3e6a40997ebeb7a753 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 17 Apr 2026 20:05:57 -0400 Subject: [PATCH 2/2] Restore nested-branch input inheritance and cover situation-dict set_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up fixes on top of the ``Simulation.set_input`` preservation in this PR, plus a latent bug that surfaced while wiring them up. Together they unblock ``policyengine-us`` bumping its core floor past 3.24.0 (``PolicyEngine/policyengine-us#8066``). ## 1. ``Holder.set_input`` now also records ``_user_input_keys`` The previous patch only recorded keys inside ``Simulation.set_input``. But ``SimulationBuilder.finalize_variables_init`` (the path taken when ``Simulation(situation=...)`` is called) routes inputs through ``holder.set_input`` directly, bypassing the simulation-level hook. So every situation-dict input got wiped by the post-``apply_reform`` cache invalidation in country-package subclasses that apply a structural reform during construction (the ``policyengine-us`` pattern). Recording the key inside ``Holder.set_input`` covers both paths: ``Simulation.set_input`` still adds its own entry (harmless duplicate in a set), and ``holder.set_input`` picks up the situation- dict and dataset loader paths. ## 2. ``Holder.get_array`` walks ``simulation.parent_branch`` The C1 fix (``fix-holder-get-array-branch-leak``) correctly stopped ``get_array`` from returning values stored under an arbitrary sibling branch — that had caused silent reform↔baseline cross-contamination. But it only fell back to the ``default`` branch, so a nested branch (``no_salt`` cloned from ``itemizing``) could no longer read inputs set on its parent. ``policyengine-us`` uses that two-level pattern: ``tax_liability_if_itemizing`` sets ``tax_unit_itemizes=True`` on an ``itemizing`` branch, and ``ctc_limiting_tax_liability`` forks a ``no_salt`` sub-branch from it. Without parent-branch fallback, ``tax_unit_itemizes`` re-runs its formula on ``no_salt``, which calls ``tax_liability_if_itemizing`` again, producing a ``CycleError`` → eventually surfaced as a recursion exception. The fix walks ``simulation.parent_branch`` up to the root and returns the first ancestor that has a value. Sibling branches (no parent relationship) still don't leak into each other — the C1 guarantee holds. ## 3. ``GroupPopulation.clone`` passes the cloned population to holders Latent bug that surfaced while fixing #2: ``GroupPopulation.clone`` was calling ``holder.clone(self)`` — passing the *source* population to each holder. ``Holder.clone`` then set ``new_dict["simulation"] = population.simulation``, pointing the cloned holder's ``.simulation`` reference back at the original sim rather than the clone. That meant a holder on the ``no_salt`` clone thought it belonged to the ``itemizing`` simulation, so the ``parent_branch`` walk started from the wrong simulation and missed the ancestor's inputs. Pass ``result`` (the cloned population) so the holder's ``.simulation`` points at the clone. ## Tests - ``test_apply_reform_preserves_situation_dict_inputs`` — covers the ``Simulation(situation=...)`` path that bypasses ``Simulation.set_input`` (fails without #1). - ``test_get_array_falls_back_through_parent_branch_chain`` — covers nested-branch parent inheritance (fails without #2). - ``test_group_population_clone_sets_holder_simulation_to_clone`` — pins the cloned holder's ``.simulation`` to the clone (fails without #3). All existing core tests still pass (514 pass, 1 pre-existing parameter security failure unrelated to these changes). The ``tax_unit_itemizes.yaml`` integration test (7/7) and the full ``gov/irs/income/taxable_income`` suite (253/253) in ``policyengine-us`` 1.647.0 pass under this branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...x-nested-branch-input-inheritance.fixed.md | 1 + policyengine_core/holders/holder.py | 38 +++++++++-- .../populations/group_population.py | 10 ++- ...test_apply_reform_preserves_user_inputs.py | 59 +++++++++++++++- tests/core/test_holder_branch_fallback.py | 67 +++++++++++++++++++ 5 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 changelog.d/fix-nested-branch-input-inheritance.fixed.md diff --git a/changelog.d/fix-nested-branch-input-inheritance.fixed.md b/changelog.d/fix-nested-branch-input-inheritance.fixed.md new file mode 100644 index 00000000..e598eb7f --- /dev/null +++ b/changelog.d/fix-nested-branch-input-inheritance.fixed.md @@ -0,0 +1 @@ +Restore nested-branch input inheritance and cover situation-dict `set_input`. Three follow-ups on top of the `Simulation.set_input` preservation: (1) `Holder.set_input` also records `_user_input_keys` so situation-dict inputs routed through `SimulationBuilder.finalize_variables_init` survive `apply_reform`, not only inputs set via `Simulation.set_input`; (2) `Holder.get_array` walks up `simulation.parent_branch` before falling back to `default`, so a sub-branch (e.g. `no_salt` cloned from `itemizing`) still sees inputs set on its ancestor — the C1 fallback-to-`default`-only broke the country-package nested-branch pattern; (3) `GroupPopulation.clone` now passes the cloned population (not the source) to `holder.clone`, so group-entity holders on a `get_branch` clone point at the cloned simulation and branch-aware lookups resolve correctly. Unblocks `PolicyEngine/policyengine-us#8066` (the `tax_unit_itemizes` integration test crashing with `TypeError: int() argument ... not 'NoneType'` under core 3.24.x because `state_fips` got wiped, plus a follow-up infinite recursion in `tax_liability_if_itemizing` once the state_fips wipe was resolved). diff --git a/policyengine_core/holders/holder.py b/policyengine_core/holders/holder.py index 669406b5..6ee42d1b 100644 --- a/policyengine_core/holders/holder.py +++ b/policyengine_core/holders/holder.py @@ -104,11 +104,27 @@ def get_array(self, period: Period, branch_name: str = "default") -> ArrayLike: return self.default_array() value = self._memory_storage.get(period, branch_name) if value is None and branch_name != "default": - # Fall back to the ``default`` branch only. Previously the fallback - # returned *any* branch that happened to have this period (the - # first one in dict-insertion order), which silently swapped - # values between unrelated branches (reform vs baseline) and - # produced wrong reform deltas. See holder.get_array bug C1. + # Walk up ``simulation.parent_branch`` so nested branches inherit + # values from their parent (e.g. a ``no_salt`` branch cloned + # from an ``itemizing`` branch still sees ``tax_unit_itemizes`` + # set on the ``itemizing`` branch). Fall back to ``default`` + # only if no ancestor branch has a value. Previously the + # fallback returned the first branch in dict-insertion order + # (bug C1) — silently swapping values between unrelated + # sibling branches (reform vs baseline) and producing wrong + # reform deltas. The post-C1 behavior only fell back to + # ``default``, which broke country-package nested-branch + # patterns that relied on the ancestor's input being visible. + parent = ( + getattr(self.simulation, "parent_branch", None) + if self.simulation + else None + ) + while parent is not None: + ancestor_value = self._memory_storage.get(period, parent.branch_name) + if ancestor_value is not None: + return ancestor_value + parent = getattr(parent, "parent_branch", None) default_value = self._memory_storage.get(period, "default") if default_value is not None: return default_value @@ -225,6 +241,18 @@ def set_input( return warnings.warn(warning_message, Warning) if self.variable.value_type in (float, int) and isinstance(array, str): array = tools.eval_expression(array) + # Track user-provided inputs on the simulation so + # ``Simulation._invalidate_all_caches`` can preserve them across + # ``apply_reform``. ``Simulation.set_input`` also records this, but + # ``SimulationBuilder.finalize_variables_init`` (the situation-dict + # path) and country-package dataset loaders call + # ``holder.set_input`` directly, bypassing the simulation-level hook. + # Recording here covers both paths. + simulation = getattr(self, "simulation", None) + if simulation is not None: + if not hasattr(simulation, "_user_input_keys"): + simulation._user_input_keys = set() + simulation._user_input_keys.add((self.variable.name, branch_name, period)) if self.variable.set_input and period.unit != self.variable.definition_period: return self.variable.set_input(self, period, array) return self._set(period, array, branch_name) diff --git a/policyengine_core/populations/group_population.py b/policyengine_core/populations/group_population.py index 12e10631..6cb40a6a 100644 --- a/policyengine_core/populations/group_population.py +++ b/policyengine_core/populations/group_population.py @@ -38,8 +38,16 @@ def __call__( def clone(self, simulation: "Simulation", members: Population) -> "GroupPopulation": result = GroupPopulation(self.entity, members) result.simulation = simulation + # Pass ``result`` (the cloned population) to ``holder.clone`` so the + # holder's ``.simulation`` reference points at the clone — not at + # the source. Previously this was ``holder.clone(self)``, which + # left every group-entity holder on a ``get_branch`` clone + # pointing back at its parent simulation; that broke ``branch_name`` + # and ``parent_branch`` lookups for group-entity variables + # (e.g. ``tax_unit_itemizes``) on nested branches. result._holders = { - variable: holder.clone(self) for (variable, holder) in self._holders.items() + variable: holder.clone(result) + for (variable, holder) in self._holders.items() } result.count = self.count result.ids = self.ids diff --git a/tests/core/test_apply_reform_preserves_user_inputs.py b/tests/core/test_apply_reform_preserves_user_inputs.py index b9d27961..6cea6016 100644 --- a/tests/core/test_apply_reform_preserves_user_inputs.py +++ b/tests/core/test_apply_reform_preserves_user_inputs.py @@ -11,6 +11,12 @@ See: - https://github.com/PolicyEngine/policyengine.py/issues/1628 (symptom surfaced here — UK household-impact tests returning 0 after reform apply) +- https://github.com/PolicyEngine/policyengine-us/issues/8058 (symptom + surfaced here — US ``tax_unit_itemizes`` integration test and many + others crashing with ``TypeError: int() argument must be a string, + a bytes-like object or a real number, not 'NoneType'`` because + ``state_fips`` got wiped and the downstream + ``state_name``/``state_code`` chain returned ``None``) - bug H3 in the existing ``test_apply_reform_invalidates_cache.py`` (the cache invalidation that over-reached) """ @@ -21,7 +27,7 @@ from policyengine_core.model_api import Reform from policyengine_core.country_template import situation_examples -from policyengine_core.simulations import SimulationBuilder +from policyengine_core.simulations import Simulation, SimulationBuilder def test_apply_reform_preserves_set_input_values(tax_benefit_system): @@ -83,6 +89,57 @@ def apply(self): assert sim.calculate("age", period=period)[0] == 27 +def test_apply_reform_preserves_situation_dict_inputs(tax_benefit_system): + """Situation-dict inputs must survive ``apply_reform`` too. + + ``Simulation(situation=...)`` routes inputs through + ``SimulationBuilder.finalize_variables_init``, which calls + ``holder.set_input`` directly — bypassing ``Simulation.set_input``. + The preservation tracking must cover that path too, otherwise + country-package subclasses that build from a situation dict and then + apply a structural reform during construction (the + ``policyengine-us`` pattern) silently lose every household input. + Surfaced in ``PolicyEngine/policyengine-us#8058``. + """ + situation = { + "persons": { + "Alicia": { + "salary": {"2017-01": 3_000.0}, + "age": {"2017-01": 42}, + } + }, + "households": { + "_": {"parents": ["Alicia"]}, + }, + } + sim = Simulation( + tax_benefit_system=tax_benefit_system, + situation=situation, + ) + + assert sim.get_holder("salary").get_known_periods(), ( + "precondition failure: situation dict did not register salary" + ) + assert sim.get_holder("age").get_known_periods(), ( + "precondition failure: situation dict did not register age" + ) + + class NoOpReform(Reform): + def apply(self): + pass + + sim.apply_reform(NoOpReform) + + # Both inputs were set through ``holder.set_input`` via the builder, + # not through ``Simulation.set_input``. They must still survive. + assert np.allclose(sim.calculate("salary", period="2017-01"), [3_000.0]), ( + "apply_reform wiped the situation-dict salary input" + ) + assert sim.calculate("age", period="2017-01")[0] == 42, ( + "apply_reform wiped the situation-dict age input" + ) + + def test_apply_reform_still_invalidates_formula_caches(tax_benefit_system): """The H3 fix must still hold — formula output caches must be cleared. diff --git a/tests/core/test_holder_branch_fallback.py b/tests/core/test_holder_branch_fallback.py index c9b06def..df674bbe 100644 --- a/tests/core/test_holder_branch_fallback.py +++ b/tests/core/test_holder_branch_fallback.py @@ -60,3 +60,70 @@ def test_get_array_falls_back_to_default_branch(tax_benefit_system): result = holder.get_array(period, "baseline") assert result is not None assert result[0] == 42.0 + + +def test_get_array_falls_back_through_parent_branch_chain(tax_benefit_system): + """Nested branches must inherit values from their parent branch. + + ``policyengine-us`` uses a two-level branch pattern: + + 1. ``tax_liability_if_itemizing`` creates an ``itemizing`` branch from + the default simulation and calls ``branch.set_input("tax_unit_itemizes", True)``. + 2. Calculating ``income_tax`` on that branch reaches + ``ctc_limiting_tax_liability``, which creates a ``no_salt`` sub-branch + from the ``itemizing`` branch and calls + ``no_salt.calculate("income_tax_before_credits")``. + + The ``no_salt`` branch must see ``tax_unit_itemizes=True`` inherited + from its parent ``itemizing`` branch — otherwise ``tax_unit_itemizes`` + re-runs its formula on ``no_salt``, which calls + ``tax_liability_if_itemizing`` again, creating a circular definition + / infinite recursion. Surfaced in ``PolicyEngine/policyengine-us#8058``. + """ + sim = _build_single(tax_benefit_system) + itemizing_branch = sim.get_branch("itemizing") + no_salt_branch = itemizing_branch.get_branch("no_salt") + + holder = no_salt_branch.person.get_holder("salary") + period = periods.period("2017-01") + + # Simulate ``itemizing_branch.set_input("salary", ...)``: the storage + # key lives under the ``itemizing`` branch name. The cloned ``no_salt`` + # holder starts with the same storage dict because ``Population.clone`` + # deep-copies ``_arrays`` from the source. + holder._memory_storage.put(np.asarray([7_777.0]), period, "itemizing") + + # Asking the ``no_salt`` branch for this value must walk up the + # ``parent_branch`` chain and return the itemizing branch's value. + result = holder.get_array(period, "no_salt") + assert result is not None, ( + "get_array on a nested branch must fall back through parent_branch " + "to the ancestor that actually has the value" + ) + assert result[0] == 7_777.0 + + +def test_group_population_clone_sets_holder_simulation_to_clone(tax_benefit_system): + """``GroupPopulation.clone`` must point holders at the cloned simulation. + + Previously ``GroupPopulation.clone`` called ``holder.clone(self)`` + (the *source* population), so every cloned holder's + ``.simulation`` reference pointed back at the source simulation. That + broke branch-aware lookups: the holder thought it belonged to the + parent branch even when the clone was a nested branch, so + ``parent_branch`` walks started from the wrong simulation and missed + the ancestor's inputs. + """ + sim = _build_single(tax_benefit_system) + branch = sim.get_branch("nested") + + # Find a group-entity variable (household-level). + household = branch.household + holder = household.get_holder("housing_tax") + + assert holder.simulation is branch, ( + "GroupPopulation.clone must pass the CLONED population to " + "holder.clone so holder.simulation points at the new branch, " + "not the source simulation" + ) + assert holder.simulation.branch_name == "nested"