diff --git a/changelog.d/fix-invalidate-caches-perf.fixed.md b/changelog.d/fix-invalidate-caches-perf.fixed.md new file mode 100644 index 00000000..d1828b78 --- /dev/null +++ b/changelog.d/fix-invalidate-caches-perf.fixed.md @@ -0,0 +1 @@ +Iterate only existing holders in `_invalidate_all_caches`. The 3.24.4-era implementation walked every variable in the tax-benefit system and lazy-created a `Holder` for each one, inflating `apply_reform` from milliseconds to seconds in downstream packages with thousands of variables (policyengine-us YAML full-suite went from ~17 min to ~51 min per job and started timing out at the 1-hour GitHub Actions limit). Untouched variables have no holder and therefore nothing to wipe, so iterating `population._holders.values()` on each population recovers the original performance while keeping the set_input preservation behaviour introduced in #475. diff --git a/policyengine_core/simulations/simulation.py b/policyengine_core/simulations/simulation.py index a7916c29..6b5f1118 100644 --- a/policyengine_core/simulations/simulation.py +++ b/policyengine_core/simulations/simulation.py @@ -283,14 +283,17 @@ def _invalidate_all_caches(self) -> None: stored_value = holder._memory_storage._arrays.get(storage_key) if stored_value is not None: preserved.setdefault(variable_name, {})[storage_key] = stored_value - for variable in list(self.tax_benefit_system.variables): - holder = self.get_holder(variable) - # Wipe formula outputs and on-disk caches on both memory and - # disk storage. After the storage-delete bug fix (C2) that - # respects branch_name, so wipe both. - holder._memory_storage._arrays = {} - if holder._disk_storage is not None: - holder._disk_storage._files = {} + # Iterate only over holders that already exist on each population — + # lazy-creating a holder for every variable in the tax-benefit + # system (thousands in policyengine-us) inflated the cost of + # ``apply_reform`` from milliseconds to seconds and broke the + # YAML full-suite on downstream repos. Untouched variables have + # no holder and therefore nothing to wipe. + for population in self.populations.values(): + for holder in population._holders.values(): + holder._memory_storage._arrays = {} + if holder._disk_storage is not None: + holder._disk_storage._files = {} # Replay preserved user inputs so ``calculate`` still sees them. for variable_name, key_to_array in preserved.items(): holder = self.get_holder(variable_name) diff --git a/uv.lock b/uv.lock index 16ec94e9..b2d11c3e 100644 --- a/uv.lock +++ b/uv.lock @@ -2384,7 +2384,7 @@ wheels = [ [[package]] name = "policyengine-core" -version = "3.24.4" +version = "3.25.0" source = { editable = "." } dependencies = [ { name = "dpath" },