Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- bump: patch
changes:
fixed:
- US model always downlaods from HuggingFace.
- Subsampling improvements.
58 changes: 49 additions & 9 deletions policyengine_api/jobs/calculate_economy_simulation_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
from policyengine_api.endpoints.economy.compare import compare_economic_outputs
from policyengine_api.endpoints.economy.reform_impact import set_comment_on_job
from policyengine_api.constants import COUNTRY_PACKAGE_VERSIONS
from policyengine_api.country import COUNTRIES, create_policy_reform
from policyengine_api.country import (
COUNTRIES,
create_policy_reform,
PolicyEngineCountry,
)
from policyengine_api.utils.v2_v1_comparison import (
V2V1Comparison,
compute_difference,
Expand Down Expand Up @@ -371,15 +375,13 @@ def _compute_economy(
options.get("max_households", os.environ.get("MAX_HOUSEHOLDS"))
is not None
):
simulation.subsample(
int(
options.get(
"max_households",
os.environ.get("MAX_HOUSEHOLDS", 1_000_000),
)
),
seed=(region, time_period),
simulation = subsample(
options=options,
simulation=simulation,
region=region,
time_period=time_period,
reform=reform,
country=country,
)
simulation.default_calculation_period = time_period

Expand Down Expand Up @@ -575,6 +577,44 @@ def _compute_cliff_impacts(self, simulation: Microsimulation) -> Dict:
}


def subsample(
options: dict,
simulation: Microsimulation,
region: str,
time_period: str,
reform: dict,
country: PolicyEngineCountry,
) -> Microsimulation:
"""
Subsamples a microsimulation dataset and reinitializes the simulation with the subsampled data.
Args:
options (dict): A dictionary of options, which may include "max_households" to specify the maximum number of households to subsample.
simulation (Microsimulation): The original microsimulation object to be subsampled.
region (str): The region for which the simulation is being run.
time_period (str): The time period for which the simulation is being run.
reform (dict): A dictionary representing the policy reform to apply to the simulation.
country (PolicyEngineCountry): The country-specific policy engine object.
Returns:
Microsimulation: A new microsimulation object initialized with the subsampled data and the specified reform.
"""

simulation.subsample(
int(
options.get(
"max_households",
os.environ.get("MAX_HOUSEHOLDS", 1_000_000),
)
),
seed=(region, time_period),
time_period=time_period,
)
input_data = simulation.to_input_dataframe()
simulation = country.country_package.Microsimulation(
dataset=input_data,
reform=reform,
)
return simulation

class SimulationAPIv2:
project: str
location: str
Expand Down