diff --git a/docs/api/changelog.rst b/docs/api/changelog.rst index a1bfc7b5e..3778a8ff7 100644 --- a/docs/api/changelog.rst +++ b/docs/api/changelog.rst @@ -102,6 +102,9 @@ Added - Functionality to dump and load MODFLOW 6 simulations to/from zarr and zipstore formats. See :meth:`imod.mf6.Modflow6Simulation.dump` and :meth:`imod.mf6.Modflow6Simulation.from_file` for more information. +- Functionality to dump and load MetaSwap models to/from netcdf + format. See :meth:`imod.msw.MetaSwapModel.dump` and + :meth:`imod.msw.MetaSwapModel.from_file` for more information. Changed ~~~~~~~ diff --git a/docs/api/msw.rst b/docs/api/msw.rst index c42fe5ab9..ca1a857a2 100644 --- a/docs/api/msw.rst +++ b/docs/api/msw.rst @@ -11,6 +11,7 @@ Model objects & methods MetaSwapModel MetaSwapModel.write + MetaSwapModel.dump MetaSwapModel.from_imod5_data MetaSwapModel.regrid_like MetaSwapModel.clip_box @@ -172,4 +173,4 @@ Mappings CouplerMapping.clip_box CouplerMapping.from_imod5_data CouplerMapping.get_regrid_methods - CouplerMapping.write \ No newline at end of file + CouplerMapping.write diff --git a/imod/common/utilities/dump_model.py b/imod/common/utilities/dump_model.py new file mode 100644 index 000000000..2c4481b08 --- /dev/null +++ b/imod/common/utilities/dump_model.py @@ -0,0 +1,86 @@ +import collections +from pathlib import Path +from typing import Any, Optional + +import tomli_w + +from imod.common.interfaces.imodel import IModel +from imod.common.serializer import EngineType +from imod.logging.logging_decorators import standard_log_decorator +from imod.mf6.validation_settings import ValidationSettings +from imod.schemata import ValidationError + + +@standard_log_decorator() +def dump_model( + model: IModel, + directory, + modelname, + validate: bool = True, + mdal_compliant: bool = False, + crs: Optional[Any] = None, + engine: EngineType = "netcdf4", +) -> Path: + """ + Dump model to files. Writes a model definition as .TOML file, which + points to data for each package. Each package is stored as a separate + NetCDF. Structured grids are saved as regular NetCDFs, unstructured + grids are saved as UGRID NetCDF. Structured grids are always made GDAL + compliant, unstructured grids can be made MDAL compliant optionally. + + Parameters + ---------- + directory: str or Path + directory to dump simulation into. + modelname: str + modelname, will be used to create a subdirectory. + validate: bool, optional + Whether to validate simulation data. Defaults to True. + mdal_compliant: bool, optional + Convert data with + :func:`imod.prepare.spatial.mdal_compliant_ugrid2d` to MDAL + compliant unstructured grids. Defaults to False. + crs: Any, optional + Anything accepted by rasterio.crs.CRS.from_user_input + Requires ``rioxarray`` installed. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + + """ + modeldirectory = Path(directory) / modelname + modeldirectory.mkdir(exist_ok=True, parents=True) + + # validation currently only supports MF6, but we want to keep the option to turn it on for other + validation_context = ValidationSettings(validate=validate) + if validation_context.validate: + statusinfo = model.validate(modelname, validation_context) + if statusinfo.has_errors(): + raise ValidationError(statusinfo.to_string()) + + toml_content: dict = collections.defaultdict(dict) + + for pkgname, pkg in model.items(): + pkg_path = pkg.to_file( + modeldirectory, + pkgname, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, + ) + toml_content[type(pkg).__name__][pkgname] = pkg_path.name + + # simulation settings are only relevant/present for MetaSwap models (msw) + if hasattr(model, "simulation_settings"): + toml_content["simulation_settings"] = model.simulation_settings + + toml_path = modeldirectory / f"{modelname}.toml" + with open(toml_path, "wb") as f: + tomli_w.dump(toml_content, f) + + return toml_path diff --git a/imod/common/utilities/value_filters.py b/imod/common/utilities/value_filters.py index 6f94cacfb..2c8285920 100644 --- a/imod/common/utilities/value_filters.py +++ b/imod/common/utilities/value_filters.py @@ -1,10 +1,22 @@ +import numbers from typing import Any import numpy as np import xarray as xr from xarray.core.utils import is_scalar -from imod.typing import GridDataset +from imod.typing import GridDataArray, GridDataset + + +def is_scalar_nan(da: GridDataArray): + """ + Test if is_scalar_nan, carefully avoid loading grids in memory + """ + scalar_data: bool = is_scalar(da) + if scalar_data: + stripped_value = da.to_numpy()[()] + return isinstance(stripped_value, numbers.Real) and np.isnan(stripped_value) # type: ignore[call-overload] + return False def is_valid(value: Any) -> bool: diff --git a/imod/mf6/model.py b/imod/mf6/model.py index e895a29f1..ee6a5ad9b 100644 --- a/imod/mf6/model.py +++ b/imod/mf6/model.py @@ -12,7 +12,6 @@ import jinja2 import numpy as np import tomli -import tomli_w import xarray as xr import xugrid as xu from jinja2 import Template @@ -22,6 +21,7 @@ from imod.common.serializer import EngineType from imod.common.statusinfo import NestedStatusInfo, StatusInfo, StatusInfoBase from imod.common.utilities.clip import clip_box_dataset +from imod.common.utilities.dump_model import dump_model from imod.common.utilities.mask import mask_all_packages from imod.common.utilities.regrid import _regrid_like from imod.common.utilities.schemata import ( @@ -617,60 +617,57 @@ def dump( engine: EngineType = "netcdf4", ) -> Path: """ - Dump simulation to files. Writes a model definition as .TOML file, which - points to data for each package. Each package is stored as a separate - NetCDF. Structured grids are saved as regular NetCDFs, unstructured - grids are saved as UGRID NetCDF. Structured grids are always made GDAL - compliant, unstructured grids can be made MDAL compliant optionally. - - Parameters - ---------- - directory: str or Path - directory to dump simulation into. - modelname: str - modelname, will be used to create a subdirectory. - validate: bool, optional - Whether to validate simulation data. Defaults to True. - mdal_compliant: bool, optional - Convert data with - :func:`imod.prepare.spatial.mdal_compliant_ugrid2d` to MDAL - compliant unstructured grids. Defaults to False. - crs: Any, optional - Anything accepted by rasterio.crs.CRS.from_user_input - Requires ``rioxarray`` installed. - engine : str, optional - File engine used to write packages. Options are ``'netcdf4'``, - ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other - softwares, for example QGIS. Zarr is optimized for big data, cloud - storage and parallel access. The ``'zarr.zip'`` option is an - experimental option which creates a zipped zarr store in a single - file, which is easier to copy and automatically compresses data as - well. Default is ``'netcdf4'``. + Dump simulation to files. Writes a model definition as .TOML file, which + points to data for each package. Each package is stored as a separate + NetCDF. Structured grids are saved as regular NetCDFs, unstructured + grids are saved as UGRID NetCDF. Structured grids are always made GDAL + compliant, unstructured grids can be made MDAL compliant optionally. + + Parameters + ---------- + directory: str or Path + directory to dump simulation into. + modelname: str + modelname, will be used to create a subdirectory. + validate: bool, optional + Whether to validate simulation data. Defaults to True. + mdal_compliant: bool, optional + Convert data with + :func:`imod.prepare.spatial.mdal_compliant_ugrid2d` to MDAL + compliant unstructured grids. Defaults to False. + crs: Any, optional + Anything accepted by rasterio.crs.CRS.from_user_input + Requires ``rioxarray`` installed. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + Returns + ------- + Path + Path to the created toml file which contains the paths to the dumped package files. The package files are dumped in the same directory as the toml file. + + Example + ------- + >>> tmp_path = tmpdir_factory.mktemp(name) + >>> toml_path = mf6_model.dump(tmp_path, name, engine=engine, validate=False) + >>> back = ModflowModel.from_file(tmp_path, name) """ - modeldirectory = pathlib.Path(directory) / modelname - modeldirectory.mkdir(exist_ok=True, parents=True) - validation_context = ValidationSettings(validate=validate) - if validation_context.validate: - statusinfo = self.validate(modelname, validation_context) - if statusinfo.has_errors(): - raise ValidationError(statusinfo.to_string()) - toml_content: dict[str, Any] = collections.defaultdict(dict) - - for pkgname, pkg in self.items(): - pkg_path = pkg.to_file( - modeldirectory, - pkgname, - mdal_compliant=mdal_compliant, - crs=crs, - engine=engine, - ) - toml_content[type(pkg).__name__][pkgname] = pkg_path.name - - toml_path = modeldirectory / f"{modelname}.toml" - with open(toml_path, "wb") as f: - tomli_w.dump(toml_content, f) + toml_path = dump_model( + self, + directory, + modelname, + validate=validate, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, + ) return toml_path diff --git a/imod/mf6/pkgbase.py b/imod/mf6/pkgbase.py index 6a5f212ca..cef68dc6e 100644 --- a/imod/mf6/pkgbase.py +++ b/imod/mf6/pkgbase.py @@ -1,16 +1,14 @@ import abc -import numbers from pathlib import Path from typing import TYPE_CHECKING, Any, Mapping, Optional, Self, final -import numpy as np import xarray as xr import xugrid as xu -from xarray.core.utils import is_scalar import imod from imod.common.interfaces.ipackagebase import IPackageBase from imod.common.serializer import EngineType, create_package_serializer +from imod.common.utilities.value_filters import is_scalar_nan from imod.typing.grid import ( GridDataArray, GridDataset, @@ -32,17 +30,6 @@ UTIL_PACKAGES = ("ats", "hpc") -def _is_scalar_nan(da: GridDataArray): - """ - Test if is_scalar_nan, carefully avoid loading grids in memory - """ - scalar_data: bool = is_scalar(da) - if scalar_data: - stripped_value = da.to_numpy()[()] - return isinstance(stripped_value, numbers.Real) and np.isnan(stripped_value) # type: ignore[call-overload] - return False - - class PackageBase(IPackageBase, abc.ABC): """ This class is used for storing a collection of Xarray DataArrays or UgridDataArrays @@ -148,7 +135,7 @@ def from_file(cls, path: str | Path, **kwargs) -> Self: # Replace NaNs by None for key, value in dataset.items(): - if _is_scalar_nan(value): + if is_scalar_nan(value): dataset[key] = None # to_netcdf converts strings into NetCDF "variable‑length UTF‑8 strings" diff --git a/imod/msw/model.py b/imod/msw/model.py index 1c604c352..bba41533c 100644 --- a/imod/msw/model.py +++ b/imod/msw/model.py @@ -1,17 +1,23 @@ import collections +import inspect import warnings from copy import copy, deepcopy from datetime import datetime from pathlib import Path -from typing import Any, Optional, Union, cast +from typing import Any, Optional, Tuple, Union, cast import cftime import jinja2 import numpy as np +import tomli import xarray as xr +import imod.msw from imod.common.constants import MaskValues +from imod.common.interfaces.imodel import IModel +from imod.common.serializer import EngineType from imod.common.utilities.clip import clip_by_grid +from imod.common.utilities.dump_model import dump_model from imod.common.utilities.partitioninfo import create_partition_info from imod.common.utilities.regrid import regrid_imod5_cap_data from imod.common.utilities.value_filters import enforce_scalar @@ -109,7 +115,7 @@ def update(self, *args, **kwargs): self[k] = v -class MetaSwapModel(Model): +class MetaSwapModel(Model, IModel): """ Contains data and writes consistent model input files @@ -370,6 +376,90 @@ def write( for pkgname in self: self[pkgname].write(directory, index, svat, mf6_dis, mf6_wel) + @classmethod + def from_file(cls, directory, modelname): + pkg_classes = { + name: pkg_cls + for name, pkg_cls in inspect.getmembers(imod.msw, inspect.isclass) + if issubclass(pkg_cls, MetaSwapPackage) + } + modeldirectory = Path(directory) / modelname + toml_path = modeldirectory / f"{modelname}.toml" + with open(toml_path, "rb") as f: + toml_content = tomli.load(f) + + parentdir = toml_path.parent + simulation_settings = toml_content.get("simulation_settings", {}) + unsa_svat_path = simulation_settings.get("unsa_svat_path", "") + instance = cls(unsa_svat_path, simulation_settings) + + for key, entry in toml_content.items(): + if key != "simulation_settings": + for pkgname, path in entry.items(): + pkg_cls = pkg_classes[key] + instance[pkgname] = pkg_cls.from_file(parentdir / path) + + return instance + + def dump( + self, + directory: Union[str, Path], + modelname: Optional[str] = None, + validate: Optional[bool] = True, + mdal_compliant: bool = False, + crs: Optional[str] = None, + engine: EngineType = "netcdf4", + ): + """ + Dump model packages to netCDF files and create a toml file with paths to these files. + The MetaSWAP model can be reloaded from the dumped files using the :func:`from_file` method. + + Parameters + ---------- + directory: Path or str + Directory to dump model in. A subdirectory with the name of the model will be created in this directory, and the files will be dumped there. + modelname: str, optional + Name of the model. This will be used as the name of the subdirectory where the files are dumped, and in the name of the toml file. If not provided, it defaults to the value of ``self._model_name``. + validate: bool, optional + Whether to perform validation before dumping the model. If True, the model will be validated using the validation functionality in iMOD. If validation errors are found, a ValidationError is raised and the model is not dumped. Default is True. + mdal_compliant: bool, optional + Whether to write the files in a format compliant with the MDAL specification. This can be used to make the files compatible with software that supports MDAL, such as QGIS. Default is False. + crs: str, optional + Coordinate reference system to use in the dumped files. This should be a string in a format recognized by the pyproj library, for example "EPSG:28992". If not provided, no CRS information is included in the files. + engine: EngineType, optional + File engine used to write packages. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + + Returns + ------- + Path + Path to the created toml file which contains the paths to the dumped package files. The package files are dumped in the same directory as the toml file. + + Example + ------- + >>> tmp_path = tmpdir_factory.mktemp(name) + >>> toml_path = msw_model.dump(tmp_path, name, engine=engine, validate=False) + >>> back = MetaSwapModel.from_file(tmp_path, name) + """ + + toml_path = dump_model( + self, + directory=directory, + modelname=modelname, + validate=validate, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, + ) + return toml_path + def regrid_like( self, mf6_regridded_dis: StructuredDiscretization, @@ -680,3 +770,47 @@ def from_imod5_data( model["time_oc"] = TimeOutputControl(times_da) return model + + def _is_splitting_supported(self) -> Tuple[bool, str]: + raise NotImplementedError + + def _is_regridding_supported(self) -> Tuple[bool, str]: + raise NotImplementedError + + def _is_clipping_supported(self) -> Tuple[bool, str]: + raise NotImplementedError + + def purge_empty_packages( + self, model_name: Optional[str] = "", ignore_time: bool = False + ) -> None: + raise NotImplementedError + + def mask_all_packages( + self, + mask: GridDataArray, + ignore_time_purge_empty: bool = False, + ): + raise NotImplementedError + + @property + def model_id(self) -> str: + raise NotImplementedError + + @property + def options(self) -> dict: + raise NotImplementedError + + @property + def domain(self) -> GridDataArray: + raise NotImplementedError + + def validate( + self, + model_name: str = "", + validation_context: Optional[Any] = None, + **kwargs, + ) -> None: + return + + +# make a read function for packages from netcdf diff --git a/imod/msw/pkgbase.py b/imod/msw/pkgbase.py index a379910af..73686b7b1 100644 --- a/imod/msw/pkgbase.py +++ b/imod/msw/pkgbase.py @@ -1,18 +1,21 @@ import abc from copy import deepcopy from pathlib import Path -from typing import Any, Optional, TextIO, TypeAlias, Union +from typing import Any, Optional, Self, TextIO, TypeAlias, Union import cftime import numpy as np import pandas as pd import xarray as xr +import zarr # nb: in modflow6, this is a try...except +from imod.common.serializer import EngineType, create_package_serializer from imod.common.utilities.clip import clip_spatial_box, clip_time_slice from imod.common.utilities.dataclass_type import DataclassType, EmptyRegridMethod from imod.common.utilities.regrid import ( _regrid_like, ) +from imod.common.utilities.value_filters import is_scalar_nan from imod.mf6.dis import StructuredDiscretization from imod.mf6.mf6_wel_adapter import Mf6Wel from imod.msw.fixed_format import format_fixed_width @@ -385,3 +388,105 @@ def from_imod5_data(self, *args, **kwargs): def _is_clipping_supported(self) -> bool: return True + + @classmethod + def from_file(cls, path: str | Path, **kwargs) -> Self: + """ + Loads an imod msw package from a file (netcdf or zarr). + Note that the checks upon package initialization are not done again! + + Parameters + ---------- + path : str, pathlib.Path + Path to the file. + **kwargs : keyword arguments + Arbitrary keyword arguments forwarded to ``xarray.open_dataset()``, or + ``xarray.open_zarr()``. + Refer to the examples. + + Returns + ------- + package : imod.msw.MetaSwapPackage + Returns a package with data loaded from file. + + Examples + -------- + + To load a package from a file, e.g. a River package: + + >>> river = imod.mf6.River.from_file("river.nc") + + For large datasets, you likely want to process it in chunks. You can + forward keyword arguments to ``xarray.open_dataset()`` or + ``xarray.open_zarr()``: + + >>> river = imod.mf6.River.from_file("river.nc", chunks={"time": 1}) + + Refer to the xarray documentation for the possible keyword arguments. + """ + path = Path(path) + if path.suffix in (".zip", ".zarr"): + if path.suffix == ".zip": + with zarr.storage.ZipStore(path, mode="r") as store: + dataset = xr.open_zarr(store, **kwargs) + else: + dataset = xr.open_zarr(str(path), **kwargs) + else: + dataset = xr.open_dataset(path, chunks="auto", **kwargs) + + # Replace NaNs by None + for key, value in dataset.items(): + if is_scalar_nan(value): + dataset[key] = None + + return cls._from_dataset(dataset) + + def to_file( + self, + modeldirectory: Path, + pkgname: str, + mdal_compliant: bool = False, + crs: Optional[Any] = None, + engine: EngineType = "netcdf4", + **kwargs, + ) -> Path: + """ + Write package to file. + + Parameters + ---------- + modeldirectory : pathlib.Path + Directory where to write the package file. + pkgname : str + Name of the package, used to create the file name. + mdal_compliant : bool, optional + Whether to write the package in MDAL-compliant format. Only used if + ``engine="netcdf4"``. Default is False. + crs : optional + Coordinate reference system to use when writing the package. Only + used if ``engine="netcdf4"`` Default is None. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + **kwargs : keyword arguments + Additional keyword arguments forwarded to the package serializer's + `to_file` method. + + Returns + ------- + pathlib.Path + Path to the written package file. + """ + + # All serialization logic is in the package serializers do not override + # this method (hence final decorator). + filewriter = create_package_serializer( + engine, mdal_compliant=mdal_compliant, crs=crs + ) + path = filewriter.to_file(self, modeldirectory, pkgname, **kwargs) + return path diff --git a/imod/tests/test_msw/test_model.py b/imod/tests/test_msw/test_model.py index aeaaf887c..771fed5ba 100644 --- a/imod/tests/test_msw/test_model.py +++ b/imod/tests/test_msw/test_model.py @@ -11,7 +11,7 @@ from imod.msw.copy_files import FileCopier from imod.msw.meteo_grid import MeteoGridCopy from imod.msw.meteo_mapping import MeteoMapping -from imod.msw.model import DEFAULT_SETTINGS +from imod.msw.model import DEFAULT_SETTINGS, MetaSwapModel from imod.msw.utilities.parse import read_para_sim from imod.typing import GridDataArray, Imod5DataDict from imod.typing.grid import zeros_like @@ -19,6 +19,34 @@ from imod.util.spatial import empty_2d +def roundtrip(msw_model, tmpdir_factory, name, engine): + # TODO: look at the values? + tmp_path = tmpdir_factory.mktemp(name) + msw_model.dump(tmp_path, name, engine=engine, validate=False) + back = MetaSwapModel.from_file(tmp_path, name) + assert isinstance(back, MetaSwapModel) + pkgkeycheck = set(msw_model.keys()) == set(back.keys()) + assert pkgkeycheck + pkgcheck = all( + msw_model[pkgname].dataset.equals(back[pkgname].dataset) + for pkgname in msw_model.keys() + ) + assert pkgcheck + assert msw_model.simulation_settings == back.simulation_settings + + +def test_msw_pkgdump_nc(msw_model, tmpdir_factory): + roundtrip(msw_model, tmpdir_factory, name="testmodel", engine="netcdf4") + + +def test_msw_pkgdump_zarr(msw_model, tmpdir_factory): + roundtrip(msw_model, tmpdir_factory, name="testmodel", engine="zarr") + + +def test_msw_pkgdump_zarrzip(msw_model, tmpdir_factory): + roundtrip(msw_model, tmpdir_factory, name="testmodel", engine="zarr.zip") + + def test_msw_model_write(msw_model, coupled_mf6_model, coupled_mf6wel, tmp_path): mf6_dis = coupled_mf6_model["GWF_1"]["dis"]