From 8ff6ffabff5165308aa04ea113eb113eb377f27e Mon Sep 17 00:00:00 2001 From: Nathan Zimmerman Date: Thu, 1 May 2025 11:23:56 -0500 Subject: [PATCH 1/2] Add flexible deserialization for datetime fill values --- src/zarr/core/metadata/v2.py | 8 +++++ tests/test_metadata/test_v2.py | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index d19193963f..94f9e337d7 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -327,7 +327,15 @@ def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any: """ if fill_value is None or dtype.hasobject: + # Pass through None or if dtype is object pass + elif dtype.kind in "M": + # Check for both string "NaT" and the int64 representation of NaT + if fill_value == "NaT" or fill_value == np.iinfo(np.int64).min: + fill_value = dtype.type("NaT") + else: + fill_value = np.array(fill_value, dtype=dtype)[()] + # Fall through for non-NaT datetime/timedelta values (handled below) elif dtype.fields is not None: # the dtype is structured (has multiple fields), so the fill_value might be a # compound value (e.g., a tuple or dict) that needs field-wise processing. diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 08b9cb2507..da11204728 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -277,6 +277,69 @@ async def test_getitem_consolidated(self, v2_consolidated_metadata): assert air.metadata.shape == (730,) +@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"]) +def test_parse_v2_fill_value_nat_integer(dtype_str: str) -> None: + """Verify parsing V2 metadata where NaT is stored as its int64 representation.""" + nat_int_repr = np.iinfo(np.int64).min # -9223372036854775808 + dtype = np.dtype(dtype_str) + metadata_dict = { + "zarr_format": 2, + "shape": (10,), + "chunks": (5,), + "dtype": dtype.str, + "compressor": None, + "filters": None, + "fill_value": nat_int_repr, + "order": "C", + } + meta = ArrayV2Metadata.from_dict(metadata_dict) + assert np.isnat(meta.fill_value) + assert meta.fill_value.dtype.kind == dtype.kind + + +@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"]) +def test_parse_v2_fill_value_nat_string(dtype_str: str) -> None: + """Verify parsing V2 metadata where NaT is stored as the string 'NaT'.""" + dtype = np.dtype(dtype_str) + metadata_dict = { + "zarr_format": 2, + "shape": (10,), + "chunks": (5,), + "dtype": dtype.str, + "compressor": None, + "filters": None, + "fill_value": "NaT", + "order": "C", + } + meta = ArrayV2Metadata.from_dict(metadata_dict) + assert np.isnat(meta.fill_value) + assert meta.fill_value.dtype.kind == dtype.kind + + +@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"]) +def test_parse_v2_fill_value_non_nat(dtype_str: str) -> None: + """Verify parsing V2 metadata with a non-NaT datetime/timedelta fill value.""" + dtype = np.dtype(dtype_str) + # Use a valid integer representation for the dtype + # Note: zarr v2 serializes non-NaT datetimes/timedeltas as integers + fill_value_int = 1234567890 if dtype.kind == "M" else 12345 + expected_value = np.array(fill_value_int, dtype=dtype)[()] + + metadata_dict = { + "zarr_format": 2, + "shape": (10,), + "chunks": (5,), + "dtype": dtype.str, + "compressor": None, + "filters": None, + "fill_value": fill_value_int, + "order": "C", + } + meta = ArrayV2Metadata.from_dict(metadata_dict) + assert meta.fill_value == expected_value + assert meta.fill_value.dtype == dtype + + def test_from_dict_extra_fields() -> None: data = { "_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"}, From a2edd4a2f44adb6b13748f74abfa461e331cd403 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerman Date: Thu, 1 May 2025 11:32:51 -0500 Subject: [PATCH 2/2] Add changelog entry --- changes/3031.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3031.bugfix.rst diff --git a/changes/3031.bugfix.rst b/changes/3031.bugfix.rst new file mode 100644 index 0000000000..46c3d1e917 --- /dev/null +++ b/changes/3031.bugfix.rst @@ -0,0 +1 @@ +Add flexible deserialization for datetime fill values in v2 metadata \ No newline at end of file