Skip to content

Add flexible deserialization for datetime fill values #3031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3031.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add flexible deserialization for datetime fill values in v2 metadata
8 changes: 8 additions & 0 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,15 @@
"""

if fill_value is None or dtype.hasobject:
# Pass through None or if dtype is object
pass
elif dtype.kind in "M":

Check warning on line 336 in src/zarr/core/metadata/v2.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v2.py#L336

Added line #L336 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
elif dtype.kind in "M":
elif dtype.kind in ("M", "m"):

handle timedelta64 as well

# Check for both string "NaT" and the int64 representation of NaT
if fill_value == "NaT" or fill_value == np.iinfo(np.int64).min:
fill_value = dtype.type("NaT")

Check warning on line 339 in src/zarr/core/metadata/v2.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v2.py#L338-L339

Added lines #L338 - L339 were not covered by tests
else:
fill_value = np.array(fill_value, dtype=dtype)[()]

Check warning on line 341 in src/zarr/core/metadata/v2.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v2.py#L341

Added line #L341 was not covered by tests
# Fall through for non-NaT datetime/timedelta values (handled below)
elif dtype.fields is not None:
# the dtype is structured (has multiple fields), so the fill_value might be a
# compound value (e.g., a tuple or dict) that needs field-wise processing.
Expand Down
63 changes: 63 additions & 0 deletions tests/test_metadata/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,69 @@ async def test_getitem_consolidated(self, v2_consolidated_metadata):
assert air.metadata.shape == (730,)


@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
def test_parse_v2_fill_value_nat_integer(dtype_str: str) -> None:
"""Verify parsing V2 metadata where NaT is stored as its int64 representation."""
nat_int_repr = np.iinfo(np.int64).min # -9223372036854775808
dtype = np.dtype(dtype_str)
metadata_dict = {
"zarr_format": 2,
"shape": (10,),
"chunks": (5,),
"dtype": dtype.str,
"compressor": None,
"filters": None,
"fill_value": nat_int_repr,
"order": "C",
}
meta = ArrayV2Metadata.from_dict(metadata_dict)
assert np.isnat(meta.fill_value)
assert meta.fill_value.dtype.kind == dtype.kind


@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
def test_parse_v2_fill_value_nat_string(dtype_str: str) -> None:
"""Verify parsing V2 metadata where NaT is stored as the string 'NaT'."""
dtype = np.dtype(dtype_str)
metadata_dict = {
"zarr_format": 2,
"shape": (10,),
"chunks": (5,),
"dtype": dtype.str,
"compressor": None,
"filters": None,
"fill_value": "NaT",
"order": "C",
}
meta = ArrayV2Metadata.from_dict(metadata_dict)
assert np.isnat(meta.fill_value)
assert meta.fill_value.dtype.kind == dtype.kind


@pytest.mark.parametrize("dtype_str", ["datetime64[s]", "timedelta64[ms]"])
def test_parse_v2_fill_value_non_nat(dtype_str: str) -> None:
"""Verify parsing V2 metadata with a non-NaT datetime/timedelta fill value."""
dtype = np.dtype(dtype_str)
# Use a valid integer representation for the dtype
# Note: zarr v2 serializes non-NaT datetimes/timedeltas as integers
fill_value_int = 1234567890 if dtype.kind == "M" else 12345
expected_value = np.array(fill_value_int, dtype=dtype)[()]

metadata_dict = {
"zarr_format": 2,
"shape": (10,),
"chunks": (5,),
"dtype": dtype.str,
"compressor": None,
"filters": None,
"fill_value": fill_value_int,
"order": "C",
}
meta = ArrayV2Metadata.from_dict(metadata_dict)
assert meta.fill_value == expected_value
assert meta.fill_value.dtype == dtype


def test_from_dict_extra_fields() -> None:
data = {
"_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"},
Expand Down