From 4b3dee58905ea572ced171c0d7dc5fba10b26105 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 6 May 2025 12:14:40 +0200 Subject: [PATCH 1/4] cast numcodecs to v3 --- src/zarr/registry.py | 31 ++++++++++++-- tests/test_codecs/test_codecs.py | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 704db3f704..71dfa4fb1b 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -5,6 +5,8 @@ from importlib.metadata import entry_points as get_entry_points from typing import TYPE_CHECKING, Any, Generic, TypeVar +import numcodecs + from zarr.core.config import BadConfigError, config if TYPE_CHECKING: @@ -166,6 +168,23 @@ def _resolve_codec(data: dict[str, JSON]) -> Codec: return get_codec_class(data["name"]).from_dict(data) # type: ignore[arg-type] +def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec: + import numcodecs.zarr3 + + codec_name = codec.__class__.__name__ + zarr3_module = numcodecs.zarr3 + + if not hasattr(zarr3_module, codec_name): + raise ValueError(f"No Zarr3 wrapper found for numcodec: {codec_name}") + + zarr3_codec_class = getattr(zarr3_module, codec_name) + + config = codec.get_config() + config.pop("id", None) + + return zarr3_codec_class(**config) + + def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: """ Normalize the input to a ``BytesBytesCodec`` instance. @@ -174,7 +193,9 @@ def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: """ from zarr.abc.codec import BytesBytesCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, BytesBytesCodec): msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead." @@ -194,7 +215,9 @@ def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec: """ from zarr.abc.codec import ArrayBytesCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayBytesCodec): msg = f"Expected a dict representation of a ArrayBytesCodec; got a dict representation of a {type(result)} instead." @@ -214,7 +237,9 @@ def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec: """ from zarr.abc.codec import ArrayArrayCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayArrayCodec): msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead." diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index b8122b4ac2..8a14d56167 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +import numcodecs +import numcodecs.zarr3 import numpy as np import pytest @@ -11,6 +13,7 @@ import zarr.api import zarr.api.asynchronous from zarr import Array, AsyncArray, config +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec from zarr.codecs import ( BytesCodec, GzipCodec, @@ -23,6 +26,7 @@ if TYPE_CHECKING: from zarr.abc.store import Store + from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike from zarr.core.buffer import NDArrayLike from zarr.core.common import MemoryOrder @@ -406,3 +410,68 @@ async def test_resize(store: Store) -> None: assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None + + +@pytest.mark.parametrize("store", ["memory"], indirect=["store"]) +@pytest.mark.parametrize( + ("codec_v2", "expected_v3_cls"), + [ + (numcodecs.BZ2(), numcodecs.zarr3.BZ2), + (numcodecs.CRC32(), numcodecs.zarr3.CRC32), + (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), + (numcodecs.LZ4(), numcodecs.zarr3.LZ4), + (numcodecs.LZMA(), numcodecs.zarr3.LZMA), + # (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), + (numcodecs.Adler32(), numcodecs.zarr3.Adler32), + ( + numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32), + numcodecs.zarr3.AsType, + ), + (numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound), + (numcodecs.Blosc(), numcodecs.zarr3.Blosc), + (numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta), + ( + numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype="f8", astype="u1"), + numcodecs.zarr3.FixedScaleOffset, + ), + (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), + (numcodecs.GZip(), numcodecs.zarr3.GZip), + (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), + # (numcodecs.PCodec(), numcodecs.zarr3.PCodec), + (numcodecs.PackBits(), numcodecs.zarr3.PackBits), + (numcodecs.Quantize(digits=1, dtype="f8"), numcodecs.zarr3.Quantize), + (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), + (numcodecs.Zlib(), numcodecs.zarr3.Zlib), + (numcodecs.Zstd(), numcodecs.zarr3.Zstd), + ], +) +def test_numcodecs_in_v3(store: Store, codec_v2, expected_v3_cls) -> None: + result_v3 = zarr.registry.numcodec_to_zarr3_codec(codec_v2) + + assert result_v3.__class__ == expected_v3_cls + assert result_v3.codec_config == codec_v2.get_config() + + filters: FiltersLike = "auto" + serializer: SerializerLike = "auto" + compressors: CompressorsLike = "auto" + if isinstance(result_v3, ArrayArrayCodec): + filters = [codec_v2] + elif isinstance(result_v3, ArrayBytesCodec): + serializer = codec_v2 + elif isinstance(result_v3, BytesBytesCodec): + compressors = [codec_v2] + else: + raise TypeError(f"unsupported type: {result_v3.__class__}") + + z = zarr.create_array( + store, + shape=(64,), + chunks=(64,), + dtype=np.bool, + fill_value=False, + filters=filters, + compressors=compressors, + serializer=serializer, + ) + z[...] = True + assert np.all(z[:]) From 27921d03512f556c39078210ae918edd23778c5a Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 6 May 2025 12:25:57 +0200 Subject: [PATCH 2/4] rename vars in numcodec_to_zarr3_codec --- src/zarr/registry.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 71dfa4fb1b..d488e05f03 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -172,17 +172,17 @@ def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec: import numcodecs.zarr3 codec_name = codec.__class__.__name__ - zarr3_module = numcodecs.zarr3 + numcodecs_zarr3_module = numcodecs.zarr3 - if not hasattr(zarr3_module, codec_name): + if not hasattr(numcodecs_zarr3_module, codec_name): raise ValueError(f"No Zarr3 wrapper found for numcodec: {codec_name}") - zarr3_codec_class = getattr(zarr3_module, codec_name) + numcodecs_zarr3_codec_class = getattr(numcodecs_zarr3_module, codec_name) config = codec.get_config() config.pop("id", None) - return zarr3_codec_class(**config) + return numcodecs_zarr3_codec_class(**config) def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: From 3811026bd7bcd5dc036a4497833c0396114e57b2 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 6 May 2025 12:35:34 +0200 Subject: [PATCH 3/4] typing --- src/zarr/registry.py | 22 ++++++++++++---------- tests/test_codecs/test_codecs.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index d488e05f03..2bf4f1e737 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -3,7 +3,7 @@ import warnings from collections import defaultdict from importlib.metadata import entry_points as get_entry_points -from typing import TYPE_CHECKING, Any, Generic, TypeVar +from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast import numcodecs @@ -179,10 +179,12 @@ def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec: numcodecs_zarr3_codec_class = getattr(numcodecs_zarr3_module, codec_name) - config = codec.get_config() - config.pop("id", None) + codec_config = codec.get_config() + codec_config.pop("id", None) - return numcodecs_zarr3_codec_class(**config) + codec = numcodecs_zarr3_codec_class(**codec_config) + codec = cast(Codec, codec) + return codec def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: @@ -201,9 +203,9 @@ def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, BytesBytesCodec): - raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.") result = data + if not isinstance(result, BytesBytesCodec): + raise TypeError(f"Expected a BytesBytesCodec. Got {type(result)} instead.") return result @@ -223,9 +225,9 @@ def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec: msg = f"Expected a dict representation of a ArrayBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, ArrayBytesCodec): - raise TypeError(f"Expected a ArrayBytesCodec. Got {type(data)} instead.") result = data + if not isinstance(result, ArrayBytesCodec): + raise TypeError(f"Expected a ArrayBytesCodec. Got {type(result)} instead.") return result @@ -245,9 +247,9 @@ def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec: msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, ArrayArrayCodec): - raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.") result = data + if not isinstance(result, ArrayArrayCodec): + raise TypeError(f"Expected a ArrayArrayCodec. Got {type(result)} instead.") return result diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 8a14d56167..178a604473 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -421,7 +421,7 @@ async def test_resize(store: Store) -> None: (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), (numcodecs.LZ4(), numcodecs.zarr3.LZ4), (numcodecs.LZMA(), numcodecs.zarr3.LZMA), - # (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), + (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), (numcodecs.Adler32(), numcodecs.zarr3.Adler32), ( numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32), @@ -437,7 +437,7 @@ async def test_resize(store: Store) -> None: (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), (numcodecs.GZip(), numcodecs.zarr3.GZip), (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), - # (numcodecs.PCodec(), numcodecs.zarr3.PCodec), + (numcodecs.PCodec(), numcodecs.zarr3.PCodec), (numcodecs.PackBits(), numcodecs.zarr3.PackBits), (numcodecs.Quantize(digits=1, dtype="f8"), numcodecs.zarr3.Quantize), (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), From dd24c5b6aa32c49ac172afdc31830ab40335395a Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 6 May 2025 12:49:54 +0200 Subject: [PATCH 4/4] exclude ZFPY, PCodec in tests as they seem not to exist? --- src/zarr/registry.py | 6 +++--- tests/test_codecs/test_codecs.py | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 2bf4f1e737..ca9829027d 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -3,10 +3,11 @@ import warnings from collections import defaultdict from importlib.metadata import entry_points as get_entry_points -from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast +from typing import TYPE_CHECKING, Any, Generic, TypeVar import numcodecs +from zarr.abc.codec import Codec from zarr.core.config import BadConfigError, config if TYPE_CHECKING: @@ -16,7 +17,6 @@ ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, - Codec, CodecPipeline, ) from zarr.core.buffer import Buffer, NDBuffer @@ -183,7 +183,7 @@ def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec: codec_config.pop("id", None) codec = numcodecs_zarr3_codec_class(**codec_config) - codec = cast(Codec, codec) + assert isinstance(codec, Codec) return codec diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 178a604473..3d5b1ada1a 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -421,7 +421,7 @@ async def test_resize(store: Store) -> None: (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), (numcodecs.LZ4(), numcodecs.zarr3.LZ4), (numcodecs.LZMA(), numcodecs.zarr3.LZMA), - (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), + # (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), AttributeError: module 'numcodecs' has no attribute 'ZFPY' (numcodecs.Adler32(), numcodecs.zarr3.Adler32), ( numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32), @@ -437,7 +437,7 @@ async def test_resize(store: Store) -> None: (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), (numcodecs.GZip(), numcodecs.zarr3.GZip), (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), - (numcodecs.PCodec(), numcodecs.zarr3.PCodec), + # (numcodecs.PCodec(), numcodecs.zarr3.PCodec), AttributeError: module 'numcodecs' has no attribute 'PCodec' (numcodecs.PackBits(), numcodecs.zarr3.PackBits), (numcodecs.Quantize(digits=1, dtype="f8"), numcodecs.zarr3.Quantize), (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), @@ -463,7 +463,7 @@ def test_numcodecs_in_v3(store: Store, codec_v2, expected_v3_cls) -> None: else: raise TypeError(f"unsupported type: {result_v3.__class__}") - z = zarr.create_array( + zarr.create_array( store, shape=(64,), chunks=(64,), @@ -473,5 +473,3 @@ def test_numcodecs_in_v3(store: Store, codec_v2, expected_v3_cls) -> None: compressors=compressors, serializer=serializer, ) - z[...] = True - assert np.all(z[:])