diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 704db3f704..ca9829027d 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -5,6 +5,9 @@ from importlib.metadata import entry_points as get_entry_points from typing import TYPE_CHECKING, Any, Generic, TypeVar +import numcodecs + +from zarr.abc.codec import Codec from zarr.core.config import BadConfigError, config if TYPE_CHECKING: @@ -14,7 +17,6 @@ ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, - Codec, CodecPipeline, ) from zarr.core.buffer import Buffer, NDBuffer @@ -166,6 +168,25 @@ def _resolve_codec(data: dict[str, JSON]) -> Codec: return get_codec_class(data["name"]).from_dict(data) # type: ignore[arg-type] +def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec: + import numcodecs.zarr3 + + codec_name = codec.__class__.__name__ + numcodecs_zarr3_module = numcodecs.zarr3 + + if not hasattr(numcodecs_zarr3_module, codec_name): + raise ValueError(f"No Zarr3 wrapper found for numcodec: {codec_name}") + + numcodecs_zarr3_codec_class = getattr(numcodecs_zarr3_module, codec_name) + + codec_config = codec.get_config() + codec_config.pop("id", None) + + codec = numcodecs_zarr3_codec_class(**codec_config) + assert isinstance(codec, Codec) + return codec + + def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: """ Normalize the input to a ``BytesBytesCodec`` instance. @@ -174,15 +195,17 @@ def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: """ from zarr.abc.codec import BytesBytesCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, BytesBytesCodec): msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, BytesBytesCodec): - raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.") result = data + if not isinstance(result, BytesBytesCodec): + raise TypeError(f"Expected a BytesBytesCodec. Got {type(result)} instead.") return result @@ -194,15 +217,17 @@ def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec: """ from zarr.abc.codec import ArrayBytesCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayBytesCodec): msg = f"Expected a dict representation of a ArrayBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, ArrayBytesCodec): - raise TypeError(f"Expected a ArrayBytesCodec. Got {type(data)} instead.") result = data + if not isinstance(result, ArrayBytesCodec): + raise TypeError(f"Expected a ArrayBytesCodec. Got {type(result)} instead.") return result @@ -214,15 +239,17 @@ def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec: """ from zarr.abc.codec import ArrayArrayCodec - if isinstance(data, dict): + if isinstance(data, numcodecs.abc.Codec): + result = numcodec_to_zarr3_codec(data) + elif isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayArrayCodec): msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: - if not isinstance(data, ArrayArrayCodec): - raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.") result = data + if not isinstance(result, ArrayArrayCodec): + raise TypeError(f"Expected a ArrayArrayCodec. Got {type(result)} instead.") return result diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index b8122b4ac2..3d5b1ada1a 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +import numcodecs +import numcodecs.zarr3 import numpy as np import pytest @@ -11,6 +13,7 @@ import zarr.api import zarr.api.asynchronous from zarr import Array, AsyncArray, config +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec from zarr.codecs import ( BytesCodec, GzipCodec, @@ -23,6 +26,7 @@ if TYPE_CHECKING: from zarr.abc.store import Store + from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike from zarr.core.buffer import NDArrayLike from zarr.core.common import MemoryOrder @@ -406,3 +410,66 @@ async def test_resize(store: Store) -> None: assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None + + +@pytest.mark.parametrize("store", ["memory"], indirect=["store"]) +@pytest.mark.parametrize( + ("codec_v2", "expected_v3_cls"), + [ + (numcodecs.BZ2(), numcodecs.zarr3.BZ2), + (numcodecs.CRC32(), numcodecs.zarr3.CRC32), + (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), + (numcodecs.LZ4(), numcodecs.zarr3.LZ4), + (numcodecs.LZMA(), numcodecs.zarr3.LZMA), + # (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), AttributeError: module 'numcodecs' has no attribute 'ZFPY' + (numcodecs.Adler32(), numcodecs.zarr3.Adler32), + ( + numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32), + numcodecs.zarr3.AsType, + ), + (numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound), + (numcodecs.Blosc(), numcodecs.zarr3.Blosc), + (numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta), + ( + numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype="f8", astype="u1"), + numcodecs.zarr3.FixedScaleOffset, + ), + (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), + (numcodecs.GZip(), numcodecs.zarr3.GZip), + (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), + # (numcodecs.PCodec(), numcodecs.zarr3.PCodec), AttributeError: module 'numcodecs' has no attribute 'PCodec' + (numcodecs.PackBits(), numcodecs.zarr3.PackBits), + (numcodecs.Quantize(digits=1, dtype="f8"), numcodecs.zarr3.Quantize), + (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), + (numcodecs.Zlib(), numcodecs.zarr3.Zlib), + (numcodecs.Zstd(), numcodecs.zarr3.Zstd), + ], +) +def test_numcodecs_in_v3(store: Store, codec_v2, expected_v3_cls) -> None: + result_v3 = zarr.registry.numcodec_to_zarr3_codec(codec_v2) + + assert result_v3.__class__ == expected_v3_cls + assert result_v3.codec_config == codec_v2.get_config() + + filters: FiltersLike = "auto" + serializer: SerializerLike = "auto" + compressors: CompressorsLike = "auto" + if isinstance(result_v3, ArrayArrayCodec): + filters = [codec_v2] + elif isinstance(result_v3, ArrayBytesCodec): + serializer = codec_v2 + elif isinstance(result_v3, BytesBytesCodec): + compressors = [codec_v2] + else: + raise TypeError(f"unsupported type: {result_v3.__class__}") + + zarr.create_array( + store, + shape=(64,), + chunks=(64,), + dtype=np.bool, + fill_value=False, + filters=filters, + compressors=compressors, + serializer=serializer, + )