From 71ca2f9fd7255412994cd59a8dd729c50fa480c2 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 21:00:55 +0200 Subject: [PATCH 01/24] docs: design spec for unifying Array and AsyncArray via runner Co-Authored-By: Claude Opus 4.8 (1M context) --- ...026-06-03-unify-array-asyncarray-design.md | 218 ++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md diff --git a/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md b/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md new file mode 100644 index 0000000000..3a505671bd --- /dev/null +++ b/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md @@ -0,0 +1,218 @@ +# Unifying `Array` and `AsyncArray` via a pluggable `runner` + +Date: 2026-06-03 +Branch: `one-array-class` + +## Goal + +Unify the `Array` and `AsyncArray` classes so that a single `Array` class owns +all array logic. Today `Array` is a thin synchronous wrapper that holds an +`AsyncArray` and delegates every operation through `sync(self.async_array.foo(...))`. +After this work, `Array` owns its own state and exposes every async operation as +a `*_async` coroutine method, with the synchronous methods implemented by running +that coroutine through a pluggable `_runner`. + +The `runner` lets a user wire in their own event loop for the async side of array +operations, instead of being locked into Zarr's background-thread event loop. + +## Scope and non-goals + +- **In scope:** add a keyword-only `runner` argument to `Array.__init__`; give + `Array` its own state and `*_async` methods; rewrite the synchronous methods to + go through `_runner`; extract remaining inline async logic into shared + module-level free functions; deprecate `Array.async_array` / `Array._async_array`. +- **Out of scope / non-goals:** + - We do **not** modify `AsyncArray`. It stays exactly as it is and remains the + public async class (compatibility shim). We simply stop wiring `Array` to it. + - We do not remove `AsyncArray` or its public API in this PR. Future deprecation + of `AsyncArray` itself is a separate effort. + - No behavior change to the default synchronous path — the default runner + preserves today's `sync()` semantics exactly. + +## Design + +### 1. The `Runner` protocol + +Add to `zarr/core/sync.py`: + +```python +@runtime_checkable +class Runner(Protocol): + def run(self, coro: Coroutine[Any, Any, T]) -> T: ... +``` + +A `Runner` takes a coroutine and returns the value obtained by awaiting it. + +Concrete default implementation, also in `zarr/core/sync.py`: + +```python +class SyncRunner: + """Run coroutines on Zarr's shared background event loop via sync().""" + + def run(self, coro: Coroutine[Any, Any, T]) -> T: + return sync(coro) +``` + +There is **no** module-level mutable `DEFAULT_RUNNER` singleton baked into the +signature. `Array.__init__` accepts `runner: Runner | None = None`, and `None` +means "use the default", resolved to a `SyncRunner()` inside `__init__`. This +keeps the default-resolution logic in one place and avoids a shared mutable +default argument. + +### 2. `Array.__init__` and state ownership + +`Array` stops being a wrapper around `AsyncArray`. It owns the same state +`AsyncArray` holds, plus a `_runner`: + +- `metadata: T_ArrayMetadata` +- `store_path: StorePath` +- `config: ArrayConfig` +- `codec_pipeline: CodecPipeline` +- `_chunk_grid: ChunkGrid` +- `_runner: Runner` + +New signature (keyword-only `runner`): + +```python +def __init__( + self, + metadata: ArrayMetadata | ArrayMetadataDict, + store_path: StorePath, + config: ArrayConfigLike | None = None, + *, + runner: Runner | None = None, +) -> None: + metadata_parsed = parse_array_metadata(metadata) + config_parsed = parse_array_config(config) + # store metadata_parsed, store_path, config_parsed, + # codec_pipeline, _chunk_grid, and (runner or SyncRunner()) +``` + +`Array`'s field set no longer maps cleanly to a single `_async_array` field, so +the `@dataclass(frozen=False)` decorator is dropped in favor of this explicit +`__init__` (mirroring `AsyncArray`'s construction style). The `with_config` +overloads and other internals are updated to construct via the new signature. + +#### Construction helper + +Many internal call sites currently do `Array(async_array)` (≈10 sites across +`zarr/api/synchronous.py`, `zarr/core/group.py`, and `Array._create` / +`from_dict` / `open`). To keep these ergonomic and to handle the common +"I already have an `AsyncArray`" case, add: + +```python +@classmethod +def _from_async_array( + cls, async_array: AsyncArray[T_ArrayMetadata], *, runner: Runner | None = None +) -> Self: + return cls( + metadata=async_array.metadata, + store_path=async_array.store_path, + config=async_array.config, + runner=runner, + ) +``` + +All existing `Array(async_array)` call sites are updated to +`Array._from_async_array(async_array)`. This is a mechanical change. + +#### Deprecating `async_array` / `_async_array` + +The `async_array` property is deprecated. On access it emits a +`DeprecationWarning` and constructs a fresh `AsyncArray` on demand from `Array`'s +own state: + +```python +@property +def async_array(self) -> AsyncArray[T_ArrayMetadata]: + warnings.warn( + "Array.async_array is deprecated; ...", + DeprecationWarning, + stacklevel=2, + ) + return AsyncArray(self.metadata, self.store_path, self.config) +``` + +The `_async_array` field is removed; any remaining internal uses are migrated to +`Array`'s own state. + +### 3. Shared free functions (single source of truth) + +The async selection methods on `AsyncArray` already delegate to module-level free +functions that take explicit state: `_getitem`, `_get_selection`, +`_get_orthogonal_selection`, `_get_mask_selection`, `_get_coordinate_selection`, +`_set_selection`, `_setitem`. These functions take `(store_path, metadata, +codec_pipeline, config, chunk_grid, ...)`. + +For the `AsyncArray` methods whose logic is currently **inline** (no free +function yet), extract the body into a new module-level async free function taking +explicit state, then have both classes call it. Methods to extract: + +- `resize`, `append`, `update_attributes` +- `nchunks_initialized`, `_nshards_initialized`, `nbytes_stored` +- `info_complete`, `_save_metadata` +- the classmethods/loaders as appropriate (`open`, `_create*`, + `get_array_metadata` already exists as a free function) + +Resulting call structure for each operation `foo`: + +- `AsyncArray.foo(...)` → `await _foo(self.metadata, self.store_path, ...)` +- `Array.foo_async(...)` → `await _foo(self.metadata, self.store_path, ...)` +- `Array.foo(...)` → `self._runner.run(self.foo_async(...))` + +For operations that return a *new* array (`resize` on v2, `update_attributes`, +`append`), the free function returns the new metadata/state; each class wraps the +result in its own type (`AsyncArray` vs `Array`, the latter preserving its +`_runner`). + +This guarantees `AsyncArray` and `Array` cannot drift, because they share one +implementation per operation. + +### 4. The `*_async` surface on `Array` + +Every current public async method on `AsyncArray` gets a `*_async` twin on +`Array`: + +- `getitem_async`, `setitem_async` +- async twins for each selection getter/setter: + `get_orthogonal_selection_async` / `set_orthogonal_selection_async`, + `get_mask_selection_async` / `set_mask_selection_async`, + `get_coordinate_selection_async` / `set_coordinate_selection_async`, + `get_block_selection_async` / `set_block_selection_async`, + `get_basic_selection_async` / `set_basic_selection_async` +- `resize_async`, `append_async`, `update_attributes_async` +- `nchunks_initialized_async`, `nbytes_stored_async`, `info_complete_async` + +The existing synchronous methods (`__getitem__`, `__setitem__`, +`get_basic_selection`, `set_basic_selection`, the orthogonal/mask/coordinate/block +selection getters and setters, `resize`, `append`, `update_attributes`, +`nchunks_initialized`, `nbytes_stored`, `info_complete`, …) are rewritten from +`sync(self.async_array.foo(...))` to `self._runner.run(self.foo_async(...))`. + +### 5. Testing and verification + +Verification bar: **full existing suite passes unchanged + new runner tests.** + +- **Regression:** the entire existing array test suite passes without + modification, proving the default synchronous behavior is preserved. Run with + `uv run pytest`. +- **New tests:** + - `Runner` protocol conformance / `SyncRunner` behaves as a `Runner`. + - Injecting a custom runner: a recording runner that captures the coroutine it + receives, asserts it is the expected coroutine, runs it, and returns the + awaited value; assert `Array(..., runner=recording)` uses it. + - Equivalence: `arr.getitem(sel)` equals `arr._runner.run(arr.getitem_async(sel))` + and equals the value via a directly-awaited `getitem_async`. + - Deprecation: accessing `Array.async_array` (and `_async_array` if still + reachable) emits a `DeprecationWarning`. + +## Risks + +- Large mechanical diff in a 6000-line file; risk of missing a `sync(...)` call + site. Mitigated by grepping all `sync(self.async_array` occurrences and by the + unchanged regression suite. +- Free-function extraction for new-array-returning methods must correctly + reconstruct per-class types. Covered by existing `resize`/`append`/ + `update_attributes` tests. +- `with_config` and other `Self`-returning methods must thread `_runner` through + so a derived `Array` keeps the user's runner. From 5445f92d0378dcc8117ea9f207a11af8b65b858c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 21:16:03 +0200 Subject: [PATCH 02/24] docs: implementation plan for unifying Array and AsyncArray Co-Authored-By: Claude Opus 4.8 (1M context) --- .../2026-06-03-unify-array-asyncarray.md | 877 ++++++++++++++++++ 1 file changed, 877 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md diff --git a/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md b/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md new file mode 100644 index 0000000000..100677da1c --- /dev/null +++ b/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md @@ -0,0 +1,877 @@ +# Unify `Array` and `AsyncArray` via a pluggable `runner` — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `Array` a self-contained class that owns its own state plus a pluggable `_runner`, exposes every async operation as a `*_async` coroutine method, and implements its synchronous methods as `self._runner.run(self.foo_async(...))` — while leaving `AsyncArray` untouched as a compatibility shim. + +**Architecture:** A `Runner` protocol (with default `SyncRunner` wrapping the existing `sync()`) is added to `zarr/core/sync.py`. `Array` stops wrapping an `AsyncArray`; it stores `metadata`, `store_path`, `config`, `codec_pipeline`, `_chunk_grid`, and `_runner` directly, and reuses the already-existing module-level async free functions (`_getitem`, `_setitem`, `_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, `_nbytes_stored`, etc.). Several of those functions accept an array object and use its property surface; their parameter type is widened to a structural `SupportsArrayState` Protocol that both `Array` and `AsyncArray` satisfy, so no function bodies change. `Array.async_array` / `_async_array` are deprecated and reconstructed on demand. + +**Tech Stack:** Python 3.11+, `asyncio`, `typing.Protocol`, pytest. Run everything with `uv run`. + +--- + +## File Structure + +- `src/zarr/core/sync.py` — add `Runner` Protocol + `SyncRunner` class. +- `src/zarr/core/array.py` — add `SupportsArrayState` Protocol; widen free-function annotations; rewrite `Array` (`__init__`, state, `*_async` methods, sync methods, `_from_async_array`, deprecated `async_array`). +- `tests/test_runner.py` — new tests for the runner protocol, custom-runner injection, sync/async equivalence, and `async_array` deprecation. +- `tests/test_array.py` — update the few spots that rely on the old construction / `async_array` access (only where they now warn). + +--- + +## Conventions for this plan + +- Always run tooling via `uv run` (e.g. `uv run pytest`, `uv run mypy`). +- Docstrings use single-backtick markdown (mkdocs), not RST double-backticks. +- Commit after each task once its tests pass. +- The type parameter on both classes is `T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)`. + +--- + +### Task 1: Add the `Runner` protocol and `SyncRunner` to `sync.py` + +**Files:** +- Modify: `src/zarr/core/sync.py` +- Test: `tests/test_runner.py` (create) + +- [ ] **Step 1: Write the failing test** + +Create `tests/test_runner.py`: + +```python +from __future__ import annotations + +import asyncio + +from zarr.core.sync import Runner, SyncRunner + + +async def _coro() -> int: + await asyncio.sleep(0) + return 42 + + +def test_sync_runner_runs_coroutine() -> None: + runner = SyncRunner() + assert runner.run(_coro()) == 42 + + +def test_sync_runner_is_runner() -> None: + assert isinstance(SyncRunner(), Runner) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/test_runner.py -v` +Expected: FAIL with `ImportError: cannot import name 'Runner'` (or `SyncRunner`). + +- [ ] **Step 3: Implement `Runner` and `SyncRunner`** + +In `src/zarr/core/sync.py`, add `Protocol`, `runtime_checkable`, and `TypeVar`/`T` to the imports as needed, then add near the bottom of the module (after `sync`): + +```python +@runtime_checkable +class Runner(Protocol): + """A `Runner` executes a coroutine and returns the awaited result. + + Implement this protocol to plug a custom event loop into `Array`. + """ + + def run(self, coro: Coroutine[Any, Any, T]) -> T: ... + + +class SyncRunner: + """The default `Runner`. Runs coroutines on Zarr's shared background event + loop via `sync`. + """ + + def run(self, coro: Coroutine[Any, Any, T]) -> T: + return sync(coro) +``` + +Add the supporting imports at the top of `sync.py`: + +```python +from typing import TYPE_CHECKING, Protocol, TypeVar, runtime_checkable +``` + +and (outside `TYPE_CHECKING`, because `T` is used at runtime in the protocol/class bodies' annotations only — annotations are lazy under `from __future__ import annotations`, so a `TYPE_CHECKING`-only `T` is fine, but `Coroutine`/`Any` are referenced only in annotations too). Keep `Coroutine` and `Any` in the existing `TYPE_CHECKING` block. Define `T` next to `P`: + +```python +T = TypeVar("T") +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/test_runner.py -v` +Expected: PASS (both tests). + +- [ ] **Step 5: Type-check** + +Run: `uv run mypy src/zarr/core/sync.py` +Expected: no new errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/zarr/core/sync.py tests/test_runner.py +git commit -m "feat(sync): add Runner protocol and SyncRunner" +``` + +--- + +### Task 2: Add the `SupportsArrayState` protocol and widen free-function annotations + +The free functions `_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, and `_shards_initialized` currently take `AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]`. They only use the array's property surface (`metadata`, `store_path`, `codec_pipeline`, `config`, `_chunk_grid`, `shape`, `shards`, `chunks`, `_info`) and mutate via `object.__setattr__`. Widen the annotation to a structural protocol so `Array` can pass `self`. + +**Files:** +- Modify: `src/zarr/core/array.py` + +- [ ] **Step 1: Add the protocol** + +Near the top-level definitions in `src/zarr/core/array.py` (after imports, before `AsyncArray`), add: + +```python +@runtime_checkable +class SupportsArrayState(Protocol): + """The structural surface the module-level array helpers rely on. + + Both `AsyncArray` and `Array` satisfy this protocol, which lets the + helper functions operate on either class. + """ + + metadata: ArrayMetadata + store_path: StorePath + codec_pipeline: CodecPipeline + config: ArrayConfig + _chunk_grid: ChunkGrid + + @property + def shape(self) -> tuple[int, ...]: ... + @property + def chunks(self) -> tuple[int, ...]: ... + @property + def shards(self) -> tuple[int, ...] | None: ... + + def _info( + self, + count_chunks_initialized: int | None = None, + count_bytes_stored: int | None = None, + ) -> Any: ... +``` + +Ensure `Protocol` and `runtime_checkable` are imported from `typing` at the top of `array.py`. + +- [ ] **Step 2: Widen the free-function signatures** + +In `src/zarr/core/array.py`, change the first parameter annotation of each of these functions from +`array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]` +to +`array: SupportsArrayState`: + +- `_nchunks_initialized` (~line 5267) +- `_nshards_initialized` (~line 5297) +- `_shards_initialized` (the function `_nshards_initialized` calls — find via `grep -n "async def _shards_initialized" src/zarr/core/array.py`) +- `_resize` (~line 5873) +- `_append` (~line 5925) +- `_update_attributes` (~line 5996) — also change its return annotation from the `AsyncArray[...]` union to `SupportsArrayState` +- `_info_complete` (~line 6023) + +Do NOT change `_nbytes_stored` (it already takes `store_path: StorePath`). + +- [ ] **Step 3: Type-check** + +Run: `uv run mypy src/zarr/core/array.py` +Expected: no new errors. (AsyncArray still satisfies the protocol structurally.) + +- [ ] **Step 4: Run the existing async-array tests to confirm no behavior change** + +Run: `uv run pytest tests/test_array.py -k "async" -q` +Expected: PASS (AsyncArray behavior unchanged). + +- [ ] **Step 5: Commit** + +```bash +git add src/zarr/core/array.py +git commit -m "refactor(array): widen array-helper params to SupportsArrayState protocol" +``` + +--- + +### Task 3: Give `Array` its own state and `__init__` (with `runner`) + +Replace the `@dataclass(frozen=False)` `Array` that wraps `_async_array` with an explicit class that owns its state. This task ONLY changes construction + state + the `async_array`/`_async_array` deprecation + a `_from_async_array` helper. Property/method rewiring happens in later tasks; to keep this task self-contained, the existing property bodies that read `self.async_array.X` will keep working because the deprecated `async_array` property reconstructs an `AsyncArray` on demand. + +**Files:** +- Modify: `src/zarr/core/array.py` (the `Array` class, starting ~line 1800) +- Test: `tests/test_runner.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_runner.py`: + +```python +import warnings + +import numpy as np +import pytest + +import zarr +from zarr.core.array import Array, AsyncArray +from zarr.core.sync import SyncRunner +from zarr.storage import MemoryStore + + +def _make_array() -> Array: + return zarr.create_array( + store=MemoryStore(), shape=(8,), chunks=(4,), dtype="i4", fill_value=0 + ) + + +def test_array_has_default_sync_runner() -> None: + arr = _make_array() + assert isinstance(arr._runner, SyncRunner) + + +def test_array_owns_state() -> None: + arr = _make_array() + # state lives on Array directly, not via a wrapped AsyncArray + assert arr.metadata is not None + assert arr.store_path is not None + assert arr.codec_pipeline is not None + + +def test_array_accepts_custom_runner() -> None: + class RecordingRunner: + def __init__(self) -> None: + self.calls = 0 + + def run(self, coro): # type: ignore[no-untyped-def] + self.calls += 1 + return SyncRunner().run(coro) + + runner = RecordingRunner() + aa = _make_array()._as_async() # helper defined below; or build AsyncArray directly + arr = Array( + metadata=aa.metadata, + store_path=aa.store_path, + config=aa.config, + runner=runner, + ) + _ = arr[:] + assert runner.calls > 0 + + +def test_async_array_property_deprecated() -> None: + arr = _make_array() + with pytest.warns(DeprecationWarning): + aa = arr.async_array + assert isinstance(aa, AsyncArray) + + +def test_from_async_array_roundtrip() -> None: + arr = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = arr.async_array + arr2 = Array._from_async_array(aa) + assert arr2.metadata == arr.metadata + assert isinstance(arr2._runner, SyncRunner) +``` + +Note: remove the `._as_async()` reference — construct the `AsyncArray` for `test_array_accepts_custom_runner` directly instead: + +```python + base = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = base.async_array + arr = Array(metadata=aa.metadata, store_path=aa.store_path, config=aa.config, runner=runner) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_runner.py -v` +Expected: FAIL (e.g. `Array` has no `_runner`, `__init__` signature mismatch, no `_from_async_array`). + +- [ ] **Step 3: Rewrite the `Array` class header, state, and `__init__`** + +Replace the dataclass decorator + `_async_array` field + `async_array` property (`src/zarr/core/array.py`, ~lines 1800–1817) with: + +```python +class Array[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: + """ + A Zarr array. + """ + + metadata: T_ArrayMetadata + store_path: StorePath + config: ArrayConfig + codec_pipeline: CodecPipeline + _chunk_grid: ChunkGrid + _runner: Runner + + def __init__( + self, + metadata: ArrayMetadata | ArrayMetadataDict, + store_path: StorePath, + config: ArrayConfigLike | None = None, + *, + runner: Runner | None = None, + ) -> None: + metadata_parsed = parse_array_metadata(metadata) + config_parsed = parse_array_config(config) + object.__setattr__(self, "metadata", metadata_parsed) + object.__setattr__(self, "store_path", store_path) + object.__setattr__(self, "config", config_parsed) + object.__setattr__(self, "_chunk_grid", ChunkGrid.from_metadata(metadata_parsed)) + object.__setattr__( + self, + "codec_pipeline", + create_codec_pipeline(metadata=metadata_parsed, store=store_path.store), + ) + object.__setattr__(self, "_runner", runner if runner is not None else SyncRunner()) + + @classmethod + def _from_async_array( + cls, + async_array: AsyncArray[T_ArrayMetadata], + *, + runner: Runner | None = None, + ) -> Self: + return cls( + metadata=async_array.metadata, + store_path=async_array.store_path, + config=async_array.config, + runner=runner, + ) + + @property + def async_array(self) -> AsyncArray[T_ArrayMetadata]: + """An asynchronous version of this array. + + .. deprecated:: + Use the `*_async` methods on `Array` instead. This property will be + removed in a future release. + """ + warnings.warn( + "Array.async_array is deprecated; use the *_async methods on Array instead.", + DeprecationWarning, + stacklevel=2, + ) + return AsyncArray(self.metadata, self.store_path, self.config) +``` + +Notes: +- Keep using `object.__setattr__` so the class can later become frozen again without churn (matches `AsyncArray`). +- Remove the `_async_array` class attribute entirely. +- Ensure `Runner`, `SyncRunner` are imported from `zarr.core.sync`, and `warnings` is imported at the top of `array.py`. +- `create_codec_pipeline` is already defined in this module (line ~224). + +- [ ] **Step 4: Fix the `_chunk_grid` and `config` properties** + +The existing `config` property (~line 1819) returns `self.async_array.config` and `_chunk_grid` property (~line 1832) returns `self.async_array._chunk_grid`. These now collide with the real attributes. DELETE both property definitions — `config` and `_chunk_grid` are now plain attributes set in `__init__`. + +- [ ] **Step 5: Add an `_info` method to `Array`** + +`Array` needs `_info` for the `SupportsArrayState` protocol (used in Task 6). Add it to `Array` (mirroring `AsyncArray._info`, ~line 1777): + +```python + def _info( + self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None + ) -> Any: + chunk_shape = self.chunks if self._chunk_grid.is_regular else None + return ArrayInfo( + _zarr_format=self.metadata.zarr_format, + _data_type=self._zdtype, + _fill_value=self.metadata.fill_value, + _shape=self.shape, + _order=self.order, + _shard_shape=self.shards, + _chunk_shape=chunk_shape, + _read_only=self.read_only, + _compressors=self.compressors, + _filters=self.filters, + _serializer=self.serializer, + _store_type=type(self.store_path.store).__name__, + _count_bytes=self.nbytes, + _count_bytes_stored=count_bytes_stored, + _count_chunks_initialized=count_chunks_initialized, + ) +``` + +This requires `Array` to expose `_zdtype`, which currently lives only on `AsyncArray` (~line 972). Add an `Array._zdtype` property copied verbatim from `AsyncArray._zdtype`: + +```python + @property + def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: + """ + The zarr-specific representation of the array data type + """ + if self.metadata.zarr_format == 2: + return self.metadata.dtype + else: + return self.metadata.data_type +``` + +- [ ] **Step 6: Fix `with_config` to thread the runner** + +Replace `with_config` body (~line 2233) `return type(self)(self._async_array.with_config(config))` with construction from new state. IMPORTANT: `AsyncArray.with_config` (~line 1156) does NOT use `parse_array_config`; it merges the new config over the existing one. Replicate that merge exactly and thread the runner: + +```python + if isinstance(config, ArrayConfig): + new_config = config + else: + # Merge new config with existing config, so missing keys are inherited + # from the current array rather than from global defaults + new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type] + return type(self)( + metadata=self.metadata, + store_path=self.store_path, + config=new_config, + runner=self._runner, + ) +``` + +- [ ] **Step 7: Update the three `Array` classmethods that build via `cls(async_array)`** + +`Array._create` (~line 1892), `Array.from_dict` (~line 1923), `Array.open` (~line 1945) end with `return cls(async_array)`. Change each to `return cls._from_async_array(async_array)`. + +- [ ] **Step 8: Run the new runner tests** + +Run: `uv run pytest tests/test_runner.py -v` +Expected: PASS. + +- [ ] **Step 9: Commit** + +```bash +git add src/zarr/core/array.py tests/test_runner.py +git commit -m "feat(array): Array owns its own state + runner; deprecate async_array" +``` + +--- + +### Task 4: Rewire `Array`'s property delegations to its own state + +There are ~48 `self.async_array.X` reads in property bodies (verify count: `grep -c "self\.async_array\." src/zarr/core/array.py`). With `async_array` now warning, these MUST be repointed to `Array`'s own state or the deprecation warning fires on every property access. + +**Files:** +- Modify: `src/zarr/core/array.py` (the `Array` property bodies, ~lines 1947–2248) + +- [ ] **Step 1: Run the property tests first (capture current passing state)** + +Run: `uv run pytest tests/test_array.py -q -k "property or shape or dtype or attrs or nbytes or chunks or shards" 2>&1 | tail -20` +Expected: PASS now (baseline). + +- [ ] **Step 2: Repoint each delegating property** + +For every `Array` property/method whose body reads `self.async_array.X`, replace with the direct equivalent. The mapping is mechanical because `Array` now holds the same state. Examples (apply the same pattern to all): + +- `store` → `return self.store_path.store` +- `ndim` → `return self.metadata.ndim` +- `shape` getter → `return self.metadata.shape` +- `chunks` → copy the `AsyncArray.chunks` body (reads `self.metadata`) +- `shards` → copy `AsyncArray.shards` body +- `size` → copy `AsyncArray.size` body +- `dtype` → copy `AsyncArray.dtype` body +- `attrs` → `return Attributes(self)` (match current `Array.attrs` semantics; check ~line 2080) +- `path`, `name`, `basename`, `order`, `read_only`, `fill_value`, `filters`, `serializer`, `compressor`, `compressors`, `cdata_shape`, `_chunk_grid_shape`, `_shard_grid_shape`, `nchunks`, `_nshards`, `nbytes` → copy each corresponding `AsyncArray` property body (all read from `self.metadata` / `self.config` / `self._chunk_grid` / `self.store_path`). +- `metadata` property (~line 2111) — `Array` had a `metadata` property returning `self.async_array.metadata`; now `metadata` is a plain attribute. DELETE the property. +- `store_path` property (~line 2115) — same: DELETE the property; it's a plain attribute now. + +Strategy: for each `AsyncArray` property, the body already uses exactly `self.metadata`/`self.config`/`self._chunk_grid`/`self.store_path`. Copy the body verbatim into the `Array` property of the same name. Use `grep -n "self\.async_array\." src/zarr/core/array.py` to enumerate remaining sites and confirm ZERO remain in property bodies when done (the only acceptable remaining `async_array` reference is inside the deprecated `async_array` property itself, which doesn't reference `self.async_array`). + +- [ ] **Step 3: Verify no stray `self.async_array.` reads remain** + +Run: `grep -n "self\.async_array\." src/zarr/core/array.py` +Expected: NO output (zero matches). + +- [ ] **Step 4: Run the array property tests** + +Run: `uv run pytest tests/test_array.py -q -k "property or shape or dtype or attrs or nbytes or chunks or shards" 2>&1 | tail -20` +Expected: PASS, and no `DeprecationWarning` emitted (run with `-W error::DeprecationWarning` to be strict): +Run: `uv run pytest tests/test_array.py -q -W error::DeprecationWarning -k "property or shape or dtype" 2>&1 | tail -20` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/zarr/core/array.py +git commit -m "refactor(array): repoint Array properties to own state" +``` + +--- + +### Task 5: Add the read/write `*_async` methods and rewire sync selection methods + +Add `getitem_async`, `setitem_async`, and the basic/orthogonal/mask/coordinate/block selection `*_async` twins to `Array`, calling the existing free functions. Rewrite the synchronous selection methods to use `self._runner.run(self._async(...))` instead of `sync(self.async_array.(...))`. + +**Files:** +- Modify: `src/zarr/core/array.py` (`Array` selection methods, ~lines 2426–3767) +- Test: `tests/test_runner.py` + +- [ ] **Step 1: Write the failing equivalence test** + +Append to `tests/test_runner.py`: + +```python +def test_getitem_sync_async_equivalence() -> None: + arr = _make_array() + arr[:] = np.arange(8, dtype="i4") + sync_result = arr[2:6] + async_via_runner = arr._runner.run(arr.getitem_async(slice(2, 6))) + np.testing.assert_array_equal(sync_result, async_via_runner) + + +def test_setitem_async_roundtrip() -> None: + arr = _make_array() + arr._runner.run(arr.setitem_async(slice(0, 4), np.arange(4, dtype="i4"))) + np.testing.assert_array_equal(arr[0:4], np.arange(4, dtype="i4")) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run pytest tests/test_runner.py -k "equivalence or setitem_async" -v` +Expected: FAIL with `AttributeError: 'Array' object has no attribute 'getitem_async'`. + +- [ ] **Step 3: Add the core `*_async` methods to `Array`** + +Add to `Array` (place near the selection methods). Each delegates to the existing module-level free functions exactly as `AsyncArray` does: + +```python + async def _get_selection_async( + self, + indexer: Indexer, + *, + prototype: BufferPrototype, + out: NDBuffer | None = None, + fields: Fields | None = None, + ) -> NDArrayLikeOrScalar: + return await _get_selection( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + indexer, + prototype=prototype, + out=out, + fields=fields, + ) + + async def _set_selection_async( + self, + indexer: Indexer, + value: npt.ArrayLike, + *, + prototype: BufferPrototype, + fields: Fields | None = None, + ) -> None: + return await _set_selection( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + indexer, + value, + prototype=prototype, + fields=fields, + ) + + async def getitem_async( + self, + selection: BasicSelection, + *, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + return await _getitem( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + selection, + prototype=prototype, + ) + + async def setitem_async( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + prototype: BufferPrototype | None = None, + ) -> None: + return await _setitem( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + selection, + value, + prototype=prototype, + ) +``` + +Confirm the exact arg order of `_get_selection`, `_set_selection`, `_getitem`, `_setitem` by reading their definitions (`grep -n "^async def _getitem\|^async def _setitem\|^async def _get_selection\|^async def _set_selection" src/zarr/core/array.py`) and match them. The `AsyncArray._get_selection`/`getitem`/`setitem` bodies (lines ~1416, ~1436, ~1574) are the reference — copy their call shape. + +- [ ] **Step 4: Rewrite the synchronous selection methods** + +Replace every `sync(self.async_array.X(...))` and `sync(self.async_array._set_selection(...))` / `sync(self.async_array._get_selection(...))` call inside `Array`'s sync selection methods with the runner + `*_async` equivalent. Concretely, for the selection getters/setters (`__getitem__`, `__setitem__`, `get_basic_selection`, `set_basic_selection`, `get_orthogonal_selection`, `set_orthogonal_selection`, `get_mask_selection`, `set_mask_selection`, `get_coordinate_selection`, `set_coordinate_selection`, `get_block_selection`, `set_block_selection`): + +- Where the body did `sync(self.async_array._get_selection(indexer, ...))`, change to `self._runner.run(self._get_selection_async(indexer, ...))`. +- Where the body did `sync(self.async_array._set_selection(indexer, value, ...))`, change to `self._runner.run(self._set_selection_async(indexer, value, ...))`. + +The indexer-construction logic in these sync methods stays exactly as-is; only the terminal `sync(self.async_array._..._selection(...))` call changes. Enumerate them with `grep -n "sync(self.async_array._set_selection\|sync(self.async_array._get_selection\|sync($" src/zarr/core/array.py` and the broader `grep -n "self.async_array._get_selection\|self.async_array._set_selection" src/zarr/core/array.py`. + +- [ ] **Step 5: Run the equivalence tests** + +Run: `uv run pytest tests/test_runner.py -k "equivalence or setitem_async" -v` +Expected: PASS. + +- [ ] **Step 6: Run the full selection test suite** + +Run: `uv run pytest tests/test_array.py -q -k "selection or getitem or setitem or basic or orthogonal or mask or coordinate or block" 2>&1 | tail -25` +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/zarr/core/array.py tests/test_runner.py +git commit -m "feat(array): add selection *_async methods; route sync selection via runner" +``` + +--- + +### Task 6: Add remaining `*_async` methods (`resize`, `append`, `update_attributes`, `info_complete`, `nchunks_initialized`, `nbytes_stored`) and rewire their sync wrappers + +These reuse the free functions widened in Task 2 (`_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, `_nbytes_stored`). + +**Files:** +- Modify: `src/zarr/core/array.py` +- Test: `tests/test_runner.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_runner.py`: + +```python +def test_resize_async() -> None: + arr = _make_array() + arr._runner.run(arr.resize_async((16,))) + assert arr.shape == (16,) + + +def test_update_attributes_async() -> None: + arr = _make_array() + arr._runner.run(arr.update_attributes_async({"foo": "bar"})) + assert arr.metadata.attributes["foo"] == "bar" + + +def test_nchunks_initialized_async() -> None: + arr = _make_array() + arr[:] = np.arange(8, dtype="i4") + n = arr._runner.run(arr.nchunks_initialized_async()) + assert n == arr.nchunks_initialized # sync property matches async result +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run pytest tests/test_runner.py -k "resize_async or update_attributes_async or nchunks_initialized_async" -v` +Expected: FAIL (`AttributeError`). + +- [ ] **Step 3: Add the `*_async` methods to `Array`** + +```python + async def resize_async( + self, new_shape: ShapeLike, delete_outside_chunks: bool = True + ) -> None: + return await _resize(self, new_shape, delete_outside_chunks) + + async def append_async(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: + return await _append(self, data, axis) + + async def update_attributes_async(self, new_attributes: dict[str, JSON]) -> Self: + await _update_attributes(self, new_attributes) + return self + + async def nchunks_initialized_async(self) -> int: + return await _nchunks_initialized(self) + + async def _nshards_initialized_async(self) -> int: + return await _nshards_initialized(self) + + async def nbytes_stored_async(self) -> int: + return await _nbytes_stored(self.store_path) + + async def info_complete_async(self) -> Any: + return await _info_complete(self) +``` + +Note `_resize` mutates `self` in place via `object.__setattr__` — this works on `Array` because it is not frozen. + +- [ ] **Step 4: Rewrite the corresponding sync methods** + +Replace the sync bodies (currently `sync(self.async_array.X(...))`, ~lines 2274, 2296, 2306, 3831, 3867, 3894, 3952): + +- `nchunks_initialized` property (~2274): `return self._runner.run(self.nchunks_initialized_async())` +- `_nshards_initialized` property (~2296): `return self._runner.run(self._nshards_initialized_async())` +- `nbytes_stored` (~2306): `return self._runner.run(self.nbytes_stored_async())` +- `resize` (~3831): `self._runner.run(self.resize_async(new_shape))` (keep the existing wrapper's return/type; check whether `Array.resize` returns a new array or `None` — read ~line 3800–3835 and preserve its current return contract) +- `append` (~3867): `return self._runner.run(self.append_async(data, axis=axis))` +- `update_attributes` (~3894): the current body does `new_array = sync(self.async_array.update_attributes(new_attributes))` then `return type(self)(new_array)` (wrapping the returned `AsyncArray`). Under the new design `update_attributes_async` mutates `self` in place and returns `self` (an `Array`), so replace the whole body with: + ```python + self._runner.run(self.update_attributes_async(new_attributes)) + return self + ``` + This matches the prior observable behavior: attributes are persisted and an `Array` with the updated metadata is returned. (Previously a fresh wrapper was returned; now `self` is mutated and returned. If a test asserts the returned object is a *distinct* instance, adjust to `return type(self)(metadata=self.metadata, store_path=self.store_path, config=self.config, runner=self._runner)` — check `tests/test_array.py` for such an assertion in Task 8.) +- `info_complete` (~3952): `return self._runner.run(self.info_complete_async())` + +For each, read the surrounding 10 lines first to preserve the exact return type and any post-processing. + +- [ ] **Step 5: Confirm zero `self.async_array.` references remain anywhere** + +Run: `grep -n "self\.async_array\." src/zarr/core/array.py` +Expected: NO output. + +- [ ] **Step 6: Run the new tests + relevant suite** + +Run: `uv run pytest tests/test_runner.py -v` +Run: `uv run pytest tests/test_array.py -q -k "resize or append or update_attributes or info or nchunks or nbytes" 2>&1 | tail -25` +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/zarr/core/array.py tests/test_runner.py +git commit -m "feat(array): add remaining *_async methods; route sync wrappers via runner" +``` + +--- + +### Task 7: Update external construction sites to `_from_async_array` + +The `Array(async_array)` call sites outside `array.py` must use the new construction path. + +**Files:** +- Modify: `src/zarr/api/synchronous.py` (lines ~763, ~947, ~1168, ~1359) +- Modify: `src/zarr/core/group.py` (lines ~2272, ~2656, ~2779) + +- [ ] **Step 1: Find all external `Array(` construction-from-async sites** + +Run: `grep -rn "Array(async_array\|Array($" src/zarr/api/synchronous.py src/zarr/core/group.py` +Also check each `Array(` call's argument: only those passing an `AsyncArray` positionally need changing. + +- [ ] **Step 2: Replace each with `_from_async_array`** + +For each site that does `Array()`, change to `Array._from_async_array()`. For example in `group.py`: +`yield name, Array(async_array)` → `yield name, Array._from_async_array(async_array)`. + +Read each call's surrounding lines to confirm the single positional arg is an `AsyncArray` (the variable is usually named `async_array` or is a `await AsyncArray.open(...)` result). + +- [ ] **Step 3: Type-check the modified modules** + +Run: `uv run mypy src/zarr/api/synchronous.py src/zarr/core/group.py` +Expected: no new errors. + +- [ ] **Step 4: Run the api + group test suites** + +Run: `uv run pytest tests/test_api.py tests/test_group.py -q 2>&1 | tail -25` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/zarr/api/synchronous.py src/zarr/core/group.py +git commit -m "refactor: construct Array via _from_async_array at external call sites" +``` + +--- + +### Task 8: Update tests that touch `async_array` directly + add changelog + +Existing tests read `arr.async_array.X` (e.g. `tests/test_array.py:384`, `:418`, `:419`). These now emit `DeprecationWarning`. Update them to use the new sync/async surface, or wrap in a warning filter where the intent is specifically to test the deprecated property. + +**Files:** +- Modify: `tests/test_array.py` +- Create: `changes/.feature.md` (the repo uses towncrier with `.md` fragments, e.g. `changes/3826.feature.md`) + +- [ ] **Step 1: Find tests referencing `async_array`** + +Run: `grep -rn "\.async_array" tests/` + +- [ ] **Step 2: Update each reference** + +- `arr.async_array.nchunks` → `arr.nchunks` (the sync property). +- `await arr.async_array._nshards_initialized()` → `arr._runner.run(arr._nshards_initialized_async())` or, if the test is async, `await arr._nshards_initialized_async()`. +- `await arr.async_array.nchunks_initialized()` → `await arr.nchunks_initialized_async()` (async test) or `arr.nchunks_initialized` (sync property). + +For any test whose explicit purpose is to verify `async_array` still works as a deprecated shim, wrap access in `pytest.warns(DeprecationWarning)` instead of removing it. + +- [ ] **Step 3: Add the changelog fragment** + +The repo uses towncrier with markdown fragments named `changes/..md` (e.g. `changes/3826.feature.md`). Create `changes/.feature.md` (substitute the actual PR number) containing: + +```markdown +`Array` now owns its own state and accepts a keyword-only `runner` argument for plugging in a custom event loop. Every async operation is available as a `*_async` method on `Array`. `Array.async_array` is deprecated; use the `*_async` methods instead. +``` + +- [ ] **Step 4: Run the updated tests with deprecation-as-error** + +Run: `uv run pytest tests/test_array.py -q -W error::DeprecationWarning 2>&1 | tail -30` +Expected: PASS (no un-suppressed deprecation warnings escape). + +- [ ] **Step 5: Commit** + +```bash +git add tests/test_array.py changes/ +git commit -m "test: migrate async_array usages; add changelog for Array unification" +``` + +--- + +### Task 9: Full verification sweep + +**Files:** none (verification only) + +- [ ] **Step 1: Run the full array + sync + runner + api + group suites** + +Run: `uv run pytest tests/test_array.py tests/test_runner.py tests/test_sync.py tests/test_api.py tests/test_group.py -q 2>&1 | tail -30` +Expected: all PASS. + +- [ ] **Step 2: Run the complete test suite** + +Run: `uv run pytest -q 2>&1 | tail -40` +Expected: PASS (or only pre-existing unrelated failures — compare against a clean `main` run if anything fails). + +- [ ] **Step 3: Type-check the whole package** + +Run: `uv run mypy src/zarr 2>&1 | tail -30` +Expected: no new errors. + +- [ ] **Step 4: Run the linters / pre-commit** + +Run: `uv run pre-commit run --all-files 2>&1 | tail -40` +Expected: PASS. + +- [ ] **Step 5: Confirm the invariant holds** + +Run: `grep -rn "self\.async_array\." src/zarr/core/array.py` +Expected: NO output. The only `async_array` reference in `array.py` is the deprecated property definition itself. + +- [ ] **Step 6: Final commit (if any lint fixes were applied)** + +```bash +git add -A +git commit -m "chore: lint/type fixes for Array unification" +``` + +--- + +## Self-Review notes + +- **Spec coverage:** Runner protocol (Task 1) ✓; SupportsArrayState + widened helpers (Task 2) ✓; Array owns state + `runner` + deprecated `async_array` + `_from_async_array` (Task 3) ✓; property repoint (Task 4) ✓; selection `*_async` + runner routing (Task 5) ✓; remaining `*_async` + sync routing (Task 6) ✓; external construction sites (Task 7) ✓; test migration + changelog (Task 8) ✓; full regression + new runner tests (Task 9 + Tasks 1/3/5/6) ✓. +- **Discovery vs. spec:** The spec assumed several methods had inline bodies needing extraction; in fact the free functions already exist but take an array object. Section 3's "extract free functions" is therefore replaced by "widen existing free-function signatures to a Protocol" (Task 2) — a smaller, safer change that still achieves the single-source-of-truth goal. +- **Type consistency:** `getitem_async`/`setitem_async`/`resize_async`/`append_async`/`update_attributes_async`/`nchunks_initialized_async`/`_nshards_initialized_async`/`nbytes_stored_async`/`info_complete_async` and `_from_async_array`, `SupportsArrayState`, `Runner`, `SyncRunner` are used consistently across tasks. +- **Verification points where exact line numbers are approximate:** each such step instructs reading the surrounding lines first and preserving the current return contract, since line numbers will drift as edits land. From 5f6d4983059a0d8a7e7481c14d37addea53f4a89 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 21:22:30 +0200 Subject: [PATCH 03/24] feat(sync): add Runner protocol and SyncRunner Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/sync.py | 22 +++++++++++++++++++++- tests/test_runner.py | 19 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tests/test_runner.py diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py index 260d4ad841..30f1026533 100644 --- a/src/zarr/core/sync.py +++ b/src/zarr/core/sync.py @@ -6,7 +6,7 @@ import os import threading from concurrent.futures import ThreadPoolExecutor, wait -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Protocol, TypeVar, runtime_checkable from typing_extensions import ParamSpec @@ -20,6 +20,7 @@ P = ParamSpec("P") +T = TypeVar("T") # From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py @@ -160,6 +161,25 @@ def sync[T]( return return_result +@runtime_checkable +class Runner(Protocol): + """A `Runner` executes a coroutine and returns the awaited result. + + Implement this protocol to plug a custom event loop into `Array`. + """ + + def run(self, coro: Coroutine[Any, Any, T]) -> T: ... + + +class SyncRunner: + """The default `Runner`. Runs coroutines on Zarr's shared background event + loop via `sync`. + """ + + def run(self, coro: Coroutine[Any, Any, T]) -> T: + return sync(coro) + + def _get_loop() -> asyncio.AbstractEventLoop: """Create or return the default fsspec IO loop diff --git a/tests/test_runner.py b/tests/test_runner.py new file mode 100644 index 0000000000..3a7777ea7f --- /dev/null +++ b/tests/test_runner.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import asyncio + +from zarr.core.sync import Runner, SyncRunner + + +async def _coro() -> int: + await asyncio.sleep(0) + return 42 + + +def test_sync_runner_runs_coroutine() -> None: + runner = SyncRunner() + assert runner.run(_coro()) == 42 + + +def test_sync_runner_is_runner() -> None: + assert isinstance(SyncRunner(), Runner) From 2edd5a72a6bf07d9b797b8a3289e2e1110650e65 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 21:33:57 +0200 Subject: [PATCH 04/24] refactor(array): widen array-helper params to SupportsArrayState protocol Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 58 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 366c19bb0c..373422a0dc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -11,9 +11,11 @@ TYPE_CHECKING, Any, Literal, + Protocol, TypedDict, cast, overload, + runtime_checkable, ) from warnings import warn @@ -302,6 +304,46 @@ async def get_array_metadata( return metadata_dict +@runtime_checkable +class SupportsArrayState(Protocol): + """The structural surface the module-level array helpers rely on. + + Both `AsyncArray` and `Array` satisfy this protocol, which lets the + helper functions operate on either class. + """ + + @property + def metadata(self) -> ArrayMetadata: ... + @property + def store_path(self) -> StorePath: ... + @property + def codec_pipeline(self) -> CodecPipeline: ... + @property + def config(self) -> ArrayConfig: ... + @property + def _chunk_grid(self) -> ChunkGrid: ... + + @property + def shape(self) -> tuple[int, ...]: ... + @property + def chunks(self) -> tuple[int, ...]: ... + @property + def shards(self) -> tuple[int, ...] | None: ... + + def _iter_shard_keys( + self, + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, + ) -> Iterator[str]: ... + + def _info( + self, + count_chunks_initialized: int | None = None, + count_bytes_stored: int | None = None, + ) -> Any: ... + + @dataclass(frozen=True) class AsyncArray[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: """ @@ -3953,7 +3995,7 @@ def info_complete(self) -> Any: async def _shards_initialized( - array: AnyAsyncArray, + array: SupportsArrayState, ) -> tuple[str, ...]: """ Return the keys of the shards that have been persisted to the storage backend. @@ -5265,7 +5307,7 @@ def _iter_chunk_regions( async def _nchunks_initialized( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, ) -> int: """ Calculate the number of chunks that have been initialized in storage. @@ -5295,7 +5337,7 @@ async def _nchunks_initialized( async def _nshards_initialized( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, ) -> int: """ Calculate the number of shards that have been initialized in storage. @@ -5871,7 +5913,7 @@ async def _setitem( async def _resize( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, new_shape: ShapeLike, delete_outside_chunks: bool = True, ) -> None: @@ -5923,7 +5965,7 @@ async def _delete_key(key: str) -> None: async def _append( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, data: npt.ArrayLike, axis: int = 0, ) -> tuple[int, ...]: @@ -5994,9 +6036,9 @@ async def _append( async def _update_attributes( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, new_attributes: dict[str, JSON], -) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: +) -> SupportsArrayState: """ Update the array's attributes. @@ -6021,7 +6063,7 @@ async def _update_attributes( async def _info_complete( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], + array: SupportsArrayState, ) -> Any: """ Return all the information for an array, including dynamic information like storage size. From 7702eaf1d3d771cfab8bb865fd59a70ec54be0a0 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 21:39:56 +0200 Subject: [PATCH 05/24] docs(array): clarify SupportsArrayState docstring Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 373422a0dc..ed253524b4 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -308,8 +308,9 @@ async def get_array_metadata( class SupportsArrayState(Protocol): """The structural surface the module-level array helpers rely on. - Both `AsyncArray` and `Array` satisfy this protocol, which lets the - helper functions operate on either class. + `AsyncArray` satisfies this protocol. The unified `Array` class is being + migrated to satisfy it as well so the same helpers can operate on either + class. """ @property From ca9d19466482648682779ae4dd7c762966e4c909 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 22:31:45 +0200 Subject: [PATCH 06/24] feat(array): Array owns its own state + runner; deprecate async_array Array no longer wraps an AsyncArray. It owns metadata, store_path, config, codec_pipeline, _chunk_grid, and a pluggable _runner (defaulting to SyncRunner). Adds Array._from_async_array and a deprecated async_array property. External Array(async_array) construction sites are converted to Array._from_async_array. Fixes downstream typing fallout from removing the _async_array attribute. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/api/synchronous.py | 32 +++---- src/zarr/core/array.py | 127 ++++++++++++++++++++-------- src/zarr/core/attributes.py | 4 +- src/zarr/core/group.py | 42 ++++++--- src/zarr/metadata/migrate_v3.py | 2 +- src/zarr/testing/stateful.py | 2 +- tests/test_api/test_asynchronous.py | 2 +- tests/test_array.py | 4 +- tests/test_codec_pipeline.py | 3 +- tests/test_codecs/test_vlen.py | 6 +- tests/test_runner.py | 61 +++++++++++++ 11 files changed, 213 insertions(+), 72 deletions(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 8386427b3f..4441b857e2 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -221,7 +221,7 @@ def open( ) ) if isinstance(obj, AsyncArray): - return Array(obj) + return Array._from_async_array(obj) else: return Group(obj) @@ -391,7 +391,7 @@ def array(data: npt.ArrayLike | AnyArray, **kwargs: Any) -> AnyArray: The new array. """ - return Array(sync(async_api.array(data=data, **kwargs))) + return Array._from_async_array(sync(async_api.array(data=data, **kwargs))) def group( @@ -760,7 +760,7 @@ def create( z : Array The array. """ - return Array( + return Array._from_async_array( sync( async_api.create( shape=shape, @@ -944,7 +944,7 @@ def create_array( # ``` """ - return Array( + return Array._from_async_array( sync( zarr.core.array.create_array( store, @@ -1165,7 +1165,7 @@ def from_array( array([[0, 0], [0, 0]]) """ - return Array( + return Array._from_async_array( sync( zarr.core.array.from_array( store, @@ -1214,7 +1214,7 @@ def empty(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ - return Array(sync(async_api.empty(shape, **kwargs))) + return Array._from_async_array(sync(async_api.empty(shape, **kwargs))) # TODO: move ArrayLike to common module @@ -1241,7 +1241,7 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> AnyArray: retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ - return Array(sync(async_api.empty_like(a, **kwargs))) + return Array._from_async_array(sync(async_api.empty_like(a, **kwargs))) # TODO: add type annotations for kwargs and fill_value @@ -1262,7 +1262,9 @@ def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) + return Array._from_async_array( + sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs)) + ) # TODO: move ArrayLike to common module @@ -1282,7 +1284,7 @@ def full_like(a: ArrayLike, **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.full_like(a, **kwargs))) + return Array._from_async_array(sync(async_api.full_like(a, **kwargs))) # TODO: add type annotations for kwargs @@ -1301,7 +1303,7 @@ def ones(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.ones(shape, **kwargs))) + return Array._from_async_array(sync(async_api.ones(shape, **kwargs))) # TODO: add type annotations for kwargs @@ -1320,7 +1322,7 @@ def ones_like(a: ArrayLike, **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.ones_like(a, **kwargs))) + return Array._from_async_array(sync(async_api.ones_like(a, **kwargs))) # TODO: update this once async_api.open_array is fully implemented @@ -1356,7 +1358,7 @@ def open_array( AsyncArray The opened array. """ - return Array( + return Array._from_async_array( sync( async_api.open_array( store=store, @@ -1387,7 +1389,7 @@ def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AnyArray: AsyncArray The opened array. """ - return Array(sync(async_api.open_like(a, path=path, **kwargs))) + return Array._from_async_array(sync(async_api.open_like(a, path=path, **kwargs))) # TODO: add type annotations for kwargs @@ -1406,7 +1408,7 @@ def zeros(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.zeros(shape=shape, **kwargs))) + return Array._from_async_array(sync(async_api.zeros(shape=shape, **kwargs))) # TODO: add type annotations for kwargs @@ -1425,4 +1427,4 @@ def zeros_like(a: ArrayLike, **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(sync(async_api.zeros_like(a, **kwargs))) + return Array._from_async_array(sync(async_api.zeros_like(a, **kwargs))) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index ed253524b4..b7cf9de2a8 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -130,7 +130,7 @@ create_chunk_grid_metadata, parse_node_type_array, ) -from zarr.core.sync import sync +from zarr.core.sync import Runner, SyncRunner, sync from zarr.errors import ( ArrayNotFoundError, ChunkNotFoundError, @@ -1840,42 +1840,98 @@ def _info( ) -# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed -@dataclass(frozen=False) class Array[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: """ A Zarr array. """ - _async_array: AsyncArray[T_ArrayMetadata] + metadata: T_ArrayMetadata + store_path: StorePath + config: ArrayConfig + codec_pipeline: CodecPipeline + _chunk_grid: ChunkGrid + _runner: Runner + + def __init__( + self, + metadata: ArrayMetadata | ArrayMetadataDict, + store_path: StorePath, + config: ArrayConfigLike | None = None, + *, + runner: Runner | None = None, + ) -> None: + metadata_parsed = parse_array_metadata(metadata) + config_parsed = parse_array_config(config) + object.__setattr__(self, "metadata", metadata_parsed) + object.__setattr__(self, "store_path", store_path) + object.__setattr__(self, "config", config_parsed) + object.__setattr__(self, "_chunk_grid", ChunkGrid.from_metadata(metadata_parsed)) + object.__setattr__( + self, + "codec_pipeline", + create_codec_pipeline(metadata=metadata_parsed, store=store_path.store), + ) + object.__setattr__(self, "_runner", runner if runner is not None else SyncRunner()) + + @classmethod + def _from_async_array( + cls, + async_array: AsyncArray[T_ArrayMetadata], + *, + runner: Runner | None = None, + ) -> Self: + return cls( + metadata=async_array.metadata, + store_path=async_array.store_path, + config=async_array.config, + runner=runner, + ) @property def async_array(self) -> AsyncArray[T_ArrayMetadata]: - """An asynchronous version of the current array. Useful for batching requests. + """An asynchronous version of this array. - Returns - ------- - An asynchronous array whose metadata + store matches that of this synchronous array. + Deprecated: use the `*_async` methods on `Array` instead. This property + will be removed in a future release. """ - return self._async_array + warnings.warn( + "Array.async_array is deprecated; use the *_async methods on Array instead.", + DeprecationWarning, + stacklevel=2, + ) + return AsyncArray(self.metadata, self.store_path, self.config) @property - def config(self) -> ArrayConfig: + def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: """ - The runtime configuration for this array. This is a read-only property. To modify the - runtime configuration, use `Array.with_config` to create a new `Array` with the modified - configuration. - - Returns - ------- - An `ArrayConfig` object that defines the runtime configuration for the array. + The zarr-specific representation of the array data type """ - return self.async_array.config + if self.metadata.zarr_format == 2: + return self.metadata.dtype + else: + return self.metadata.data_type - @property - def _chunk_grid(self) -> ChunkGrid: - """The chunk grid for this array, bound to the array's shape.""" - return self.async_array._chunk_grid + def _info( + self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None + ) -> Any: + chunk_shape = self.chunks if self._chunk_grid.is_regular else None + return ArrayInfo( + _zarr_format=self.metadata.zarr_format, + _data_type=self._zdtype, + _fill_value=self.metadata.fill_value, + _shape=self.shape, + _order=self.order, + _shard_shape=self.shards, + _chunk_shape=chunk_shape, + _read_only=self.read_only, + _compressors=self.compressors, + _filters=self.filters, + _serializer=self.serializer, + _store_type=type(self.store_path.store).__name__, + _count_bytes=self.nbytes, + _count_bytes_stored=count_bytes_stored, + _count_chunks_initialized=count_chunks_initialized, + ) @classmethod def _create( @@ -1932,7 +1988,7 @@ def _create( config=config, ), ) - return cls(async_array) + return cls._from_async_array(async_array) @classmethod def from_dict( @@ -1963,7 +2019,7 @@ def from_dict( If the dictionary data is invalid or missing required fields for array creation. """ async_array = AsyncArray.from_dict(store_path=store_path, data=data) - return cls(async_array) + return cls._from_async_array(async_array) @classmethod def open( @@ -1985,7 +2041,7 @@ def open( Array opened from the store. """ async_array = sync(AsyncArray.open(store)) - return cls(async_array) + return cls._from_async_array(async_array) @property def store(self) -> Store: @@ -2150,14 +2206,6 @@ def basename(self) -> str: """Final component of name.""" return self.async_array.basename - @property - def metadata(self) -> ArrayMetadata: - return self.async_array.metadata - - @property - def store_path(self) -> StorePath: - return self.async_array.store_path - @property def order(self) -> MemoryOrder: return self.async_array.order @@ -2273,7 +2321,16 @@ def with_config(self, config: ArrayConfigLike) -> Self: ------- A new Array """ - return type(self)(self._async_array.with_config(config)) + if isinstance(config, ArrayConfig): + new_config = config + else: + new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type] + return type(self)( + metadata=self.metadata, + store_path=self.store_path, + config=new_config, + runner=self._runner, + ) @property def nbytes(self) -> int: @@ -3935,7 +3992,7 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: overwritten by the new values. """ new_array = sync(self.async_array.update_attributes(new_attributes)) - return type(self)(new_array) + return type(self)._from_async_array(new_array) def __repr__(self) -> str: return f"" diff --git a/src/zarr/core/attributes.py b/src/zarr/core/attributes.py index 7f29e44365..e139bc0d76 100644 --- a/src/zarr/core/attributes.py +++ b/src/zarr/core/attributes.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import MutableMapping -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from zarr.core.common import JSON @@ -18,7 +18,7 @@ def __init__(self, obj: AnyArray | Group) -> None: self._obj = obj def __getitem__(self, key: str) -> JSON: - return self._obj.metadata.attributes[key] + return cast("JSON", self._obj.metadata.attributes[key]) def __setitem__(self, key: str, value: JSON) -> None: new_attrs = dict(self._obj.metadata.attributes) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index de8c8e9a68..c2a0d08e5b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -127,7 +127,7 @@ def _parse_async_node( ) -> AnyArray | Group: """Wrap an AsyncArray in an Array, or an AsyncGroup in a Group.""" if isinstance(node, AsyncArray): - return Array(node) + return Array._from_async_array(node) elif isinstance(node, AsyncGroup): return Group(node) else: @@ -1844,7 +1844,7 @@ def __getitem__(self, path: str) -> AnyArray | Group: """ obj = self._sync(self._async_group.getitem(path)) if isinstance(obj, AsyncArray): - return Array(obj) + return Array._from_async_array(obj) else: return Group(obj) @@ -2269,7 +2269,7 @@ def arrays(self) -> Generator[tuple[str, AnyArray], None]: [('subarray', )] """ for name, async_array in self._sync_iter(self._async_group.arrays()): - yield name, Array(async_array) + yield name, Array._from_async_array(async_array) def array_keys(self) -> Generator[str, None]: """Return an iterator over group member names. @@ -2653,7 +2653,7 @@ def create_array( compressors = _parse_deprecated_compressor( compressor, compressors, zarr_format=self.metadata.zarr_format ) - return Array( + return Array._from_async_array( self._sync( self._async_group.create_array( name=name, @@ -2694,7 +2694,9 @@ def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> AnyArr ------- a : Array """ - return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.require_array(name, shape=shape, **kwargs)) + ) def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an empty array with the specified shape in this Group. The contents will be filled with @@ -2715,7 +2717,9 @@ def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ - return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.empty(name=name, shape=shape, **kwargs)) + ) def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. @@ -2734,7 +2738,9 @@ def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray Array The new array. """ - return Array(self._sync(self._async_group.zeros(name=name, shape=shape, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.zeros(name=name, shape=shape, **kwargs)) + ) def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array, with one being used as the default value for uninitialized portions of the array. @@ -2753,7 +2759,9 @@ def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: Array The new array. """ - return Array(self._sync(self._async_group.ones(name=name, shape=shape, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.ones(name=name, shape=shape, **kwargs)) + ) def full( self, *, name: str, shape: tuple[int, ...], fill_value: Any | None, **kwargs: Any @@ -2776,7 +2784,7 @@ def full( Array The new array. """ - return Array( + return Array._from_async_array( self._sync( self._async_group.full(name=name, shape=shape, fill_value=fill_value, **kwargs) ) @@ -2806,7 +2814,9 @@ def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ - return Array(self._sync(self._async_group.empty_like(name=name, data=data, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.empty_like(name=name, data=data, **kwargs)) + ) def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array of zeros like `data`. @@ -2826,7 +2836,9 @@ def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> The new array. """ - return Array(self._sync(self._async_group.zeros_like(name=name, data=data, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.zeros_like(name=name, data=data, **kwargs)) + ) def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array of ones like `data`. @@ -2845,7 +2857,9 @@ def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A Array The new array. """ - return Array(self._sync(self._async_group.ones_like(name=name, data=data, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.ones_like(name=name, data=data, **kwargs)) + ) def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array like `data` filled with the `fill_value` of `data` . @@ -2864,7 +2878,9 @@ def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> A Array The new array. """ - return Array(self._sync(self._async_group.full_like(name=name, data=data, **kwargs))) + return Array._from_async_array( + self._sync(self._async_group.full_like(name=name, data=data, **kwargs)) + ) def move(self, source: str, dest: str) -> None: """Move a sub-group or sub-array from one path to another. diff --git a/src/zarr/metadata/migrate_v3.py b/src/zarr/metadata/migrate_v3.py index 370af75a6d..eb4c7fd825 100644 --- a/src/zarr/metadata/migrate_v3.py +++ b/src/zarr/metadata/migrate_v3.py @@ -88,7 +88,7 @@ def migrate_to_v3(zarr_v2: AnyArray | Group, output_path: StorePath, dry_run: bo if not zarr_v2.metadata.zarr_format == 2: raise TypeError("Only arrays / groups with zarr v2 metadata can be converted") - if isinstance(zarr_v2.metadata, GroupMetadata): + if isinstance(zarr_v2, Group): _convert_group(zarr_v2, output_path, dry_run) else: _convert_array(zarr_v2, output_path, dry_run) diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index d6c43f4ecc..2cdf41c961 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -168,7 +168,7 @@ def add_array(self, data: DataObject, name: str) -> None: chunks=chunks_param, dtype=a.dtype, fill_value=a.fill_value, - dimension_names=a.metadata.dimension_names, # type: ignore[union-attr] + dimension_names=a.metadata.dimension_names, compressors=None, ) arr[:] = a[:] diff --git a/tests/test_api/test_asynchronous.py b/tests/test_api/test_asynchronous.py index 362195e858..c630b8e015 100644 --- a/tests/test_api/test_asynchronous.py +++ b/tests/test_api/test_asynchronous.py @@ -70,7 +70,7 @@ def test_get_shape_chunks( compressors=None, filters=None, zarr_format=2, - )._async_array, + ), { "chunks": (10,), "shape": (100,), diff --git a/tests/test_array.py b/tests/test_array.py index 0d6d2d5906..677b149682 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -2308,7 +2308,9 @@ def test_with_config(config: ArrayConfigParams) -> None: source_config: ArrayConfigParams = {"write_empty_chunks": False, "order": "F"} source_array = zarr.create_array({}, shape=(1,), dtype="uint8", config=source_config) - new_async_array_config_dict = source_array._async_array.with_config(config).config.to_dict() + with pytest.warns(DeprecationWarning, match="async_array is deprecated"): + async_array = source_array.async_array + new_async_array_config_dict = async_array.with_config(config).config.to_dict() new_array_config_dict = source_array.with_config(config).config.to_dict() for key in source_config: diff --git a/tests/test_codec_pipeline.py b/tests/test_codec_pipeline.py index fa41c2867b..907ad9746b 100644 --- a/tests/test_codec_pipeline.py +++ b/tests/test_codec_pipeline.py @@ -36,7 +36,8 @@ async def test_read_returns_get_results( if write_slice is not None: arr[write_slice] = 0 - async_arr = arr._async_array + with pytest.warns(DeprecationWarning, match="async_array is deprecated"): + async_arr = arr.async_array pipeline = async_arr.codec_pipeline metadata = async_arr.metadata diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py index 3422090a28..9a14180de3 100644 --- a/tests/test_codecs/test_vlen.py +++ b/tests/test_codecs/test_vlen.py @@ -61,8 +61,10 @@ def test_vlen_string( # test round trip b = Array.open(sp) - assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy - assert np.array_equal(data, b[:, :]) + # mypy resolves `Array.open`'s `Self` return to a single constrained metadata + # type, so it wrongly thinks these statements are unreachable; they run fine. + assert isinstance(b.metadata, ArrayV3Metadata) # type: ignore[unreachable] # needed for mypy + assert np.array_equal(data, b[:, :]) # type: ignore[unreachable] assert b.metadata.data_type == get_data_type_from_native_dtype(data.dtype) assert a.dtype == data.dtype diff --git a/tests/test_runner.py b/tests/test_runner.py index 3a7777ea7f..c6cb9a2005 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -1,8 +1,18 @@ from __future__ import annotations import asyncio +import warnings +from typing import TYPE_CHECKING, Any +import pytest + +import zarr +from zarr.core.array import Array, AsyncArray from zarr.core.sync import Runner, SyncRunner +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + from collections.abc import Coroutine async def _coro() -> int: @@ -17,3 +27,54 @@ def test_sync_runner_runs_coroutine() -> None: def test_sync_runner_is_runner() -> None: assert isinstance(SyncRunner(), Runner) + + +def _make_array() -> Array[Any]: + return zarr.create_array(store=MemoryStore(), shape=(8,), chunks=(4,), dtype="i4", fill_value=0) + + +def test_array_has_default_sync_runner() -> None: + arr = _make_array() + assert isinstance(arr._runner, SyncRunner) + + +def test_array_owns_state() -> None: + arr = _make_array() + assert arr.metadata is not None + assert arr.store_path is not None + assert arr.codec_pipeline is not None + + +def test_array_accepts_custom_runner() -> None: + class RecordingRunner: + def __init__(self) -> None: + self.calls = 0 + + def run(self, coro: Coroutine[Any, Any, Any]) -> Any: + self.calls += 1 + return SyncRunner().run(coro) + + runner = RecordingRunner() + base = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = base.async_array + arr = Array(metadata=aa.metadata, store_path=aa.store_path, config=aa.config, runner=runner) + assert arr._runner is runner + + +def test_async_array_property_deprecated() -> None: + arr = _make_array() + with pytest.warns(DeprecationWarning, match="async_array is deprecated"): + aa = arr.async_array + assert isinstance(aa, AsyncArray) + + +def test_from_async_array_roundtrip() -> None: + arr = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = arr.async_array + arr2 = Array._from_async_array(aa) + assert arr2.metadata == arr.metadata + assert isinstance(arr2._runner, SyncRunner) From ca5fc34583ee06c709177e254f424836934be1e2 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 22:35:48 +0200 Subject: [PATCH 07/24] fix(array): restore Array equality after state refactor Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b7cf9de2a8..7300e9c1a3 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1901,6 +1901,17 @@ def async_array(self) -> AsyncArray[T_ArrayMetadata]: ) return AsyncArray(self.metadata, self.store_path, self.config) + def __eq__(self, other: object) -> bool: + if not isinstance(other, Array): + return NotImplemented + return ( + self.metadata == other.metadata + and self.store_path == other.store_path + and self.config == other.config + ) + + __hash__ = None # type: ignore[assignment] + @property def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: """ From cca5fbb815af9ca52d6b0f187e9c23f9ab74015d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 22:36:25 +0200 Subject: [PATCH 08/24] docs: log Task 3 deviations and interim-red state Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plans/2026-06-03-unify-array-asyncarray.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md b/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md index 100677da1c..a135cfebf3 100644 --- a/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md +++ b/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md @@ -10,6 +10,18 @@ --- +## Execution log / deviations (kept current during implementation) + +- **Task 3 absorbed extra work** that the original decomposition under-anticipated, because the new `Array.__init__` signature and the removal of the `_async_array` attribute have repo-wide blast radius (the pre-commit hook type-checks the *whole* repo): + - Converted ALL external `Array(async_array)` construction sites to `Array._from_async_array(...)` in `src/zarr/api/synchronous.py` and `src/zarr/core/group.py` — **this is the entirety of Task 7**, so Task 7 is effectively complete after Task 3. + - Migrated the `_async_array`-referencing tests (`tests/test_array.py`, `tests/test_api/test_asynchronous.py`, `tests/test_codec_pipeline.py`) and removed a now-unused `# type: ignore` in `src/zarr/testing/stateful.py` — **this is part of Task 8**. + - Fixed downstream typing fallout: `src/zarr/core/attributes.py` (`cast` to `JSON`), `src/zarr/metadata/migrate_v3.py` (narrow on `isinstance(zarr_v2, Group)` instead of on `.metadata`), `tests/test_codecs/test_vlen.py` (`type: ignore[unreachable]`). + - **Added `Array.__eq__`** (compares `metadata`, `store_path`, `config`; `__hash__ = None`) — the old `@dataclass(frozen=False)` auto-generated equality that the plain-class rewrite dropped, breaking `test_serializable_sync_array`. +- **KNOWN INTERIM-RED STATE after Task 3:** 12 tests fail — the `test_resize_*` and `test_append_*` families. Cause: the *sync* `Array.resize`/`append` still call `sync(self.async_array.(...))`, but `async_array` is now a deprecated property that reconstructs a **throwaway** `AsyncArray` each access, so the mutation never lands on `self`. **Task 6 fixes this** by routing sync `resize`/`append` through `self._runner.run(self._async(...))`, which mutates `self` in place. The `shape` setter (which calls `self.resize`) is fixed transitively. Until Task 6, these 12 failures are expected; Task 9 must confirm they are GONE. +- Commits so far: Task 1 `5f6d4983`; Task 2 `2edd5a72` + docstring `7702eaf1`; Task 3 `ca9d1946` + eq fix `ca5fc345`. + +--- + ## File Structure - `src/zarr/core/sync.py` — add `Runner` Protocol + `SyncRunner` class. From c6227a140cb00e1c22ef0b7d0630eda70cb1960a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jun 2026 22:52:25 +0200 Subject: [PATCH 09/24] test: migrate remaining _async_array refs to deprecated async_array property Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_indexing.py | 21 ++++++++++++++++----- tests/test_properties.py | 13 ++++++++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index c45942eee7..ca878e2f2f 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1,6 +1,7 @@ from __future__ import annotations import itertools +import warnings from collections import Counter from typing import TYPE_CHECKING, Any from uuid import uuid4 @@ -2110,7 +2111,9 @@ class TestAsync: async def test_async_oindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) - async_zarr = z._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarr = z.async_array result = await async_zarr.oindex.getitem(indexer) assert_array_equal(result, expected) @@ -2121,7 +2124,9 @@ async def test_async_oindex_with_zarr_array(self, store): z1 = group.create_array(name="z1", shape=(2, 2), chunks=(1, 1), dtype="i8") z1[...] = np.array([[1, 2], [3, 4]]) - async_zarr = z1._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarr = z1.async_array # create boolean zarr array to index with z2 = group.create_array(name="z2", shape=(2,), chunks=(1,), dtype="?") @@ -2143,7 +2148,9 @@ async def test_async_oindex_with_zarr_array(self, store): async def test_async_vindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) - async_zarr = z._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarr = z.async_array result = await async_zarr.vindex.getitem(indexer) assert_array_equal(result, expected) @@ -2154,7 +2161,9 @@ async def test_async_vindex_with_zarr_array(self, store): z1 = group.create_array(name="z1", shape=(2, 2), chunks=(1, 1), dtype="i8") z1[...] = np.array([[1, 2], [3, 4]]) - async_zarr = z1._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarr = z1.async_array # create boolean zarr array to index with z2 = group.create_array(name="z2", shape=(2, 2), chunks=(1, 1), dtype="?") @@ -2168,7 +2177,9 @@ async def test_async_vindex_with_zarr_array(self, store): async def test_async_invalid_indexer(self, store): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) - async_zarr = z._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarr = z.async_array with pytest.raises(IndexError): await async_zarr.vindex.getitem("invalid_indexer") diff --git a/tests/test_properties.py b/tests/test_properties.py index 0e5dcf77b0..60cd68c3ad 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -1,6 +1,7 @@ import itertools import json import numbers +import warnings from collections.abc import Generator from typing import Any @@ -131,7 +132,9 @@ async def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray[indexer], actual) # async get - async_zarray = zarray._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarray = zarray.async_array actual = await async_zarray.getitem(indexer) assert_array_equal(nparray[indexer], actual) @@ -173,7 +176,9 @@ async def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray[npindexer], actual) # async get - async_zarray = zarray._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarray = zarray.async_array actual = await async_zarray.oindex.getitem(zindexer) assert_array_equal(nparray[npindexer], actual) @@ -214,7 +219,9 @@ async def test_vindex(data: st.DataObject) -> None: assert_array_equal(nparray[indexer], actual) # async get - async_zarray = zarray._async_array + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + async_zarray = zarray.async_array actual = await async_zarray.vindex.getitem(indexer) assert_array_equal(nparray[indexer], actual) From d644fabac7e1f866ab39d2f2989cea709d0e6dac Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 08:18:04 +0200 Subject: [PATCH 10/24] refactor(array): repoint Array read-only properties to own state Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 127 +++++++++++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 29 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7300e9c1a3..51b8cb3a9e 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2056,7 +2056,7 @@ def open( @property def store(self) -> Store: - return self.async_array.store + return self.store_path.store @property def ndim(self) -> int: @@ -2067,7 +2067,7 @@ def ndim(self) -> int: int The number of dimensions in the array. """ - return self.async_array.ndim + return len(self.metadata.shape) @property def shape(self) -> tuple[int, ...]: @@ -2078,7 +2078,7 @@ def shape(self) -> tuple[int, ...]: tuple[int, ...] The shape of the array. """ - return self.async_array.shape + return self.metadata.shape @shape.setter def shape(self, value: tuple[int, ...]) -> None: @@ -2098,7 +2098,8 @@ def chunks(self) -> tuple[int, ...]: tuple A tuple of integers representing the length of each dimension of a chunk. """ - return self.async_array.chunks + # TODO: move sharding awareness out of metadata + return self.metadata.chunks @property def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: @@ -2124,7 +2125,14 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: >>> arr.read_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ - return self.async_array.read_chunk_sizes + + from zarr.codecs.sharding import ShardingCodec + + codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) + if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): + inner_chunk_shape = codecs[0].chunk_shape + return _chunk_sizes_from_shape(self.shape, inner_chunk_shape) + return self._chunk_grid.chunk_sizes @property def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: @@ -2148,7 +2156,8 @@ def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: >>> arr.write_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ - return self.async_array.write_chunk_sizes + + return self._chunk_grid.chunk_sizes @property def shards(self) -> tuple[int, ...] | None: @@ -2163,7 +2172,7 @@ def shards(self) -> tuple[int, ...] | None: tuple | None A tuple of integers representing the length of each dimension of a shard or None if sharding is not used. """ - return self.async_array.shards + return self.metadata.shards @property def size(self) -> int: @@ -2174,7 +2183,7 @@ def size(self) -> int: int Total number of elements in the array. """ - return self.async_array.size + return np.prod(self.metadata.shape).item() @property def dtype(self) -> np.dtype[Any]: @@ -2185,7 +2194,7 @@ def dtype(self) -> np.dtype[Any]: np.dtype The NumPy data type. """ - return self.async_array.dtype + return self._zdtype.to_native_dtype() @property def attrs(self) -> Attributes: @@ -2205,25 +2214,33 @@ def attrs(self) -> Attributes: @property def path(self) -> str: """Storage path.""" - return self.async_array.path + return self.store_path.path @property def name(self) -> str: """Array name following h5py convention.""" - return self.async_array.name + # follow h5py convention: add leading slash + name = self.path + if not name.startswith("/"): + name = "/" + name + return name @property def basename(self) -> str: """Final component of name.""" - return self.async_array.basename + return self.name.split("/")[-1] @property def order(self) -> MemoryOrder: - return self.async_array.order + if self.metadata.zarr_format == 2: + return self.metadata.order + else: + return self.config.order @property def read_only(self) -> bool: - return self.async_array.read_only + # Backwards compatibility for 2.x + return self.store_path.read_only @property def fill_value(self) -> Any: @@ -2235,14 +2252,27 @@ def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ - return self.async_array.filters + if self.metadata.zarr_format == 2: + filters = self.metadata.filters + if filters is None: + return () + return filters + + return tuple( + codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayArrayCodec) + ) @property def serializer(self) -> None | ArrayBytesCodec: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ - return self.async_array.serializer + if self.metadata.zarr_format == 2: + return None + + return next( + codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayBytesCodec) + ) @property @deprecated("Use Array.compressors instead.", category=ZarrDeprecationWarning) @@ -2254,7 +2284,9 @@ def compressor(self) -> Numcodec | None: `array.compressor` is deprecated since v3.0.0 and will be removed in a future release. Use [`array.compressors`][zarr.Array.compressors] instead. """ - return self.async_array.compressor + if self.metadata.zarr_format == 2: + return self.metadata.compressor + raise TypeError("`compressor` is not available for Zarr format 3 arrays.") @property def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: @@ -2262,7 +2294,14 @@ def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ - return self.async_array.compressors + if self.metadata.zarr_format == 2: + if self.metadata.compressor is not None: + return (self.metadata.compressor,) + return () + + return tuple( + codec for codec in self.metadata.inner_codecs if isinstance(codec, BytesBytesCodec) + ) @property def cdata_shape(self) -> tuple[int, ...]: @@ -2271,7 +2310,7 @@ def cdata_shape(self) -> tuple[int, ...]: When sharding is used, this counts inner chunks (not shards) per dimension. """ - return self.async_array._chunk_grid_shape + return self._chunk_grid_shape @property def _chunk_grid_shape(self) -> tuple[int, ...]: @@ -2285,14 +2324,27 @@ def _chunk_grid_shape(self) -> tuple[int, ...]: tuple[int, ...] The number of chunks along each dimension. """ - return self.async_array._chunk_grid_shape + # TODO: refactor — extract a sharding_codec property on ArrayV3Metadata + # to replace the repeated `len == 1 and isinstance` pattern. + from zarr.codecs.sharding import ShardingCodec + + codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) + if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): + # When sharding, count inner chunks across the whole array + chunk_shape = codecs[0].chunk_shape + return tuple(starmap(ceildiv, zip(self.shape, chunk_shape, strict=True))) + return self._chunk_grid.grid_shape @property def _shard_grid_shape(self) -> tuple[int, ...]: """ The shape of the shard grid for this array. """ - return self.async_array._shard_grid_shape + if self.shards is None: + shard_shape = self.chunks + else: + shard_shape = self.shards + return tuple(starmap(ceildiv, zip(self.shape, shard_shape, strict=True))) @property def nchunks(self) -> int: @@ -2302,14 +2354,14 @@ def nchunks(self) -> int: Note that if a sharding codec is used, then the number of chunks may exceed the number of stored objects supporting this array. """ - return self.async_array.nchunks + return product(self._chunk_grid_shape) @property def _nshards(self) -> int: """ The number of shards in the stored representation of this array. """ - return self.async_array._nshards + return product(self._shard_grid_shape) @overload def with_config(self: ArrayV2, config: ArrayConfigLike) -> ArrayV2: ... @@ -2356,7 +2408,7 @@ def nbytes(self) -> int: dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ - return self.async_array.nbytes + return self.size * self.dtype.itemsize @property def nchunks_initialized(self) -> int: @@ -2439,7 +2491,12 @@ def _iter_shard_keys( The storage key of each shard in the selection or chunk though chunks technically do not have storage keys. """ - return self.async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) + # Iterate over the coordinates of chunks in chunk grid space. + return _iter_shard_keys( + array=self, + origin=origin, + selection_shape=selection_shape, + ) def _iter_chunk_coords( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2465,7 +2522,11 @@ def _iter_chunk_coords( tuple[int, ...] The coordinates of each chunk in the selection. """ - return self.async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) + return _iter_chunk_coords( + array=self, + origin=origin, + selection_shape=selection_shape, + ) def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2491,7 +2552,11 @@ def _iter_shard_coords( tuple[int, ...] The coordinates of each shard in the selection. """ - return self.async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) + return _iter_shard_coords( + array=self, + origin=origin, + selection_shape=selection_shape, + ) def _iter_chunk_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2511,7 +2576,11 @@ def _iter_chunk_regions( tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - return self.async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) + return _iter_chunk_regions( + array=self, + origin=origin, + selection_shape=selection_shape, + ) def _iter_shard_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2532,7 +2601,7 @@ def _iter_shard_regions( A tuple of slice objects representing the region spanned by each shard or if no shard is present, chunk in the selection. """ - return self.async_array._iter_shard_regions(origin=origin, selection_shape=selection_shape) + return _iter_shard_regions(array=self, origin=origin, selection_shape=selection_shape) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None From ed7a3fe34aa94be4729b9696b73ae09b8ab06933 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 08:30:30 +0200 Subject: [PATCH 11/24] feat(array): add selection *_async methods; route sync selection via runner Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 114 ++++++++++++++++++++++++++++++++--------- tests/test_runner.py | 35 +++++++++++++ 2 files changed, 125 insertions(+), 24 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 51b8cb3a9e..09f2e716cc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2622,6 +2622,80 @@ def __array__( return arr_np + async def _get_selection( + self, + indexer: Indexer, + *, + prototype: BufferPrototype, + out: NDBuffer | None = None, + fields: Fields | None = None, + ) -> NDArrayLikeOrScalar: + return await _get_selection( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + indexer, + prototype=prototype, + out=out, + fields=fields, + ) + + async def _set_selection( + self, + indexer: Indexer, + value: npt.ArrayLike, + *, + prototype: BufferPrototype, + fields: Fields | None = None, + ) -> None: + return await _set_selection( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + indexer, + value, + prototype=prototype, + fields=fields, + ) + + async def getitem_async( + self, + selection: BasicSelection, + *, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + return await _getitem( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + selection, + prototype=prototype, + ) + + async def setitem_async( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + prototype: BufferPrototype | None = None, + ) -> None: + return await _setitem( + self.store_path, + self.metadata, + self.codec_pipeline, + self.config, + self._chunk_grid, + selection, + value, + prototype=prototype, + ) + def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. @@ -3010,8 +3084,8 @@ def get_basic_selection( if prototype is None: prototype = default_buffer_prototype() - return sync( - self.async_array._get_selection( + return self._runner.run( + self._get_selection( BasicIndexer(selection, self.shape, self._chunk_grid), out=out, fields=fields, @@ -3120,7 +3194,7 @@ def set_basic_selection( if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self._chunk_grid) - sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_orthogonal_selection( self, @@ -3248,10 +3322,8 @@ def get_orthogonal_selection( if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) - return sync( - self.async_array._get_selection( - indexer=indexer, out=out, fields=fields, prototype=prototype - ) + return self._runner.run( + self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) ) def set_orthogonal_selection( @@ -3366,8 +3438,8 @@ def set_orthogonal_selection( if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) - return sync( - self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) + return self._runner.run( + self._set_selection(indexer, value, fields=fields, prototype=prototype) ) def get_mask_selection( @@ -3454,10 +3526,8 @@ def get_mask_selection( if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self._chunk_grid) - return sync( - self.async_array._get_selection( - indexer=indexer, out=out, fields=fields, prototype=prototype - ) + return self._runner.run( + self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) ) def set_mask_selection( @@ -3543,7 +3613,7 @@ def set_mask_selection( if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self._chunk_grid) - sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_coordinate_selection( self, @@ -3631,10 +3701,8 @@ def get_coordinate_selection( if prototype is None: prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) - out_array = sync( - self.async_array._get_selection( - indexer=indexer, out=out, fields=fields, prototype=prototype - ) + out_array = self._runner.run( + self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) ) if hasattr(out_array, "shape"): @@ -3744,7 +3812,7 @@ def set_coordinate_selection( f"elements with an array of {value.shape[0]} elements." ) - sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_block_selection( self, @@ -3844,10 +3912,8 @@ def get_block_selection( if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self._chunk_grid) - return sync( - self.async_array._get_selection( - indexer=indexer, out=out, fields=fields, prototype=prototype - ) + return self._runner.run( + self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) ) def set_block_selection( @@ -3944,7 +4010,7 @@ def set_block_selection( if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self._chunk_grid) - sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) @property def vindex(self) -> VIndex: diff --git a/tests/test_runner.py b/tests/test_runner.py index c6cb9a2005..a24aad006e 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -4,6 +4,7 @@ import warnings from typing import TYPE_CHECKING, Any +import numpy as np import pytest import zarr @@ -78,3 +79,37 @@ def test_from_async_array_roundtrip() -> None: arr2 = Array._from_async_array(aa) assert arr2.metadata == arr.metadata assert isinstance(arr2._runner, SyncRunner) + + +def test_getitem_sync_async_equivalence() -> None: + arr = _make_array() + arr[:] = np.arange(8, dtype="i4") + sync_result = arr[2:6] + async_via_runner = arr._runner.run(arr.getitem_async(slice(2, 6))) + np.testing.assert_array_equal(sync_result, async_via_runner) + + +def test_setitem_async_roundtrip() -> None: + arr = _make_array() + arr._runner.run(arr.setitem_async(slice(0, 4), np.arange(4, dtype="i4"))) + np.testing.assert_array_equal(arr[0:4], np.arange(4, dtype="i4")) + + +def test_custom_runner_invoked_on_read() -> None: + # The runner injected into Array is actually used by sync reads. + class RecordingRunner: + def __init__(self) -> None: + self.calls = 0 + + def run(self, coro: Coroutine[Any, Any, Any]) -> Any: + self.calls += 1 + return SyncRunner().run(coro) + + runner = RecordingRunner() + base = _make_array() + base[:] = np.arange(8, dtype="i4") + arr = Array( + metadata=base.metadata, store_path=base.store_path, config=base.config, runner=runner + ) + _ = arr[2:6] + assert runner.calls > 0 From 1d104f3d6404f3f0c7d6d839b6e3a80e493d8fdc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 09:35:45 +0200 Subject: [PATCH 12/24] feat(array): add remaining *_async methods; route sync wrappers via runner Routes resize/append/update_attributes/nchunks_initialized/nbytes_stored/ info_complete through self._runner.run(self.*_async(...)), which mutate the live Array. Fixes resize/append not updating array state. Array no longer delegates to the deprecated async_array property. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 40 +++++++++++++++++++++++++++++++--------- tests/test_runner.py | 19 +++++++++++++++++++ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 09f2e716cc..fd243190a4 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2434,7 +2434,7 @@ def nchunks_initialized(self) -> int: >>> arr.nchunks_initialized 6 """ - return sync(self.async_array.nchunks_initialized()) + return self._runner.run(self.nchunks_initialized_async()) @property def _nshards_initialized(self) -> int: @@ -2456,7 +2456,7 @@ def _nshards_initialized(self) -> int: >>> arr._nshards_initialized 3 """ - return sync(self.async_array._nshards_initialized()) + return self._runner.run(self._nshards_initialized_async()) def nbytes_stored(self) -> int: """ @@ -2466,7 +2466,7 @@ def nbytes_stored(self) -> int: ------- size : int """ - return sync(self.async_array.nbytes_stored()) + return self._runner.run(self.nbytes_stored_async()) def _iter_shard_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -4074,7 +4074,7 @@ def resize(self, new_shape: ShapeLike) -> None: #>(50, 50) ``` """ - sync(self.async_array.resize(new_shape)) + self._runner.run(self.resize_async(new_shape)) def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: """Append `data` to `axis`. @@ -4110,7 +4110,7 @@ def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: >>> z.shape (20000, 2000) """ - return sync(self.async_array.append(data, axis=axis)) + return self._runner.run(self.append_async(data, axis=axis)) def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: """ @@ -4137,8 +4137,8 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: - The updated attributes will be merged with existing attributes, and any conflicts will be overwritten by the new values. """ - new_array = sync(self.async_array.update_attributes(new_attributes)) - return type(self)._from_async_array(new_array) + self._runner.run(self.update_attributes_async(new_attributes)) + return self def __repr__(self) -> str: return f"" @@ -4175,7 +4175,7 @@ def info(self) -> Any: Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 40 """ - return self.async_array.info + return self._info() def info_complete(self) -> Any: """ @@ -4195,7 +4195,29 @@ def info_complete(self) -> Any: ------- [zarr.Array.info][] - The statically known subset of metadata about an array. """ - return sync(self.async_array.info_complete()) + return self._runner.run(self.info_complete_async()) + + async def resize_async(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: + return await _resize(self, new_shape, delete_outside_chunks) + + async def append_async(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: + return await _append(self, data, axis) + + async def update_attributes_async(self, new_attributes: dict[str, JSON]) -> Self: + await _update_attributes(self, new_attributes) + return self + + async def nchunks_initialized_async(self) -> int: + return await _nchunks_initialized(self) + + async def _nshards_initialized_async(self) -> int: + return await _nshards_initialized(self) + + async def nbytes_stored_async(self) -> int: + return await _nbytes_stored(self.store_path) + + async def info_complete_async(self) -> Any: + return await _info_complete(self) async def _shards_initialized( diff --git a/tests/test_runner.py b/tests/test_runner.py index a24aad006e..adc3df58e1 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -113,3 +113,22 @@ def run(self, coro: Coroutine[Any, Any, Any]) -> Any: ) _ = arr[2:6] assert runner.calls > 0 + + +def test_resize_async() -> None: + arr = _make_array() + arr._runner.run(arr.resize_async((16,))) + assert arr.shape == (16,) + + +def test_update_attributes_async() -> None: + arr = _make_array() + arr._runner.run(arr.update_attributes_async({"foo": "bar"})) + assert arr.metadata.attributes["foo"] == "bar" + + +def test_nchunks_initialized_async() -> None: + arr = _make_array() + arr[:] = np.arange(8, dtype="i4") + n = arr._runner.run(arr.nchunks_initialized_async()) + assert n == arr.nchunks_initialized From a7969fbd2a3324a151459a6492b5111ec33e339a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 10:06:03 +0200 Subject: [PATCH 13/24] test: migrate Array.async_array usages to new API; add changelog Co-Authored-By: Claude Opus 4.8 (1M context) --- changes/4011.feature.md | 1 + src/zarr/core/array.py | 6 +++--- tests/test_api.py | 4 ++-- tests/test_array.py | 34 +++++++++++++++++----------------- tests/test_v2.py | 6 +++--- 5 files changed, 26 insertions(+), 25 deletions(-) create mode 100644 changes/4011.feature.md diff --git a/changes/4011.feature.md b/changes/4011.feature.md new file mode 100644 index 0000000000..bb93b5b31e --- /dev/null +++ b/changes/4011.feature.md @@ -0,0 +1 @@ +`Array` now owns its own state and accepts a keyword-only `runner` argument for plugging in a custom event loop. Every asynchronous array operation is available as a `*_async` method on `Array` (for example `Array.getitem_async`). `Array.async_array` is deprecated; use the `*_async` methods instead. diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fd243190a4..3a16d00fb1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4473,7 +4473,7 @@ async def from_array( Create an array from an existing Array without copying the data: - >>> arr5 = asyncio.run(from_array({}, data=Array(arr4), write_data=False)) + >>> arr5 = asyncio.run(from_array({}, data=Array._from_async_array(arr4), write_data=False)) >>> arr5 >>> asyncio.run(arr5.getitem(...)) @@ -4534,9 +4534,9 @@ async def from_array( if isinstance(data, Array): async def _copy_array_region( - chunk_coords: tuple[int, ...] | slice, _data: AnyArray + chunk_coords: tuple[int, ...] | slice, _data: Array[Any] ) -> None: - arr = await _data.async_array.getitem(chunk_coords) + arr = await _data.getitem_async(chunk_coords) await result.setitem(chunk_coords, arr) # Stream data from the source array to the new array diff --git a/tests/test_api.py b/tests/test_api.py index 788519969d..e30904e99a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -165,7 +165,7 @@ async def test_array_like_creation( assert new_arr.shape == expect_shape assert new_arr.chunks == expect_chunks assert new_arr.dtype == expect_dtype - assert np.all(Array(new_arr)[:] == expect_fill) + assert np.all(Array._from_async_array(new_arr)[:] == expect_fill) # TODO: parametrize over everything this function takes @@ -232,7 +232,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat) arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True}) assert isinstance(arr2, zarr.Array) - assert arr2.async_array.config.write_empty_chunks is True + assert arr2.config.write_empty_chunks is True arr2[0:5] = np.zeros(5) assert arr2.nchunks_initialized == 1 diff --git a/tests/test_array.py b/tests/test_array.py index 677b149682..bc7fba02f1 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -381,7 +381,7 @@ def test_nchunks(test_cls: type[AnyArray] | type[AnyAsyncArray], nchunks: int) - if test_cls == Array: observed = arr.nchunks else: - observed = arr.async_array.nchunks + observed = arr.nchunks assert observed == expected @@ -415,8 +415,8 @@ async def test_nchunks_initialized( observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr.async_array._nshards_initialized() - assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard + observed = await arr._nshards_initialized_async() + assert observed == await arr.nchunks_initialized_async() // chunks_per_shard assert observed == expected # delete chunks @@ -426,8 +426,8 @@ async def test_nchunks_initialized( observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr.async_array._nshards_initialized() - assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard + observed = await arr._nshards_initialized_async() + assert observed == await arr.nchunks_initialized_async() // chunks_per_shard expected = arr._nshards - idx - 1 assert observed == expected @@ -453,7 +453,7 @@ async def test_chunks_initialized( ) for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 - observed = sorted(await _shards_initialized(arr.async_array)) + observed = sorted(await _shards_initialized(arr)) expected = sorted(keys) assert observed == expected @@ -505,7 +505,7 @@ def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None) result = arr.info expected = ArrayInfo( _zarr_format=2, - _data_type=arr.async_array._zdtype, + _data_type=arr._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -523,7 +523,7 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None) result = arr.info expected = ArrayInfo( _zarr_format=3, - _data_type=arr.async_array._zdtype, + _data_type=arr._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -549,7 +549,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] | result = arr.info_complete() expected = ArrayInfo( _zarr_format=3, - _data_type=arr.async_array._zdtype, + _data_type=arr._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -961,7 +961,7 @@ def test_write_empty_chunks_behavior( config={"write_empty_chunks": write_empty_chunks}, ) - assert arr.async_array.config.write_empty_chunks == write_empty_chunks + assert arr.config.write_empty_chunks == write_empty_chunks # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 @@ -1032,7 +1032,7 @@ async def test_nbytes( store = MemoryStore() arr = zarr.create_array(store=store, shape=shape, dtype=dtype, fill_value=0) if array_type == "async": - assert arr.async_array.nbytes == np.prod(arr.shape) * arr.dtype.itemsize + assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize else: assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize @@ -2081,7 +2081,7 @@ def test_chunk_grid_shape( shard_grid_shape = tuple(starmap(ceildiv, zip(array_shape, _shard_shape, strict=True))) assert arr._chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape - assert arr.async_array.cdata_shape == chunk_grid_shape + assert arr.cdata_shape == chunk_grid_shape assert arr._shard_grid_shape == shard_grid_shape assert arr._nshards == np.prod(shard_grid_shape) @@ -2112,7 +2112,7 @@ def test_iter_chunk_coords( observed = tuple(_iter_chunk_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_coords()) - assert observed == tuple(arr.async_array._iter_chunk_coords()) + assert observed == tuple(arr._iter_chunk_coords()) @pytest.mark.parametrize( @@ -2145,7 +2145,7 @@ def test_iter_shard_coords( observed = tuple(_iter_shard_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_coords()) - assert observed == tuple(arr.async_array._iter_shard_coords()) + assert observed == tuple(arr._iter_shard_coords()) @pytest.mark.parametrize( @@ -2180,7 +2180,7 @@ def test_iter_shard_keys( observed = tuple(_iter_shard_keys(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_keys()) - assert observed == tuple(arr.async_array._iter_shard_keys()) + assert observed == tuple(arr._iter_shard_keys()) @pytest.mark.parametrize( @@ -2216,7 +2216,7 @@ def test_iter_shard_regions( observed = tuple(_iter_shard_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_regions()) - assert observed == tuple(arr.async_array._iter_shard_regions()) + assert observed == tuple(arr._iter_shard_regions()) @pytest.mark.parametrize( @@ -2245,7 +2245,7 @@ def test_iter_chunk_regions( observed = tuple(_iter_chunk_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_regions()) - assert observed == tuple(arr.async_array._iter_chunk_regions()) + assert observed == tuple(arr._iter_chunk_regions()) @pytest.mark.parametrize("num_shards", [1, 3]) diff --git a/tests/test_v2.py b/tests/test_v2.py index 3a063ac509..bbf6c8ad8d 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -144,13 +144,13 @@ def test_create_array_defaults(store: Store) -> None: g = zarr.open(store, mode="w", zarr_format=2) assert isinstance(g, Group) arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None) - assert arr.async_array.compressor is None + assert arr.compressor is None assert not (arr.filters) arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,)) - assert arr.async_array.compressor is not None + assert arr.compressor is not None assert not (arr.filters) arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd()) - assert arr.async_array.compressor is not None + assert arr.compressor is not None assert not (arr.filters) with pytest.raises(ValueError): g.create_array( From 41e0468320e4369c383f36653eff6cae2d6e18c5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 10:48:06 +0200 Subject: [PATCH 14/24] feat(array): add async orthogonal/mask/coordinate/block selection methods Adds get/set_{orthogonal,mask,coordinate,block}_selection_async to Array and migrates tests off the deprecated async_array property where an Array async equivalent now exists. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 139 +++++++++++++++++++++++++++++++++++++++ tests/test_indexing.py | 6 +- tests/test_properties.py | 16 +---- tests/test_runner.py | 39 +++++++++++ 4 files changed, 182 insertions(+), 18 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 3a16d00fb1..6b8c9e5b22 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2696,6 +2696,145 @@ async def setitem_async( prototype=prototype, ) + async def get_orthogonal_selection_async( + self, + selection: OrthogonalSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) + return await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + async def set_orthogonal_selection_async( + self, + selection: OrthogonalSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) + await self._set_selection(indexer, value, fields=fields, prototype=prototype) + + async def get_mask_selection_async( + self, + mask: MaskSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.shape, self._chunk_grid) + return await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + async def set_mask_selection_async( + self, + mask: MaskSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.shape, self._chunk_grid) + await self._set_selection(indexer, value, fields=fields, prototype=prototype) + + async def get_coordinate_selection_async( + self, + selection: CoordinateSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) + out_array = await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + if hasattr(out_array, "shape"): + # restore shape + out_array = np.array(out_array).reshape(indexer.sel_shape) + return out_array + + async def set_coordinate_selection_async( + self, + selection: CoordinateSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + # setup indexer + indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) + + # handle value - need ndarray-like flatten value + if not is_scalar(value, self.dtype): + try: + from numcodecs.compat import ensure_ndarray_like + + value = ensure_ndarray_like(value) # TODO replace with agnostic + except TypeError: + # Handle types like `list` or `tuple` + value = np.array(value) # TODO replace with agnostic + if hasattr(value, "shape") and len(value.shape) > 1: + value = np.array(value).reshape(-1) + + if not is_scalar(value, self.dtype) and ( + isinstance(value, NDArrayLike) and indexer.shape != value.shape + ): + raise ValueError( + f"Attempting to set a selection of {indexer.sel_shape[0]} " + f"elements with an array of {value.shape[0]} elements." + ) + + await self._set_selection(indexer, value, fields=fields, prototype=prototype) + + async def get_block_selection_async( + self, + selection: BasicSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BlockIndexer(selection, self.shape, self._chunk_grid) + return await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + async def set_block_selection_async( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BlockIndexer(selection, self.shape, self._chunk_grid) + await self._set_selection(indexer, value, fields=fields, prototype=prototype) + def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. diff --git a/tests/test_indexing.py b/tests/test_indexing.py index ca878e2f2f..2eb515314b 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2111,11 +2111,7 @@ class TestAsync: async def test_async_oindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - async_zarr = z.async_array - - result = await async_zarr.oindex.getitem(indexer) + result = await z.get_orthogonal_selection_async(indexer) assert_array_equal(result, expected) @pytest.mark.asyncio diff --git a/tests/test_properties.py b/tests/test_properties.py index 60cd68c3ad..2694dff5be 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -1,7 +1,6 @@ import itertools import json import numbers -import warnings from collections.abc import Generator from typing import Any @@ -132,10 +131,7 @@ async def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray[indexer], actual) # async get - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - async_zarray = zarray.async_array - actual = await async_zarray.getitem(indexer) + actual = await zarray.getitem_async(indexer) assert_array_equal(nparray[indexer], actual) # sync set @@ -176,10 +172,7 @@ async def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray[npindexer], actual) # async get - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - async_zarray = zarray.async_array - actual = await async_zarray.oindex.getitem(zindexer) + actual = await zarray.get_orthogonal_selection_async(zindexer) assert_array_equal(nparray[npindexer], actual) # sync get @@ -219,10 +212,7 @@ async def test_vindex(data: st.DataObject) -> None: assert_array_equal(nparray[indexer], actual) # async get - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - async_zarray = zarray.async_array - actual = await async_zarray.vindex.getitem(indexer) + actual = await zarray.get_coordinate_selection_async(indexer) assert_array_equal(nparray[indexer], actual) # sync set diff --git a/tests/test_runner.py b/tests/test_runner.py index adc3df58e1..448a649d2a 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -132,3 +132,42 @@ def test_nchunks_initialized_async() -> None: arr[:] = np.arange(8, dtype="i4") n = arr._runner.run(arr.nchunks_initialized_async()) assert n == arr.nchunks_initialized + + +def test_orthogonal_selection_async_roundtrip() -> None: + arr = zarr.create_array( + store=MemoryStore(), shape=(4, 4), chunks=(2, 2), dtype="i4", fill_value=0 + ) + arr[:] = np.arange(16, dtype="i4").reshape(4, 4) + expected = arr.get_orthogonal_selection(([0, 2], slice(None))) # type: ignore[arg-type] + actual = arr._runner.run(arr.get_orthogonal_selection_async(([0, 2], slice(None)))) # type: ignore[arg-type] + np.testing.assert_array_equal(actual, expected) + + +def test_coordinate_selection_async_roundtrip() -> None: + arr = zarr.create_array( + store=MemoryStore(), shape=(4, 4), chunks=(2, 2), dtype="i4", fill_value=0 + ) + arr[:] = np.arange(16, dtype="i4").reshape(4, 4) + expected = arr.get_coordinate_selection(([0, 1], [0, 1])) + actual = arr._runner.run(arr.get_coordinate_selection_async(([0, 1], [0, 1]))) + np.testing.assert_array_equal(actual, expected) + + +def test_block_selection_async_roundtrip() -> None: + arr = zarr.create_array( + store=MemoryStore(), shape=(4, 4), chunks=(2, 2), dtype="i4", fill_value=0 + ) + arr[:] = np.arange(16, dtype="i4").reshape(4, 4) + expected = arr.get_block_selection((0, 0)) + actual = arr._runner.run(arr.get_block_selection_async((0, 0))) + np.testing.assert_array_equal(actual, expected) + + +def test_set_orthogonal_selection_async() -> None: + arr = zarr.create_array( + store=MemoryStore(), shape=(4, 4), chunks=(2, 2), dtype="i4", fill_value=0 + ) + arr._runner.run(arr.set_orthogonal_selection_async(([0, 2], slice(None)), 7)) # type: ignore[arg-type] + expected = arr.get_orthogonal_selection(([0, 2], slice(None))) # type: ignore[arg-type] + np.testing.assert_array_equal(expected, np.full((2, 4), 7, dtype="i4")) From bf8e2e1b9fe7668c12fb415ab56b2316b7572193 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 11:15:38 +0200 Subject: [PATCH 15/24] refactor(array): sync selection methods delegate to async twins Eliminates duplicated indexer construction and coordinate value-validation by routing each sync selection method through self._runner.run of its *_async twin. Adds get/set_basic_selection_async for a complete surface. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 125 ++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6b8c9e5b22..8a04234396 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2662,6 +2662,34 @@ async def _set_selection( fields=fields, ) + async def get_basic_selection_async( + self, + selection: BasicSelection = Ellipsis, + *, + out: NDBuffer | None = None, + prototype: BufferPrototype | None = None, + fields: Fields | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BasicIndexer(selection, self.shape, self._chunk_grid) + return await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + async def set_basic_selection_async( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BasicIndexer(selection, self.shape, self._chunk_grid) + return await self._set_selection(indexer, value, fields=fields, prototype=prototype) + async def getitem_async( self, selection: BasicSelection, @@ -3221,15 +3249,8 @@ def get_basic_selection( """ - if prototype is None: - prototype = default_buffer_prototype() return self._runner.run( - self._get_selection( - BasicIndexer(selection, self.shape, self._chunk_grid), - out=out, - fields=fields, - prototype=prototype, - ) + self.get_basic_selection_async(selection, out=out, prototype=prototype, fields=fields) ) def set_basic_selection( @@ -3330,10 +3351,9 @@ def set_basic_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = BasicIndexer(selection, self.shape, self._chunk_grid) - self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) + return self._runner.run( + self.set_basic_selection_async(selection, value, fields=fields, prototype=prototype) + ) def get_orthogonal_selection( self, @@ -3458,11 +3478,10 @@ def get_orthogonal_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) return self._runner.run( - self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) + self.get_orthogonal_selection_async( + selection, out=out, fields=fields, prototype=prototype + ) ) def set_orthogonal_selection( @@ -3574,11 +3593,10 @@ def set_orthogonal_selection( [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) return self._runner.run( - self._set_selection(indexer, value, fields=fields, prototype=prototype) + self.set_orthogonal_selection_async( + selection, value, fields=fields, prototype=prototype + ) ) def get_mask_selection( @@ -3662,11 +3680,8 @@ def get_mask_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = MaskIndexer(mask, self.shape, self._chunk_grid) return self._runner.run( - self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) + self.get_mask_selection_async(mask, out=out, fields=fields, prototype=prototype) ) def set_mask_selection( @@ -3749,10 +3764,9 @@ def set_mask_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = MaskIndexer(mask, self.shape, self._chunk_grid) - self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) + return self._runner.run( + self.set_mask_selection_async(mask, value, fields=fields, prototype=prototype) + ) def get_coordinate_selection( self, @@ -3837,18 +3851,12 @@ def get_coordinate_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) - out_array = self._runner.run( - self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) + return self._runner.run( + self.get_coordinate_selection_async( + selection, out=out, fields=fields, prototype=prototype + ) ) - if hasattr(out_array, "shape"): - # restore shape - out_array = np.array(out_array).reshape(indexer.sel_shape) - return out_array - def set_coordinate_selection( self, selection: CoordinateSelection, @@ -3926,32 +3934,11 @@ def set_coordinate_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - # setup indexer - indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) - - # handle value - need ndarray-like flatten value - if not is_scalar(value, self.dtype): - try: - from numcodecs.compat import ensure_ndarray_like - - value = ensure_ndarray_like(value) # TODO replace with agnostic - except TypeError: - # Handle types like `list` or `tuple` - value = np.array(value) # TODO replace with agnostic - if hasattr(value, "shape") and len(value.shape) > 1: - value = np.array(value).reshape(-1) - - if not is_scalar(value, self.dtype) and ( - isinstance(value, NDArrayLike) and indexer.shape != value.shape - ): - raise ValueError( - f"Attempting to set a selection of {indexer.sel_shape[0]} " - f"elements with an array of {value.shape[0]} elements." + return self._runner.run( + self.set_coordinate_selection_async( + selection, value, fields=fields, prototype=prototype ) - - self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) + ) def get_block_selection( self, @@ -4048,11 +4035,8 @@ def get_block_selection( [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = BlockIndexer(selection, self.shape, self._chunk_grid) return self._runner.run( - self._get_selection(indexer=indexer, out=out, fields=fields, prototype=prototype) + self.get_block_selection_async(selection, out=out, fields=fields, prototype=prototype) ) def set_block_selection( @@ -4146,10 +4130,9 @@ def set_block_selection( [__setitem__][zarr.Array.__setitem__] """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = BlockIndexer(selection, self.shape, self._chunk_grid) - self._runner.run(self._set_selection(indexer, value, fields=fields, prototype=prototype)) + return self._runner.run( + self.set_block_selection_async(selection, value, fields=fields, prototype=prototype) + ) @property def vindex(self) -> VIndex: From 4d199762b6d4babc87154ddf3c744025e8012da4 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 11:48:59 +0200 Subject: [PATCH 16/24] docs(array): document the *_async methods on Array Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 344 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 8a04234396..427d9fdec2 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2670,6 +2670,28 @@ async def get_basic_selection_async( prototype: BufferPrototype | None = None, fields: Fields | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve data for an item or region of the array. + + This is the asynchronous variant of [`get_basic_selection`][zarr.Array.get_basic_selection]. + + Parameters + ---------- + selection : BasicSelection + A selection specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested region. + """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self._chunk_grid) @@ -2685,6 +2707,28 @@ async def set_basic_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify data for an item or region of the array. + + This is the asynchronous variant of [`set_basic_selection`][zarr.Array.set_basic_selection]. + + Parameters + ---------- + selection : tuple + A tuple specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self._chunk_grid) @@ -2696,6 +2740,26 @@ async def getitem_async( *, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve data for an item or region of the array. + + This is the asynchronous variant of basic indexing via square bracket notation + (see [`__getitem__`][zarr.Array.__getitem__] and + [`get_basic_selection`][zarr.Array.get_basic_selection]). + + Parameters + ---------- + selection : BasicSelection + A selection specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the + default buffer prototype is used. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested region. + """ return await _getitem( self.store_path, self.metadata, @@ -2713,6 +2777,27 @@ async def setitem_async( *, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify data for an item or region of the array. + + This is the asynchronous variant of basic indexing via square bracket notation + (see [`__setitem__`][zarr.Array.__setitem__] and + [`set_basic_selection`][zarr.Array.set_basic_selection]). + + Parameters + ---------- + selection : BasicSelection + A selection specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ return await _setitem( self.store_path, self.metadata, @@ -2732,6 +2817,30 @@ async def get_orthogonal_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve data by making a selection for each dimension of the array. + + This is the asynchronous variant of + [`get_orthogonal_selection`][zarr.Array.get_orthogonal_selection]. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the + default buffer prototype is used. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested selection. + """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) @@ -2747,6 +2856,29 @@ async def set_orthogonal_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify data via a selection for each dimension of the array. + + This is the asynchronous variant of + [`set_orthogonal_selection`][zarr.Array.set_orthogonal_selection]. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + value : npt.ArrayLike + An array-like array containing the data to be stored in the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) @@ -2760,6 +2892,30 @@ async def get_mask_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve a selection of individual items via a Boolean mask array. + + This is the asynchronous variant of + [`get_mask_selection`][zarr.Array.get_mask_selection]. + + Parameters + ---------- + mask : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the + default buffer prototype is used. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested selection. + """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self._chunk_grid) @@ -2775,6 +2931,29 @@ async def set_mask_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify a selection of individual items via a Boolean mask array. + + This is the asynchronous variant of + [`set_mask_selection`][zarr.Array.set_mask_selection]. + + Parameters + ---------- + mask : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self._chunk_grid) @@ -2788,6 +2967,29 @@ async def get_coordinate_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve a selection of individual items by their coordinates. + + This is the asynchronous variant of + [`get_coordinate_selection`][zarr.Array.get_coordinate_selection]. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the + default buffer prototype is used. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested coordinate selection. + """ if prototype is None: prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) @@ -2808,6 +3010,28 @@ async def set_coordinate_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify a selection of individual items by their coordinates. + + This is the asynchronous variant of + [`set_coordinate_selection`][zarr.Array.set_coordinate_selection]. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ if prototype is None: prototype = default_buffer_prototype() # setup indexer @@ -2843,6 +3067,29 @@ async def get_block_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: + """Asynchronously retrieve a selection of individual blocks by their chunk indices. + + This is the asynchronous variant of + [`get_block_selection`][zarr.Array.get_block_selection]. + + Parameters + ---------- + selection : int or slice or tuple of int or slice + An integer (coordinate) or slice for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the + default buffer prototype is used. + + Returns + ------- + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested block selection. + """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self._chunk_grid) @@ -2858,6 +3105,28 @@ async def set_block_selection_async( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: + """Asynchronously modify a selection of individual blocks by their chunk indices. + + This is the asynchronous variant of + [`set_block_selection`][zarr.Array.set_block_selection]. + + Parameters + ---------- + selection : tuple + An integer (coordinate) or slice for each dimension of the array. + value : npt.ArrayLike + An array-like containing the data to be stored in the block selection. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Returns + ------- + None + """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self._chunk_grid) @@ -4320,25 +4589,100 @@ def info_complete(self) -> Any: return self._runner.run(self.info_complete_async()) async def resize_async(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: + """Asynchronously change the shape of the array by growing or shrinking one or more dimensions. + + This is the asynchronous variant of [`resize`][zarr.Array.resize]. + + Parameters + ---------- + new_shape : tuple + New shape of the array. + delete_outside_chunks : bool, default True + If True, chunks that fall entirely outside the new array shape are deleted from + the underlying store. + + Returns + ------- + None + """ return await _resize(self, new_shape, delete_outside_chunks) async def append_async(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: + """Asynchronously append `data` to `axis`. + + This is the asynchronous variant of [`append`][zarr.Array.append]. + + Parameters + ---------- + data : array-like + Data to be appended. + axis : int + Axis along which to append. + + Returns + ------- + new_shape : tuple + The new shape of the array after appending the data. + """ return await _append(self, data, axis) async def update_attributes_async(self, new_attributes: dict[str, JSON]) -> Self: + """Asynchronously update the array's attributes. + + This is the asynchronous variant of [`update_attributes`][zarr.Array.update_attributes]. + + Parameters + ---------- + new_attributes : dict + A dictionary of new attributes to update or add to the array. The keys represent attribute + names, and the values must be JSON-compatible. + + Returns + ------- + Array + The array with the updated attributes. + """ await _update_attributes(self, new_attributes) return self async def nchunks_initialized_async(self) -> int: + """Asynchronously calculate the number of chunks that have been initialized in storage. + + This is the asynchronous variant of the + [`nchunks_initialized`][zarr.Array.nchunks_initialized] property. + + Returns + ------- + nchunks_initialized : int + The number of chunks that have been initialized. + """ return await _nchunks_initialized(self) async def _nshards_initialized_async(self) -> int: return await _nshards_initialized(self) async def nbytes_stored_async(self) -> int: + """Asynchronously determine the size, in bytes, of the array actually written to the store. + + This is the asynchronous variant of [`nbytes_stored`][zarr.Array.nbytes_stored]. + + Returns + ------- + size : int + The size, in bytes, of the array actually written to the store. + """ return await _nbytes_stored(self.store_path) async def info_complete_async(self) -> Any: + """Asynchronously return all the information about an array, including information from the Store. + + This is the asynchronous variant of [`info_complete`][zarr.Array.info_complete]. + + Returns + ------- + ArrayInfo + All information about the array, including dynamic information read from the store. + """ return await _info_complete(self) From 0bd104bbe899664b782e655e93b82bf58740d681 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 11:49:43 +0200 Subject: [PATCH 17/24] docs: note Array constructor change in changelog Co-Authored-By: Claude Opus 4.8 (1M context) --- changes/4011.feature.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changes/4011.feature.md b/changes/4011.feature.md index bb93b5b31e..f0641636a7 100644 --- a/changes/4011.feature.md +++ b/changes/4011.feature.md @@ -1 +1,3 @@ `Array` now owns its own state and accepts a keyword-only `runner` argument for plugging in a custom event loop. Every asynchronous array operation is available as a `*_async` method on `Array` (for example `Array.getitem_async`). `Array.async_array` is deprecated; use the `*_async` methods instead. + +The `Array` constructor signature changed from `Array(async_array)` to `Array(metadata, store_path, config=None, *, runner=None)`, mirroring `AsyncArray`. Code that previously constructed an `Array` by wrapping an `AsyncArray` should use the new `Array._from_async_array(async_array)` classmethod instead. From b7119c53339b5152dcbf8e6af514425391142df3 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 12:10:54 +0200 Subject: [PATCH 18/24] fix: address roborev branch-review findings - update_attributes (sync) returns a fresh Array, preserving the prior contract - from_array docstring example uses a public construction path - align SupportsArrayState._iter_shard_keys signature with the real methods - restore AsyncArray coverage in test_get_shape_chunks - extract shared sharding-codec helper to dedup Array/AsyncArray properties Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 68 +++++++++++++++-------------- tests/test_api/test_asynchronous.py | 28 +++++++++++- 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 427d9fdec2..da15048c3c 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -155,7 +155,7 @@ from zarr.abc.codec import CodecPipeline from zarr.abc.store import Store - from zarr.codecs.sharding import ShardingCodecIndexLocation + from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar from zarr.storage import StoreLike from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3 @@ -202,6 +202,20 @@ def _chunk_sizes_from_shape( return tuple(result) +def _sharding_codec(metadata: ArrayMetadata) -> ShardingCodec | None: + """Return the array's sharding codec, or `None` if the array is not sharded. + + An array is considered sharded when its metadata declares exactly one codec + and that codec is a `ShardingCodec`. + """ + from zarr.codecs.sharding import ShardingCodec + + codecs: tuple[Codec, ...] = getattr(metadata, "codecs", ()) + if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): + return codecs[0] + return None + + def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data @@ -333,7 +347,6 @@ def shards(self) -> tuple[int, ...] | None: ... def _iter_shard_keys( self, - *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[str]: ... @@ -895,12 +908,9 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: ((30, 30, 30, 10), (40, 40)) """ - from zarr.codecs.sharding import ShardingCodec - - codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) - if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): - inner_chunk_shape = codecs[0].chunk_shape - return _chunk_sizes_from_shape(self.shape, inner_chunk_shape) + codec = _sharding_codec(self.metadata) + if codec is not None: + return _chunk_sizes_from_shape(self.shape, codec.chunk_shape) return self._chunk_grid.chunk_sizes @property @@ -1132,15 +1142,10 @@ def _chunk_grid_shape(self) -> tuple[int, ...]: tuple[int, ...] The number of chunks along each dimension. """ - # TODO: refactor — extract a sharding_codec property on ArrayV3Metadata - # to replace the repeated `len == 1 and isinstance` pattern. - from zarr.codecs.sharding import ShardingCodec - - codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) - if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): + codec = _sharding_codec(self.metadata) + if codec is not None: # When sharding, count inner chunks across the whole array - chunk_shape = codecs[0].chunk_shape - return tuple(starmap(ceildiv, zip(self.shape, chunk_shape, strict=True))) + return tuple(starmap(ceildiv, zip(self.shape, codec.chunk_shape, strict=True))) return self._chunk_grid.grid_shape @property @@ -1366,7 +1371,7 @@ def _iter_shard_coords( ) def _iter_shard_keys( - self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ Iterate over the keys of the stored objects supporting this array. @@ -2126,12 +2131,9 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: ((30, 30, 30, 10), (40, 40)) """ - from zarr.codecs.sharding import ShardingCodec - - codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) - if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): - inner_chunk_shape = codecs[0].chunk_shape - return _chunk_sizes_from_shape(self.shape, inner_chunk_shape) + codec = _sharding_codec(self.metadata) + if codec is not None: + return _chunk_sizes_from_shape(self.shape, codec.chunk_shape) return self._chunk_grid.chunk_sizes @property @@ -2324,15 +2326,10 @@ def _chunk_grid_shape(self) -> tuple[int, ...]: tuple[int, ...] The number of chunks along each dimension. """ - # TODO: refactor — extract a sharding_codec property on ArrayV3Metadata - # to replace the repeated `len == 1 and isinstance` pattern. - from zarr.codecs.sharding import ShardingCodec - - codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) - if len(codecs) == 1 and isinstance(codecs[0], ShardingCodec): + codec = _sharding_codec(self.metadata) + if codec is not None: # When sharding, count inner chunks across the whole array - chunk_shape = codecs[0].chunk_shape - return tuple(starmap(ceildiv, zip(self.shape, chunk_shape, strict=True))) + return tuple(starmap(ceildiv, zip(self.shape, codec.chunk_shape, strict=True))) return self._chunk_grid.grid_shape @property @@ -4529,7 +4526,12 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: overwritten by the new values. """ self._runner.run(self.update_attributes_async(new_attributes)) - return self + return type(self)( + metadata=self.metadata, + store_path=self.store_path, + config=self.config, + runner=self._runner, + ) def __repr__(self) -> str: return f"" @@ -4939,7 +4941,7 @@ async def from_array( Create an array from an existing Array without copying the data: - >>> arr5 = asyncio.run(from_array({}, data=Array._from_async_array(arr4), write_data=False)) + >>> arr5 = asyncio.run(from_array({}, data=Array(metadata=arr4.metadata, store_path=arr4.store_path, config=arr4.config), write_data=False)) >>> arr5 >>> asyncio.run(arr5.getitem(...)) diff --git a/tests/test_api/test_asynchronous.py b/tests/test_api/test_asynchronous.py index c630b8e015..b9eff02757 100644 --- a/tests/test_api/test_asynchronous.py +++ b/tests/test_api/test_asynchronous.py @@ -9,6 +9,7 @@ from zarr import create_array from zarr.api.asynchronous import _get_shape_chunks, _like_args, group, open +from zarr.core.array import AsyncArray from zarr.core.buffer.core import default_buffer_prototype from zarr.core.group import AsyncGroup @@ -18,7 +19,6 @@ import numpy.typing as npt - from zarr.core.array import AsyncArray from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.types import AnyArray @@ -38,6 +38,11 @@ class WithChunkLen(WithShape): chunklen: int +def _as_async_array(arr: Any) -> AsyncArray[Any]: + """Build an `AsyncArray` mirroring the state of a sync `Array`.""" + return AsyncArray(arr.metadata, arr.store_path, arr.config) + + @pytest.mark.parametrize( ("observed", "expected"), [ @@ -80,6 +85,27 @@ def test_get_shape_chunks( "order": "C", }, ), + ( + _as_async_array( + create_array( + {}, + chunks=(10,), + shape=(100,), + dtype="f8", + compressors=None, + filters=None, + zarr_format=2, + ) + ), + { + "chunks": (10,), + "shape": (100,), + "dtype": np.dtype("f8"), + "compressor": None, + "filters": None, + "order": "C", + }, + ), ], ) def test_like_args( From 1403b0b64c3d305f5e55bf693387512f35334e84 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 13:17:56 +0200 Subject: [PATCH 19/24] chore: remove planning docs --- .../2026-06-03-unify-array-asyncarray.md | 889 ------------------ ...026-06-03-unify-array-asyncarray-design.md | 218 ----- 2 files changed, 1107 deletions(-) delete mode 100644 docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md delete mode 100644 docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md diff --git a/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md b/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md deleted file mode 100644 index a135cfebf3..0000000000 --- a/docs/superpowers/plans/2026-06-03-unify-array-asyncarray.md +++ /dev/null @@ -1,889 +0,0 @@ -# Unify `Array` and `AsyncArray` via a pluggable `runner` — Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Make `Array` a self-contained class that owns its own state plus a pluggable `_runner`, exposes every async operation as a `*_async` coroutine method, and implements its synchronous methods as `self._runner.run(self.foo_async(...))` — while leaving `AsyncArray` untouched as a compatibility shim. - -**Architecture:** A `Runner` protocol (with default `SyncRunner` wrapping the existing `sync()`) is added to `zarr/core/sync.py`. `Array` stops wrapping an `AsyncArray`; it stores `metadata`, `store_path`, `config`, `codec_pipeline`, `_chunk_grid`, and `_runner` directly, and reuses the already-existing module-level async free functions (`_getitem`, `_setitem`, `_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, `_nbytes_stored`, etc.). Several of those functions accept an array object and use its property surface; their parameter type is widened to a structural `SupportsArrayState` Protocol that both `Array` and `AsyncArray` satisfy, so no function bodies change. `Array.async_array` / `_async_array` are deprecated and reconstructed on demand. - -**Tech Stack:** Python 3.11+, `asyncio`, `typing.Protocol`, pytest. Run everything with `uv run`. - ---- - -## Execution log / deviations (kept current during implementation) - -- **Task 3 absorbed extra work** that the original decomposition under-anticipated, because the new `Array.__init__` signature and the removal of the `_async_array` attribute have repo-wide blast radius (the pre-commit hook type-checks the *whole* repo): - - Converted ALL external `Array(async_array)` construction sites to `Array._from_async_array(...)` in `src/zarr/api/synchronous.py` and `src/zarr/core/group.py` — **this is the entirety of Task 7**, so Task 7 is effectively complete after Task 3. - - Migrated the `_async_array`-referencing tests (`tests/test_array.py`, `tests/test_api/test_asynchronous.py`, `tests/test_codec_pipeline.py`) and removed a now-unused `# type: ignore` in `src/zarr/testing/stateful.py` — **this is part of Task 8**. - - Fixed downstream typing fallout: `src/zarr/core/attributes.py` (`cast` to `JSON`), `src/zarr/metadata/migrate_v3.py` (narrow on `isinstance(zarr_v2, Group)` instead of on `.metadata`), `tests/test_codecs/test_vlen.py` (`type: ignore[unreachable]`). - - **Added `Array.__eq__`** (compares `metadata`, `store_path`, `config`; `__hash__ = None`) — the old `@dataclass(frozen=False)` auto-generated equality that the plain-class rewrite dropped, breaking `test_serializable_sync_array`. -- **KNOWN INTERIM-RED STATE after Task 3:** 12 tests fail — the `test_resize_*` and `test_append_*` families. Cause: the *sync* `Array.resize`/`append` still call `sync(self.async_array.(...))`, but `async_array` is now a deprecated property that reconstructs a **throwaway** `AsyncArray` each access, so the mutation never lands on `self`. **Task 6 fixes this** by routing sync `resize`/`append` through `self._runner.run(self._async(...))`, which mutates `self` in place. The `shape` setter (which calls `self.resize`) is fixed transitively. Until Task 6, these 12 failures are expected; Task 9 must confirm they are GONE. -- Commits so far: Task 1 `5f6d4983`; Task 2 `2edd5a72` + docstring `7702eaf1`; Task 3 `ca9d1946` + eq fix `ca5fc345`. - ---- - -## File Structure - -- `src/zarr/core/sync.py` — add `Runner` Protocol + `SyncRunner` class. -- `src/zarr/core/array.py` — add `SupportsArrayState` Protocol; widen free-function annotations; rewrite `Array` (`__init__`, state, `*_async` methods, sync methods, `_from_async_array`, deprecated `async_array`). -- `tests/test_runner.py` — new tests for the runner protocol, custom-runner injection, sync/async equivalence, and `async_array` deprecation. -- `tests/test_array.py` — update the few spots that rely on the old construction / `async_array` access (only where they now warn). - ---- - -## Conventions for this plan - -- Always run tooling via `uv run` (e.g. `uv run pytest`, `uv run mypy`). -- Docstrings use single-backtick markdown (mkdocs), not RST double-backticks. -- Commit after each task once its tests pass. -- The type parameter on both classes is `T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)`. - ---- - -### Task 1: Add the `Runner` protocol and `SyncRunner` to `sync.py` - -**Files:** -- Modify: `src/zarr/core/sync.py` -- Test: `tests/test_runner.py` (create) - -- [ ] **Step 1: Write the failing test** - -Create `tests/test_runner.py`: - -```python -from __future__ import annotations - -import asyncio - -from zarr.core.sync import Runner, SyncRunner - - -async def _coro() -> int: - await asyncio.sleep(0) - return 42 - - -def test_sync_runner_runs_coroutine() -> None: - runner = SyncRunner() - assert runner.run(_coro()) == 42 - - -def test_sync_runner_is_runner() -> None: - assert isinstance(SyncRunner(), Runner) -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `uv run pytest tests/test_runner.py -v` -Expected: FAIL with `ImportError: cannot import name 'Runner'` (or `SyncRunner`). - -- [ ] **Step 3: Implement `Runner` and `SyncRunner`** - -In `src/zarr/core/sync.py`, add `Protocol`, `runtime_checkable`, and `TypeVar`/`T` to the imports as needed, then add near the bottom of the module (after `sync`): - -```python -@runtime_checkable -class Runner(Protocol): - """A `Runner` executes a coroutine and returns the awaited result. - - Implement this protocol to plug a custom event loop into `Array`. - """ - - def run(self, coro: Coroutine[Any, Any, T]) -> T: ... - - -class SyncRunner: - """The default `Runner`. Runs coroutines on Zarr's shared background event - loop via `sync`. - """ - - def run(self, coro: Coroutine[Any, Any, T]) -> T: - return sync(coro) -``` - -Add the supporting imports at the top of `sync.py`: - -```python -from typing import TYPE_CHECKING, Protocol, TypeVar, runtime_checkable -``` - -and (outside `TYPE_CHECKING`, because `T` is used at runtime in the protocol/class bodies' annotations only — annotations are lazy under `from __future__ import annotations`, so a `TYPE_CHECKING`-only `T` is fine, but `Coroutine`/`Any` are referenced only in annotations too). Keep `Coroutine` and `Any` in the existing `TYPE_CHECKING` block. Define `T` next to `P`: - -```python -T = TypeVar("T") -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `uv run pytest tests/test_runner.py -v` -Expected: PASS (both tests). - -- [ ] **Step 5: Type-check** - -Run: `uv run mypy src/zarr/core/sync.py` -Expected: no new errors. - -- [ ] **Step 6: Commit** - -```bash -git add src/zarr/core/sync.py tests/test_runner.py -git commit -m "feat(sync): add Runner protocol and SyncRunner" -``` - ---- - -### Task 2: Add the `SupportsArrayState` protocol and widen free-function annotations - -The free functions `_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, and `_shards_initialized` currently take `AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]`. They only use the array's property surface (`metadata`, `store_path`, `codec_pipeline`, `config`, `_chunk_grid`, `shape`, `shards`, `chunks`, `_info`) and mutate via `object.__setattr__`. Widen the annotation to a structural protocol so `Array` can pass `self`. - -**Files:** -- Modify: `src/zarr/core/array.py` - -- [ ] **Step 1: Add the protocol** - -Near the top-level definitions in `src/zarr/core/array.py` (after imports, before `AsyncArray`), add: - -```python -@runtime_checkable -class SupportsArrayState(Protocol): - """The structural surface the module-level array helpers rely on. - - Both `AsyncArray` and `Array` satisfy this protocol, which lets the - helper functions operate on either class. - """ - - metadata: ArrayMetadata - store_path: StorePath - codec_pipeline: CodecPipeline - config: ArrayConfig - _chunk_grid: ChunkGrid - - @property - def shape(self) -> tuple[int, ...]: ... - @property - def chunks(self) -> tuple[int, ...]: ... - @property - def shards(self) -> tuple[int, ...] | None: ... - - def _info( - self, - count_chunks_initialized: int | None = None, - count_bytes_stored: int | None = None, - ) -> Any: ... -``` - -Ensure `Protocol` and `runtime_checkable` are imported from `typing` at the top of `array.py`. - -- [ ] **Step 2: Widen the free-function signatures** - -In `src/zarr/core/array.py`, change the first parameter annotation of each of these functions from -`array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]` -to -`array: SupportsArrayState`: - -- `_nchunks_initialized` (~line 5267) -- `_nshards_initialized` (~line 5297) -- `_shards_initialized` (the function `_nshards_initialized` calls — find via `grep -n "async def _shards_initialized" src/zarr/core/array.py`) -- `_resize` (~line 5873) -- `_append` (~line 5925) -- `_update_attributes` (~line 5996) — also change its return annotation from the `AsyncArray[...]` union to `SupportsArrayState` -- `_info_complete` (~line 6023) - -Do NOT change `_nbytes_stored` (it already takes `store_path: StorePath`). - -- [ ] **Step 3: Type-check** - -Run: `uv run mypy src/zarr/core/array.py` -Expected: no new errors. (AsyncArray still satisfies the protocol structurally.) - -- [ ] **Step 4: Run the existing async-array tests to confirm no behavior change** - -Run: `uv run pytest tests/test_array.py -k "async" -q` -Expected: PASS (AsyncArray behavior unchanged). - -- [ ] **Step 5: Commit** - -```bash -git add src/zarr/core/array.py -git commit -m "refactor(array): widen array-helper params to SupportsArrayState protocol" -``` - ---- - -### Task 3: Give `Array` its own state and `__init__` (with `runner`) - -Replace the `@dataclass(frozen=False)` `Array` that wraps `_async_array` with an explicit class that owns its state. This task ONLY changes construction + state + the `async_array`/`_async_array` deprecation + a `_from_async_array` helper. Property/method rewiring happens in later tasks; to keep this task self-contained, the existing property bodies that read `self.async_array.X` will keep working because the deprecated `async_array` property reconstructs an `AsyncArray` on demand. - -**Files:** -- Modify: `src/zarr/core/array.py` (the `Array` class, starting ~line 1800) -- Test: `tests/test_runner.py` - -- [ ] **Step 1: Write the failing tests** - -Append to `tests/test_runner.py`: - -```python -import warnings - -import numpy as np -import pytest - -import zarr -from zarr.core.array import Array, AsyncArray -from zarr.core.sync import SyncRunner -from zarr.storage import MemoryStore - - -def _make_array() -> Array: - return zarr.create_array( - store=MemoryStore(), shape=(8,), chunks=(4,), dtype="i4", fill_value=0 - ) - - -def test_array_has_default_sync_runner() -> None: - arr = _make_array() - assert isinstance(arr._runner, SyncRunner) - - -def test_array_owns_state() -> None: - arr = _make_array() - # state lives on Array directly, not via a wrapped AsyncArray - assert arr.metadata is not None - assert arr.store_path is not None - assert arr.codec_pipeline is not None - - -def test_array_accepts_custom_runner() -> None: - class RecordingRunner: - def __init__(self) -> None: - self.calls = 0 - - def run(self, coro): # type: ignore[no-untyped-def] - self.calls += 1 - return SyncRunner().run(coro) - - runner = RecordingRunner() - aa = _make_array()._as_async() # helper defined below; or build AsyncArray directly - arr = Array( - metadata=aa.metadata, - store_path=aa.store_path, - config=aa.config, - runner=runner, - ) - _ = arr[:] - assert runner.calls > 0 - - -def test_async_array_property_deprecated() -> None: - arr = _make_array() - with pytest.warns(DeprecationWarning): - aa = arr.async_array - assert isinstance(aa, AsyncArray) - - -def test_from_async_array_roundtrip() -> None: - arr = _make_array() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - aa = arr.async_array - arr2 = Array._from_async_array(aa) - assert arr2.metadata == arr.metadata - assert isinstance(arr2._runner, SyncRunner) -``` - -Note: remove the `._as_async()` reference — construct the `AsyncArray` for `test_array_accepts_custom_runner` directly instead: - -```python - base = _make_array() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - aa = base.async_array - arr = Array(metadata=aa.metadata, store_path=aa.store_path, config=aa.config, runner=runner) -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `uv run pytest tests/test_runner.py -v` -Expected: FAIL (e.g. `Array` has no `_runner`, `__init__` signature mismatch, no `_from_async_array`). - -- [ ] **Step 3: Rewrite the `Array` class header, state, and `__init__`** - -Replace the dataclass decorator + `_async_array` field + `async_array` property (`src/zarr/core/array.py`, ~lines 1800–1817) with: - -```python -class Array[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: - """ - A Zarr array. - """ - - metadata: T_ArrayMetadata - store_path: StorePath - config: ArrayConfig - codec_pipeline: CodecPipeline - _chunk_grid: ChunkGrid - _runner: Runner - - def __init__( - self, - metadata: ArrayMetadata | ArrayMetadataDict, - store_path: StorePath, - config: ArrayConfigLike | None = None, - *, - runner: Runner | None = None, - ) -> None: - metadata_parsed = parse_array_metadata(metadata) - config_parsed = parse_array_config(config) - object.__setattr__(self, "metadata", metadata_parsed) - object.__setattr__(self, "store_path", store_path) - object.__setattr__(self, "config", config_parsed) - object.__setattr__(self, "_chunk_grid", ChunkGrid.from_metadata(metadata_parsed)) - object.__setattr__( - self, - "codec_pipeline", - create_codec_pipeline(metadata=metadata_parsed, store=store_path.store), - ) - object.__setattr__(self, "_runner", runner if runner is not None else SyncRunner()) - - @classmethod - def _from_async_array( - cls, - async_array: AsyncArray[T_ArrayMetadata], - *, - runner: Runner | None = None, - ) -> Self: - return cls( - metadata=async_array.metadata, - store_path=async_array.store_path, - config=async_array.config, - runner=runner, - ) - - @property - def async_array(self) -> AsyncArray[T_ArrayMetadata]: - """An asynchronous version of this array. - - .. deprecated:: - Use the `*_async` methods on `Array` instead. This property will be - removed in a future release. - """ - warnings.warn( - "Array.async_array is deprecated; use the *_async methods on Array instead.", - DeprecationWarning, - stacklevel=2, - ) - return AsyncArray(self.metadata, self.store_path, self.config) -``` - -Notes: -- Keep using `object.__setattr__` so the class can later become frozen again without churn (matches `AsyncArray`). -- Remove the `_async_array` class attribute entirely. -- Ensure `Runner`, `SyncRunner` are imported from `zarr.core.sync`, and `warnings` is imported at the top of `array.py`. -- `create_codec_pipeline` is already defined in this module (line ~224). - -- [ ] **Step 4: Fix the `_chunk_grid` and `config` properties** - -The existing `config` property (~line 1819) returns `self.async_array.config` and `_chunk_grid` property (~line 1832) returns `self.async_array._chunk_grid`. These now collide with the real attributes. DELETE both property definitions — `config` and `_chunk_grid` are now plain attributes set in `__init__`. - -- [ ] **Step 5: Add an `_info` method to `Array`** - -`Array` needs `_info` for the `SupportsArrayState` protocol (used in Task 6). Add it to `Array` (mirroring `AsyncArray._info`, ~line 1777): - -```python - def _info( - self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None - ) -> Any: - chunk_shape = self.chunks if self._chunk_grid.is_regular else None - return ArrayInfo( - _zarr_format=self.metadata.zarr_format, - _data_type=self._zdtype, - _fill_value=self.metadata.fill_value, - _shape=self.shape, - _order=self.order, - _shard_shape=self.shards, - _chunk_shape=chunk_shape, - _read_only=self.read_only, - _compressors=self.compressors, - _filters=self.filters, - _serializer=self.serializer, - _store_type=type(self.store_path.store).__name__, - _count_bytes=self.nbytes, - _count_bytes_stored=count_bytes_stored, - _count_chunks_initialized=count_chunks_initialized, - ) -``` - -This requires `Array` to expose `_zdtype`, which currently lives only on `AsyncArray` (~line 972). Add an `Array._zdtype` property copied verbatim from `AsyncArray._zdtype`: - -```python - @property - def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: - """ - The zarr-specific representation of the array data type - """ - if self.metadata.zarr_format == 2: - return self.metadata.dtype - else: - return self.metadata.data_type -``` - -- [ ] **Step 6: Fix `with_config` to thread the runner** - -Replace `with_config` body (~line 2233) `return type(self)(self._async_array.with_config(config))` with construction from new state. IMPORTANT: `AsyncArray.with_config` (~line 1156) does NOT use `parse_array_config`; it merges the new config over the existing one. Replicate that merge exactly and thread the runner: - -```python - if isinstance(config, ArrayConfig): - new_config = config - else: - # Merge new config with existing config, so missing keys are inherited - # from the current array rather than from global defaults - new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type] - return type(self)( - metadata=self.metadata, - store_path=self.store_path, - config=new_config, - runner=self._runner, - ) -``` - -- [ ] **Step 7: Update the three `Array` classmethods that build via `cls(async_array)`** - -`Array._create` (~line 1892), `Array.from_dict` (~line 1923), `Array.open` (~line 1945) end with `return cls(async_array)`. Change each to `return cls._from_async_array(async_array)`. - -- [ ] **Step 8: Run the new runner tests** - -Run: `uv run pytest tests/test_runner.py -v` -Expected: PASS. - -- [ ] **Step 9: Commit** - -```bash -git add src/zarr/core/array.py tests/test_runner.py -git commit -m "feat(array): Array owns its own state + runner; deprecate async_array" -``` - ---- - -### Task 4: Rewire `Array`'s property delegations to its own state - -There are ~48 `self.async_array.X` reads in property bodies (verify count: `grep -c "self\.async_array\." src/zarr/core/array.py`). With `async_array` now warning, these MUST be repointed to `Array`'s own state or the deprecation warning fires on every property access. - -**Files:** -- Modify: `src/zarr/core/array.py` (the `Array` property bodies, ~lines 1947–2248) - -- [ ] **Step 1: Run the property tests first (capture current passing state)** - -Run: `uv run pytest tests/test_array.py -q -k "property or shape or dtype or attrs or nbytes or chunks or shards" 2>&1 | tail -20` -Expected: PASS now (baseline). - -- [ ] **Step 2: Repoint each delegating property** - -For every `Array` property/method whose body reads `self.async_array.X`, replace with the direct equivalent. The mapping is mechanical because `Array` now holds the same state. Examples (apply the same pattern to all): - -- `store` → `return self.store_path.store` -- `ndim` → `return self.metadata.ndim` -- `shape` getter → `return self.metadata.shape` -- `chunks` → copy the `AsyncArray.chunks` body (reads `self.metadata`) -- `shards` → copy `AsyncArray.shards` body -- `size` → copy `AsyncArray.size` body -- `dtype` → copy `AsyncArray.dtype` body -- `attrs` → `return Attributes(self)` (match current `Array.attrs` semantics; check ~line 2080) -- `path`, `name`, `basename`, `order`, `read_only`, `fill_value`, `filters`, `serializer`, `compressor`, `compressors`, `cdata_shape`, `_chunk_grid_shape`, `_shard_grid_shape`, `nchunks`, `_nshards`, `nbytes` → copy each corresponding `AsyncArray` property body (all read from `self.metadata` / `self.config` / `self._chunk_grid` / `self.store_path`). -- `metadata` property (~line 2111) — `Array` had a `metadata` property returning `self.async_array.metadata`; now `metadata` is a plain attribute. DELETE the property. -- `store_path` property (~line 2115) — same: DELETE the property; it's a plain attribute now. - -Strategy: for each `AsyncArray` property, the body already uses exactly `self.metadata`/`self.config`/`self._chunk_grid`/`self.store_path`. Copy the body verbatim into the `Array` property of the same name. Use `grep -n "self\.async_array\." src/zarr/core/array.py` to enumerate remaining sites and confirm ZERO remain in property bodies when done (the only acceptable remaining `async_array` reference is inside the deprecated `async_array` property itself, which doesn't reference `self.async_array`). - -- [ ] **Step 3: Verify no stray `self.async_array.` reads remain** - -Run: `grep -n "self\.async_array\." src/zarr/core/array.py` -Expected: NO output (zero matches). - -- [ ] **Step 4: Run the array property tests** - -Run: `uv run pytest tests/test_array.py -q -k "property or shape or dtype or attrs or nbytes or chunks or shards" 2>&1 | tail -20` -Expected: PASS, and no `DeprecationWarning` emitted (run with `-W error::DeprecationWarning` to be strict): -Run: `uv run pytest tests/test_array.py -q -W error::DeprecationWarning -k "property or shape or dtype" 2>&1 | tail -20` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -git add src/zarr/core/array.py -git commit -m "refactor(array): repoint Array properties to own state" -``` - ---- - -### Task 5: Add the read/write `*_async` methods and rewire sync selection methods - -Add `getitem_async`, `setitem_async`, and the basic/orthogonal/mask/coordinate/block selection `*_async` twins to `Array`, calling the existing free functions. Rewrite the synchronous selection methods to use `self._runner.run(self._async(...))` instead of `sync(self.async_array.(...))`. - -**Files:** -- Modify: `src/zarr/core/array.py` (`Array` selection methods, ~lines 2426–3767) -- Test: `tests/test_runner.py` - -- [ ] **Step 1: Write the failing equivalence test** - -Append to `tests/test_runner.py`: - -```python -def test_getitem_sync_async_equivalence() -> None: - arr = _make_array() - arr[:] = np.arange(8, dtype="i4") - sync_result = arr[2:6] - async_via_runner = arr._runner.run(arr.getitem_async(slice(2, 6))) - np.testing.assert_array_equal(sync_result, async_via_runner) - - -def test_setitem_async_roundtrip() -> None: - arr = _make_array() - arr._runner.run(arr.setitem_async(slice(0, 4), np.arange(4, dtype="i4"))) - np.testing.assert_array_equal(arr[0:4], np.arange(4, dtype="i4")) -``` - -- [ ] **Step 2: Run to verify failure** - -Run: `uv run pytest tests/test_runner.py -k "equivalence or setitem_async" -v` -Expected: FAIL with `AttributeError: 'Array' object has no attribute 'getitem_async'`. - -- [ ] **Step 3: Add the core `*_async` methods to `Array`** - -Add to `Array` (place near the selection methods). Each delegates to the existing module-level free functions exactly as `AsyncArray` does: - -```python - async def _get_selection_async( - self, - indexer: Indexer, - *, - prototype: BufferPrototype, - out: NDBuffer | None = None, - fields: Fields | None = None, - ) -> NDArrayLikeOrScalar: - return await _get_selection( - self.store_path, - self.metadata, - self.codec_pipeline, - self.config, - self._chunk_grid, - indexer, - prototype=prototype, - out=out, - fields=fields, - ) - - async def _set_selection_async( - self, - indexer: Indexer, - value: npt.ArrayLike, - *, - prototype: BufferPrototype, - fields: Fields | None = None, - ) -> None: - return await _set_selection( - self.store_path, - self.metadata, - self.codec_pipeline, - self.config, - self._chunk_grid, - indexer, - value, - prototype=prototype, - fields=fields, - ) - - async def getitem_async( - self, - selection: BasicSelection, - *, - prototype: BufferPrototype | None = None, - ) -> NDArrayLikeOrScalar: - return await _getitem( - self.store_path, - self.metadata, - self.codec_pipeline, - self.config, - self._chunk_grid, - selection, - prototype=prototype, - ) - - async def setitem_async( - self, - selection: BasicSelection, - value: npt.ArrayLike, - *, - prototype: BufferPrototype | None = None, - ) -> None: - return await _setitem( - self.store_path, - self.metadata, - self.codec_pipeline, - self.config, - self._chunk_grid, - selection, - value, - prototype=prototype, - ) -``` - -Confirm the exact arg order of `_get_selection`, `_set_selection`, `_getitem`, `_setitem` by reading their definitions (`grep -n "^async def _getitem\|^async def _setitem\|^async def _get_selection\|^async def _set_selection" src/zarr/core/array.py`) and match them. The `AsyncArray._get_selection`/`getitem`/`setitem` bodies (lines ~1416, ~1436, ~1574) are the reference — copy their call shape. - -- [ ] **Step 4: Rewrite the synchronous selection methods** - -Replace every `sync(self.async_array.X(...))` and `sync(self.async_array._set_selection(...))` / `sync(self.async_array._get_selection(...))` call inside `Array`'s sync selection methods with the runner + `*_async` equivalent. Concretely, for the selection getters/setters (`__getitem__`, `__setitem__`, `get_basic_selection`, `set_basic_selection`, `get_orthogonal_selection`, `set_orthogonal_selection`, `get_mask_selection`, `set_mask_selection`, `get_coordinate_selection`, `set_coordinate_selection`, `get_block_selection`, `set_block_selection`): - -- Where the body did `sync(self.async_array._get_selection(indexer, ...))`, change to `self._runner.run(self._get_selection_async(indexer, ...))`. -- Where the body did `sync(self.async_array._set_selection(indexer, value, ...))`, change to `self._runner.run(self._set_selection_async(indexer, value, ...))`. - -The indexer-construction logic in these sync methods stays exactly as-is; only the terminal `sync(self.async_array._..._selection(...))` call changes. Enumerate them with `grep -n "sync(self.async_array._set_selection\|sync(self.async_array._get_selection\|sync($" src/zarr/core/array.py` and the broader `grep -n "self.async_array._get_selection\|self.async_array._set_selection" src/zarr/core/array.py`. - -- [ ] **Step 5: Run the equivalence tests** - -Run: `uv run pytest tests/test_runner.py -k "equivalence or setitem_async" -v` -Expected: PASS. - -- [ ] **Step 6: Run the full selection test suite** - -Run: `uv run pytest tests/test_array.py -q -k "selection or getitem or setitem or basic or orthogonal or mask or coordinate or block" 2>&1 | tail -25` -Expected: PASS. - -- [ ] **Step 7: Commit** - -```bash -git add src/zarr/core/array.py tests/test_runner.py -git commit -m "feat(array): add selection *_async methods; route sync selection via runner" -``` - ---- - -### Task 6: Add remaining `*_async` methods (`resize`, `append`, `update_attributes`, `info_complete`, `nchunks_initialized`, `nbytes_stored`) and rewire their sync wrappers - -These reuse the free functions widened in Task 2 (`_resize`, `_append`, `_update_attributes`, `_info_complete`, `_nchunks_initialized`, `_nshards_initialized`, `_nbytes_stored`). - -**Files:** -- Modify: `src/zarr/core/array.py` -- Test: `tests/test_runner.py` - -- [ ] **Step 1: Write the failing tests** - -Append to `tests/test_runner.py`: - -```python -def test_resize_async() -> None: - arr = _make_array() - arr._runner.run(arr.resize_async((16,))) - assert arr.shape == (16,) - - -def test_update_attributes_async() -> None: - arr = _make_array() - arr._runner.run(arr.update_attributes_async({"foo": "bar"})) - assert arr.metadata.attributes["foo"] == "bar" - - -def test_nchunks_initialized_async() -> None: - arr = _make_array() - arr[:] = np.arange(8, dtype="i4") - n = arr._runner.run(arr.nchunks_initialized_async()) - assert n == arr.nchunks_initialized # sync property matches async result -``` - -- [ ] **Step 2: Run to verify failure** - -Run: `uv run pytest tests/test_runner.py -k "resize_async or update_attributes_async or nchunks_initialized_async" -v` -Expected: FAIL (`AttributeError`). - -- [ ] **Step 3: Add the `*_async` methods to `Array`** - -```python - async def resize_async( - self, new_shape: ShapeLike, delete_outside_chunks: bool = True - ) -> None: - return await _resize(self, new_shape, delete_outside_chunks) - - async def append_async(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: - return await _append(self, data, axis) - - async def update_attributes_async(self, new_attributes: dict[str, JSON]) -> Self: - await _update_attributes(self, new_attributes) - return self - - async def nchunks_initialized_async(self) -> int: - return await _nchunks_initialized(self) - - async def _nshards_initialized_async(self) -> int: - return await _nshards_initialized(self) - - async def nbytes_stored_async(self) -> int: - return await _nbytes_stored(self.store_path) - - async def info_complete_async(self) -> Any: - return await _info_complete(self) -``` - -Note `_resize` mutates `self` in place via `object.__setattr__` — this works on `Array` because it is not frozen. - -- [ ] **Step 4: Rewrite the corresponding sync methods** - -Replace the sync bodies (currently `sync(self.async_array.X(...))`, ~lines 2274, 2296, 2306, 3831, 3867, 3894, 3952): - -- `nchunks_initialized` property (~2274): `return self._runner.run(self.nchunks_initialized_async())` -- `_nshards_initialized` property (~2296): `return self._runner.run(self._nshards_initialized_async())` -- `nbytes_stored` (~2306): `return self._runner.run(self.nbytes_stored_async())` -- `resize` (~3831): `self._runner.run(self.resize_async(new_shape))` (keep the existing wrapper's return/type; check whether `Array.resize` returns a new array or `None` — read ~line 3800–3835 and preserve its current return contract) -- `append` (~3867): `return self._runner.run(self.append_async(data, axis=axis))` -- `update_attributes` (~3894): the current body does `new_array = sync(self.async_array.update_attributes(new_attributes))` then `return type(self)(new_array)` (wrapping the returned `AsyncArray`). Under the new design `update_attributes_async` mutates `self` in place and returns `self` (an `Array`), so replace the whole body with: - ```python - self._runner.run(self.update_attributes_async(new_attributes)) - return self - ``` - This matches the prior observable behavior: attributes are persisted and an `Array` with the updated metadata is returned. (Previously a fresh wrapper was returned; now `self` is mutated and returned. If a test asserts the returned object is a *distinct* instance, adjust to `return type(self)(metadata=self.metadata, store_path=self.store_path, config=self.config, runner=self._runner)` — check `tests/test_array.py` for such an assertion in Task 8.) -- `info_complete` (~3952): `return self._runner.run(self.info_complete_async())` - -For each, read the surrounding 10 lines first to preserve the exact return type and any post-processing. - -- [ ] **Step 5: Confirm zero `self.async_array.` references remain anywhere** - -Run: `grep -n "self\.async_array\." src/zarr/core/array.py` -Expected: NO output. - -- [ ] **Step 6: Run the new tests + relevant suite** - -Run: `uv run pytest tests/test_runner.py -v` -Run: `uv run pytest tests/test_array.py -q -k "resize or append or update_attributes or info or nchunks or nbytes" 2>&1 | tail -25` -Expected: PASS. - -- [ ] **Step 7: Commit** - -```bash -git add src/zarr/core/array.py tests/test_runner.py -git commit -m "feat(array): add remaining *_async methods; route sync wrappers via runner" -``` - ---- - -### Task 7: Update external construction sites to `_from_async_array` - -The `Array(async_array)` call sites outside `array.py` must use the new construction path. - -**Files:** -- Modify: `src/zarr/api/synchronous.py` (lines ~763, ~947, ~1168, ~1359) -- Modify: `src/zarr/core/group.py` (lines ~2272, ~2656, ~2779) - -- [ ] **Step 1: Find all external `Array(` construction-from-async sites** - -Run: `grep -rn "Array(async_array\|Array($" src/zarr/api/synchronous.py src/zarr/core/group.py` -Also check each `Array(` call's argument: only those passing an `AsyncArray` positionally need changing. - -- [ ] **Step 2: Replace each with `_from_async_array`** - -For each site that does `Array()`, change to `Array._from_async_array()`. For example in `group.py`: -`yield name, Array(async_array)` → `yield name, Array._from_async_array(async_array)`. - -Read each call's surrounding lines to confirm the single positional arg is an `AsyncArray` (the variable is usually named `async_array` or is a `await AsyncArray.open(...)` result). - -- [ ] **Step 3: Type-check the modified modules** - -Run: `uv run mypy src/zarr/api/synchronous.py src/zarr/core/group.py` -Expected: no new errors. - -- [ ] **Step 4: Run the api + group test suites** - -Run: `uv run pytest tests/test_api.py tests/test_group.py -q 2>&1 | tail -25` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -git add src/zarr/api/synchronous.py src/zarr/core/group.py -git commit -m "refactor: construct Array via _from_async_array at external call sites" -``` - ---- - -### Task 8: Update tests that touch `async_array` directly + add changelog - -Existing tests read `arr.async_array.X` (e.g. `tests/test_array.py:384`, `:418`, `:419`). These now emit `DeprecationWarning`. Update them to use the new sync/async surface, or wrap in a warning filter where the intent is specifically to test the deprecated property. - -**Files:** -- Modify: `tests/test_array.py` -- Create: `changes/.feature.md` (the repo uses towncrier with `.md` fragments, e.g. `changes/3826.feature.md`) - -- [ ] **Step 1: Find tests referencing `async_array`** - -Run: `grep -rn "\.async_array" tests/` - -- [ ] **Step 2: Update each reference** - -- `arr.async_array.nchunks` → `arr.nchunks` (the sync property). -- `await arr.async_array._nshards_initialized()` → `arr._runner.run(arr._nshards_initialized_async())` or, if the test is async, `await arr._nshards_initialized_async()`. -- `await arr.async_array.nchunks_initialized()` → `await arr.nchunks_initialized_async()` (async test) or `arr.nchunks_initialized` (sync property). - -For any test whose explicit purpose is to verify `async_array` still works as a deprecated shim, wrap access in `pytest.warns(DeprecationWarning)` instead of removing it. - -- [ ] **Step 3: Add the changelog fragment** - -The repo uses towncrier with markdown fragments named `changes/..md` (e.g. `changes/3826.feature.md`). Create `changes/.feature.md` (substitute the actual PR number) containing: - -```markdown -`Array` now owns its own state and accepts a keyword-only `runner` argument for plugging in a custom event loop. Every async operation is available as a `*_async` method on `Array`. `Array.async_array` is deprecated; use the `*_async` methods instead. -``` - -- [ ] **Step 4: Run the updated tests with deprecation-as-error** - -Run: `uv run pytest tests/test_array.py -q -W error::DeprecationWarning 2>&1 | tail -30` -Expected: PASS (no un-suppressed deprecation warnings escape). - -- [ ] **Step 5: Commit** - -```bash -git add tests/test_array.py changes/ -git commit -m "test: migrate async_array usages; add changelog for Array unification" -``` - ---- - -### Task 9: Full verification sweep - -**Files:** none (verification only) - -- [ ] **Step 1: Run the full array + sync + runner + api + group suites** - -Run: `uv run pytest tests/test_array.py tests/test_runner.py tests/test_sync.py tests/test_api.py tests/test_group.py -q 2>&1 | tail -30` -Expected: all PASS. - -- [ ] **Step 2: Run the complete test suite** - -Run: `uv run pytest -q 2>&1 | tail -40` -Expected: PASS (or only pre-existing unrelated failures — compare against a clean `main` run if anything fails). - -- [ ] **Step 3: Type-check the whole package** - -Run: `uv run mypy src/zarr 2>&1 | tail -30` -Expected: no new errors. - -- [ ] **Step 4: Run the linters / pre-commit** - -Run: `uv run pre-commit run --all-files 2>&1 | tail -40` -Expected: PASS. - -- [ ] **Step 5: Confirm the invariant holds** - -Run: `grep -rn "self\.async_array\." src/zarr/core/array.py` -Expected: NO output. The only `async_array` reference in `array.py` is the deprecated property definition itself. - -- [ ] **Step 6: Final commit (if any lint fixes were applied)** - -```bash -git add -A -git commit -m "chore: lint/type fixes for Array unification" -``` - ---- - -## Self-Review notes - -- **Spec coverage:** Runner protocol (Task 1) ✓; SupportsArrayState + widened helpers (Task 2) ✓; Array owns state + `runner` + deprecated `async_array` + `_from_async_array` (Task 3) ✓; property repoint (Task 4) ✓; selection `*_async` + runner routing (Task 5) ✓; remaining `*_async` + sync routing (Task 6) ✓; external construction sites (Task 7) ✓; test migration + changelog (Task 8) ✓; full regression + new runner tests (Task 9 + Tasks 1/3/5/6) ✓. -- **Discovery vs. spec:** The spec assumed several methods had inline bodies needing extraction; in fact the free functions already exist but take an array object. Section 3's "extract free functions" is therefore replaced by "widen existing free-function signatures to a Protocol" (Task 2) — a smaller, safer change that still achieves the single-source-of-truth goal. -- **Type consistency:** `getitem_async`/`setitem_async`/`resize_async`/`append_async`/`update_attributes_async`/`nchunks_initialized_async`/`_nshards_initialized_async`/`nbytes_stored_async`/`info_complete_async` and `_from_async_array`, `SupportsArrayState`, `Runner`, `SyncRunner` are used consistently across tasks. -- **Verification points where exact line numbers are approximate:** each such step instructs reading the surrounding lines first and preserving the current return contract, since line numbers will drift as edits land. diff --git a/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md b/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md deleted file mode 100644 index 3a505671bd..0000000000 --- a/docs/superpowers/specs/2026-06-03-unify-array-asyncarray-design.md +++ /dev/null @@ -1,218 +0,0 @@ -# Unifying `Array` and `AsyncArray` via a pluggable `runner` - -Date: 2026-06-03 -Branch: `one-array-class` - -## Goal - -Unify the `Array` and `AsyncArray` classes so that a single `Array` class owns -all array logic. Today `Array` is a thin synchronous wrapper that holds an -`AsyncArray` and delegates every operation through `sync(self.async_array.foo(...))`. -After this work, `Array` owns its own state and exposes every async operation as -a `*_async` coroutine method, with the synchronous methods implemented by running -that coroutine through a pluggable `_runner`. - -The `runner` lets a user wire in their own event loop for the async side of array -operations, instead of being locked into Zarr's background-thread event loop. - -## Scope and non-goals - -- **In scope:** add a keyword-only `runner` argument to `Array.__init__`; give - `Array` its own state and `*_async` methods; rewrite the synchronous methods to - go through `_runner`; extract remaining inline async logic into shared - module-level free functions; deprecate `Array.async_array` / `Array._async_array`. -- **Out of scope / non-goals:** - - We do **not** modify `AsyncArray`. It stays exactly as it is and remains the - public async class (compatibility shim). We simply stop wiring `Array` to it. - - We do not remove `AsyncArray` or its public API in this PR. Future deprecation - of `AsyncArray` itself is a separate effort. - - No behavior change to the default synchronous path — the default runner - preserves today's `sync()` semantics exactly. - -## Design - -### 1. The `Runner` protocol - -Add to `zarr/core/sync.py`: - -```python -@runtime_checkable -class Runner(Protocol): - def run(self, coro: Coroutine[Any, Any, T]) -> T: ... -``` - -A `Runner` takes a coroutine and returns the value obtained by awaiting it. - -Concrete default implementation, also in `zarr/core/sync.py`: - -```python -class SyncRunner: - """Run coroutines on Zarr's shared background event loop via sync().""" - - def run(self, coro: Coroutine[Any, Any, T]) -> T: - return sync(coro) -``` - -There is **no** module-level mutable `DEFAULT_RUNNER` singleton baked into the -signature. `Array.__init__` accepts `runner: Runner | None = None`, and `None` -means "use the default", resolved to a `SyncRunner()` inside `__init__`. This -keeps the default-resolution logic in one place and avoids a shared mutable -default argument. - -### 2. `Array.__init__` and state ownership - -`Array` stops being a wrapper around `AsyncArray`. It owns the same state -`AsyncArray` holds, plus a `_runner`: - -- `metadata: T_ArrayMetadata` -- `store_path: StorePath` -- `config: ArrayConfig` -- `codec_pipeline: CodecPipeline` -- `_chunk_grid: ChunkGrid` -- `_runner: Runner` - -New signature (keyword-only `runner`): - -```python -def __init__( - self, - metadata: ArrayMetadata | ArrayMetadataDict, - store_path: StorePath, - config: ArrayConfigLike | None = None, - *, - runner: Runner | None = None, -) -> None: - metadata_parsed = parse_array_metadata(metadata) - config_parsed = parse_array_config(config) - # store metadata_parsed, store_path, config_parsed, - # codec_pipeline, _chunk_grid, and (runner or SyncRunner()) -``` - -`Array`'s field set no longer maps cleanly to a single `_async_array` field, so -the `@dataclass(frozen=False)` decorator is dropped in favor of this explicit -`__init__` (mirroring `AsyncArray`'s construction style). The `with_config` -overloads and other internals are updated to construct via the new signature. - -#### Construction helper - -Many internal call sites currently do `Array(async_array)` (≈10 sites across -`zarr/api/synchronous.py`, `zarr/core/group.py`, and `Array._create` / -`from_dict` / `open`). To keep these ergonomic and to handle the common -"I already have an `AsyncArray`" case, add: - -```python -@classmethod -def _from_async_array( - cls, async_array: AsyncArray[T_ArrayMetadata], *, runner: Runner | None = None -) -> Self: - return cls( - metadata=async_array.metadata, - store_path=async_array.store_path, - config=async_array.config, - runner=runner, - ) -``` - -All existing `Array(async_array)` call sites are updated to -`Array._from_async_array(async_array)`. This is a mechanical change. - -#### Deprecating `async_array` / `_async_array` - -The `async_array` property is deprecated. On access it emits a -`DeprecationWarning` and constructs a fresh `AsyncArray` on demand from `Array`'s -own state: - -```python -@property -def async_array(self) -> AsyncArray[T_ArrayMetadata]: - warnings.warn( - "Array.async_array is deprecated; ...", - DeprecationWarning, - stacklevel=2, - ) - return AsyncArray(self.metadata, self.store_path, self.config) -``` - -The `_async_array` field is removed; any remaining internal uses are migrated to -`Array`'s own state. - -### 3. Shared free functions (single source of truth) - -The async selection methods on `AsyncArray` already delegate to module-level free -functions that take explicit state: `_getitem`, `_get_selection`, -`_get_orthogonal_selection`, `_get_mask_selection`, `_get_coordinate_selection`, -`_set_selection`, `_setitem`. These functions take `(store_path, metadata, -codec_pipeline, config, chunk_grid, ...)`. - -For the `AsyncArray` methods whose logic is currently **inline** (no free -function yet), extract the body into a new module-level async free function taking -explicit state, then have both classes call it. Methods to extract: - -- `resize`, `append`, `update_attributes` -- `nchunks_initialized`, `_nshards_initialized`, `nbytes_stored` -- `info_complete`, `_save_metadata` -- the classmethods/loaders as appropriate (`open`, `_create*`, - `get_array_metadata` already exists as a free function) - -Resulting call structure for each operation `foo`: - -- `AsyncArray.foo(...)` → `await _foo(self.metadata, self.store_path, ...)` -- `Array.foo_async(...)` → `await _foo(self.metadata, self.store_path, ...)` -- `Array.foo(...)` → `self._runner.run(self.foo_async(...))` - -For operations that return a *new* array (`resize` on v2, `update_attributes`, -`append`), the free function returns the new metadata/state; each class wraps the -result in its own type (`AsyncArray` vs `Array`, the latter preserving its -`_runner`). - -This guarantees `AsyncArray` and `Array` cannot drift, because they share one -implementation per operation. - -### 4. The `*_async` surface on `Array` - -Every current public async method on `AsyncArray` gets a `*_async` twin on -`Array`: - -- `getitem_async`, `setitem_async` -- async twins for each selection getter/setter: - `get_orthogonal_selection_async` / `set_orthogonal_selection_async`, - `get_mask_selection_async` / `set_mask_selection_async`, - `get_coordinate_selection_async` / `set_coordinate_selection_async`, - `get_block_selection_async` / `set_block_selection_async`, - `get_basic_selection_async` / `set_basic_selection_async` -- `resize_async`, `append_async`, `update_attributes_async` -- `nchunks_initialized_async`, `nbytes_stored_async`, `info_complete_async` - -The existing synchronous methods (`__getitem__`, `__setitem__`, -`get_basic_selection`, `set_basic_selection`, the orthogonal/mask/coordinate/block -selection getters and setters, `resize`, `append`, `update_attributes`, -`nchunks_initialized`, `nbytes_stored`, `info_complete`, …) are rewritten from -`sync(self.async_array.foo(...))` to `self._runner.run(self.foo_async(...))`. - -### 5. Testing and verification - -Verification bar: **full existing suite passes unchanged + new runner tests.** - -- **Regression:** the entire existing array test suite passes without - modification, proving the default synchronous behavior is preserved. Run with - `uv run pytest`. -- **New tests:** - - `Runner` protocol conformance / `SyncRunner` behaves as a `Runner`. - - Injecting a custom runner: a recording runner that captures the coroutine it - receives, asserts it is the expected coroutine, runs it, and returns the - awaited value; assert `Array(..., runner=recording)` uses it. - - Equivalence: `arr.getitem(sel)` equals `arr._runner.run(arr.getitem_async(sel))` - and equals the value via a directly-awaited `getitem_async`. - - Deprecation: accessing `Array.async_array` (and `_async_array` if still - reachable) emits a `DeprecationWarning`. - -## Risks - -- Large mechanical diff in a 6000-line file; risk of missing a `sync(...)` call - site. Mitigated by grepping all `sync(self.async_array` occurrences and by the - unchanged regression suite. -- Free-function extraction for new-array-returning methods must correctly - reconstruct per-class types. Covered by existing `resize`/`append`/ - `update_attributes` tests. -- `with_config` and other `Self`-returning methods must thread `_runner` through - so a derived `Array` keeps the user's runner. From 5c5582a6975f2c9c145d12e6fa682b08f9f9ed4d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 13:26:13 +0200 Subject: [PATCH 20/24] docs: document Array runner and *_async methods in user guide Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/user-guide/arrays.md | 34 +++++++++++++++++++++++++++++++ docs/user-guide/performance.md | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/docs/user-guide/arrays.md b/docs/user-guide/arrays.md index 14122003c0..bec6479476 100644 --- a/docs/user-guide/arrays.md +++ b/docs/user-guide/arrays.md @@ -74,6 +74,40 @@ print(z[:]) More information about NumPy-style indexing can be found in the [NumPy documentation](https://numpy.org/doc/stable/user/basics.indexing.html). +### Asynchronous access + +The indexing and I/O operations shown above are synchronous: they block until the +data has been read or written. Every such operation also has an asynchronous +counterpart on the `Array`, named with an `_async` suffix, which returns a +coroutine you can `await`. These are useful for issuing many requests +concurrently from `async` code without going through Zarr's synchronous wrapper. + +```python exec="true" session="arrays" source="above" result="ansi" +import asyncio + + +async def read_corner(arr): + return await arr.getitem_async((0, 0)) + + +print(asyncio.run(read_corner(z))) +``` + +Counterparts exist for the full read/write surface, including the advanced +indexers described below — for example `getitem_async`, `setitem_async`, +`get_orthogonal_selection_async`, `get_coordinate_selection_async`, +`get_block_selection_async`, and the matching setters, as well as `resize_async`, +`append_async`, and `update_attributes_async`. Each synchronous method is +implemented by running its `_async` counterpart through the array's runner; see +[Custom event loops with runner](performance.md#custom-event-loops-with-runner) +in the performance guide for how to control which event loop executes them. + +!!! note + + Earlier versions of Zarr exposed the asynchronous API through a separate + `AsyncArray` object reachable via the `Array.async_array` property. That + property is now deprecated in favor of the `_async` methods on `Array`. + ## Persistent arrays In the examples above, compressed data for each chunk of the array was stored in diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index fa98e9466e..a318c999b9 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -246,6 +246,43 @@ thread pool (see the Dask section below). Increasing it may improve throughput in CPU-bound workloads where many synchronous-to-async dispatches happen concurrently. +### Custom event loops with `runner` + +Every `Array` method that touches storage is implemented as an asynchronous +coroutine. A synchronous call like `z[...]` runs that coroutine to completion +through the array's *runner*. By default the runner is a `SyncRunner`, which +submits the coroutine to Zarr's shared background event loop (the same mechanism +described above, governed by `threading.max_workers`). + +You can supply your own runner to control which event loop executes the +coroutines — for example to reuse an event loop you already manage, or to +integrate with another async framework. A runner is any object with a +`run(coro)` method that awaits the coroutine and returns its result: + +```python +import zarr +from zarr.core.sync import Runner, SyncRunner + + +class MyRunner: + def run(self, coro): + # Execute `coro` on the event loop of your choice and return its result. + return SyncRunner().run(coro) + + +z = zarr.create_array(store={}, shape=(100,), chunks=(10,), dtype="i4") +z_custom = zarr.Array( + metadata=z.metadata, + store_path=z.store_path, + config=z.config, + runner=MyRunner(), +) +``` + +`Runner` is a [`typing.Protocol`][], so a custom runner does not need to subclass +anything — it only needs a compatible `run` method. When `runner` is omitted (or +`None`), the array uses the default `SyncRunner`. + ### Using Zarr with Dask [Dask](https://www.dask.org/) is a popular parallel computing library that works well with Zarr for processing large arrays. When using Zarr with Dask, it's important to consider the interaction between Dask's thread pool and Zarr's concurrency settings. From cbec4730f7dc67cf699109a23fb81f00c143aff6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 13:40:19 +0200 Subject: [PATCH 21/24] feat(array): accept legacy Array(async_array) form with deprecation Softens the constructor break: Array(async_array) still works but emits a DeprecationWarning, constructing from the async array's metadata/store_path/ config. The new Array(metadata, store_path, ...) form is preferred. Co-Authored-By: Claude Opus 4.8 (1M context) --- changes/4011.feature.md | 2 +- src/zarr/core/array.py | 24 +++++++++++++++++++++--- tests/test_runner.py | 26 ++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/changes/4011.feature.md b/changes/4011.feature.md index f0641636a7..3f65ba49c4 100644 --- a/changes/4011.feature.md +++ b/changes/4011.feature.md @@ -1,3 +1,3 @@ `Array` now owns its own state and accepts a keyword-only `runner` argument for plugging in a custom event loop. Every asynchronous array operation is available as a `*_async` method on `Array` (for example `Array.getitem_async`). `Array.async_array` is deprecated; use the `*_async` methods instead. -The `Array` constructor signature changed from `Array(async_array)` to `Array(metadata, store_path, config=None, *, runner=None)`, mirroring `AsyncArray`. Code that previously constructed an `Array` by wrapping an `AsyncArray` should use the new `Array._from_async_array(async_array)` classmethod instead. +The `Array` constructor signature changed from `Array(async_array)` to `Array(metadata, store_path, config=None, *, runner=None)`, mirroring `AsyncArray`. The legacy `Array(async_array)` form still works for now but is deprecated and will be removed in a future release; construct an `Array` directly with the new signature instead. diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index da15048c3c..7c9b00813a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1859,13 +1859,31 @@ class Array[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: def __init__( self, - metadata: ArrayMetadata | ArrayMetadataDict, - store_path: StorePath, + metadata: ArrayMetadata | ArrayMetadataDict | AsyncArray[T_ArrayMetadata], + store_path: StorePath | None = None, config: ArrayConfigLike | None = None, *, runner: Runner | None = None, ) -> None: - metadata_parsed = parse_array_metadata(metadata) + metadata_in: ArrayMetadata | ArrayMetadataDict + if isinstance(metadata, AsyncArray): + # Legacy construction form: Array(async_array). Deprecated. + warnings.warn( + "Array(async_array) is deprecated; construct an Array directly " + "with Array(metadata, store_path, config=...), or use " + "Array._from_async_array(async_array).", + DeprecationWarning, + stacklevel=2, + ) + async_array = metadata + metadata_in = async_array.metadata + store_path = async_array.store_path + config = async_array.config + else: + metadata_in = metadata + if store_path is None: + raise TypeError("store_path is required when constructing an Array from metadata") + metadata_parsed = parse_array_metadata(metadata_in) config_parsed = parse_array_config(config) object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) diff --git a/tests/test_runner.py b/tests/test_runner.py index 448a649d2a..2333a7dcc6 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -171,3 +171,29 @@ def test_set_orthogonal_selection_async() -> None: arr._runner.run(arr.set_orthogonal_selection_async(([0, 2], slice(None)), 7)) # type: ignore[arg-type] expected = arr.get_orthogonal_selection(([0, 2], slice(None))) # type: ignore[arg-type] np.testing.assert_array_equal(expected, np.full((2, 4), 7, dtype="i4")) + + +def test_legacy_array_from_async_array_constructor() -> None: + # Array(async_array) is the deprecated legacy construction form. It should + # still work but emit a DeprecationWarning. + base = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = base.async_array # a real AsyncArray + with pytest.warns(DeprecationWarning, match="Array\\(async_array\\)"): + arr = Array(aa) + assert isinstance(arr, Array) + assert arr.metadata == aa.metadata + assert arr.store_path == aa.store_path + assert isinstance(arr._runner, SyncRunner) + + +def test_legacy_array_constructor_passes_runner() -> None: + base = _make_array() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + aa = base.async_array + runner = SyncRunner() + with pytest.warns(DeprecationWarning, match="Array\\(async_array\\)"): + arr = Array(aa, runner=runner) + assert arr._runner is runner From 8ac1f3fd0ad77075d508c8e7e5e27fb97c893e2c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 13:47:58 +0200 Subject: [PATCH 22/24] docs: make the runner example block exec="true" so it is validated The docs build guards that every python block declares exec/test; the new custom-runner example was a bare fence. Mark it exec="true" (it constructs an Array with a custom runner, which runs cleanly) and drop the unused Runner import. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/user-guide/performance.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index a318c999b9..4a91ec5994 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -259,9 +259,9 @@ coroutines — for example to reuse an event loop you already manage, or to integrate with another async framework. A runner is any object with a `run(coro)` method that awaits the coroutine and returns its result: -```python +```python exec="true" session="performance" source="above" import zarr -from zarr.core.sync import Runner, SyncRunner +from zarr.core.sync import SyncRunner class MyRunner: From 733fdd7d4f4359fa3a5b2b52c3b00b5ed3a83ec6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 14:07:22 +0200 Subject: [PATCH 23/24] test: cover new Array branches (constructor error, __eq__, sharding, compressor/filters) Closes coverage gaps introduced by the Array unification: the store_path-required TypeError, __eq__ NotImplemented path, the sharded read_chunk_sizes/_chunk_grid_shape branch, and the v2/v3 compressor and v2 filters property branches. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_array.py | 44 +++++++++++++++++++++++++ tests/test_runner.py | 76 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/tests/test_array.py b/tests/test_array.py index bc7fba02f1..2c74f9cb09 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -2376,3 +2376,47 @@ async def test_create_array_chunks_3d( shape = (10, 12, 15) arr = await create_array(store={}, shape=shape, chunks=chunk_input, dtype="float64") assert arr.write_chunk_sizes == expected + + +def test_read_chunk_sizes_sharded() -> None: + """For a sharded array, read_chunk_sizes reports the inner-chunk sizes and + cdata_shape / _chunk_grid_shape count inner chunks across the whole array. + + This exercises the sharding branch of read_chunk_sizes and _chunk_grid_shape. + """ + shape = (30, 20) + shard_shape = (10, 20) + chunk_shape = (5, 4) + arr = zarr.create_array( + store=MemoryStore(), + shape=shape, + chunks=chunk_shape, + shards=shard_shape, + dtype="i1", + ) + + # Inner-chunk sizes, clipped to the array extent (no boundary remainder here). + expected_read = ( + (5, 5, 5, 5, 5, 5), + (4, 4, 4, 4, 4), + ) + assert arr.read_chunk_sizes == expected_read + + # write_chunk_sizes reports the shard (outer) chunk sizes. + assert arr.write_chunk_sizes == ((10, 10, 10), (20,)) + + # cdata_shape / _chunk_grid_shape count inner chunks across the whole array. + expected_grid = tuple(starmap(ceildiv, zip(shape, chunk_shape, strict=True))) + assert arr._chunk_grid_shape == expected_grid + assert arr.cdata_shape == expected_grid + + # The AsyncArray shares the same helpers, so the sharded paths agree. + aa = AsyncArray(arr.metadata, arr.store_path, arr.config) + assert aa.read_chunk_sizes == expected_read + assert aa.cdata_shape == expected_grid + + # Unsharded AsyncArray exercises the non-sharding fallback of the same helpers. + unsharded = zarr.create_array(store=MemoryStore(), shape=(30, 20), chunks=(5, 4), dtype="i1") + aa_unsharded = AsyncArray(unsharded.metadata, unsharded.store_path, unsharded.config) + assert aa_unsharded.read_chunk_sizes == unsharded.read_chunk_sizes + assert aa_unsharded.cdata_shape == unsharded.cdata_shape diff --git a/tests/test_runner.py b/tests/test_runner.py index 2333a7dcc6..6ea94ee291 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -10,6 +10,7 @@ import zarr from zarr.core.array import Array, AsyncArray from zarr.core.sync import Runner, SyncRunner +from zarr.errors import ZarrDeprecationWarning from zarr.storage import MemoryStore if TYPE_CHECKING: @@ -197,3 +198,78 @@ def test_legacy_array_constructor_passes_runner() -> None: with pytest.warns(DeprecationWarning, match="Array\\(async_array\\)"): arr = Array(aa, runner=runner) assert arr._runner is runner + + +def test_array_constructor_requires_store_path() -> None: + # Constructing an Array from metadata without a store_path must error. + arr = _make_array() + md = arr.metadata + with pytest.raises(TypeError, match="store_path is required"): + Array(md) + + +def test_array_eq_non_array_is_false() -> None: + # Array.__eq__ returns NotImplemented for non-Array operands; Python then + # falls back to identity comparison, yielding False. + arr = _make_array() + assert (arr == 42) is False + assert (arr == object()) is False + assert (arr != object()) is True + + +def test_array_eq_other_array_true() -> None: + # Two Arrays viewing the same state compare equal (exercises the True branch). + arr = _make_array() + other = Array(metadata=arr.metadata, store_path=arr.store_path, config=arr.config) + assert arr == other + + +def test_compressor_v2_returns_without_error() -> None: + arr2 = zarr.create_array(store={}, shape=(8,), chunks=(4,), dtype="i4", zarr_format=2) + with pytest.warns(ZarrDeprecationWarning): + # The value may be a codec or None depending on defaults; just confirm + # the v2 branch returns rather than raising. + _ = arr2.compressor + + +def test_compressor_v3_raises_typeerror() -> None: + arr3 = zarr.create_array(store={}, shape=(8,), chunks=(4,), dtype="i4", zarr_format=3) + with ( + pytest.warns(ZarrDeprecationWarning), + pytest.raises(TypeError, match="not available for Zarr format 3"), + ): + _ = arr3.compressor + + +def test_filters_v2_non_none() -> None: + # A v2 array created with explicit filters should report them via .filters. + from numcodecs import Delta + + arr = zarr.create_array( + store={}, + shape=(8,), + chunks=(4,), + dtype="i4", + zarr_format=2, + filters=[Delta(dtype="i4")], + ) + filters = arr.filters + assert filters == (Delta(dtype="i4"),) + + +def test_array_open_roundtrip() -> None: + store = MemoryStore() + created = zarr.create_array(store=store, shape=(8,), chunks=(4,), dtype="i4", fill_value=0) + opened = Array.open(store) + assert isinstance(opened, Array) + assert opened.metadata == created.metadata + assert isinstance(opened._runner, SyncRunner) + + +def test_array_create_roundtrip() -> None: + # The Array._create classmethod returns a sync Array via _from_async_array. + store = MemoryStore() + arr = Array._create(store=store, shape=(8,), dtype="i4", chunk_shape=(4,), zarr_format=3) + assert isinstance(arr, Array) + assert arr.shape == (8,) + assert isinstance(arr._runner, SyncRunner) From 9605495b670681453d22352ca22c10f356de0adf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 4 Jun 2026 14:25:47 +0200 Subject: [PATCH 24/24] fix(array): address roborev findings on constructor, update_attributes, iterators, async_array - legacy Array(async_array) raises if store_path/config also supplied - update_attributes_async returns a fresh Array, consistent with the sync form - align Array._iter_shard_coords signature with sibling iterators - async_array property left uncached: resize/append replace metadata so caching would be stale Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zarr/core/array.py | 15 +++++++++++++-- tests/test_runner.py | 11 +++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7c9b00813a..37c1f30a51 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1868,6 +1868,12 @@ def __init__( metadata_in: ArrayMetadata | ArrayMetadataDict if isinstance(metadata, AsyncArray): # Legacy construction form: Array(async_array). Deprecated. + if store_path is not None or config is not None: + raise TypeError( + "When constructing an Array from an AsyncArray (deprecated), " + "store_path and config must not also be provided; they are taken " + "from the AsyncArray." + ) warnings.warn( "Array(async_array) is deprecated; construct an Array directly " "with Array(metadata, store_path, config=...), or use " @@ -2544,7 +2550,7 @@ def _iter_chunk_coords( ) def _iter_shard_coords( - self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of shards in shard grid space. @@ -4663,7 +4669,12 @@ async def update_attributes_async(self, new_attributes: dict[str, JSON]) -> Self The array with the updated attributes. """ await _update_attributes(self, new_attributes) - return self + return type(self)( + metadata=self.metadata, + store_path=self.store_path, + config=self.config, + runner=self._runner, + ) async def nchunks_initialized_async(self) -> int: """Asynchronously calculate the number of chunks that have been initialized in storage. diff --git a/tests/test_runner.py b/tests/test_runner.py index 6ea94ee291..a84a921199 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -128,6 +128,17 @@ def test_update_attributes_async() -> None: assert arr.metadata.attributes["foo"] == "bar" +def test_legacy_constructor_rejects_extra_store_path() -> None: + base = _make_array() + import warnings as _w + + with _w.catch_warnings(): + _w.simplefilter("ignore", DeprecationWarning) + aa = base.async_array + with pytest.raises(TypeError, match="must not also be provided"): + Array(aa, store_path=base.store_path) + + def test_nchunks_initialized_async() -> None: arr = _make_array() arr[:] = np.arange(8, dtype="i4")