Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7039c00
build: change supported dask versions
melonora Mar 30, 2026
63fbe9a
feat[config]: allow for persisting config
melonora Mar 30, 2026
b7e98b9
test: add config fixture
melonora Apr 7, 2026
558fe97
test: add tests for config
melonora Apr 7, 2026
8eebdb0
add raster write kwargs to api
melonora Apr 14, 2026
790be0c
add tests for raster API
melonora Apr 14, 2026
1a1c673
build: add zarrs-python for improved shard io
melonora Apr 14, 2026
471f72c
CI: change lowerbound dask version
melonora Apr 14, 2026
cd48574
build: correct zarrs
melonora Apr 14, 2026
0187fe9
build: change distributed version constraint
melonora Apr 14, 2026
b629de0
build: support dask and distributed >=2026.3.0
melonora Apr 14, 2026
c2b1375
CI: change lowerbound test version of dask
melonora Apr 14, 2026
bf5b910
fix: pre-commit error due to incorrect typehint
melonora Apr 14, 2026
49441fe
build: include zarrs as dependency
melonora Apr 14, 2026
6334fa8
make zarrs codec default
melonora Apr 14, 2026
73ca72a
config: change chunks and shards to accomodate for raster and table
melonora Apr 16, 2026
c6041bb
chore: adjust raster_write to new config fields
melonora Apr 16, 2026
278606a
docs: add docstring
melonora Apr 16, 2026
f6c0a37
change: add support for providing storage options as list
melonora Apr 16, 2026
36e2271
docs: make docstring for raster write kwargs more clear
melonora Apr 16, 2026
50cb6bb
tests: complete and refactor sharding tests
melonora Apr 16, 2026
6257096
docs: adjust docstring config
melonora Apr 16, 2026
5525fbf
fix: correct parsing chunks, shards argument
melonora Apr 16, 2026
44414c1
test: add testing for adjusting chunks with env variable
melonora Apr 16, 2026
e974647
test: write using settings raster_chunks
melonora Apr 16, 2026
bd6249e
feat: add raster_write_kwargs to write_element
melonora Apr 16, 2026
2600237
test: add test writing multiple elements with raster_kwargs
melonora Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
fail-fast: false
matrix:
include:
- {os: windows-latest, python: "3.11", dask-version: "2025.12.0", name: "Dask 2025.12.0"}
- {os: windows-latest, python: "3.11", dask-version: "2026.3.0", name: "Dask 2026.3.0"}
- {os: windows-latest, python: "3.13", dask-version: "latest", name: "Dask latest"}
- {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
- {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"}
Expand Down
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ dependencies = [
"annsel>=0.1.2",
"click",
"dask-image",
"dask>=2025.12.0,<2026.1.2",
"distributed<2026.1.2",
"dask>=2026.3.0",
"distributed>=2026.3.0",
"datashader",
"fsspec[s3,http]",
"geopandas>=0.14",
Expand All @@ -50,6 +50,7 @@ dependencies = [
"xarray>=2024.10.0",
"xarray-spatial>=0.3.5",
"zarr>=3.0.0",
"zarrs",
]
[project.optional-dependencies]
torch = [
Expand All @@ -62,6 +63,9 @@ extra = [
]

[dependency-groups]
sharding = [
"zarrs",
]
dev = [
"bump2version",
]
Expand Down
4 changes: 4 additions & 0 deletions src/spatialdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@
"settings",
]

import zarr

zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})


def __getattr__(name: str) -> Any:
if name in _submodules:
Expand Down
24 changes: 24 additions & 0 deletions src/spatialdata/_core/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from collections.abc import Iterable
from typing import Any

from anndata import AnnData

Expand Down Expand Up @@ -164,3 +165,26 @@ def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) ->
setattr(sanitized, attr, new_dict)

return None if inplace else sanitized


def create_raster_element_kwargs(
raster_write_kwargs: dict[str, dict[str, Any] | list[dict[str, Any]]] | list[dict[str, Any]],
element_name: str,
) -> dict[str, Any] | list[dict[str, Any]]:

if isinstance(raster_write_kwargs, dict) and (kwargs := raster_write_kwargs.get(element_name)):
element_raster_write_kwargs = kwargs
elif isinstance(raster_write_kwargs, dict) and not all(
isinstance(x, (dict, list)) for x in raster_write_kwargs.values()
):
element_raster_write_kwargs = raster_write_kwargs
elif isinstance(raster_write_kwargs, list):
if not all(isinstance(x, dict) for x in raster_write_kwargs):
raise ValueError(
"If passing raster_write_kwargs as list, it is assumed to be the storage "
"options for each scale of a multiscale raster as a dictionary."
)
element_raster_write_kwargs = raster_write_kwargs
else:
raise ValueError(f"Type of raster_write_kwargs should be either dict or list, got {type(raster_write_kwargs)}.")
return element_raster_write_kwargs
59 changes: 58 additions & 1 deletion src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,7 @@ def write(
update_sdata_path: bool = True,
sdata_formats: SpatialDataFormatType | list[SpatialDataFormatType] | None = None,
shapes_geometry_encoding: Literal["WKB", "geoarrow"] | None = None,
raster_write_kwargs: dict[str, dict[str, Any] | list[dict[str, Any]]] | list[dict[str, Any]] | None = None,
) -> None:
"""
Write the `SpatialData` object to a Zarr store.
Expand Down Expand Up @@ -1155,7 +1156,27 @@ def write(
shapes_geometry_encoding
Whether to use the WKB or geoarrow encoding for GeoParquet. See :meth:`geopandas.GeoDataFrame.to_parquet`
for details. If None, uses the value from :attr:`spatialdata.settings.shapes_geometry_encoding`.
"""
raster_write_kwargs
Storage options for raster elements.These options are passed to the zarr storage backend for writing and
can be provided in several formats:

1. Single dictionary
A dictionary containing all storage options applied globally.
2. Dictionary per raster element
A dictionary where:
- Keys = names of raster elements
- Values = storage options for each element
- For single-scale data: a dictionary
- For multiscale data: a list of dictionaries (one per scale)
3. List of dictionaries (multiscale only)
A list where each dictionary defines the storage options for one scale of a multiscale raster element.

Important Notes
- The available key–value pairs in these dictionaries depend on the Zarr format used for writing.
- For a full list of supported storage options, refer to:
https://zarr.readthedocs.io/en/stable/api/zarr/create/#zarr.create_array
"""
from spatialdata._core._utils import create_raster_element_kwargs
from spatialdata._io._utils import _resolve_zarr_store
from spatialdata._io.format import _parse_formats

Expand All @@ -1173,6 +1194,10 @@ def write(
store.close()

for element_type, element_name, element in self.gen_elements():
element_raster_write_kwargs = None
if element_type in ("images", "labels") and raster_write_kwargs:
element_raster_write_kwargs = create_raster_element_kwargs(raster_write_kwargs, element_name)

self._write_element(
element=element,
zarr_container_path=file_path,
Expand All @@ -1181,6 +1206,7 @@ def write(
overwrite=False,
parsed_formats=parsed,
shapes_geometry_encoding=shapes_geometry_encoding,
element_raster_write_kwargs=element_raster_write_kwargs,
)

if self.path != file_path and update_sdata_path:
Expand All @@ -1198,6 +1224,7 @@ def _write_element(
overwrite: bool,
parsed_formats: dict[str, SpatialDataFormatType] | None = None,
shapes_geometry_encoding: Literal["WKB", "geoarrow"] | None = None,
element_raster_write_kwargs: dict[str, Any] | list[dict[str, Any]] | None = None,
) -> None:
from spatialdata._io.io_zarr import _get_groups_for_element

Expand Down Expand Up @@ -1231,13 +1258,15 @@ def _write_element(
group=element_group,
name=element_name,
element_format=parsed_formats["raster"],
storage_options=element_raster_write_kwargs,
)
elif element_type == "labels":
write_labels(
labels=element,
group=root_group,
name=element_name,
element_format=parsed_formats["raster"],
storage_options=element_raster_write_kwargs,
)
elif element_type == "points":
write_points(
Expand Down Expand Up @@ -1268,6 +1297,9 @@ def write_element(
overwrite: bool = False,
sdata_formats: SpatialDataFormatType | list[SpatialDataFormatType] | None = None,
shapes_geometry_encoding: Literal["WKB", "geoarrow"] | None = None,
raster_write_kwargs: dict[str, dict[str, Any] | list[dict[str, Any]] | Any]
| list[dict[str, Any]]
| None = None,
) -> None:
"""
Write a single element, or a list of elements, to the Zarr store used for backing.
Expand All @@ -1286,12 +1318,32 @@ def write_element(
shapes_geometry_encoding
Whether to use the WKB or geoarrow encoding for GeoParquet. See :meth:`geopandas.GeoDataFrame.to_parquet`
for details. If None, uses the value from :attr:`spatialdata.settings.shapes_geometry_encoding`.
raster_write_kwargs
Storage options for raster elements.These options are passed to the zarr storage backend for writing and
can be provided in several formats:

1. Single dictionary
A dictionary containing all storage options applied globally.
2. Dictionary per raster element
A dictionary where:
- Keys = names of raster elements
- Values = storage options for each element
- For single-scale data: a dictionary
- For multiscale data: a list of dictionaries (one per scale)
3. List of dictionaries (multiscale only)
A list where each dictionary defines the storage options for one scale of a multiscale raster element.

Important Notes
- The available key–value pairs in these dictionaries depend on the Zarr format used for writing.
- For a full list of supported storage options, refer to:
https://zarr.readthedocs.io/en/stable/api/zarr/create/#zarr.create_array

Notes
-----
If you pass a list of names, the elements will be written one by one. If an error occurs during the writing of
an element, the writing of the remaining elements will not be attempted.
"""
from spatialdata._core._utils import create_raster_element_kwargs
from spatialdata._io.format import _parse_formats

parsed_formats = _parse_formats(formats=sdata_formats)
Expand Down Expand Up @@ -1331,6 +1383,10 @@ def write_element(

self._check_element_not_on_disk_with_different_type(element_type=element_type, element_name=element_name)

element_raster_write_kwargs = None
if element_type in ("images", "labels") and raster_write_kwargs:
element_raster_write_kwargs = create_raster_element_kwargs(raster_write_kwargs, element_name)

self._write_element(
element=element,
zarr_container_path=self.path,
Expand All @@ -1339,6 +1395,7 @@ def write_element(
overwrite=overwrite,
parsed_formats=parsed_formats,
shapes_geometry_encoding=shapes_geometry_encoding,
element_raster_write_kwargs=element_raster_write_kwargs,
)
# After every write, metadata should be consolidated, otherwise this can lead to IO problems like when deleting.
if self.has_consolidated_metadata():
Expand Down
39 changes: 36 additions & 3 deletions src/spatialdata/_io/io_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,13 @@ def _prepare_storage_options(
return None
if isinstance(storage_options, dict):
prepared = dict(storage_options)
if "chunks" in prepared:
if "chunks" in prepared and prepared["chunks"] is not None:
prepared["chunks"] = _normalize_explicit_chunks(prepared["chunks"])
return prepared

prepared_options = [dict(options) for options in storage_options]
for options in prepared_options:
if "chunks" in options:
if "chunks" in options and options["chunks"] is not None:
options["chunks"] = _normalize_explicit_chunks(options["chunks"])
return prepared_options

Expand Down Expand Up @@ -283,12 +283,27 @@ def _write_raster(
raster_format
The format used to write the raster data.
storage_options
Additional options for writing the raster data, like chunks and compression.
Storage options for raster elements.These options are passed to the zarr storage backend for writing and
can be provided in several formats:

1. Single dictionary
A dictionary containing all storage options applied to the raster, either single or multiscale.
2. List of dictionaries (multiscale only)
A list where each dictionary defines the storage options for one scale of the multiscale raster element.

Important Notes
- The available key–value pairs in these dictionaries depend on the Zarr format used for writing.
- For a full list of supported storage options, refer to:
https://zarr.readthedocs.io/en/stable/api/zarr/create/#zarr.create_array
label_metadata
Label metadata which can only be defined when writing 'labels'.
metadata
Additional metadata for the raster element
"""
from dataclasses import asdict

from spatialdata import settings

if raster_type not in ["image", "labels"]:
raise ValueError(f"{raster_type} is not a valid raster type. Must be 'image' or 'labels'.")
# "name" and "label_metadata" are only used for labels. "name" is written in write_multiscale_ngff() but ignored in
Expand All @@ -305,6 +320,24 @@ def _write_raster(
for c in channels:
metadata["metadata"]["omero"]["channels"].append({"label": c}) # type: ignore[union-attr, index, call-overload]

if isinstance(storage_options, dict):
storage_options = {
**{k.split("_")[1]: v for k, v in asdict(settings).items() if k in ("raster_chunks", "raster_shards")},
**storage_options,
}
elif isinstance(storage_options, list):
storage_options = [
{
**{k.split("_")[1]: v for k, v in asdict(settings).items() if k in ("raster_chunks", "raster_shards")},
**x,
}
for x in storage_options
]
elif not storage_options:
storage_options = {
k.split("_")[1]: v for k, v in asdict(settings).items() if k in ("raster_chunks", "raster_shards")
}

if isinstance(raster_data, DataArray):
_write_raster_dataarray(
raster_type,
Expand Down
Loading
Loading