Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5987137
Add initial tests for remote storage workflows with UPath
SamirMoustafa Feb 28, 2026
865eb76
io: add dask.array.to_zarr compat for ome_zarr kwargs
SamirMoustafa Mar 2, 2026
2134386
io: add remote storage helpers in _utils
SamirMoustafa Mar 2, 2026
eee34d8
core: support UPath for SpatialData.path and write()
SamirMoustafa Mar 2, 2026
40af327
io: use resolved store and remote parquet in points, raster, shapes, …
SamirMoustafa Mar 2, 2026
540631c
ci: add test deps and Dockerfile for storage emulators (S3, Azure, GCS)
SamirMoustafa Mar 2, 2026
532af5a
test: move remote storage tests under tests/io/remote_storage and add…
SamirMoustafa Mar 2, 2026
c22b8bf
fix: update Dask internal keys for zarr compatibility
SamirMoustafa Mar 2, 2026
0c07169
test: refine subset and table validation in spatial data tests
SamirMoustafa Mar 2, 2026
f21bb52
feat: move Dockerfile for storage emulators to facilitate testing
SamirMoustafa Mar 2, 2026
072566a
ci: enhance GitHub Actions workflow to support storage emulators on L…
SamirMoustafa Mar 2, 2026
ee6e4dc
fix: handle RuntimeError in fsspec async session closure
SamirMoustafa Mar 2, 2026
9019e6a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 2, 2026
42c3133
refactor: add type hints to functions in _dask_zarr_compat, _utils, a…
SamirMoustafa Mar 2, 2026
70ababe
chore: remove pytest-timeout from test dependencies in pyproject.toml
SamirMoustafa Mar 4, 2026
cae2319
test: add unit tests for remote storage store resolution and credenti…
SamirMoustafa Mar 4, 2026
857327b
Merge main into cloud-storage-support
SamirMoustafa Apr 14, 2026
fe6bf24
chore(ci): fix GCS emulator tests (gcsfs, sync upload, multi-arch)
SamirMoustafa Apr 15, 2026
3cb2c93
refactor: remove deprecated dask array compatibility layer
SamirMoustafa Apr 15, 2026
6cf359a
Improve path handling in FsspecStore and update read_parquet options
SamirMoustafa Apr 15, 2026
df7be9a
Add fsspec integration by adding support for cloud object store proto…
SamirMoustafa Apr 15, 2026
a0bcc65
Enhance path handling for hierarchical URIs in SpatialData and relate…
SamirMoustafa Apr 15, 2026
f1cc651
Ensure existing Zarr stores are returned unchanged in _resolve_zarr_s…
SamirMoustafa Apr 15, 2026
55ba3d0
remove unused fsspec async handling code and update related test docu…
SamirMoustafa Apr 15, 2026
0e2e424
Updating the path setter to accept strings and normalize them to Path…
SamirMoustafa Apr 15, 2026
ce20830
write method safeguards for local and remote paths in SpatialData.
SamirMoustafa Apr 15, 2026
fbc3040
Support for UPath in data reading functions and improve error handlin…
SamirMoustafa Apr 15, 2026
175fbea
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 15, 2026
3beed0e
Refactor full_sdata fixture for consistency in remote I/O tests.
SamirMoustafa Apr 15, 2026
a7c51c2
rollback the unneeded changes for test cases within the core
SamirMoustafa Apr 15, 2026
6443422
rollback the unneeded changes for test cases within the query
SamirMoustafa Apr 15, 2026
be23021
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 15, 2026
53c45ee
Adding a dedicated job for remote storage tests, updating coverage up…
SamirMoustafa Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,68 @@ jobs:
PLATFORM: ${{ matrix.os }}
DISPLAY: :42
run: |
uv run pytest --cov --color=yes --cov-report=xml
uv run pytest --cov --color=yes --cov-report=xml --ignore=tests/io/remote_storage/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
name: coverage
verbose: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

test-remote-storage:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
strategy:
fail-fast: false
matrix:
python: ["3.11", "3.13"]
env:
MPLBACKEND: agg
PLATFORM: ubuntu-latest
DISPLAY: :42
GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT: "false"
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
with:
version: "latest"
python-version: ${{ matrix.python }}
- name: Install dependencies
run: |
uv add dask
uv sync --group=test
- name: Build and start storage emulators
run: |
docker build -f tests/io/remote_storage/Dockerfile.emulators -t spatialdata-emulators .
docker run --rm -d --name spatialdata-emulators \
-p 5000:5000 -p 10000:10000 -p 4443:4443 \
spatialdata-emulators
- name: Wait for emulator ports
run: |
echo "Waiting for S3 (5000), Azure (10000), GCS (4443)..."
python3 -c "
import socket, time
for _ in range(45):
try:
for p in (5000, 10000, 4443):
socket.create_connection(('127.0.0.1', p), timeout=2)
print('Emulators ready.')
break
except (socket.error, OSError):
time.sleep(2)
else:
raise SystemExit('Emulators did not become ready.')
"
- name: Test remote storage
run: |
uv run pytest tests/io/remote_storage/ --cov --color=yes --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
name: coverage-remote-storage-${{ matrix.python }}
verbose: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ dev = [
"bump2version",
]
test = [
"adlfs",
"gcsfs",
"moto[server]",
"pytest",
"pytest-cov",
"pytest-mock",
Expand Down
103 changes: 76 additions & 27 deletions src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __init__(
tables: dict[str, AnnData] | Tables | None = None,
attrs: Mapping[Any, Any] | None = None,
) -> None:
self._path: Path | None = None
self._path: Path | UPath | None = None

self._shared_keys: set[str | None] = set()
self._images: Images = Images(shared_keys=self._shared_keys)
Expand Down Expand Up @@ -548,16 +548,24 @@ def is_backed(self) -> bool:
return self.path is not None

@property
def path(self) -> Path | None:
"""Path to the Zarr storage."""
def path(self) -> Path | UPath | None:
"""Path to the Zarr storage (always :class:`pathlib.Path` or :class:`upath.UPath` when set)."""
return self._path

@path.setter
def path(self, value: Path | None) -> None:
if value is None or isinstance(value, str | Path):
def path(self, value: str | Path | UPath | None) -> None:
if value is None:
self._path = None
elif isinstance(value, (Path, UPath)):
self._path = value
elif isinstance(value, str):
# Match ``write()`` / ``_validate_can_safely_write_to_path``: keep ``self._path`` as Path | UPath only.
if "://" in value:
self._path = UPath(value)
else:
self._path = Path(value)
else:
raise TypeError("Path must be `None`, a `str` or a `Path` object.")
raise TypeError("Path must be `None`, a `str`, a `Path` or a `UPath` object.")

def locate_element(self, element: SpatialElement) -> list[str]:
"""
Expand Down Expand Up @@ -1032,18 +1040,45 @@ def _symmetric_difference_with_zarr_store(self) -> tuple[list[str], list[str]]:

def _validate_can_safely_write_to_path(
self,
file_path: str | Path,
file_path: str | Path | UPath,
overwrite: bool = False,
saving_an_element: bool = False,
) -> None:
from spatialdata._io._utils import _backed_elements_contained_in_path, _is_subfolder, _resolve_zarr_store
"""
Guard against unsafe writes for **local** paths (zarr check, Dask backing, subfolders).

For :class:`upath.UPath`, only "store exists vs ``overwrite``" is checked. Local Dask-backing
and subfolder checks are omitted because backing paths are filesystem-local and are not
compared to object-store keys; ``overwrite=True`` on remote URLs must be chosen carefully.
"""
from spatialdata._io._utils import (
_backed_elements_contained_in_path,
_is_subfolder,
_remote_zarr_store_exists,
_resolve_zarr_store,
)

if isinstance(file_path, str):
# Hierarchical URIs (``scheme://…``) must become UPath: plain ``Path(str)`` breaks cloud URLs
# (S3-compatible stores, Azure ``abfs://`` / ``az://``, GCS ``gs://``, ``https://``, fsspec chains, etc.).
if isinstance(file_path, str) and "://" in file_path:
file_path = UPath(file_path)
elif isinstance(file_path, str):
file_path = Path(file_path)

if not isinstance(file_path, Path):
raise ValueError(f"file_path must be a string or a Path object, type(file_path) = {type(file_path)}.")
if not isinstance(file_path, (Path, UPath)):
raise ValueError(f"file_path must be a string, Path or UPath object, type(file_path) = {type(file_path)}.")

if isinstance(file_path, UPath):
store = _resolve_zarr_store(file_path)
if _remote_zarr_store_exists(store) and not overwrite:
raise ValueError(
"The Zarr store already exists. Use `overwrite=True` to try overwriting the store. "
"Please note that only Zarr stores not currently in use by the current SpatialData object can be "
"overwritten."
)
return

# Local Path: existing logic
# TODO: add test for this
if os.path.exists(file_path):
store = _resolve_zarr_store(file_path)
Expand Down Expand Up @@ -1072,8 +1107,13 @@ def _validate_can_safely_write_to_path(
ERROR_MSG + "\nDetails: the target path contains one or more files that Dask use for "
"backing elements in the SpatialData object." + WORKAROUND
)
if self.path is not None and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
# Subfolder checks only for local paths (Path); skip when self.path is UPath
if (
self.path is not None
and isinstance(self.path, Path)
and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
)
):
if saving_an_element and _is_subfolder(parent=self.path, child=file_path):
raise ValueError(
Expand Down Expand Up @@ -1102,7 +1142,7 @@ def _validate_all_elements(self) -> None:
@_deprecation_alias(format="sdata_formats", version="0.7.0")
def write(
self,
file_path: str | Path,
file_path: str | Path | UPath | None = None,
overwrite: bool = False,
consolidate_metadata: bool = True,
update_sdata_path: bool = True,
Expand All @@ -1115,10 +1155,12 @@ def write(
Parameters
----------
file_path
The path to the Zarr store to write to.
The path to the Zarr store to write to. If ``None``, uses :attr:`path` (must be set).
overwrite
If `True`, overwrite the Zarr store if it already exists. If `False`, `write()` will fail if the Zarr store
already exists.
already exists. For remote paths (:class:`upath.UPath`), the extra safeguards used for local paths (that
Dask-backed files are not inside the write target) are not applied; use ``overwrite=True`` only when you
are sure the destination store may be replaced.
consolidate_metadata
If `True`, triggers :func:`zarr.convenience.consolidate_metadata`, which writes all the metadata in a single
file at the root directory of the store. This makes the data cloud accessible, which is required for certain
Expand Down Expand Up @@ -1161,7 +1203,15 @@ def write(

parsed = _parse_formats(sdata_formats)

if isinstance(file_path, str):
if file_path is None:
if self.path is None:
raise ValueError("file_path must be provided when SpatialData.path is not set.")
file_path = self.path
# Hierarchical URIs (``scheme://…``) must become UPath: plain ``Path(str)`` breaks cloud URLs
# (S3-compatible stores, Azure ``abfs://`` / ``az://``, GCS ``gs://``, ``https://``, fsspec chains, etc.).
if isinstance(file_path, str) and "://" in file_path:
file_path = UPath(file_path)
elif isinstance(file_path, str):
file_path = Path(file_path)
self._validate_can_safely_write_to_path(file_path, overwrite=overwrite)
self._validate_all_elements()
Expand Down Expand Up @@ -1192,7 +1242,7 @@ def write(
def _write_element(
self,
element: SpatialElement | AnnData,
zarr_container_path: Path,
zarr_container_path: Path | UPath,
element_type: str,
element_name: str,
overwrite: bool,
Expand All @@ -1201,10 +1251,8 @@ def _write_element(
) -> None:
from spatialdata._io.io_zarr import _get_groups_for_element

if not isinstance(zarr_container_path, Path):
raise ValueError(
f"zarr_container_path must be a Path object, type(zarr_container_path) = {type(zarr_container_path)}."
)
if not isinstance(zarr_container_path, (Path, UPath)):
raise ValueError(f"zarr_container_path must be a Path or UPath, got {type(zarr_container_path).__name__}.")
file_path_of_element = zarr_container_path / element_type / element_name
self._validate_can_safely_write_to_path(
file_path=file_path_of_element, overwrite=overwrite, saving_an_element=True
Expand Down Expand Up @@ -1489,7 +1537,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# check if the element exists in the Zarr storage
if not _group_for_element_exists(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
):
Expand All @@ -1503,7 +1551,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# warn the users if the element is not self-contained, that is, it is Dask-backed by files outside the Zarr
# group for the element
element_zarr_path = Path(self.path) / element_type / element_name
element_zarr_path = self.path / element_type / element_name
if not _is_element_self_contained(element=element, element_path=element_zarr_path):
logger.info(
f"Element {element_type}/{element_name} is not self-contained. The metadata will be"
Expand Down Expand Up @@ -1544,7 +1592,7 @@ def write_channel_names(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have the check in the conditional
if element_type == "images" and self.path is not None:
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path), element_type=element_type, element_name=element_name, use_consolidated=False
zarr_path=self.path, element_type=element_type, element_name=element_name, use_consolidated=False
)

from spatialdata._io._utils import overwrite_channel_names
Expand Down Expand Up @@ -1588,7 +1636,7 @@ def write_transformations(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have a conditional
assert self.path is not None
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
use_consolidated=False,
Expand Down Expand Up @@ -1956,7 +2004,8 @@ def h(s: str) -> str:

descr = "SpatialData object"
if self.path is not None:
descr += f", with associated Zarr store: {self.path.resolve()}"
path_descr = str(self.path) if isinstance(self.path, UPath) else self.path.resolve()
descr += f", with associated Zarr store: {path_descr}"

non_empty_elements = self._non_empty_elements()
last_element_index = len(non_empty_elements) - 1
Expand Down
Loading
Loading