Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions src/data_processors/process_dataset/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_crop_coords(sdata, max_n_pixels=20000*20000): #50000*50000):
The crop coordinates
"""

_, h, w = sdata['morphology_mip']["scale0"].image.shape
_, h, w = sdata['image']["scale0"].image.shape
#h, w = sdata

# Check if the image is already below the maximum number of pixels
Expand Down Expand Up @@ -195,18 +195,23 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
adata.var.reset_index(inplace=True, drop=True)
adata.var_names = adata.var["feature_name"].values.astype(str).tolist()

# Ensure the metadata table exists in sdata (rename "table" -> "metadata" if needed)
if "metadata" not in sdata.tables:
if "table" in sdata.tables:
sdata["metadata"] = sdata["table"]
else:
sdata["metadata"] = ad.AnnData(uns={})

# store metadata to adata and sdata uns
metadata_uns_cols = ["dataset_id", "dataset_name", "dataset_url", "dataset_reference", "dataset_summary", "dataset_description", "dataset_organism"]
for col in metadata_uns_cols:
orig_col = f"orig_{col}"
if orig_col in adata.uns:
adata.uns[orig_col] = adata.uns[col]
adata.uns[col] = par[col]
if not ("table" in sdata.tables):
sdata["table"] = ad.AnnData(uns={})
if orig_col in sdata["table"].uns:
sdata["table"].uns[orig_col] = sdata["table"].uns[col]
sdata["table"].uns[col] = par[col]
if orig_col in sdata["metadata"].uns:
sdata["metadata"].uns[orig_col] = sdata["metadata"].uns[col]
sdata["metadata"].uns[col] = par[col]

# Correct the feature_key attribute in sdata if needed
# NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded
Expand All @@ -215,6 +220,11 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
if feature_key != "feature_name":
sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] = "feature_name"

# Rename image key to match API spec (file_common_ist.yaml expects "image")
if "morphology_mip" in sdata.images:
sdata["image"] = sdata["morphology_mip"]
del sdata.images["morphology_mip"]

# Crop datasets that are too large
crop_coords = get_crop_coords(sdata)
if crop_coords is not None:
Expand Down
11 changes: 5 additions & 6 deletions src/methods_cell_type_annotation/rctd/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@ engines:
# run: |
# apt-get update && apt-get install -y wget
- type: r
bioc: [anndataR, rhdf5, devtools]
#- type: r
# bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
# bioc_force_install: true
bioc: [SingleCellExperiment, anndataR, rhdf5, devtools]
# bioc_force_install: true
- type: docker
run: |
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"

Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"
# Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"

# This can probably be left out again in the future. It currently fixes a bug described in these issues:
# https://github.com/drighelli/SpatialExperiment/issues/171
# https://github.com/satijalab/seurat/issues/9889
Expand Down
1 change: 1 addition & 0 deletions src/methods_cell_type_annotation/rctd/script.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(spacexr)
library(Matrix)
library(SingleCellExperiment)
# library(SpatialExperiment)
library(anndataR)

## VIASH START
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ resources:
engines:
- type: docker
image: openproblems/base_python:1
__merge__:
__merge__:
- /src/base/setup_spatialdata_partial.yaml
setup:
- type: python
pypi: [sopa]
- type: native

runners:
Expand Down
26 changes: 25 additions & 1 deletion src/methods_data_aggregation/aggregate_spatial_data/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import anndata as ad
import geopandas as gpd
import sopa
import spatialdata as sd
from shapely.geometry import MultiPoint
from spatialdata.models import ShapesModel
from sopa.utils import copy_transformations

## VIASH START
par = {
Expand Down Expand Up @@ -36,9 +41,14 @@
del sdata.points[key]

for key in list(sdata.tables.keys()):
if key != 'metadata':
if key not in ['metadata', 'table']:
del sdata.tables[key]

# raw_ist.zarr stores the metadata table as 'table'; rename to match the output spec
if 'table' in sdata.tables and 'metadata' not in sdata.tables:
sdata['metadata'] = sdata.tables['table']
del sdata.tables['table']

# sdata_transcripts
for col in list(sdata_transcripts["transcripts"].columns):
if col not in ['x', 'y', 'z', 'feature_name', 'cell_id', 'transcript_id']:
Expand Down Expand Up @@ -69,6 +79,20 @@
adata.obs['passed_QC'] = adata_qc_col.obs['passed_QC']
sdata['counts'] = adata

#######################
# Compute cell shapes #
#######################
print('Computing cell boundaries from transcripts using convex hulls', flush=True)
transcripts_df = sdata_transcripts["transcripts"].compute()
transcripts_assigned = transcripts_df[transcripts_df["cell_id"] != 0]
cell_shapes = transcripts_assigned.groupby("cell_id")[["x", "y"]].apply(
lambda g: MultiPoint(list(zip(g["x"], g["y"]))).convex_hull
)
geo_df = gpd.GeoDataFrame(geometry=cell_shapes)
geo_df = sopa.shapes.to_valid_polygons(geo_df)
transformations = copy_transformations(sdata_transcripts["transcripts"])
sdata["cell_boundaries"] = ShapesModel.parse(geo_df, transformations=transformations)

#################
# Write output #
#################
Expand Down
7 changes: 5 additions & 2 deletions src/methods_expression_correction/split/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ engines:
- type: docker
run: |
apt-get update
# - type: r
# packages: [fs, rlang, lifecycle]
- type: r
bioc: [anndataR, rhdf5, devtools, scater]
bioc: [SingleCellExperiment, anndataR, rhdf5, devtools, scater]
# bioc: [SpatialExperiment, anndataR, rhdf5, devtools, scater]
- type: docker
run: |
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"

# SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
# https://github.com/drighelli/SpatialExperiment/issues/171
Expand Down
4 changes: 4 additions & 0 deletions src/methods_segmentation/binning/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def convert_to_lower_dtype(arr):
data_array = xr.DataArray(image, name=f'segmentation', dims=('y', 'x'))
parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
sd_output.labels['segmentation'] = parsed_data
sd_output.tables['table'] = ad.AnnData(
obs=sdata.tables["table"].obs[["cell_id", "region"]],
var=sdata.tables["table"].var[[]]
)

print("Writing output", flush=True)
if os.path.exists(par["output"]):
Expand Down
5 changes: 5 additions & 0 deletions src/methods_segmentation/cellpose/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr):
parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
sd_output.labels['segmentation'] = parsed_data

sd_output.tables['table'] = ad.AnnData(
obs=sdata.tables["table"].obs[["cell_id", "region"]],
var=sdata.tables["table"].var[[]]
)

print("Writing output", flush=True)
if os.path.exists(par["output"]):
shutil.rmtree(par["output"])
Expand Down
14 changes: 7 additions & 7 deletions src/methods_segmentation/custom_segmentation/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
sdata_segmentation_only = sd.SpatialData(
labels={
"segmentation": sdata[par["labels_key"]]
}#,
#tables={
# "table": ad.AnnData(
# obs=sdata.tables["table"].obs[["cell_id", "region"]],
# var=sdata.tables["table"].var[[]]
# )
#}
},
tables={
"table": ad.AnnData(
obs=sdata.tables["table"].obs[["cell_id", "region"]],
var=sdata.tables["table"].var[[]]
)
}
)

print("Writing output", flush=True)
Expand Down
10 changes: 8 additions & 2 deletions src/methods_segmentation/stardist/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import xarray as xr
import spatialdata as sd
import anndata as ad
#from csbdeep.utils import normalize
from csbdeep.data import Normalizer, normalize_mi_ma
from stardist.models import StarDist2D
Expand Down Expand Up @@ -35,8 +36,8 @@ def convert_to_lower_dtype(arr):

# Read image and its transformation
sdata = sd.read_zarr(par["input"])
image = sdata['morphology_mip']['scale0'].image.compute().to_numpy()
transformation = sdata['morphology_mip']['scale0'].image.transform.copy()
image = sdata['image']['scale0'].image.compute().to_numpy()
transformation = sdata['image']['scale0'].image.transform.copy()

# Segment image

Expand Down Expand Up @@ -76,6 +77,11 @@ def do_after(self):
parsed_labels = sd.models.Labels2DModel.parse(labels_array, transformations=transformation)
sd_output.labels['segmentation'] = parsed_labels

sd_output.tables['table'] = ad.AnnData(
obs=sdata.tables["table"].obs[["cell_id", "region"]],
var=sdata.tables["table"].var[[]]
)

print("Writing output", flush=True)
Path(par["output"]).parent.mkdir(parents=True, exist_ok=True)
if os.path.exists(par["output"]):
Expand Down
5 changes: 5 additions & 0 deletions src/methods_segmentation/watershed/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr):
parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
sd_output.labels['segmentation'] = parsed_data

sd_output.tables['table'] = ad.AnnData(
obs=sdata.tables["table"].obs[["cell_id", "region"]],
var=sdata.tables["table"].var[[]]
)

print("Writing output", flush=True)
if os.path.exists(par["output"]):
shutil.rmtree(par["output"])
Expand Down
Loading