diff --git a/common b/common index c4ab960e2..65e05af68 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit c4ab960e25910f0fe9ddabd2422ba65057f0b90b +Subproject commit 65e05af68a11ee87853fcf7a3c6b579001f21abe diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index dde37f4a8..b1e2312cb 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -38,7 +38,7 @@ def get_crop_coords(sdata, max_n_pixels=20000*20000): #50000*50000): The crop coordinates """ - _, h, w = sdata['morphology_mip']["scale0"].image.shape + _, h, w = sdata['image']["scale0"].image.shape #h, w = sdata # Check if the image is already below the maximum number of pixels @@ -195,6 +195,13 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): adata.var.reset_index(inplace=True, drop=True) adata.var_names = adata.var["feature_name"].values.astype(str).tolist() +# Ensure the metadata table exists in sdata (rename "table" -> "metadata" if needed) +if "metadata" not in sdata.tables: + if "table" in sdata.tables: + sdata["metadata"] = sdata["table"] + else: + sdata["metadata"] = ad.AnnData(uns={}) + # store metadata to adata and sdata uns metadata_uns_cols = ["dataset_id", "dataset_name", "dataset_url", "dataset_reference", "dataset_summary", "dataset_description", "dataset_organism"] for col in metadata_uns_cols: @@ -202,11 +209,9 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): if orig_col in adata.uns: adata.uns[orig_col] = adata.uns[col] adata.uns[col] = par[col] - if not ("table" in sdata.tables): - sdata["table"] = ad.AnnData(uns={}) - if orig_col in sdata["table"].uns: - sdata["table"].uns[orig_col] = sdata["table"].uns[col] - sdata["table"].uns[col] = par[col] + if orig_col in sdata["metadata"].uns: + sdata["metadata"].uns[orig_col] = sdata["metadata"].uns[col] + sdata["metadata"].uns[col] = par[col] # Correct the feature_key attribute in sdata if needed # NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded @@ -215,6 +220,11 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): if feature_key != "feature_name": sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] = "feature_name" +# Rename image key to match API spec (file_common_ist.yaml expects "image") +if "morphology_mip" in sdata.images: + sdata["image"] = sdata["morphology_mip"] + del sdata.images["morphology_mip"] + # Crop datasets that are too large crop_coords = get_crop_coords(sdata) if crop_coords is not None: diff --git a/src/methods_cell_type_annotation/rctd/config.vsh.yaml b/src/methods_cell_type_annotation/rctd/config.vsh.yaml index b3b8d2d74..5c0b94691 100644 --- a/src/methods_cell_type_annotation/rctd/config.vsh.yaml +++ b/src/methods_cell_type_annotation/rctd/config.vsh.yaml @@ -22,14 +22,13 @@ engines: # run: | # apt-get update && apt-get install -y wget - type: r - bioc: [anndataR, rhdf5, devtools] - #- type: r - # bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment] - # bioc_force_install: true + bioc: [SingleCellExperiment, anndataR, rhdf5, devtools] + # bioc_force_install: true - type: docker run: | - Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)" - + Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)" +# Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)" + # This can probably be left out again in the future. It currently fixes a bug described in these issues: # https://github.com/drighelli/SpatialExperiment/issues/171 # https://github.com/satijalab/seurat/issues/9889 diff --git a/src/methods_cell_type_annotation/rctd/script.R b/src/methods_cell_type_annotation/rctd/script.R index d07bb334a..ce4fc051f 100644 --- a/src/methods_cell_type_annotation/rctd/script.R +++ b/src/methods_cell_type_annotation/rctd/script.R @@ -1,6 +1,7 @@ library(spacexr) library(Matrix) library(SingleCellExperiment) +# library(SpatialExperiment) library(anndataR) ## VIASH START diff --git a/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml b/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml index eb9613f97..6d5a3b04a 100644 --- a/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml +++ b/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml @@ -20,8 +20,11 @@ resources: engines: - type: docker image: openproblems/base_python:1 - __merge__: + __merge__: - /src/base/setup_spatialdata_partial.yaml + setup: + - type: python + pypi: [sopa] - type: native runners: diff --git a/src/methods_data_aggregation/aggregate_spatial_data/script.py b/src/methods_data_aggregation/aggregate_spatial_data/script.py index 745fbac2c..99a10b09c 100644 --- a/src/methods_data_aggregation/aggregate_spatial_data/script.py +++ b/src/methods_data_aggregation/aggregate_spatial_data/script.py @@ -1,5 +1,10 @@ import anndata as ad +import geopandas as gpd +import sopa import spatialdata as sd +from shapely.geometry import MultiPoint +from spatialdata.models import ShapesModel +from sopa.utils import copy_transformations ## VIASH START par = { @@ -36,9 +41,14 @@ del sdata.points[key] for key in list(sdata.tables.keys()): - if key != 'metadata': + if key not in ['metadata', 'table']: del sdata.tables[key] +# raw_ist.zarr stores the metadata table as 'table'; rename to match the output spec +if 'table' in sdata.tables and 'metadata' not in sdata.tables: + sdata['metadata'] = sdata.tables['table'] + del sdata.tables['table'] + # sdata_transcripts for col in list(sdata_transcripts["transcripts"].columns): if col not in ['x', 'y', 'z', 'feature_name', 'cell_id', 'transcript_id']: @@ -69,6 +79,20 @@ adata.obs['passed_QC'] = adata_qc_col.obs['passed_QC'] sdata['counts'] = adata +####################### +# Compute cell shapes # +####################### +print('Computing cell boundaries from transcripts using convex hulls', flush=True) +transcripts_df = sdata_transcripts["transcripts"].compute() +transcripts_assigned = transcripts_df[transcripts_df["cell_id"] != 0] +cell_shapes = transcripts_assigned.groupby("cell_id")[["x", "y"]].apply( + lambda g: MultiPoint(list(zip(g["x"], g["y"]))).convex_hull +) +geo_df = gpd.GeoDataFrame(geometry=cell_shapes) +geo_df = sopa.shapes.to_valid_polygons(geo_df) +transformations = copy_transformations(sdata_transcripts["transcripts"]) +sdata["cell_boundaries"] = ShapesModel.parse(geo_df, transformations=transformations) + ################# # Write output # ################# diff --git a/src/methods_expression_correction/split/config.vsh.yaml b/src/methods_expression_correction/split/config.vsh.yaml index 18a3fabbe..a5ca9dbd0 100644 --- a/src/methods_expression_correction/split/config.vsh.yaml +++ b/src/methods_expression_correction/split/config.vsh.yaml @@ -29,11 +29,14 @@ engines: - type: docker run: | apt-get update + # - type: r + # packages: [fs, rlang, lifecycle] - type: r - bioc: [anndataR, rhdf5, devtools, scater] + bioc: [SingleCellExperiment, anndataR, rhdf5, devtools, scater] + # bioc: [SpatialExperiment, anndataR, rhdf5, devtools, scater] - type: docker run: | - Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')" + Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')" # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues: # https://github.com/drighelli/SpatialExperiment/issues/171 diff --git a/src/methods_segmentation/binning/script.py b/src/methods_segmentation/binning/script.py index 2e837d671..c564ac483 100644 --- a/src/methods_segmentation/binning/script.py +++ b/src/methods_segmentation/binning/script.py @@ -50,6 +50,10 @@ def convert_to_lower_dtype(arr): data_array = xr.DataArray(image, name=f'segmentation', dims=('y', 'x')) parsed_data = Labels2DModel.parse(data_array, transformations=transformation) sd_output.labels['segmentation'] = parsed_data +sd_output.tables['table'] = ad.AnnData( + obs=sdata.tables["table"].obs[["cell_id", "region"]], + var=sdata.tables["table"].var[[]] + ) print("Writing output", flush=True) if os.path.exists(par["output"]): diff --git a/src/methods_segmentation/cellpose/script.py b/src/methods_segmentation/cellpose/script.py index 15f85901b..f0c375651 100644 --- a/src/methods_segmentation/cellpose/script.py +++ b/src/methods_segmentation/cellpose/script.py @@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr): parsed_data = Labels2DModel.parse(data_array, transformations=transformation) sd_output.labels['segmentation'] = parsed_data +sd_output.tables['table'] = ad.AnnData( + obs=sdata.tables["table"].obs[["cell_id", "region"]], + var=sdata.tables["table"].var[[]] + ) + print("Writing output", flush=True) if os.path.exists(par["output"]): shutil.rmtree(par["output"]) diff --git a/src/methods_segmentation/custom_segmentation/script.py b/src/methods_segmentation/custom_segmentation/script.py index a63e18a01..8f6cb9724 100644 --- a/src/methods_segmentation/custom_segmentation/script.py +++ b/src/methods_segmentation/custom_segmentation/script.py @@ -23,13 +23,13 @@ sdata_segmentation_only = sd.SpatialData( labels={ "segmentation": sdata[par["labels_key"]] - }#, - #tables={ - # "table": ad.AnnData( - # obs=sdata.tables["table"].obs[["cell_id", "region"]], - # var=sdata.tables["table"].var[[]] - # ) - #} + }, + tables={ + "table": ad.AnnData( + obs=sdata.tables["table"].obs[["cell_id", "region"]], + var=sdata.tables["table"].var[[]] + ) + } ) print("Writing output", flush=True) diff --git a/src/methods_segmentation/stardist/script.py b/src/methods_segmentation/stardist/script.py index 7fea783af..968e5bbf8 100644 --- a/src/methods_segmentation/stardist/script.py +++ b/src/methods_segmentation/stardist/script.py @@ -4,6 +4,7 @@ import numpy as np import xarray as xr import spatialdata as sd +import anndata as ad #from csbdeep.utils import normalize from csbdeep.data import Normalizer, normalize_mi_ma from stardist.models import StarDist2D @@ -35,8 +36,8 @@ def convert_to_lower_dtype(arr): # Read image and its transformation sdata = sd.read_zarr(par["input"]) -image = sdata['morphology_mip']['scale0'].image.compute().to_numpy() -transformation = sdata['morphology_mip']['scale0'].image.transform.copy() +image = sdata['image']['scale0'].image.compute().to_numpy() +transformation = sdata['image']['scale0'].image.transform.copy() # Segment image @@ -76,6 +77,11 @@ def do_after(self): parsed_labels = sd.models.Labels2DModel.parse(labels_array, transformations=transformation) sd_output.labels['segmentation'] = parsed_labels +sd_output.tables['table'] = ad.AnnData( + obs=sdata.tables["table"].obs[["cell_id", "region"]], + var=sdata.tables["table"].var[[]] + ) + print("Writing output", flush=True) Path(par["output"]).parent.mkdir(parents=True, exist_ok=True) if os.path.exists(par["output"]): diff --git a/src/methods_segmentation/watershed/script.py b/src/methods_segmentation/watershed/script.py index f498757d9..3651cc2d3 100644 --- a/src/methods_segmentation/watershed/script.py +++ b/src/methods_segmentation/watershed/script.py @@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr): parsed_data = Labels2DModel.parse(data_array, transformations=transformation) sd_output.labels['segmentation'] = parsed_data +sd_output.tables['table'] = ad.AnnData( + obs=sdata.tables["table"].obs[["cell_id", "region"]], + var=sdata.tables["table"].var[[]] + ) + print("Writing output", flush=True) if os.path.exists(par["output"]): shutil.rmtree(par["output"])