openproblems-bio · dariarom94 · Apr 8, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 20, 2026
diff --git a/common b/common
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py
@@ -38,7 +38,7 @@ def get_crop_coords(sdata, max_n_pixels=20000*20000): #50000*50000):
         The crop coordinates
     """
 
-    _, h, w = sdata['morphology_mip']["scale0"].image.shape
+    _, h, w = sdata['image']["scale0"].image.shape
     #h, w = sdata
 
     # Check if the image is already below the maximum number of pixels
@@ -195,18 +195,23 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
 adata.var.reset_index(inplace=True, drop=True)
 adata.var_names = adata.var["feature_name"].values.astype(str).tolist()
 
+# Ensure the metadata table exists in sdata (rename "table" -> "metadata" if needed)
+if "metadata" not in sdata.tables:
+    if "table" in sdata.tables:
+        sdata["metadata"] = sdata["table"]
+    else:
+        sdata["metadata"] = ad.AnnData(uns={})
+
 # store metadata to adata and sdata uns
 metadata_uns_cols = ["dataset_id", "dataset_name", "dataset_url", "dataset_reference", "dataset_summary", "dataset_description", "dataset_organism"]
 for col in metadata_uns_cols:
     orig_col = f"orig_{col}"
     if orig_col in adata.uns:
         adata.uns[orig_col] = adata.uns[col]
     adata.uns[col] = par[col]
-    if not ("table" in sdata.tables):
-        sdata["table"] = ad.AnnData(uns={})
-    if orig_col in sdata["table"].uns:
-        sdata["table"].uns[orig_col] = sdata["table"].uns[col]
-    sdata["table"].uns[col] = par[col]
+    if orig_col in sdata["metadata"].uns:
+        sdata["metadata"].uns[orig_col] = sdata["metadata"].uns[col]
+    sdata["metadata"].uns[col] = par[col]
 
 # Correct the feature_key attribute in sdata if needed
 # NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded
@@ -215,6 +220,11 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
     if feature_key != "feature_name":
         sdata['transcripts'].attrs["spatialdata_attrs"]["feature_key"] = "feature_name"
 
+# Rename image key to match API spec (file_common_ist.yaml expects "image")
+if "morphology_mip" in sdata.images:
+    sdata["image"] = sdata["morphology_mip"]
+    del sdata.images["morphology_mip"]
+
 # Crop datasets that are too large
 crop_coords = get_crop_coords(sdata)
 if crop_coords is not None:

diff --git a/src/methods_cell_type_annotation/rctd/config.vsh.yaml b/src/methods_cell_type_annotation/rctd/config.vsh.yaml
@@ -22,14 +22,13 @@ engines:
       #  run: |
       #    apt-get update && apt-get install -y wget
       - type: r
-        bioc: [anndataR, rhdf5, devtools]
-      #- type: r
-      #  bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
-      #  bioc_force_install: true
+        bioc: [SingleCellExperiment, anndataR, rhdf5, devtools]
+        # bioc_force_install: true
       - type: docker
         run: |
-          Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"
-
+          Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"
+# Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"
+
       # This can probably be left out again in the future. It currently fixes a bug described in these issues:
       # https://github.com/drighelli/SpatialExperiment/issues/171
       # https://github.com/satijalab/seurat/issues/9889

diff --git a/src/methods_cell_type_annotation/rctd/script.R b/src/methods_cell_type_annotation/rctd/script.R
@@ -1,6 +1,7 @@
 library(spacexr)
 library(Matrix)
 library(SingleCellExperiment)
+# library(SpatialExperiment)
 library(anndataR)
 
 ## VIASH START

diff --git a/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml b/src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml
@@ -20,8 +20,11 @@ resources:
 engines:
   - type: docker
     image: openproblems/base_python:1
-    __merge__: 
+    __merge__:
       - /src/base/setup_spatialdata_partial.yaml
+    setup:
+      - type: python
+        pypi: [sopa]
   - type: native
 
 runners:

diff --git a/src/methods_data_aggregation/aggregate_spatial_data/script.py b/src/methods_data_aggregation/aggregate_spatial_data/script.py
@@ -1,5 +1,10 @@
 import anndata as ad
+import geopandas as gpd
+import sopa
 import spatialdata as sd
+from shapely.geometry import MultiPoint
+from spatialdata.models import ShapesModel
+from sopa.utils import copy_transformations
 
 ## VIASH START
 par = {
@@ -36,9 +41,14 @@
   del sdata.points[key]
 
 for key in list(sdata.tables.keys()):
-  if key != 'metadata':
+  if key not in ['metadata', 'table']:
     del sdata.tables[key]
 
+# raw_ist.zarr stores the metadata table as 'table'; rename to match the output spec
+if 'table' in sdata.tables and 'metadata' not in sdata.tables:
+  sdata['metadata'] = sdata.tables['table']
+  del sdata.tables['table']
+
 # sdata_transcripts
 for col in list(sdata_transcripts["transcripts"].columns):
   if col not in ['x', 'y', 'z', 'feature_name', 'cell_id', 'transcript_id']:
@@ -69,6 +79,20 @@
 adata.obs['passed_QC'] = adata_qc_col.obs['passed_QC']
 sdata['counts'] = adata
 
+#######################
+# Compute cell shapes #
+#######################
+print('Computing cell boundaries from transcripts using convex hulls', flush=True)
+transcripts_df = sdata_transcripts["transcripts"].compute()
+transcripts_assigned = transcripts_df[transcripts_df["cell_id"] != 0]
+cell_shapes = transcripts_assigned.groupby("cell_id")[["x", "y"]].apply(
+  lambda g: MultiPoint(list(zip(g["x"], g["y"]))).convex_hull
+)
+geo_df = gpd.GeoDataFrame(geometry=cell_shapes)
+geo_df = sopa.shapes.to_valid_polygons(geo_df)
+transformations = copy_transformations(sdata_transcripts["transcripts"])
+sdata["cell_boundaries"] = ShapesModel.parse(geo_df, transformations=transformations)
+
 #################
 # Write output #
 #################

diff --git a/src/methods_expression_correction/split/config.vsh.yaml b/src/methods_expression_correction/split/config.vsh.yaml
@@ -29,11 +29,14 @@ engines:
       - type: docker
         run: |
           apt-get update
+      # - type: r
+      #   packages: [fs, rlang, lifecycle]
       - type: r
-        bioc: [anndataR, rhdf5, devtools, scater]
+        bioc: [SingleCellExperiment, anndataR, rhdf5, devtools, scater]
+        # bioc: [SpatialExperiment, anndataR, rhdf5, devtools, scater]
       - type: docker
         run: |
-          Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
+          Rscript -e "options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
 
       # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
       # https://github.com/drighelli/SpatialExperiment/issues/171

diff --git a/src/methods_segmentation/binning/script.py b/src/methods_segmentation/binning/script.py
@@ -50,6 +50,10 @@ def convert_to_lower_dtype(arr):
 data_array = xr.DataArray(image, name=f'segmentation', dims=('y', 'x'))
 parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
 sd_output.labels['segmentation'] = parsed_data
+sd_output.tables['table'] = ad.AnnData(
+      obs=sdata.tables["table"].obs[["cell_id", "region"]],
+      var=sdata.tables["table"].var[[]]
+    )
 
 print("Writing output", flush=True)
 if os.path.exists(par["output"]):

diff --git a/src/methods_segmentation/cellpose/script.py b/src/methods_segmentation/cellpose/script.py
@@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr):
 parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
 sd_output.labels['segmentation'] = parsed_data
 
+sd_output.tables['table'] = ad.AnnData(
+      obs=sdata.tables["table"].obs[["cell_id", "region"]],
+      var=sdata.tables["table"].var[[]]
+    )
+
 print("Writing output", flush=True)
 if os.path.exists(par["output"]):
   shutil.rmtree(par["output"])

diff --git a/src/methods_segmentation/custom_segmentation/script.py b/src/methods_segmentation/custom_segmentation/script.py
@@ -23,13 +23,13 @@
 sdata_segmentation_only = sd.SpatialData(
   labels={
     "segmentation": sdata[par["labels_key"]]
-  }#,
-  #tables={
-  #  "table": ad.AnnData(
-  #    obs=sdata.tables["table"].obs[["cell_id", "region"]],
-  #    var=sdata.tables["table"].var[[]]
-  #  )
-  #}
+  },
+  tables={
+    "table": ad.AnnData(
+      obs=sdata.tables["table"].obs[["cell_id", "region"]],
+      var=sdata.tables["table"].var[[]]
+    )
+  }
 )
 
 print("Writing output", flush=True)

diff --git a/src/methods_segmentation/stardist/script.py b/src/methods_segmentation/stardist/script.py
@@ -4,6 +4,7 @@
 import numpy as np
 import xarray as xr
 import spatialdata as sd
+import anndata as ad
 #from csbdeep.utils import normalize
 from csbdeep.data import Normalizer, normalize_mi_ma
 from stardist.models import StarDist2D
@@ -35,8 +36,8 @@ def convert_to_lower_dtype(arr):
 
 # Read image and its transformation
 sdata = sd.read_zarr(par["input"])
-image = sdata['morphology_mip']['scale0'].image.compute().to_numpy()
-transformation = sdata['morphology_mip']['scale0'].image.transform.copy()
+image = sdata['image']['scale0'].image.compute().to_numpy()
+transformation = sdata['image']['scale0'].image.transform.copy()
 
 # Segment image
 
@@ -76,6 +77,11 @@ def do_after(self):
 parsed_labels = sd.models.Labels2DModel.parse(labels_array, transformations=transformation)
 sd_output.labels['segmentation'] = parsed_labels
 
+sd_output.tables['table'] = ad.AnnData(
+      obs=sdata.tables["table"].obs[["cell_id", "region"]],
+      var=sdata.tables["table"].var[[]]
+    )
+
 print("Writing output", flush=True)
 Path(par["output"]).parent.mkdir(parents=True, exist_ok=True)
 if os.path.exists(par["output"]):

diff --git a/src/methods_segmentation/watershed/script.py b/src/methods_segmentation/watershed/script.py
@@ -50,6 +50,11 @@ def convert_to_lower_dtype(arr):
 parsed_data = Labels2DModel.parse(data_array, transformations=transformation)
 sd_output.labels['segmentation'] = parsed_data
 
+sd_output.tables['table'] = ad.AnnData(
+      obs=sdata.tables["table"].obs[["cell_id", "region"]],
+      var=sdata.tables["table"].var[[]]
+    )
+
 print("Writing output", flush=True)
 if os.path.exists(par["output"]):
   shutil.rmtree(par["output"])
+58 −137		component_tests/run_and_check_output.py
+0 −21		nextflow_helpers/README.md
+0 −232		nextflow_helpers/benchmarkHelper.nf
+9 −58		nextflow_helpers/labels_tw.config
+0 −2,786		nextflow_helpers/workflowHelper.nf
+0 −35		schemas/results_v4/combined_output.json
+0 −63		schemas/results_v4/core.json
+0 −90		schemas/results_v4/dataset_info.json
+0 −84		schemas/results_v4/method_info.json
+0 −77		schemas/results_v4/metric_info.json
+0 −50		schemas/results_v4/quality_control.json
+0 −183		schemas/results_v4/results.json
+0 −64		schemas/results_v4/task_info.json
+3 −3		scripts/create_component
+4 −4		scripts/create_task_readme
+0 −418		scripts/render_results_report
+3 −3		scripts/sync_resources