From 45b9303d32432ec6954cd480d2f3c4e8d30ebf4a Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Mon, 20 Apr 2026 09:36:29 +0200 Subject: [PATCH 1/5] yaml files --- src/base/setup_spatialdata_partial.yaml | 3 +++ src/base/setup_txsim_partial.yaml | 13 +++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 src/base/setup_spatialdata_partial.yaml create mode 100644 src/base/setup_txsim_partial.yaml diff --git a/src/base/setup_spatialdata_partial.yaml b/src/base/setup_spatialdata_partial.yaml new file mode 100644 index 0000000..d2b72a2 --- /dev/null +++ b/src/base/setup_spatialdata_partial.yaml @@ -0,0 +1,3 @@ +setup: + - type: python + pypi: ["spatialdata", "anndata>=0.12.0", "zarr>=3.0.0"] diff --git a/src/base/setup_txsim_partial.yaml b/src/base/setup_txsim_partial.yaml new file mode 100644 index 0000000..5646a46 --- /dev/null +++ b/src/base/setup_txsim_partial.yaml @@ -0,0 +1,13 @@ +setup: + - type: python + pypi: ["spatialdata==0.5.0", "anndata>=0.12.0", "pyarrow<22.0.0", "zarr<3.0.0"] + # 1. remove pyarrow when https://github.com/scverse/spatialdata/issues/1007 is fixed. + # This is actually fixed now with the spatialdata release 0.6.0. However, the new + # release now comes with zarr 3.0.0. When reading a zarr file that was saved with + # zarr 3.0.0 we can not load it with zarr<3.0.0. (PathNotFoundError: nothing found at path '') + # 2. Currently sopa enforces zarr<3.0.0. Therefore we need to save all our data with zarr<3.0.0. + # As soon as this is fixed (https://github.com/gustaveroussy/sopa/issues/347): + # - remove restriction on spatialdata + # - remove zarr<3.0.0 + # - remove pyarrow<22.0.0 + # - Recreate all the datasets (scripts/create_resources/combine/process_datasets.sh) From b64a4446e05b163e4a47ce770f254cb75b287873 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Mon, 20 Apr 2026 09:36:50 +0200 Subject: [PATCH 2/5] sync to add txsim resources --- _viash.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/_viash.yaml b/_viash.yaml index 51acb10..ab15894 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -47,14 +47,16 @@ references: info: image: The name of the image file to use for the component on the website. - # Step 5: Replace the task_template to the name of the task. test_resources: - type: s3 - path: s3://openproblems-data/resources_test/common/ - dest: resources_test/common + path: s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1/ + dest: resources_test/common/2023_10x_mouse_brain_xenium_rep1/ - type: s3 - path: s3://openproblems-data/resources_test/task_template/ - dest: resources_test/task_template + path: s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ + dest: resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ + - type: s3 + path: s3://openproblems-data/resources_test/task_ist_preprocessing/ + dest: resources_test/task_ist_preprocessing # Step 6: Update the authors of the task. authors: From 73f9c03a080e1840f5b36a77d49c54f012ba7f43 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Mon, 20 Apr 2026 09:51:37 +0200 Subject: [PATCH 3/5] test resources scripts --- .../2023_10x_mouse_brain_xenium_rep1.sh | 52 +++++++++++++++++++ .../2023_yao_mouse_brain_scrnaseq_10xv2.sh | 43 +++++++++++++++ scripts/create_test_resources/README.md | 11 ++++ 3 files changed, 106 insertions(+) create mode 100755 scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh create mode 100755 scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh create mode 100644 scripts/create_test_resources/README.md diff --git a/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh b/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh new file mode 100755 index 0000000..519e19b --- /dev/null +++ b/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +if [ ! -d temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1 ]; then + mkdir -p temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1 +fi +if [ ! -f temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip ]; then + wget -O temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip \ + https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip +fi + +cat > /tmp/params.yaml << HERE +param_list: + - id: 2023_10x_mouse_brain_xenium_rep1 + input: temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip + segmentation_id: + - cell + - nucleus + dataset_name: Xenium V1 Fresh Frozen Mouse Brain rep1 + dataset_url: https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard + dataset_summary: Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform. + dataset_description: Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). + dataset_organism: mus_musculus + crop_region_min_x: 10000 + crop_region_max_x: 11000 + crop_region_min_y: 10000 + crop_region_max_y: 11000 + +publish_dir: resources_test/common +output_dataset: '\$id/dataset.zarr' +output_state: '\$id/state.yaml' +HERE + +# convert to zarr +nextflow run . \ + -main-script target/nextflow/datasets/workflows/process_tenx_xenium/main.nf \ + -profile docker \ + -resume \ + -params-file /tmp/params.yaml + +# sync to s3 +aws s3 sync --profile op \ + "resources_test/common/2023_10x_mouse_brain_xenium_rep1" \ + "s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1" \ + --delete --dryrun diff --git a/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh b/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh new file mode 100755 index 0000000..4c94302 --- /dev/null +++ b/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +cat > /tmp/params.yaml << HERE +param_list: + - id: 2023_yao_mouse_brain_scrnaseq_10xv2 + regions: + - OLF + - TH + dataset_name: ABCA Mouse Brain scRNAseq + dataset_url: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717 + dataset_reference: 10.1038/s41586-023-06812-z + dataset_summary: A high-resolution scRNAseq atlas of cell types in the whole mouse brain + dataset_description: See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset. + dataset_organism: mus_musculus + +do_subsample: true +n_obs: 400 +n_vars: 10000 + +output_dataset: "\$id/dataset.h5ad" +output_meta: "\$id/dataset_meta.yaml" +output_state: "\$id/state.yaml" +publish_dir: resources_test/common +HERE + +nextflow run . \ + -main-script target/nextflow/datasets/workflows/process_allen_brain_cell_atlas/main.nf \ + -profile docker \ + -resume \ + -params-file /tmp/params.yaml + +aws s3 sync --profile op \ + "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2" \ + "s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2" \ + --delete --dryrun diff --git a/scripts/create_test_resources/README.md b/scripts/create_test_resources/README.md new file mode 100644 index 0000000..434251c --- /dev/null +++ b/scripts/create_test_resources/README.md @@ -0,0 +1,11 @@ + + +Here we generate a small test dataset, used for `viash test`. Note that the file structure here is a bit simplified compared to `scripts/create_resources` as we only have one dataset. + +Download and process the single cell data: +`bash 2023_yao_mouse_brain_scrnaseq_10xv2.sh` + +Download and process the spatial data: +`bash 2023_10x_mouse_brain_xenium_rep1.sh` + +Combine the two datasets and run the ist preprocessing pipeline once with generic methods to create example outputs after each step: `test_pipeline.sh` From e2fd77f9e37a5f16336367ce23a8ca1ea676732c Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 20 Apr 2026 10:05:32 +0200 Subject: [PATCH 4/5] create spatial segmentation test resources from ist_preprocessing task --- _viash.yaml | 4 +- .../2023_10x_mouse_brain_xenium_rep1.sh | 52 ------------------- .../2023_yao_mouse_brain_scrnaseq_10xv2.sh | 43 --------------- scripts/create_test_resources/README.md | 10 +--- .../mouse_brain_combined.sh | 32 ++++++++++++ 5 files changed, 35 insertions(+), 106 deletions(-) delete mode 100755 scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh delete mode 100755 scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh create mode 100755 scripts/create_test_resources/mouse_brain_combined.sh diff --git a/_viash.yaml b/_viash.yaml index ab15894..99202fd 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -55,8 +55,8 @@ info: path: s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ dest: resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ - type: s3 - path: s3://openproblems-data/resources_test/task_ist_preprocessing/ - dest: resources_test/task_ist_preprocessing + path: s3://openproblems-data/resources_test/task_spatial_segmentation/ + dest: resources_test/task_spatial_segmentation # Step 6: Update the authors of the task. authors: diff --git a/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh b/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh deleted file mode 100755 index 519e19b..0000000 --- a/scripts/create_test_resources/2023_10x_mouse_brain_xenium_rep1.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# get the root of the directory -REPO_ROOT=$(git rev-parse --show-toplevel) - -# ensure that the command below is run from the root of the repository -cd "$REPO_ROOT" - -set -e - -if [ ! -d temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1 ]; then - mkdir -p temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1 -fi -if [ ! -f temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip ]; then - wget -O temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip \ - https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip -fi - -cat > /tmp/params.yaml << HERE -param_list: - - id: 2023_10x_mouse_brain_xenium_rep1 - input: temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip - segmentation_id: - - cell - - nucleus - dataset_name: Xenium V1 Fresh Frozen Mouse Brain rep1 - dataset_url: https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard - dataset_summary: Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform. - dataset_description: Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). - dataset_organism: mus_musculus - crop_region_min_x: 10000 - crop_region_max_x: 11000 - crop_region_min_y: 10000 - crop_region_max_y: 11000 - -publish_dir: resources_test/common -output_dataset: '\$id/dataset.zarr' -output_state: '\$id/state.yaml' -HERE - -# convert to zarr -nextflow run . \ - -main-script target/nextflow/datasets/workflows/process_tenx_xenium/main.nf \ - -profile docker \ - -resume \ - -params-file /tmp/params.yaml - -# sync to s3 -aws s3 sync --profile op \ - "resources_test/common/2023_10x_mouse_brain_xenium_rep1" \ - "s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1" \ - --delete --dryrun diff --git a/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh b/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh deleted file mode 100755 index 4c94302..0000000 --- a/scripts/create_test_resources/2023_yao_mouse_brain_scrnaseq_10xv2.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# get the root of the directory -REPO_ROOT=$(git rev-parse --show-toplevel) - -# ensure that the command below is run from the root of the repository -cd "$REPO_ROOT" - -set -e - -cat > /tmp/params.yaml << HERE -param_list: - - id: 2023_yao_mouse_brain_scrnaseq_10xv2 - regions: - - OLF - - TH - dataset_name: ABCA Mouse Brain scRNAseq - dataset_url: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717 - dataset_reference: 10.1038/s41586-023-06812-z - dataset_summary: A high-resolution scRNAseq atlas of cell types in the whole mouse brain - dataset_description: See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset. - dataset_organism: mus_musculus - -do_subsample: true -n_obs: 400 -n_vars: 10000 - -output_dataset: "\$id/dataset.h5ad" -output_meta: "\$id/dataset_meta.yaml" -output_state: "\$id/state.yaml" -publish_dir: resources_test/common -HERE - -nextflow run . \ - -main-script target/nextflow/datasets/workflows/process_allen_brain_cell_atlas/main.nf \ - -profile docker \ - -resume \ - -params-file /tmp/params.yaml - -aws s3 sync --profile op \ - "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2" \ - "s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2" \ - --delete --dryrun diff --git a/scripts/create_test_resources/README.md b/scripts/create_test_resources/README.md index 434251c..46bb116 100644 --- a/scripts/create_test_resources/README.md +++ b/scripts/create_test_resources/README.md @@ -1,11 +1,3 @@ - - Here we generate a small test dataset, used for `viash test`. Note that the file structure here is a bit simplified compared to `scripts/create_resources` as we only have one dataset. -Download and process the single cell data: -`bash 2023_yao_mouse_brain_scrnaseq_10xv2.sh` - -Download and process the spatial data: -`bash 2023_10x_mouse_brain_xenium_rep1.sh` - -Combine the two datasets and run the ist preprocessing pipeline once with generic methods to create example outputs after each step: `test_pipeline.sh` +Copy the data from the `task_ist_preprocessing` test resources: `mouse_brain_combined.sh` diff --git a/scripts/create_test_resources/mouse_brain_combined.sh b/scripts/create_test_resources/mouse_brain_combined.sh new file mode 100755 index 0000000..fdc4e37 --- /dev/null +++ b/scripts/create_test_resources/mouse_brain_combined.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +set -e + +if [ ! -d resources_test/task_spatial_segmentation/mouse_brain_combined ]; then + mkdir -p resources_test/task_spatial_segmentation/mouse_brain_combined +fi + +# these files were generated by https://github.com/openproblems-bio/task_ist_preprocessing/tree/main/scripts/create_test_resources +# we can just copy them for now + +aws s3 sync --profile op \ + s3://openproblems-data/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr \ + resources_test/task_spatial_segmentation/mouse_brain_combined/raw_ist.zarr + +aws s3 cp --profile op \ + s3://openproblems-data/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad \ + resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad + +# ...additional preprocessing if needed ... + +# sync to s3 +aws s3 sync --profile op \ + "resources_test/task_spatial_segmentation/mouse_brain_combined/" \ + "s3://openproblems-data/resources_test/task_spatial_segmentation/mouse_brain_combined/" \ + --delete --dryrun From 59a068a15abf03b76b1edff8fa13eaa409d5f4ea Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 20 Apr 2026 10:06:17 +0200 Subject: [PATCH 5/5] remove unused test resources --- _viash.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/_viash.yaml b/_viash.yaml index 99202fd..17b6093 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -48,12 +48,6 @@ references: info: image: The name of the image file to use for the component on the website. test_resources: - - type: s3 - path: s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1/ - dest: resources_test/common/2023_10x_mouse_brain_xenium_rep1/ - - type: s3 - path: s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ - dest: resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/ - type: s3 path: s3://openproblems-data/resources_test/task_spatial_segmentation/ dest: resources_test/task_spatial_segmentation