diff --git a/assets/config/xenium_bidcell.txt b/assets/config/xenium_bidcell.txt new file mode 100644 index 00000000..81cb39f1 --- /dev/null +++ b/assets/config/xenium_bidcell.txt @@ -0,0 +1,50 @@ +nuclei_fovs: + stitch_nuclei_fovs: False # set True to stitch separate FOVs of DAPI together in 1 image + +nuclei: + diameter: # estimated diameter of nuclei for Cellpose - or None to automatically compute, default: None + +transcripts: + shift_to_origin: True # shift to origin, making min(x) and min(y) (0,0) + x_col: x_location # name of x location column in transcripts file + y_col: y_location # name of y location column in transcripts file + gene_col: feature_name # name of genes column in transcripts file + transcripts_to_filter: # genes starting with these strings will be filtered out + - NegControlProbe_ + - antisense_ + - NegControlCodeword_ + - BLANK_ + - Blank- + - NegPrb + +affine: + target_pix_um: 1.0 # microns per pixel to perform segmentation; default: 1.0 + base_pix_x: 0.2125 # convert to microns along width by multiplying the original pixels by base_pix_x microns per pixel + base_pix_y: 0.2125 # convert to microns along width by multiplying the original pixels by base_pix_y microns per pixel + base_ts_x: 1.0 # convert between transcript locations and target pixels along width + base_ts_y: 1.0 # convert between transcript locations and target pixels along height + global_shift_x: 0 # additional adjustment to align transcripts to DAPI in target pixels along image width; default: 0 + global_shift_y: 0 # additional adjustment to align transcripts to DAPI in target pixels along image height; default: 0 + +model_params: + name: custom # segmentation model to use: custom for model in model.py or set to a encoder name from segmentation_models_pytorch; default: custom + patch_size: 48 # size of transcriptomic image patches for input to DL model + elongated: # list of elongated cell types that are in the single-cell reference + - placeholder + +training_params: + total_epochs: 1 # number of training epochs; default: 1 + total_steps: 60 # number of training steps; default: 4000 + ne_weight: 1.0 # weight for nuclei encapsulation loss; default: 1.0 + os_weight: 1.0 # weight for oversegmentation loss; default: 1.0 + cc_weight: 1.0 # weight for cell-calling loss; default: 1.0 + ov_weight: 1.0 # weight for oversegmentation loss; default: 1.0 + pos_weight: 1.0 # weight for positive marker loss; default: 1.0 + neg_weight: 1.0 # weight for negative marker loss; default: 1.0 + +testing_params: + test_epoch: 1 # epoch to test; default: 1 + test_step: 60 # step number to test; default: 4000 + +experiment_dirs: + dir_id: last # specify timestamp of output dir or leave blank to use latest dir, default: last \ No newline at end of file diff --git a/modules/local/proseg/preset/main.nf b/modules/local/proseg/preset/main.nf index 8cedfe82..51bf4f10 100644 --- a/modules/local/proseg/preset/main.nf +++ b/modules/local/proseg/preset/main.nf @@ -8,27 +8,21 @@ process PROSEG { tuple val(meta), path(transcripts) output: - tuple val(meta), path("cell-polygons.geojson.gz"), emit: cell_polygons_2d - path("transcript-metadata.csv.gz") , emit: transcript_metadata - path("expected-counts.csv.gz") , emit: expected_counts - path("cell-metadata.csv.gz") , emit: cell_metadata - path("gene-metadata.csv.gz") , emit: gene_metadata - path("rates.csv.gz") , emit: rates - path("cell-polygons-layers.geojson.gz") , emit: cell_polygons_layers - path("cell-hulls.geojson.gz") , emit: cell_hulls - path("union-cell-polygons.geojson.gz") , emit: union_cell_polygons - path("versions.yml") , emit: versions + tuple val(meta), path("${prefix}") , emit: outdir + tuple val(meta), path("${prefix}/cell-polygons.geojson.gz") , emit: cell_polygons_2d + tuple val(meta), path("${prefix}/transcript-metadata.csv.gz") , emit: transcript_metadata + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: + prefix = task.ext.prefix ?: "${meta.id}" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead." } def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" // check for platform values if ( !(params.format in ['xenium', 'cosmx', 'merscope']) ) { @@ -36,18 +30,20 @@ process PROSEG { } """ + mkdir -p ${prefix} + proseg \\ --${params.format} \\ ${transcripts} \\ --nthreads ${task.cpus} \\ - --output-expected-counts expected-counts.csv.gz \\ - --output-cell-metadata cell-metadata.csv.gz \\ - --output-transcript-metadata transcript-metadata.csv.gz \\ - --output-gene-metadata gene-metadata.csv.gz \\ - --output-rates rates.csv.gz \\ - --output-cell-polygons cell-polygons.geojson.gz \\ - --output-cell-polygon-layers cell-polygons-layers.geojson.gz \\ - --output-cell-hulls cell-hulls.geojson.gz \\ + --output-expected-counts ${prefix}/expected-counts.csv.gz \\ + --output-cell-metadata ${prefix}/cell-metadata.csv.gz \\ + --output-transcript-metadata ${prefix}/transcript-metadata.csv.gz \\ + --output-gene-metadata ${prefix}/gene-metadata.csv.gz \\ + --output-rates ${prefix}/rates.csv.gz \\ + --output-cell-polygons ${prefix}/cell-polygons.geojson.gz \\ + --output-cell-polygon-layers ${prefix}/cell-polygons-layers.geojson.gz \\ + --output-cell-hulls ${prefix}/cell-hulls.geojson.gz \\ ${args} cat <<-END_VERSIONS > versions.yml @@ -62,18 +58,20 @@ process PROSEG { error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead." } def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ - touch expected-counts.csv.gz - touch cell-metadata.csv.gz - touch transcript-metadata.csv.gz - touch gene-metadata.csv.gz - touch rates.csv.gz - touch cell-polygons.geojson.gz - touch cell-polygons-layers.geojson.gz - touch cell-hulls.geojson.gz - touch union-cell-polygons.geojson.gz + mkdir -p ${prefix} + + touch ${prefix}/expected-counts.csv.gz + touch ${prefix}/cell-metadata.csv.gz + touch ${prefix}/transcript-metadata.csv.gz + touch ${prefix}/gene-metadata.csv.gz + touch ${prefix}/rates.csv.gz + touch ${prefix}/cell-polygons.geojson.gz + touch ${prefix}/cell-polygons-layers.geojson.gz + touch ${prefix}/cell-hulls.geojson.gz + touch ${prefix}/union-cell-polygons.geojson.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/proseg/proseg2baysor/main.nf b/modules/local/proseg/proseg2baysor/main.nf index e841f966..5fda561c 100644 --- a/modules/local/proseg/proseg2baysor/main.nf +++ b/modules/local/proseg/proseg2baysor/main.nf @@ -5,26 +5,31 @@ process PROSEG2BAYSOR { container "khersameesh24/proseg:2.0.0" input: - tuple val(meta), path(cell_polygons) - path(transcript_metadata) + tuple val(meta), path(cell_polygons), path(transcript_metadata) output: - tuple val(meta), path("xr-cell-polygons.geojson"), emit: xr_polygons - path("xr-transcript-metadata.csv") , emit: xr_metadata - path("versions.yml") , emit: versions + tuple val(meta), path("${prefix}/xr-cell-polygons.geojson") , emit: xr_polygons + tuple val(meta), path("${prefix}/xr-transcript-metadata.csv") , emit: xr_metadata + tuple val(meta), path("${prefix}") , emit: outdir + path("versions.yml") , emit: versions script: + prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "PROSEG2BAYSOR (preprocess) module does not support Conda. Please use Docker / Singularity / Podman instead." } """ - proseg-to-baysor \ - ${transcript_metadata} \ - ${cell_polygons} \ - --output-transcript-metadata xr-transcript-metadata.csv \ - --output-cell-polygons xr-cell-polygons.geojson + mkdir -p ${prefix} + + proseg-to-baysor \\ + ${transcript_metadata} \\ + ${cell_polygons} \\ + --output-transcript-metadata ${prefix}/xr-transcript-metadata.csv \\ + --output-cell-polygons ${prefix}/xr-cell-polygons.geojson \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -39,11 +44,13 @@ process PROSEG2BAYSOR { error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead." } def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ - touch xr-transcript-metadata.csv - touch xr-cell-polygons.geojson + mkdir -p ${prefix} + + touch ${prefix}/xr-transcript-metadata.csv + touch ${prefix}/xr-cell-polygons.geojson cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/xeniumranger/import-segmentation/main.nf b/modules/local/xeniumranger/import-segmentation/main.nf new file mode 100644 index 00000000..99398eb4 --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/main.nf @@ -0,0 +1,86 @@ +process XENIUMRANGER_IMPORT_SEGMENTATION { + tag "$meta.id" + label 'process_high' + + container "nf-core/xeniumranger:3.1.1" + + input: + tuple val(meta), + path(xenium_bundle), + path(coordinate_transform), + path(nuclei), + path(cells), + path(transcript_assignment), + path(viz_polygons), + val(units) + + output: + tuple val(meta), path("${prefix}") , emit: bundle + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "XENIUMRANGER_IMPORT-SEGMENTATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + + // image based segmentation options + def coord_transform = coordinate_transform ? "--coordinate-transform=\"${coordinate_transform}\"": "" + def nuclei_detection = nuclei ? "--nuclei=\"${nuclei}\"": "" + def cell_detection = cells ? "--cells=\"${cells}\"": "" + + // transcript based segmentation + def transcript_assign = transcript_assignment ? "--transcript-assignment=\"${transcript_assignment}\"": "" + def polygons = viz_polygons ? "--viz-polygons=\"${viz_polygons}\"":"" + + // shared argument + def space = units ? "--units=${units}" : "" + + // conditional args + def exp_dist = nuclei ? "--expansion-distance=${params.expansion_distance}": "" + + """ + xeniumranger import-segmentation \\ + --id="import_segmentation_out" \\ + --xenium-bundle="${xenium_bundle}" \\ + ${exp_dist} \\ + ${coord_transform} \\ + ${nuclei_detection} \\ + ${cell_detection} \\ + ${transcript_assign} \\ + ${polygons} \\ + ${space} \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ + ${args} + + # assign the new xenium bundle as the desired output folder + mv import_segmentation_out/outs ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + xeniumranger: \$(xeniumranger -V | sed -e "s/xeniumranger-/- /g") + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "XENIUMRANGER_IMPORT-SEGMENTATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p "${prefix}/outs/" + touch "${prefix}/outs/fake_file.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + xeniumranger: \$(xeniumranger -V | sed -e "s/xeniumranger-/- /g") + END_VERSIONS + """ +} diff --git a/modules/local/xeniumranger/import-segmentation/meta.yml b/modules/local/xeniumranger/import-segmentation/meta.yml new file mode 100644 index 00000000..fc9fa5c9 --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/meta.yml @@ -0,0 +1,84 @@ +name: xeniumranger_import_segmentation +description: The xeniumranger import-segmentation module allows you to specify 2D + nuclei and/or cell segmentation results for assigning transcripts to cells and recalculate + all Xenium Onboard Analysis (XOA) outputs that depend on segmentation. Segmentation + results can be generated by community-developed tools or prior Xenium segmentation + result. +keywords: + - spatial + - segmentation + - import segmentation + - nuclear segmentation + - cell segmentation + - xeniumranger + - imaging +tools: + - xeniumranger: + description: | + Xenium Ranger is a set of analysis pipelines that process Xenium In Situ Gene Expression data to relabel, resegment, or import new segmentation results from community-developed tools. Xenium Ranger provides flexible off-instrument reanalysis of Xenium In Situ data. Relabel transcripts, resegment cells with the latest 10x segmentation algorithms, or import your own segmentation data to assign transcripts to cells. + homepage: "https://www.10xgenomics.com/support/software/xenium-ranger/latest" + documentation: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/getting-started" + tool_dev_url: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/analysis" + licence: + - "10x Genomics EULA" + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing run information + e.g. [id:'xenium_bundle_path'] + - xenium_bundle: + type: directory + description: Path to the xenium output bundle generated by the Xenium Onboard + Analysis pipeline + - - expansion_distance: + type: integer + description: Nuclei boundary expansion distance in µm. Only for use when nucleus + segmentation provided as input. Default-5 (accepted range 0 - 100) + - - coordinate_transform: + type: file + description: Image alignment file containing similarity transform matrix e.g., + the _imagealignment.csv file exported from Xenium Explorer + - - nuclei: + type: file + description: | + Label mask (TIFF or NPY), polygons of nucleus segmentations (GeoJSON FeatureCollection), or Xenium Onboard Analysis cells.zarr.zip (the nucleus masks as input). + --nuclei will use nucleusGeometry polygon if it exists in the GeoJSON (i.e., for QuPath-like GeoJSON files), + or geometry if it does not. Error if --transcript-assignment argument is used. + - - cells: + type: file + description: | + Label mask (TIFF or NPY), polygons of cell segmentations (GeoJSON FeatureCollection), or Xenium Onboard Analysis cells.zarr.zip (the cell masks as input). + Features with a non-cell objectType will be ignored. Error if --transcript-assignment argument is used. + In Xenium Ranger v2.0, --nuclei no longer needs to be used with --cells. + - - transcript_assignment: + type: file + description: | + Transcript CSV with cell assignment from Baysor v0.6. Error if --cells or --nuclei arguments are used. + - - viz_polygons: + type: file + description: | + Cell boundary polygons (GeoJSON) for visualization from Baysor v0.6. Required if --transcript-assignment argument used. Error if --cells or --nuclei arguments used. +output: + - bundle: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "${meta.id}/outs": + type: file + description: Files containing the outputs of xenium ranger, see official 10X + Genomics documentation for a complete list of outputs + pattern: "${meta.id}/outs" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@khersameesh24" +maintainers: + - "@khersameesh24" diff --git a/modules/local/xeniumranger/import-segmentation/tests/main.nf.test b/modules/local/xeniumranger/import-segmentation/tests/main.nf.test new file mode 100644 index 00000000..54d3ba00 --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/tests/main.nf.test @@ -0,0 +1,314 @@ +nextflow_process { + + name "Test Process XENIUMRANGER_IMPORT_SEGMENTATION" + script "../main.nf" + process "XENIUMRANGER_IMPORT_SEGMENTATION" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "xeniumranger" + tag "xeniumranger/import-segmentation" + tag "unzip" + + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" + process { + """ + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] + """ + } + } + } + + test("xeniumranger import-segmentation nuclei npy") { + when { + process { + """ + input[0] = Channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/nuclei.npy" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + + test("xeniumranger import-segmentation nuclei tif") { + when { + process { + """ + input[0] = Channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/nuclei.npy" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation segmentation csv") { + when { + process { + """ + input[0] = Channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/imagealignment.csv" + input[3] = [] + input[4] = [] + input[5] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/segmentation.csv" + input[6] = UNZIP.out.unzipped_archive.map { it[1] } + "/segmentations/segmentation_polygons.json" + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation") { + when { + process { + """ + input[0] = Channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/cells.zarr.zip" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.outs.get(0).get(1).findAll { file(it).name !in [ + 'analysis_summary.html', + 'metrics_summary.csv', + 'cell_boundaries.csv.gz', + 'cell_boundaries.parquet', + 'nucleus_boundaries.csv.gz', + 'nucleus_boundaries.parquet', + 'cells.csv.gz', + 'cells.parquet', + 'cells.zarr.zip', + 'transcripts.parquet', + 'transcripts.zarr.zip', + 'clusters.csv', + 'differential_expression.csv', + 'components.csv', + 'projection.csv', + 'variance.csv', + 'analysis.zarr.zip', + 'experiment.xenium', + 'cell_feature_matrix.zarr.zip' + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'metrics_summary.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'nucleus_boundaries.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.csv.gz' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cells.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'transcripts.parquet' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'clusters.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'differential_expression.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'components.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'projection.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'variance.csv' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'analysis.zarr.zip' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'experiment.xenium' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'cell_feature_matrix.zarr.zip' }).exists() }, + ) + } + } + + test("xeniumranger import-segmentation stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: "test_xeniumranger_import-segmentation"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] }) + input[1] = 0 + input[2] = [] + input[3] = UNZIP.out.unzipped_archive.map { it[1] } + "/cells.zarr.zip" + input[4] = [] + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/local/xeniumranger/import-segmentation/tests/main.nf.test.snap b/modules/local/xeniumranger/import-segmentation/tests/main.nf.test.snap new file mode 100644 index 00000000..1c312ae0 --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/tests/main.nf.test.snap @@ -0,0 +1,127 @@ +{ + "xeniumranger import-segmentation": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T00:13:13.575888" + }, + "xeniumranger import-segmentation nuclei npy": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:03:26.726334" + }, + "xeniumranger import-segmentation segmentation csv": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,5d74ea595561e0300b6c3e5ec8d06fff", + "barcodes.tsv.gz:md5,97496a9b448d9380cff0575b8e7a6f57", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,f93ed82a2a74c154392fc6237642f1d2", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:22:58.158857" + }, + "xeniumranger import-segmentation stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_xeniumranger_import-segmentation" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + "outs": [ + [ + { + "id": "test_xeniumranger_import-segmentation" + }, + "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T22:49:39.204133" + }, + "xeniumranger import-segmentation nuclei tif": { + "content": [ + [ + "versions.yml:md5,d76e870d71abf94ed9ae972a08b83f63" + ], + [ + "dispersion.csv:md5,e8b1abb880ece8fb730ce34a15f958b4", + "features_selected.csv:md5,c5e32d69f001f938ed316d2108a21e00", + "cell_feature_matrix.h5:md5,96cb400f1b1dd6f8796daea0ad5c74e6", + "barcodes.tsv.gz:md5,04ea06796d6b28517c288904ca043582", + "features.tsv.gz:md5,7862242129681900a9cc4086dc83b62e", + "matrix.mtx.gz:md5,489f86fbd8d65d6b973bb9cc7c5a76f1", + "gene_panel.json:md5,8890dd5fd90706e751554ac3fdfdedde", + "morphology.ome.tif:md5,6b65fff28a38a001b8f25061737fbf9b", + "morphology_focus_0000.ome.tif:md5,90e796ad634d14e62cf2ebcadf2eaf98" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T23:11:37.18721" + } +} \ No newline at end of file diff --git a/modules/local/xeniumranger/import-segmentation/tests/nextflow.config b/modules/local/xeniumranger/import-segmentation/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/local/xeniumranger/import-segmentation/tests/tags.yml b/modules/local/xeniumranger/import-segmentation/tests/tags.yml new file mode 100644 index 00000000..90c2b805 --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/tests/tags.yml @@ -0,0 +1,2 @@ +xeniumranger/import-segmentation: + - "modules/nf-core/xeniumranger/import-segmentation/**" diff --git a/modules/local/xeniumranger/import-segmentation/xeniumranger-import-segmentation.diff b/modules/local/xeniumranger/import-segmentation/xeniumranger-import-segmentation.diff new file mode 100644 index 00000000..18f220fe --- /dev/null +++ b/modules/local/xeniumranger/import-segmentation/xeniumranger-import-segmentation.diff @@ -0,0 +1,123 @@ +Changes in component 'nf-core/xeniumranger/import-segmentation' +Changes in 'xeniumranger/import-segmentation/main.nf': +--- modules/nf-core/xeniumranger/import-segmentation/main.nf ++++ modules/nf-core/xeniumranger/import-segmentation/main.nf +@@ -2,20 +2,20 @@ + tag "$meta.id" + label 'process_high' + +- container "nf-core/xeniumranger:3.0.1" ++ container "nf-core/xeniumranger:3.1.1" + + input: + tuple val(meta), path(xenium_bundle) +- val(expansion_distance) + path(coordinate_transform) + path(nuclei) + path(cells) + path(transcript_assignment) + path(viz_polygons) ++ val(units) + + output: +- tuple val(meta), path("**/outs/**"), emit: outs +- path "versions.yml", emit: versions ++ tuple val(meta), path("${meta.id}/outs"), emit: bundle ++ path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -29,32 +29,33 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + + // image based segmentation options +- def expansion_distance = expansion_distance ? "--expansion-distance=\"${expansion_distance}\"": "" // expansion distance (default - 5, range - 0 - 100) +- def coordinate_transform = coordinate_transform ? "--coordinate-transform=\"${coordinate_transform}\"": "" +- ++ def coord_transform = coordinate_transform ? "--coordinate-transform=\"${coordinate_transform}\"": "" + def nuclei_detection = nuclei ? "--nuclei=\"${nuclei}\"": "" +- def cells = cells ? "--cells=\"${cells}\"": "" ++ def cell_detection = cells ? "--cells=\"${cells}\"": "" + + // transcript based segmentation +- def transcript_assignment = transcript_assignment ? "--transcript-assignment=\"${transcript_assignment}\"": "" +- def viz_polygons = viz_polygons ? "--viz-polygons=\"${viz_polygons}\"":"" ++ def transcript_assign = transcript_assignment ? "--transcript-assignment=\"${transcript_assignment}\"": "" ++ def polygons = viz_polygons ? "--viz-polygons=\"${viz_polygons}\"":"" + + // shared argument +- def units = coordinate_transform ? "--units=microns": "--units=pixels" ++ def space = units ? "--units=${units}" : "" ++ ++ // conditional args ++ def exp_dist = nuclei ? "--expansion-distance=${params.expansion_distance}": "" + + """ + xeniumranger import-segmentation \\ + --id="${prefix}" \\ + --xenium-bundle="${xenium_bundle}" \\ ++ ${exp_dist} \\ ++ ${coord_transform} \\ ++ ${nuclei_detection} \\ ++ ${cell_detection} \\ ++ ${transcript_assign} \\ ++ ${polygons} \\ ++ ${space} \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ +- ${coordinate_transform} \\ +- ${nuclei_detection} \\ +- ${cells} \\ +- ${expansion_distance} \\ +- ${transcript_assignment} \\ +- ${viz_polygons} \\ +- ${units} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + +Changes in 'xeniumranger/import-segmentation/meta.yml': +--- modules/nf-core/xeniumranger/import-segmentation/meta.yml ++++ modules/nf-core/xeniumranger/import-segmentation/meta.yml +@@ -62,22 +62,22 @@ + description: | + Cell boundary polygons (GeoJSON) for visualization from Baysor v0.6. Required if --transcript-assignment argument used. Error if --cells or --nuclei arguments used. + output: +- - outs: ++ - bundle: + - meta: +- type: file +- description: Files containing the outputs of Cell Ranger, see official 10X Genomics +- documentation for a complete list +- pattern: "${meta.id}/outs/*" +- - "**/outs/**": +- type: file +- description: Files containing the outputs of xenium ranger, see official 10X +- Genomics documentation for a complete list of outputs +- pattern: "${meta.id}/outs/*" ++ type: map ++ description: | ++ Groovy Map containing sample information ++ e.g. [ id:'sample' ] ++ - "${meta.id}/outs": ++ type: file ++ description: Files containing the outputs of xenium ranger, see official 10X ++ Genomics documentation for a complete list of outputs ++ pattern: "${meta.id}/outs" + - versions: + - versions.yml: +- type: file +- description: File containing software versions +- pattern: "versions.yml" ++ type: file ++ description: File containing software versions ++ pattern: "versions.yml" + authors: + - "@khersameesh24" + maintainers: + +'modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test' is unchanged +'modules/nf-core/xeniumranger/import-segmentation/tests/main.nf.test.snap' is unchanged +'modules/nf-core/xeniumranger/import-segmentation/tests/nextflow.config' is unchanged +'modules/nf-core/xeniumranger/import-segmentation/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/cellpose/main.nf b/modules/nf-core/cellpose/main.nf index e19bd00f..11ca353a 100644 --- a/modules/nf-core/cellpose/main.nf +++ b/modules/nf-core/cellpose/main.nf @@ -29,6 +29,7 @@ process CELLPOSE { """ export OMP_NUM_THREADS=${task.cpus} export MKL_NUM_THREADS=${task.cpus} + export NPY_PROMOTION_STATE=legacy cellpose \\ --image_path $image \\ --save_tif \\ diff --git a/nextflow.config b/nextflow.config index 31579809..096dff58 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,6 +224,12 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + + gpu { + docker.runOptions = (docker.runOptions ?: '') + '--gpus all' + apptainer.runOptions = (apptainer.runOptions ?: '') + '--nv' + singularity.runOptions = (singularity.runOptions ?: '') + '--nv' + } } @@ -302,6 +308,14 @@ manifest { contribution: ['author', 'maintainer'], // List of contribution types ('author', 'maintainer' or 'contributor') orcid: '' ], + [ + name: 'Dongze He', + affiliation: 'Altos Labs, San Diego, USA', + email: 'dongzehe.zaza@gmail.com', + github: '@dongzehe', + contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: '0000-0001-8259-7434' + ], ] homePage = 'https://github.com/nf-core/spatialxe' description = """A pipeline for spatialomics 10x Xenium In Situ data.""" diff --git a/subworkflows/local/proseg_preset_proseg2baysor/main.nf b/subworkflows/local/proseg_preset_proseg2baysor/main.nf index 90448094..ac8b71d9 100644 --- a/subworkflows/local/proseg_preset_proseg2baysor/main.nf +++ b/subworkflows/local/proseg_preset_proseg2baysor/main.nf @@ -4,7 +4,8 @@ include { PROSEG } from '../../../modules/local/proseg/preset/main' include { PROSEG2BAYSOR } from '../../../modules/local/proseg/proseg2baysor/main' -include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/local/xeniumranger/import-segmentation/main' + workflow PROSEG_PRESET_PROSEG2BAYSOR { @@ -16,30 +17,35 @@ workflow PROSEG_PRESET_PROSEG2BAYSOR { main: ch_versions = Channel.empty() - ch_coordinate_space = Channel.value("microns") // run proseg with the xenium format PROSEG ( ch_transcripts_parquet ) ch_versions = ch_versions.mix( PROSEG.out.versions ) // run proseg-to-baysor on the data generated with the proseg run - PROSEG2BAYSOR ( PROSEG.out.cell_polygons_2d, PROSEG.out.transcript_metadata ) + PROSEG2BAYSOR ( + PROSEG.out.cell_polygons_2d.combine(PROSEG.out.transcript_metadata, by: 0) + ) ch_versions = ch_versions.mix( PROSEG2BAYSOR.out.versions ) - ch_metadata = PROSEG2BAYSOR.out.xr_metadata - ch_polygons = PROSEG2BAYSOR.out.xr_polygons.map { - _meta, polygons -> return [ polygons ] - } - // run xeniumranger import-segmentation XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle_path, - [], - [], - [], - ch_metadata, - ch_polygons, - ch_coordinate_space + ch_bundle_path + .combine(PROSEG2BAYSOR.out.xr_polygons, by: 0) + .combine(PROSEG2BAYSOR.out.xr_metadata, by: 0) + .map { + meta, bundle, xr_cell_polygons, xr_transcript_metadata -> tuple( + meta, // meta + bundle, // bundle + [], // coordinate_transform + [], // nuclei + [], // cells + xr_transcript_metadata, // transcript_assignment + xr_cell_polygons, // viz_polygons + "microns" // units + ) + } + ) ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions )