nf-core · an-altosian · Apr 27, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/assets/config/xenium_bidcell.txt b/assets/config/xenium_bidcell.txt
@@ -0,0 +1,50 @@
+nuclei_fovs:
+    stitch_nuclei_fovs: False # set True to stitch separate FOVs of DAPI together in 1 image
+
+nuclei:
+    diameter: # estimated diameter of nuclei for Cellpose - or None to automatically compute, default: None
+
+transcripts:
+    shift_to_origin: True # shift to origin, making min(x) and min(y) (0,0)
+    x_col: x_location # name of x location column in transcripts file
+    y_col: y_location # name of y location column in transcripts file
+    gene_col: feature_name # name of genes column in transcripts file
+    transcripts_to_filter: # genes starting with these strings will be filtered out
+    - NegControlProbe_
+    - antisense_
+    - NegControlCodeword_
+    - BLANK_
+    - Blank-
+    - NegPrb
+
+affine:
+    target_pix_um: 1.0 # microns per pixel to perform segmentation; default: 1.0
+    base_pix_x: 0.2125 # convert to microns along width by multiplying the original pixels by base_pix_x microns per pixel
+    base_pix_y: 0.2125 # convert to microns along width by multiplying the original pixels by base_pix_y microns per pixel
+    base_ts_x: 1.0 # convert between transcript locations and target pixels along width
+    base_ts_y: 1.0 # convert between transcript locations and target pixels along height
+    global_shift_x: 0 # additional adjustment to align transcripts to DAPI in target pixels along image width; default: 0
+    global_shift_y: 0 # additional adjustment to align transcripts to DAPI in target pixels along image height; default: 0
+
+model_params:
+    name: custom # segmentation model to use: custom for model in model.py or set to a encoder name from segmentation_models_pytorch; default: custom
+    patch_size: 48 # size of transcriptomic image patches for input to DL model
+    elongated: # list of elongated cell types that are in the single-cell reference
+    - placeholder
+
+training_params:
+    total_epochs: 1 # number of training epochs; default: 1
+    total_steps: 60 # number of training steps; default: 4000
+    ne_weight: 1.0 # weight for nuclei encapsulation loss; default: 1.0
+    os_weight: 1.0 # weight for oversegmentation loss; default: 1.0
+    cc_weight: 1.0 # weight for cell-calling loss; default: 1.0
+    ov_weight: 1.0 # weight for oversegmentation loss; default: 1.0
+    pos_weight: 1.0 # weight for positive marker loss; default: 1.0
+    neg_weight: 1.0 # weight for negative marker loss; default: 1.0
+
+testing_params:
+    test_epoch: 1 # epoch to test; default: 1
+    test_step: 60 # step number to test; default: 4000
+
+experiment_dirs:
+    dir_id: last # specify timestamp of output dir or leave blank to use latest dir, default: last
diff --git a/modules/local/proseg/preset/main.nf b/modules/local/proseg/preset/main.nf
@@ -8,46 +8,42 @@ process PROSEG {
     tuple val(meta), path(transcripts)
 
     output:
-    tuple val(meta), path("cell-polygons.geojson.gz"), emit: cell_polygons_2d
-    path("transcript-metadata.csv.gz")               , emit: transcript_metadata
-    path("expected-counts.csv.gz")                   , emit: expected_counts
-    path("cell-metadata.csv.gz")                     , emit: cell_metadata
-    path("gene-metadata.csv.gz")                     , emit: gene_metadata
-    path("rates.csv.gz")                             , emit: rates
-    path("cell-polygons-layers.geojson.gz")          , emit: cell_polygons_layers
-    path("cell-hulls.geojson.gz")                    , emit: cell_hulls
-    path("union-cell-polygons.geojson.gz")           , emit: union_cell_polygons
-    path("versions.yml")                             , emit: versions
+    tuple val(meta), path("${prefix}")                              , emit: outdir
+    tuple val(meta), path("${prefix}/cell-polygons.geojson.gz")     , emit: cell_polygons_2d
+    tuple val(meta), path("${prefix}/transcript-metadata.csv.gz")   , emit: transcript_metadata
+    path("versions.yml")                                            , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
+    prefix = task.ext.prefix ?: "${meta.id}"
     // Exit if running this module with -profile conda / -profile mamba
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
 
     // check for platform values
     if ( !(params.format in ['xenium', 'cosmx', 'merscope']) ) {
         error "${params.format} is an invalid platform type. Please specify xenium, cosmx, or merscope"
     }
 
     """
+    mkdir -p ${prefix}
+
     proseg \\
         --${params.format} \\
         ${transcripts} \\
         --nthreads ${task.cpus} \\
-        --output-expected-counts expected-counts.csv.gz \\
-        --output-cell-metadata cell-metadata.csv.gz \\
-        --output-transcript-metadata transcript-metadata.csv.gz \\
-        --output-gene-metadata gene-metadata.csv.gz \\
-        --output-rates rates.csv.gz \\
-        --output-cell-polygons cell-polygons.geojson.gz \\
-        --output-cell-polygon-layers cell-polygons-layers.geojson.gz \\
-        --output-cell-hulls cell-hulls.geojson.gz \\
+        --output-expected-counts ${prefix}/expected-counts.csv.gz \\
+        --output-cell-metadata ${prefix}/cell-metadata.csv.gz \\
+        --output-transcript-metadata ${prefix}/transcript-metadata.csv.gz \\
+        --output-gene-metadata ${prefix}/gene-metadata.csv.gz \\
+        --output-rates ${prefix}/rates.csv.gz \\
+        --output-cell-polygons ${prefix}/cell-polygons.geojson.gz \\
+        --output-cell-polygon-layers ${prefix}/cell-polygons-layers.geojson.gz \\
+        --output-cell-hulls ${prefix}/cell-hulls.geojson.gz \\
         ${args}
 
     cat <<-END_VERSIONS > versions.yml
@@ -62,18 +58,20 @@ process PROSEG {
         error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    touch expected-counts.csv.gz
-    touch cell-metadata.csv.gz
-    touch transcript-metadata.csv.gz
-    touch gene-metadata.csv.gz
-    touch rates.csv.gz
-    touch cell-polygons.geojson.gz
-    touch cell-polygons-layers.geojson.gz
-    touch cell-hulls.geojson.gz
-    touch union-cell-polygons.geojson.gz
+    mkdir -p ${prefix}
+
+    touch ${prefix}/expected-counts.csv.gz
+    touch ${prefix}/cell-metadata.csv.gz
+    touch ${prefix}/transcript-metadata.csv.gz
+    touch ${prefix}/gene-metadata.csv.gz
+    touch ${prefix}/rates.csv.gz
+    touch ${prefix}/cell-polygons.geojson.gz
+    touch ${prefix}/cell-polygons-layers.geojson.gz
+    touch ${prefix}/cell-hulls.geojson.gz
+    touch ${prefix}/union-cell-polygons.geojson.gz
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/proseg/proseg2baysor/main.nf b/modules/local/proseg/proseg2baysor/main.nf
@@ -5,26 +5,31 @@ process PROSEG2BAYSOR {
     container "khersameesh24/proseg:2.0.0"
 
     input:
-    tuple val(meta), path(cell_polygons)
-    path(transcript_metadata)
+    tuple val(meta), path(cell_polygons), path(transcript_metadata)
 
     output:
-    tuple val(meta), path("xr-cell-polygons.geojson"), emit: xr_polygons
-    path("xr-transcript-metadata.csv")               , emit: xr_metadata
-    path("versions.yml")                             , emit: versions
+    tuple val(meta), path("${prefix}/xr-cell-polygons.geojson")     , emit: xr_polygons
+    tuple val(meta), path("${prefix}/xr-transcript-metadata.csv")   , emit: xr_metadata
+    tuple val(meta), path("${prefix}")                              , emit: outdir
+    path("versions.yml")                                            , emit: versions
 
     script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def args = task.ext.args ?: ''
     // Exit if running this module with -profile conda / -profile mamba
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "PROSEG2BAYSOR (preprocess) module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
 
     """
-    proseg-to-baysor  \
-        ${transcript_metadata} \
-        ${cell_polygons} \
-        --output-transcript-metadata xr-transcript-metadata.csv \
-        --output-cell-polygons xr-cell-polygons.geojson
+    mkdir -p ${prefix}
+
+    proseg-to-baysor  \\
+        ${transcript_metadata} \\
+        ${cell_polygons} \\
+        --output-transcript-metadata ${prefix}/xr-transcript-metadata.csv \\
+        --output-cell-polygons ${prefix}/xr-cell-polygons.geojson \\
+        ${args}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -39,11 +44,13 @@ process PROSEG2BAYSOR {
         error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    touch xr-transcript-metadata.csv
-    touch xr-cell-polygons.geojson
+    mkdir -p ${prefix}
+
+    touch ${prefix}/xr-transcript-metadata.csv
+    touch ${prefix}/xr-cell-polygons.geojson
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/xeniumranger/import-segmentation/main.nf b/modules/local/xeniumranger/import-segmentation/main.nf
@@ -0,0 +1,86 @@
+process XENIUMRANGER_IMPORT_SEGMENTATION {
+    tag "$meta.id"
+    label 'process_high'
+
+    container "nf-core/xeniumranger:3.1.1"
+
+    input:
+    tuple val(meta), 
+        path(xenium_bundle),
+        path(coordinate_transform),
+        path(nuclei),
+        path(cells),
+        path(transcript_assignment),
+        path(viz_polygons),
+        val(units)
+
+    output:
+    tuple val(meta), path("${prefix}")  , emit: bundle
+    path("versions.yml")                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "XENIUMRANGER_IMPORT-SEGMENTATION module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+
+    // image based segmentation options
+    def coord_transform = coordinate_transform ? "--coordinate-transform=\"${coordinate_transform}\"": ""
+    def nuclei_detection = nuclei ? "--nuclei=\"${nuclei}\"": ""
+    def cell_detection = cells ? "--cells=\"${cells}\"": ""
+
+    // transcript based segmentation
+    def transcript_assign = transcript_assignment ? "--transcript-assignment=\"${transcript_assignment}\"": ""
+    def polygons = viz_polygons ? "--viz-polygons=\"${viz_polygons}\"":""
+
+    // shared argument
+    def space = units ? "--units=${units}" : ""
+
+    // conditional args
+    def exp_dist = nuclei ? "--expansion-distance=${params.expansion_distance}": ""
+
+    """
+    xeniumranger import-segmentation \\
+        --id="import_segmentation_out" \\
+        --xenium-bundle="${xenium_bundle}" \\
+        ${exp_dist} \\
+        ${coord_transform} \\
+        ${nuclei_detection} \\
+        ${cell_detection} \\
+        ${transcript_assign} \\
+        ${polygons} \\
+        ${space} \\
+        --localcores=${task.cpus} \\
+        --localmem=${task.memory.toGiga()} \\
+        ${args}
+
+    # assign the new xenium bundle as the desired output folder
+    mv import_segmentation_out/outs ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        xeniumranger: \$(xeniumranger -V | sed -e "s/xeniumranger-/- /g")
+    END_VERSIONS
+    """
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "XENIUMRANGER_IMPORT-SEGMENTATION module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    mkdir -p "${prefix}/outs/"
+    touch "${prefix}/outs/fake_file.txt"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        xeniumranger: \$(xeniumranger -V | sed -e "s/xeniumranger-/- /g")
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/xeniumranger/import-segmentation/meta.yml b/modules/local/xeniumranger/import-segmentation/meta.yml
@@ -0,0 +1,84 @@
+name: xeniumranger_import_segmentation
+description: The xeniumranger import-segmentation module allows you to specify 2D
+  nuclei and/or cell segmentation results for assigning transcripts to cells and recalculate
+  all Xenium Onboard Analysis (XOA) outputs that depend on segmentation. Segmentation
+  results can be generated by community-developed tools or prior Xenium segmentation
+  result.
+keywords:
+  - spatial
+  - segmentation
+  - import segmentation
+  - nuclear segmentation
+  - cell segmentation
+  - xeniumranger
+  - imaging
+tools:
+  - xeniumranger:
+      description: |
+        Xenium Ranger is a set of analysis pipelines that process Xenium In Situ Gene Expression data to relabel, resegment, or import new segmentation results from community-developed tools. Xenium Ranger provides flexible off-instrument reanalysis of Xenium In Situ data. Relabel transcripts, resegment cells with the latest 10x segmentation algorithms, or import your own segmentation data to assign transcripts to cells.
+      homepage: "https://www.10xgenomics.com/support/software/xenium-ranger/latest"
+      documentation: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/getting-started"
+      tool_dev_url: "https://www.10xgenomics.com/support/software/xenium-ranger/latest/analysis"
+      licence:
+        - "10x Genomics EULA"
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing run information
+          e.g. [id:'xenium_bundle_path']
+    - xenium_bundle:
+        type: directory
+        description: Path to the xenium output bundle generated by the Xenium Onboard
+          Analysis pipeline
+  - - expansion_distance:
+        type: integer
+        description: Nuclei boundary expansion distance in µm. Only for use when nucleus
+          segmentation provided as input. Default-5 (accepted range 0 - 100)
+  - - coordinate_transform:
+        type: file
+        description: Image alignment file containing similarity transform matrix e.g.,
+          the _imagealignment.csv file exported from Xenium Explorer
+  - - nuclei:
+        type: file
+        description: |
+          Label mask (TIFF or NPY), polygons of nucleus segmentations (GeoJSON FeatureCollection), or Xenium Onboard Analysis cells.zarr.zip (the nucleus masks as input).
+          --nuclei will use nucleusGeometry polygon if it exists in the GeoJSON (i.e., for QuPath-like GeoJSON files),
+          or geometry if it does not. Error if --transcript-assignment argument is used.
+  - - cells:
+        type: file
+        description: |
+          Label mask (TIFF or NPY), polygons of cell segmentations (GeoJSON FeatureCollection), or Xenium Onboard Analysis cells.zarr.zip (the cell masks as input).
+          Features with a non-cell objectType will be ignored. Error if --transcript-assignment argument is used.
+          In Xenium Ranger v2.0, --nuclei no longer needs to be used with --cells.
+  - - transcript_assignment:
+        type: file
+        description: |
+          Transcript CSV with cell assignment from Baysor v0.6. Error if --cells or --nuclei arguments are used.
+  - - viz_polygons:
+        type: file
+        description: |
+          Cell boundary polygons (GeoJSON) for visualization from Baysor v0.6. Required if --transcript-assignment argument used. Error if --cells or --nuclei arguments used.
+output:
+  - bundle:
+      - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'sample' ]
+      - "${meta.id}/outs":
+        type: file
+        description: Files containing the outputs of xenium ranger, see official 10X
+          Genomics documentation for a complete list of outputs
+        pattern: "${meta.id}/outs"
+  - versions:
+      - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+authors:
+  - "@khersameesh24"
+maintainers:
+  - "@khersameesh24"