diff --git a/modules/nf-core/trtools/mergestr/environment.yml b/modules/nf-core/trtools/mergestr/environment.yml new file mode 100644 index 000000000000..ad355e21184c --- /dev/null +++ b/modules/nf-core/trtools/mergestr/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::trtools=6.1.0" diff --git a/modules/nf-core/trtools/mergestr/main.nf b/modules/nf-core/trtools/mergestr/main.nf new file mode 100644 index 000000000000..9841d9727f99 --- /dev/null +++ b/modules/nf-core/trtools/mergestr/main.nf @@ -0,0 +1,46 @@ +process TRTOOLS_MERGESTR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trtools:6.1.0--pyhdfd78af_0': + 'quay.io/biocontainers/trtools:6.1.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(vcfs), path(tbis) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + tuple val("${task.process}"), val('trtools'), eval("mergeSTR --version | sed 's/mergeSTR //'"), topic: versions, emit: versions_trtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_mergestr" + if ( vcfs.any{ "${it}" == "${prefix}.vcf" || "${it}" == "${prefix}.vcf.gz" } ) { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + def input = vcfs.sort { vcf -> vcf.toString() }.join(",") + + """ + mergeSTR \\ + --vcfs ${input} \\ + --out ${prefix} \\ + ${args} + + bgzip -f ${prefix}.vcf + tabix -f -p vcf ${prefix}.vcf.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_dumpstr" + + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/trtools/mergestr/meta.yml b/modules/nf-core/trtools/mergestr/meta.yml new file mode 100644 index 000000000000..bc1168a7d88b --- /dev/null +++ b/modules/nf-core/trtools/mergestr/meta.yml @@ -0,0 +1,90 @@ +name: "trtools_mergestr" +description: MergeSTR merges multiple VCF files produced by the same TR + genotyper into a single VCF file. +keywords: + - tandem repeats + - str + - vcf + - merge + - trtools +tools: + - "trtools": + description: "Toolkit for genome-wide analysis of tandem repeats" + homepage: "https://trtools.readthedocs.io/" + documentation: "https://trtools.readthedocs.io/" + tool_dev_url: "https://github.com/gymrek-lab/TRTools" + doi: "10.1093/bioinformatics/btaa736" + licence: + - "MIT" + identifier: biotools:trtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - vcfs: + type: file + description: List containing 2 or more bgzipped tandem repeat VCF files + e.g. [ 'file1.vcf.gz', 'file2.vcf.gz' ] + pattern: "*.{vcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" + - tbis: + type: file + description: List containing the tbi index files corresponding to the vcfs + input files e.g. [ 'file1.vcf.gz.tbi', 'file2.vcf.gz.tbi' ] + pattern: "*.{vcf.gz.tbi}" + ontologies: + - edam: "http://edamontology.org/format_3700" +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.vcf.gz": + type: file + description: Merged VCF file with the merged genotypes + pattern: "*.vcf.gz" + ontologies: + - edam: "http://edamontology.org/format_3016" + - edam: "http://edamontology.org/format_3989" + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.vcf.gz.tbi": + type: file + description: Tabix index for the merged VCF file + pattern: "*.vcf.gz.tbi" + ontologies: + - edam: "http://edamontology.org/format_3700" + versions_trtools: + - - ${task.process}: + type: string + description: The name of the process + - trtools: + type: string + description: The name of the tool + - mergeSTR --version | sed 's/mergeSTR //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - trtools: + type: string + description: The name of the tool + - mergeSTR --version | sed 's/mergeSTR //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@KondratievaOlesya" +maintainers: + - "@KondratievaOlesya" diff --git a/modules/nf-core/trtools/mergestr/tests/main.nf.test b/modules/nf-core/trtools/mergestr/tests/main.nf.test new file mode 100644 index 000000000000..083bba1221c6 --- /dev/null +++ b/modules/nf-core/trtools/mergestr/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_process { + + name "Test Process TRTOOLS_MERGESTR" + script "../main.nf" + config "./nextflow.config" + process "TRTOOLS_MERGESTR" + + tag "modules" + tag "modules_nfcore" + tag "gangstr" + tag "trtools" + tag "trtools/mergestr" + + setup { + run("GANGSTR", alias: "GANGSTR1") { + script "modules/nf-core/gangstr/main.nf" + + process { + """ + bed1 = Channel.of('chr22\t3000\t3020\t5\tCGCGC') + .collectFile(name: 'genome1.bed', newLine: true) + + input[0] = Channel.of([ + [id:'test1'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true) + ]).combine(bed1) + + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + """ + } + } + + run("GANGSTR", alias: "GANGSTR2") { + script "modules/nf-core/gangstr/main.nf" + + process { + """ + bed2 = Channel.of('chr22\t3000\t3020\t5\tCGCGC') + .collectFile(name: 'genome2.bed', newLine: true) + + input[0] = Channel.of([ + [id:'test2'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists:true) + ]).combine(bed2) + + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + """ + } + } + } + + test("homo_sapiens - gangstr vcfs") { + + when { + process { + """ + input[0] = GANGSTR1.out.vcf + .combine(GANGSTR2.out.vcf) + .combine(GANGSTR1.out.index) + .combine(GANGSTR2.out.index) + .map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 -> + [ + [ id: 'test' ], + [ vcf1, vcf2 ], + [ tbi1, tbi2 ] + ] + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + file(process.out.tbi[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("homo_sapiens - gangstr vcfs - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GANGSTR1.out.vcf + .combine(GANGSTR2.out.vcf) + .combine(GANGSTR1.out.index) + .combine(GANGSTR2.out.index) + .map { meta1, vcf1, meta2, vcf2, meta3, tbi1, meta4, tbi2 -> + [ + [ id: 'test' ], + [ vcf1, vcf2 ], + [ tbi1, tbi2 ] + ] + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/modules/nf-core/trtools/mergestr/tests/main.nf.test.snap b/modules/nf-core/trtools/mergestr/tests/main.nf.test.snap new file mode 100644 index 000000000000..d05e15560014 --- /dev/null +++ b/modules/nf-core/trtools/mergestr/tests/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "homo_sapiens - gangstr vcfs - stub": { + "content": [ + { + "tbi": [ + [ + { + "id": "test" + }, + "test_dumpstr.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_dumpstr.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_trtools": [ + [ + "TRTOOLS_MERGESTR", + "trtools", + "6.1.0" + ] + ] + } + ], + "timestamp": "2026-06-18T15:15:21.845815738", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.3" + } + }, + "homo_sapiens - gangstr vcfs": { + "content": [ + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", + "test_mergestr.vcf.gz.tbi", + { + "versions_trtools": [ + [ + "TRTOOLS_MERGESTR", + "trtools", + "6.1.0" + ] + ] + } + ], + "timestamp": "2026-06-16T14:11:27.91327297", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.3" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/trtools/mergestr/tests/nextflow.config b/modules/nf-core/trtools/mergestr/tests/nextflow.config new file mode 100644 index 000000000000..b69ee4ba1c73 --- /dev/null +++ b/modules/nf-core/trtools/mergestr/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: TRTOOLS_MERGESTR { + ext.args = '--vcftype gangstr --update-sample-from-file' + } + withName: GANGSTR1 { + ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000' + } + + withName: GANGSTR2 { + ext.args = '--insertmean 300 --insertsdev 50 --min-sample-reads 1 --max-proc-read 100000' + } +}