From c568ef79a39f1e79f0dd92c571c41f5100457e37 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:27:12 +0200 Subject: [PATCH 01/16] change type and update container --- modules/local/get_chrom_sizes.nf | 4 ++-- modules/local/rename_align_files.nf | 4 ++-- nextflow_schema.json | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/get_chrom_sizes.nf b/modules/local/get_chrom_sizes.nf index 4ab80ed1..e84dbe20 100644 --- a/modules/local/get_chrom_sizes.nf +++ b/modules/local/get_chrom_sizes.nf @@ -4,8 +4,8 @@ process GET_CHROM_SIZES { conda "conda-forge::coreutils=8.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : - 'biocontainers/gnu-wget:1.18--0' }" + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' : + 'biocontainers/gnu-wget:1.18--h36e9172_9' }" input: tuple val(meta), path(fai) diff --git a/modules/local/rename_align_files.nf b/modules/local/rename_align_files.nf index da9f890b..40278ca5 100644 --- a/modules/local/rename_align_files.nf +++ b/modules/local/rename_align_files.nf @@ -4,8 +4,8 @@ process RENAME_ALIGN_FILES { conda "conda-forge::coreutils=8.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : - 'biocontainers/gnu-wget:1.18--0' }" + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' : + 'biocontainers/gnu-wget:1.18--h36e9172_9' }" input: tuple val(meta), path(input) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6e2fbc81..10e7f354 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -106,8 +106,8 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A file containing the path to models produced by GATK4 GermlineCNVCaller cohort.", - "format": "file-path", + "description": "A directory containing the models produced by GATK4 GermlineCNVCaller cohort.", + "format": "directory-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, "genome": { From bf98157390d9307111242ad9d294f00591c347eb Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:45:40 +0200 Subject: [PATCH 02/16] revert a change --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 974b16fd..f9db3a8f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,8 +113,8 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A directory containing the models produced by GATK4 GermlineCNVCaller cohort.", - "format": "directory-path", + "description": "A file containing the path to the models produced by GATK4 GermlineCNVCaller cohort.", + "format": "file-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, "genome": { From 16c2be05363955b49a81f7c08eee048eb6a509f0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:46:32 +0200 Subject: [PATCH 03/16] fix typo --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f9db3a8f..80cdfd11 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,7 +113,7 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A file containing the path to the models produced by GATK4 GermlineCNVCaller cohort.", + "description": "A file containing the path to models produced by GATK4 GermlineCNVCaller cohort.", "format": "file-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, From 71c81b3e8622918b63fd4ed9c12699b2d65f3d3b Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 13:47:01 +0200 Subject: [PATCH 04/16] update bwameme --- modules.json | 2 +- modules/nf-core/bwameme/mem/main.nf | 13 ++-- modules/nf-core/bwameme/mem/meta.yml | 3 + .../nf-core/bwameme/mem/tests/main.nf.test | 15 +++-- .../bwameme/mem/tests/main.nf.test.snap | 65 +++++++++++++++++++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/modules.json b/modules.json index 1b731501..d6310623 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,7 @@ }, "bwameme/mem": { "branch": "master", - "git_sha": "79480293280ff4f10f30bdea1ddd903f223f8489", + "git_sha": "c3793385cf559bb60d33e6c3b0cb379a40b26602", "installed_by": ["modules"] }, "cadd": { diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf index db41316e..db91482f 100644 --- a/modules/nf-core/bwameme/mem/main.nf +++ b/modules/nf-core/bwameme/mem/main.nf @@ -12,6 +12,7 @@ process BWAMEME_MEM { tuple val(meta2), path(index) tuple val(meta3), path(fasta) val sort_bam + val mbuffer output: tuple val(meta), path("*.sam") , emit: sam , optional:true @@ -29,14 +30,14 @@ process BWAMEME_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' - def mbuffer_mem = 3072 - if (!task.memory) { - log.info '[bwameme-mbuffer] Available memory not known - defaulting to 3GB for mbuffer. Specify process memory requirements to change this.' + if (!mbuffer) { + log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3072MB for mbuffer.' + mbuffer_mem = 3072 } else { - mbuffer_mem = (task.memory.mega*0.5).intValue() + mbuffer_mem = mbuffer } - def mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" - def mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" + mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" + mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension_matcher = (args2 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml index c7eb7b28..85a8b5b3 100644 --- a/modules/nf-core/bwameme/mem/meta.yml +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -52,6 +52,9 @@ input: type: boolean description: use samtools sort (true) or samtools view (false) pattern: "true or false" + - mbuffer: + type: integer + description: memory for mbuffer in megabytes output: - meta: diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test index 3b67b39e..961d6379 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { tag "bwameme/index" config "./nextflow.config" - test("sarscov2 - fastq, index, fasta, false") { + test("sarscov2 - fastq, index, fasta, false, 0") { setup { run("BWAMEME_INDEX") { @@ -38,6 +38,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false + input[4] = 0 """ } } @@ -54,7 +55,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, fasta, true") { + test("sarscov2 - fastq, index, fasta, true, 2048") { setup { run("BWAMEME_INDEX") { @@ -81,6 +82,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } @@ -97,7 +99,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0") { setup { run("BWAMEME_INDEX") { @@ -127,6 +129,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false + input[4] = 0 """ } } @@ -143,7 +146,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048") { setup { run("BWAMEME_INDEX") { @@ -173,6 +176,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } @@ -189,7 +193,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub") { options "-stub" @@ -221,6 +225,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap index 281011ae..a8ff281b 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -1,4 +1,17 @@ { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:31.035368735" + }, "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ "test.bam", @@ -25,6 +38,19 @@ }, "timestamp": "2024-05-15T19:28:46.895668666" }, + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:08.497131484" + }, "sarscov2 - [fastq1, fastq2], index, fasta, true": { "content": [ "test.bam", @@ -38,6 +64,32 @@ }, "timestamp": "2024-05-15T20:44:56.510177191" }, + "sarscov2 - fastq, index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:16:23.45126091" + }, + "sarscov2 - fastq, index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:16:46.541148031" + }, "sarscov2 - fastq, index, fasta, false": { "content": [ "test.bam", @@ -63,5 +115,18 @@ "nextflow": "23.10.1" }, "timestamp": "2024-05-15T20:44:05.2657749" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:40.514767321" } } \ No newline at end of file From 03a71f6fc409996f5f4ead0f3f828e84b0db6708 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 14:03:25 +0200 Subject: [PATCH 05/16] update bwameme --- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ subworkflows/local/align.nf | 2 ++ subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 3 ++- workflows/raredisease.nf | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 928d3a8c..b6defbc2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,6 +53,7 @@ params { // Alignment aligner = 'bwamem2' + mbuffer_mem = 3072 min_trimmed_length = 40 mt_subsample_rd = 150 mt_subsample_seed = 30 diff --git a/nextflow_schema.json b/nextflow_schema.json index 80cdfd11..47f36d06 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -605,6 +605,13 @@ "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, + "mbuffer_mem": { + "type": "integer", + "default": 3072, + "description": "Memory allocated for mbuffer in megabytes (used only by bwameme)", + "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", + "fa_icon": "fas fa-less-than" + }, "min_trimmed_length": { "type": "integer", "default": 40, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 8822d1cb..db7b0bfc 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -25,6 +25,7 @@ workflow ALIGN { ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_mbuffer_mem // integer: [mandatory] memory in megabytes val_platform // string: [mandatory] illumina or a different technology main: @@ -56,6 +57,7 @@ workflow ALIGN { ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_fai, + val_mbuffer_mem, val_platform ) ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 7d635d51..b11f589f 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -21,6 +21,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_bwameme_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_mbuffer_mem // integer: [mandatory] default: 3072 val_platform // string: [mandatory] default: illumina main: @@ -32,7 +33,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align = BWA.out.bam ch_versions = ch_versions.mix(BWA.out.versions.first()) } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true ) + BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem ) ch_align = BWAMEME_MEM.out.bam ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first()) } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ab09b6f2..ee00ee66 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -370,6 +370,7 @@ workflow RAREDISEASE { ch_mtshift_fasta, ch_mtshift_dictionary, ch_mtshift_fai, + params.mbuffer_mem, params.platform ) .set { ch_mapped } From 98ed2cdc3ea0ac0d4582ce330c307417cad4cd40 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:53:36 +0200 Subject: [PATCH 06/16] remove readcount interval generation --- nextflow_schema.json | 2 +- subworkflows/local/prepare_references.nf | 30 +++++++++------------ workflows/raredisease.nf | 34 +++++++++++++----------- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 47f36d06..032569bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -281,7 +281,7 @@ "fa_icon": "fas fa-file", "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", "format": "file-path", - "help_text": "Generated by GATK4 preprocessintervals. If absent, pipeline can generate this file." + "help_text": "Generated by GATK4 preprocessintervals." }, "reduced_penetrance": { "type": "string", diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 2bd4b6dd..36445800 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -31,13 +31,14 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul workflow PREPARE_REFERENCES { take: - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] - ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] - ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] - ch_target_bed // channel: [mandatory for WES] [ path(bed) ] - ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ] + ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] + ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] + ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] + ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: ch_versions = Channel.empty() @@ -49,7 +50,8 @@ workflow PREPARE_REFERENCES { // Genome indices SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) GATK_SD(ch_genome_fasta) - ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + ch_dict = Channel.empty().mix(ch_genome_dictionary, GATK_SD.out.dict).collect() GET_CHROM_SIZES( ch_fai ) // Genome alignment indices @@ -89,7 +91,7 @@ workflow PREPARE_REFERENCES { TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } // Generate bait and target intervals - GATK_BILT(ch_target_bed, GATK_SD.out.dict).interval_list + GATK_BILT(ch_target_bed, ch_dict).interval_list GATK_ILT(GATK_BILT.out.interval_list) GATK_ILT.out.interval_list .collect{ it[1] } @@ -101,10 +103,6 @@ workflow PREPARE_REFERENCES { CAT_CAT_BAIT ( ch_bait_intervals_cat_in ) UNTAR_VEP_CACHE (ch_vep_cache) - //cnvcalling intervals - GATK_PREPROCESS_WGS (ch_genome_fasta, ch_fai, GATK_SD.out.dict, [[],[]], [[],[]]).set {ch_preprocwgs} - GATK_PREPROCESS_WES (ch_genome_fasta, ch_fai, GATK_SD.out.dict, GATK_BILT.out.interval_list, [[],[]]).set {ch_preprocwes} - // RTG tools ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] } .set {ch_rtgformat_in} @@ -134,8 +132,6 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(GATK_ILT.out.versions) ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions) ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) - ch_versions = ch_versions.mix(GATK_PREPROCESS_WGS.out.versions) - ch_versions = ch_versions.mix(GATK_PREPROCESS_WES.out.versions) ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions) emit: @@ -144,9 +140,7 @@ workflow PREPARE_REFERENCES { genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] genome_fai = ch_fai // channel: [ val(meta), path(fai) ] - genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] - readcount_intervals = Channel.empty() - .mix(ch_preprocwgs.interval_list,ch_preprocwes.interval_list)// channel: [ path(intervals) ] + genome_dict = ch_dict // channel: [ val(meta), path(dict) ] sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ] mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ee00ee66..bc5fef99 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -58,7 +58,7 @@ if (params.variant_caller.equals("sentieon")) { } if (!params.skip_germlinecnvcaller) { - mandatoryParams += ["ploidy_model", "gcnvcaller_model"] + mandatoryParams += ["ploidy_model", "gcnvcaller_model", "readcount_intervals"] } if (!params.skip_vep_filter) { @@ -171,24 +171,27 @@ workflow RAREDISEASE { ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) } // Initialize file channels for PREPARE_REFERENCES subworkflow - ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) + ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) // Prepare references and indices. PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, + ch_genome_dictionary, ch_mt_fasta, ch_gnomad_af_tab, ch_dbsnp, @@ -220,8 +223,7 @@ workflow RAREDISEASE { : ch_references.genome_bwameme_index ch_genome_chrsizes = ch_references.genome_chrom_sizes ch_genome_fai = ch_references.genome_fai - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.genome_dict + ch_genome_dictionary = ch_references.genome_dict ch_gens_gnomad_pos = params.gens_gnomad_pos ? Channel.fromPath(params.gens_gnomad_pos).collect() : Channel.empty() ch_gens_interval_list = params.gens_interval_list ? Channel.fromPath(params.gens_interval_list).collect() From 27cf8fb946790b34186581b4f1a1b099119110f6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 17:48:21 +0200 Subject: [PATCH 07/16] update module --- modules.json | 2 +- modules/nf-core/bwameme/mem/main.nf | 13 ++- modules/nf-core/bwameme/mem/meta.yml | 5 +- .../nf-core/bwameme/mem/tests/main.nf.test | 15 ++- .../bwameme/mem/tests/main.nf.test.snap | 108 +++++++++++++++--- 5 files changed, 118 insertions(+), 25 deletions(-) diff --git a/modules.json b/modules.json index d6310623..4322c732 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,7 @@ }, "bwameme/mem": { "branch": "master", - "git_sha": "c3793385cf559bb60d33e6c3b0cb379a40b26602", + "git_sha": "0aa157a00b54bcbe2c50be375cafd68d928e7f4d", "installed_by": ["modules"] }, "cadd": { diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf index db91482f..2efc8c0f 100644 --- a/modules/nf-core/bwameme/mem/main.nf +++ b/modules/nf-core/bwameme/mem/main.nf @@ -13,6 +13,7 @@ process BWAMEME_MEM { tuple val(meta3), path(fasta) val sort_bam val mbuffer + val samtools_threads output: tuple val(meta), path("*.sam") , emit: sam , optional:true @@ -31,13 +32,19 @@ process BWAMEME_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' if (!mbuffer) { - log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3072MB for mbuffer.' + log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3GB for mbuffer.' mbuffer_mem = 3072 } else { mbuffer_mem = mbuffer } + if (!samtools_threads) { + log.info 'Number of threads for samtools is not set - defaulting to 2 threads.' + threads = 2 + } else { + threads = samtools_threads + } mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" - mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" + mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/threads).intValue()+"M" : "" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension_matcher = (args2 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" @@ -54,7 +61,7 @@ process BWAMEME_MEM { \$INDEX \\ $reads \\ $mbuffer_command \\ - | samtools $samtools_command $args2 $mem_per_thread -@ $task.cpus ${reference} -o ${prefix}.${extension} - + | samtools $samtools_command $args2 $mem_per_thread -@ $threads ${reference} -o ${prefix}.${extension} - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml index 85a8b5b3..e5d28db2 100644 --- a/modules/nf-core/bwameme/mem/meta.yml +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -54,7 +54,10 @@ input: pattern: "true or false" - mbuffer: type: integer - description: memory for mbuffer in megabytes + description: memory for mbuffer in megabytes (default 3072) + - sort_threads: + type: integer + description: number of threads to used during samtools sort (default 2). output: - meta: diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test index 961d6379..8175f58a 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { tag "bwameme/index" config "./nextflow.config" - test("sarscov2 - fastq, index, fasta, false, 0") { + test("sarscov2 - fastq, index, fasta, false, 0, 4") { setup { run("BWAMEME_INDEX") { @@ -39,6 +39,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false input[4] = 0 + input[5] = 4 """ } } @@ -55,7 +56,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, fasta, true, 2048") { + test("sarscov2 - fastq, index, fasta, true, 2048, 4") { setup { run("BWAMEME_INDEX") { @@ -83,6 +84,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = 4 """ } } @@ -99,7 +101,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4") { setup { run("BWAMEME_INDEX") { @@ -130,6 +132,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false input[4] = 0 + input[5] = 4 """ } } @@ -146,7 +149,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''") { setup { run("BWAMEME_INDEX") { @@ -177,6 +180,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = "" """ } } @@ -193,7 +197,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub") { options "-stub" @@ -226,6 +230,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = 4 """ } } diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap index a8ff281b..55235959 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4": { "content": [ "test.bam", [ @@ -10,7 +10,7 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:31.035368735" + "timestamp": "2024-06-20T16:07:51.065498711" }, "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ @@ -25,7 +25,33 @@ }, "timestamp": "2024-05-15T20:04:31.962017214" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:08:18.378362535" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:25:43.613918051" + }, + "sarscov2 - fastq, index, fasta, false": { "content": [ "test.bam", [ @@ -36,9 +62,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T19:28:46.895668666" + "timestamp": "2024-05-15T20:00:05.782384898" }, - "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { "content": [ "test.bam", [ @@ -49,9 +75,35 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:08.497131484" + "timestamp": "2024-06-18T10:17:40.514767321" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:31.035368735" + }, + "sarscov2 - fastq, index, fasta, true, 2048, 4": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:07:24.071789902" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { "content": [ "test.bam", [ @@ -62,7 +114,20 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T20:44:56.510177191" + "timestamp": "2024-05-15T19:28:46.895668666" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:08.497131484" }, "sarscov2 - fastq, index, fasta, false, 0": { "content": [ @@ -77,6 +142,19 @@ }, "timestamp": "2024-06-18T10:16:23.45126091" }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:44:56.510177191" + }, "sarscov2 - fastq, index, fasta, true, 2048": { "content": [ "test.bam", @@ -90,7 +168,7 @@ }, "timestamp": "2024-06-18T10:16:46.541148031" }, - "sarscov2 - fastq, index, fasta, false": { + "sarscov2 - fastq, index, fasta, true": { "content": [ "test.bam", [ @@ -101,9 +179,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T20:00:05.782384898" + "timestamp": "2024-05-15T20:44:05.2657749" }, - "sarscov2 - fastq, index, fasta, true": { + "sarscov2 - fastq, index, fasta, false, 0, 4": { "content": [ "test.bam", [ @@ -112,11 +190,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-05-15T20:44:05.2657749" + "timestamp": "2024-06-20T16:06:58.802149967" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub": { "content": [ "test.bam", [ @@ -127,6 +205,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:40.514767321" + "timestamp": "2024-06-20T16:08:28.453969552" } } \ No newline at end of file From 481f9f53191d2546609975a3d57326713043754a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:14:49 +0200 Subject: [PATCH 08/16] add sort threads option --- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ subworkflows/local/align.nf | 4 +++- subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 4 ++-- workflows/raredisease.nf | 3 ++- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index b6defbc2..f109b654 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,6 +54,7 @@ params { // Alignment aligner = 'bwamem2' mbuffer_mem = 3072 + samtools_sort_threads = 4 min_trimmed_length = 40 mt_subsample_rd = 150 mt_subsample_seed = 30 diff --git a/nextflow_schema.json b/nextflow_schema.json index 032569bf..7004ba9e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -605,6 +605,13 @@ "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, + "samtools_sort_threads": { + "type": "integer", + "default": 4, + "description": "Number of threads allocated for sorting alignment files (used only by bwameme)", + "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", + "fa_icon": "fas fa-less-than" + }, "mbuffer_mem": { "type": "integer", "default": 3072, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index db7b0bfc..2163767c 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -27,6 +27,7 @@ workflow ALIGN { ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] val_mbuffer_mem // integer: [mandatory] memory in megabytes val_platform // string: [mandatory] illumina or a different technology + val_sort_threads // integer: [mandatory] memory in megabytes main: ch_bwamem2_bam = Channel.empty() @@ -58,7 +59,8 @@ workflow ALIGN { ch_genome_fasta, ch_genome_fai, val_mbuffer_mem, - val_platform + val_platform, + val_sort_threads ) ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index b11f589f..15d3db9a 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -23,7 +23,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] val_mbuffer_mem // integer: [mandatory] default: 3072 val_platform // string: [mandatory] default: illumina - + val_sort_threads // integer: [mandatory] default: 4 main: ch_versions = Channel.empty() @@ -33,7 +33,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align = BWA.out.bam ch_versions = ch_versions.mix(BWA.out.versions.first()) } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem ) + BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem, val_sort_threads ) ch_align = BWAMEME_MEM.out.bam ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first()) } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index bc5fef99..361bbee2 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -373,7 +373,8 @@ workflow RAREDISEASE { ch_mtshift_dictionary, ch_mtshift_fai, params.mbuffer_mem, - params.platform + params.platform, + params.samtools_sort_threads ) .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) From 5296b877a37249ed2de4ce3d0667a164731aa12a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:44:49 +0200 Subject: [PATCH 09/16] update changelog --- CHANGELOG.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67fdb977..e29858c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,16 +3,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 2.2.0 - Dogmatix [XXXX-XX-XX] +## 2.2.0dev - Dogmatix [XXXX-XX-XX] ### `Added` +- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#564](https://github.com/nf-core/raredisease/pull/564) + ### `Changed` +- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#564](https://github.com/nf-core/raredisease/pull/564) + ### `Fixed` +- Docker manifest error from gnu-wget container [#564](https://github.com/nf-core/raredisease/pull/564) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------- | +| | mbuffer_mem | +| | samtools_sort_threads | + ## 2.1.0 - Obelix [2024-05-29] ### `Added` From 9c74555c1d21d7e0290ad0962f0b9f5a68d21801 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:47:31 +0200 Subject: [PATCH 10/16] update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e29858c7..8e998348 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,15 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#564](https://github.com/nf-core/raredisease/pull/564) +- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) ### `Changed` -- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#564](https://github.com/nf-core/raredisease/pull/564) +- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) ### `Fixed` -- Docker manifest error from gnu-wget container [#564](https://github.com/nf-core/raredisease/pull/564) +- Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) ### Parameters From 798a150a6ed8ce06f95c14d9b3c9b903bae1106d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 21:31:34 +0200 Subject: [PATCH 11/16] fix error [skip ci] --- conf/modules/prepare_references.config | 15 --------------- subworkflows/local/prepare_references.nf | 2 -- 2 files changed, 17 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 9ec4d47b..10fda4a3 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -124,19 +124,4 @@ process { ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) } } - withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WGS' { - ext.args = { [ - '--padding 0', - '--interval-merging-rule OVERLAPPING_ONLY', - "--exclude-intervals ${params.mito_name}", - "--tmp-dir ./" - ].join(' ') } - ext.when = { params.analysis_type.equals("wgs") && !params.readcount_intervals } - } - - withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WES' { - ext.args = { "--bin-length 0 --interval-merging-rule OVERLAPPING_ONLY --exclude-intervals ${params.mito_name}" } - ext.when = { params.analysis_type.equals("wes") && !params.readcount_intervals } - } - } diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 36445800..7c7726b4 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -13,8 +13,6 @@ include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modul include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WGS } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' -include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WES } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main' From 3cbe7a77afe26bd0ee04ecd10439cb1ceaf7002a Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 21:33:44 +0200 Subject: [PATCH 12/16] remove module --- modules.json | 5 -- .../gatk4/preprocessintervals/environment.yml | 7 -- .../nf-core/gatk4/preprocessintervals/main.nf | 62 -------------- .../gatk4/preprocessintervals/meta.yml | 82 ------------------- 4 files changed, 156 deletions(-) delete mode 100644 modules/nf-core/gatk4/preprocessintervals/environment.yml delete mode 100644 modules/nf-core/gatk4/preprocessintervals/main.nf delete mode 100644 modules/nf-core/gatk4/preprocessintervals/meta.yml diff --git a/modules.json b/modules.json index 4322c732..e400f22d 100644 --- a/modules.json +++ b/modules.json @@ -206,11 +206,6 @@ "git_sha": "cf607b7749da0a8f5ca2a1e31233e13e3159e2fe", "installed_by": ["modules"] }, - "gatk4/preprocessintervals": { - "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] - }, "gatk4/printreads": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml deleted file mode 100644 index ec0b09e9..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: gatk4_preprocessintervals -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf deleted file mode 100644 index dffc4bb1..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process GATK4_PREPROCESSINTERVALS { - tag "$fasta" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) - tuple val(meta3), path(dict) - tuple val(meta4), path(intervals) - tuple val(meta5), path(exclude_intervals) - - output: - tuple val(meta), path("*.interval_list"), emit: interval_list - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def include_command = intervals ? "--intervals $intervals" : "" - def exclude_command = exclude_intervals ? "--exclude-intervals $exclude_intervals" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - """ - gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ - PreprocessIntervals \\ - $include_command \\ - $exclude_command \\ - --reference $fasta \\ - --output ${prefix}.interval_list \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.interval_list - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml deleted file mode 100644 index cf3f6ac4..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/meta.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: "gatk4_preprocessintervals" -description: Prepares bins for coverage collection. -keywords: - - bed - - gatk4 - - interval - - preprocessintervals -tools: - - "gatk4": - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: "10.1158/1538-7445.AM2017-3590" - licence: ["Apache-2.0"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) - pattern: "*.{bed,interval_list}" - - meta5: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - exclude_intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) - pattern: "*.{bed,interval_list}" -output: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - interval_list: - type: file - description: Processed interval list file - pattern: "*.{bed,interval_list}" -authors: - - "@ryanjameskennedy" - - "@ViktorHy" - - "@ramprasadn" -maintainers: - - "@ryanjameskennedy" - - "@ViktorHy" - - "@ramprasadn" From aa4c0e651a658541b56a3a1614a81ccaa9eda88d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:09:52 +0200 Subject: [PATCH 13/16] fix error --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 361bbee2..2b6e3483 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -258,7 +258,7 @@ workflow RAREDISEASE { ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() - : ( ch_references.readcount_intervals ?: Channel.empty() ) + : Channel.empty() ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect() From 62206aed4105e7c839e793dc05bf4f7178699295 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:09:34 +0200 Subject: [PATCH 14/16] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e998348..b30c3e33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) +- `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) ### `Fixed` From 1ffcaa12e7a4b0330866460f17256936a0972727 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:27:51 +0200 Subject: [PATCH 15/16] review suggestions --- docs/usage.md | 46 +++++++++++++++++++++++--------------------- nextflow_schema.json | 2 +- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 996d0e6a..a742d128 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -215,14 +215,16 @@ The mandatory and optional parameters for each category are tabulated below. ##### 6. Copy number variant calling -| Mandatory | Optional | -| ------------------------------ | ------------------------------- | -| ploidy_model1 | readcount_intervals3 | -| gcnvcaller_model1,2 | | +| Mandatory | Optional | +| --------------------------------- | -------- | +| ploidy_model1,4 | | +| gcnvcaller_model1,2,4 | | +| readcount_intervals3,4 | | 1 Output from steps 3 & 4 of GATK's CNV calling pipeline run in cohort mode as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
2 Sample file can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gcnvmodels.tsv) (Note the header 'models' in the sample file).
3 Output from step 1 of GATK's CNV calling pipeline as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
+4 All these files can be generated using the germlinecnvcaller tool option in nf-core/createpanelrefs.
##### 7. SNV annotation & Ranking diff --git a/nextflow_schema.json b/nextflow_schema.json index 7004ba9e..41cea99c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -281,7 +281,7 @@ "fa_icon": "fas fa-file", "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", "format": "file-path", - "help_text": "Generated by GATK4 preprocessintervals." + "help_text": "Generated by GATK4 preprocessintervals. It needs to be the same as the intervals used to generate the ploidy and cnv models." }, "reduced_penetrance": { "type": "string", From ca4cdad1422ec206802b43d0c0a5ed331df4c44c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:37:25 +0200 Subject: [PATCH 16/16] prettier --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a742d128..66d0b357 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile)