From 7cc4bda2072f1ea4441adc520ade74e98a860872 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:01:43 +0200 Subject: [PATCH 1/4] reorder sv bits --- nextflow.config | 1 + nextflow_schema.json | 5 + workflows/raredisease.nf | 257 +++++++++++++++++++++++---------------- 3 files changed, 160 insertions(+), 103 deletions(-) diff --git a/nextflow.config b/nextflow.config index 03c81529..9ab21daa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { skip_snv_annotation = false skip_snv_calling = false skip_sv_annotation = false + skip_sv_calling = false skip_mt_subsample = false skip_vcf2cytosure = true skip_vep_filter = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 557b1ec2..8f1b9df8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -588,6 +588,11 @@ "description": "Specifies whether or not to skip annotate structural variant subworkflow.", "fa_icon": "fas fa-toggle-on" }, + "skip_sv_calling": { + "type": "boolean", + "description": "Specifies whether or not to skip nuclear and mitochondrial SV calling and annotation.", + "fa_icon": "fas fa-toggle-on" + }, "skip_vcf2cytosure": { "type": "boolean", "default": true, diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 26c56682..06dd9a23 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -170,7 +170,9 @@ workflow RAREDISEASE { ch_samples = ch_samplesheet.map { meta, fastqs -> meta} ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) } + // // Initialize file channels for PREPARE_REFERENCES subworkflow + // ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() @@ -187,7 +189,9 @@ workflow RAREDISEASE { ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() : Channel.value([[],[]]) + // // Prepare references and indices. + // PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, @@ -200,7 +204,9 @@ workflow RAREDISEASE { ) .set { ch_references } + // // Gather built indices or get them from the params + // ch_bait_intervals = ch_references.bait_intervals ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() @@ -303,19 +309,31 @@ workflow RAREDISEASE { : Channel.empty() ch_versions = ch_versions.mix(ch_references.versions) + // // SV caller priority + // if (params.skip_germlinecnvcaller) { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + if (params.analysis_type.equals("wgs")) { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + } else { + ch_svcaller_priority = Channel.value(["manta"]) } else { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) + if (params.analysis_type.equals("wgs")) { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) + } else { + ch_svcaller_priority = Channel.value(["manta", "gcnvcaller"]) + } } - + // // Generate pedigree file + // ch_pedfile = CREATE_PEDIGREE_FILE(ch_samples.toList()).ped ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions) + // // Read and store paths in the vep_plugin_files file + // if (params.vep_plugin_files) { ch_vep_extra_files_unsplit.splitCsv ( header:true ) .map { row -> @@ -330,7 +348,9 @@ workflow RAREDISEASE { .set {ch_vep_extra_files} } - // Read and store hgnc ids in a channel + // + // Dump all HGNC ids in a file + // ch_vep_filters_scout_fmt .mix (ch_vep_filters_std_fmt) .set {ch_vep_filters} @@ -339,13 +359,17 @@ workflow RAREDISEASE { .txt .set {ch_hgnc_ids} + // // Input QC + // if (!params.skip_fastqc) { FASTQC (ch_samplesheet) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) } - // CREATE CHROMOSOME BED AND INTERVALS + // + // Create chromosome bed and intervals for splitting and gathering operations + // SCATTER_GENOME ( ch_genome_dictionary, ch_genome_fai, @@ -551,73 +575,121 @@ workflow RAREDISEASE { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SVs + CALL AND ANNOTATE NUCLEAR SVs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - CALL_STRUCTURAL_VARIANTS ( - ch_mapped.genome_marked_bam, - ch_mapped.genome_marked_bai, - ch_mapped.genome_bam_bai, - ch_mapped.mt_bam_bai, - ch_mapped.mtshift_bam_bai, - ch_genome_bwaindex, - ch_genome_fasta, - ch_genome_fai, - ch_mtshift_fasta, - ch_case_info, - ch_target_bed, - ch_genome_dictionary, - ch_svcaller_priority, - ch_readcount_intervals, - ch_ploidy_model, - ch_gcnvcaller_model - ) - ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) + if (!params.skip_sv_calling) { + CALL_STRUCTURAL_VARIANTS ( + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, + ch_genome_bwaindex, + ch_genome_fasta, + ch_genome_fai, + ch_mtshift_fasta, + ch_case_info, + ch_target_bed, + ch_genome_dictionary, + ch_svcaller_priority, + ch_readcount_intervals, + ch_ploidy_model, + ch_gcnvcaller_model + ) + ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) // // ANNOTATE STRUCTURAL VARIANTS // - if (!params.skip_sv_annotation) { - ANNOTATE_STRUCTURAL_VARIANTS ( - CALL_STRUCTURAL_VARIANTS.out.vcf, - ch_sv_dbs, - ch_sv_bedpedbs, - params.genome, - params.vep_cache_version, - ch_vep_cache, - ch_genome_fasta, - ch_genome_dictionary, - ch_vep_extra_files - ).set { ch_sv_annotate } - ch_versions = ch_versions.mix(ch_sv_annotate.versions) + if (!params.skip_sv_annotation) { + ANNOTATE_STRUCTURAL_VARIANTS ( + CALL_STRUCTURAL_VARIANTS.out.vcf, + ch_sv_dbs, + ch_sv_bedpedbs, + params.genome, + params.vep_cache_version, + ch_vep_cache, + ch_genome_fasta, + ch_genome_dictionary, + ch_vep_extra_files + ).set { ch_sv_annotate } + ch_versions = ch_versions.mix(ch_sv_annotate.versions) - GENERATE_CLINICAL_SET_SV( - ch_sv_annotate.vcf_ann, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) + GENERATE_CLINICAL_SET_SV( + ch_sv_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) - ANN_CSQ_PLI_SV ( - GENERATE_CLINICAL_SET_SV.out.vcf, - ch_variant_consequences_sv - ) - ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) + ANN_CSQ_PLI_SV ( + GENERATE_CLINICAL_SET_SV.out.vcf, + ch_variant_consequences_sv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) + + RANK_VARIANTS_SV ( + ANN_CSQ_PLI_SV.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_sv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions) + } + } - RANK_VARIANTS_SV ( - ANN_CSQ_PLI_SV.out.vcf_ann, - ch_pedfile, - ch_reduced_penetrance, - ch_score_config_sv +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CALL AND ANNOTATE MOBILE ELEMENTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + if (!params.skip_me_calling || params.analysis_type.equals("wes")) { + CALL_MOBILE_ELEMENTS( + ch_mapped.genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_me_references, + ch_case_info, + params.genome ) - ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions) + ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) - } + if (!params.skip_me_annotation) { + ANNOTATE_MOBILE_ELEMENTS( + CALL_MOBILE_ELEMENTS.out.vcf, + ch_me_svdb_resources, + ch_genome_fasta, + ch_genome_dictionary, + ch_vep_cache, + params.genome, + params.vep_cache_version, + ch_vep_extra_files + ) + ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions) + GENERATE_CLINICAL_SET_ME( + ANNOTATE_MOBILE_ELEMENTS.out.vcf, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions ) + ANN_CSQ_PLI_ME( + GENERATE_CLINICAL_SET_ME.out.vcf, + ch_variant_consequences_sv + ) + ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions ) + + } + } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SMNCOPYNUMBERCALLER +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - // STEP 1.7: SMNCOPYNUMBERCALLER RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output .collect{it} .toList() @@ -640,7 +712,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions) ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) - // ped correspondence, sex check, ancestry check +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PEDDY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if (!params.skip_peddy) { PEDDY ( CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), @@ -649,7 +725,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(PEDDY.out.versions.first()) } - // Generate CGH files from sequencing data, turned off by default +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Generate CGH files from sequencing data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if ( !params.skip_vcf2cytosure && params.analysis_type != "wes" ) { GENERATE_CYTOSURE_FILES ( ch_sv_annotate.vcf_ann, @@ -661,7 +741,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GENERATE_CYTOSURE_FILES.out.versions) } - // GENS +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if ( !params.skip_gens && params.analysis_type != "wes" ) { GENS ( ch_mapped.genome_bam_bai, @@ -678,48 +762,12 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GENS.out.versions) } - if (!params.skip_me_calling) { - CALL_MOBILE_ELEMENTS( - ch_mapped.genome_bam_bai, - ch_genome_fasta, - ch_genome_fai, - ch_me_references, - ch_case_info, - params.genome - ) - ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) - - if (!params.skip_me_annotation) { - ANNOTATE_MOBILE_ELEMENTS( - CALL_MOBILE_ELEMENTS.out.vcf, - ch_me_svdb_resources, - ch_genome_fasta, - ch_genome_dictionary, - ch_vep_cache, - params.genome, - params.vep_cache_version, - ch_vep_extra_files - ) - ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions) - - GENERATE_CLINICAL_SET_ME( - ANNOTATE_MOBILE_ELEMENTS.out.vcf, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions ) - - ANN_CSQ_PLI_ME( - GENERATE_CLINICAL_SET_ME.out.vcf, - ch_variant_consequences_sv - ) - ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions ) - - } - } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VARIANT EVALUATION WITH RTGTOOLS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - // - // VARIANT EVALUATION - // if (params.run_rtgvcfeval) { VARIANT_EVALUATION ( CALL_SNV.out.genome_vcf_tabix, @@ -730,9 +778,12 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions) } - // - // Collate and save software versions - // +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COLLECT SOFTWARE VERSIONS & MultiQC +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", From 90fad322ea3eaacfe179d04962c2bc6e0d9df172 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:29:53 +0200 Subject: [PATCH 2/4] fix logic --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 9d37808b..ab2883d5 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -649,7 +649,7 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_me_calling || params.analysis_type.equals("wes")) { + if (!params.skip_me_calling && params.analysis_type.equals("wgs")) { CALL_MOBILE_ELEMENTS( ch_mapped.genome_bam_bai, ch_genome_fasta, From f1ab55d2d33ffc4e1bce24a4c74a83d2b5fcc28d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:31:32 +0200 Subject: [PATCH 3/4] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 755501b6..e0742170 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) - Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) - Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) @@ -27,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | samtools_sort_threads | | | skip_repeat_analysis | | | skip_snv_calling | +| | skip_sv_calling | ## 2.1.0 - Obelix [2024-05-29] From 31e8f2edf47f7f087acb53ce45da6fe14438d4d9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:54:14 +0200 Subject: [PATCH 4/4] fix error --- workflows/raredisease.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ab2883d5..691d65b7 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -324,6 +324,7 @@ workflow RAREDISEASE { ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { ch_svcaller_priority = Channel.value(["manta"]) + } } else { if (params.analysis_type.equals("wgs")) { ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) @@ -604,9 +605,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) - // - // ANNOTATE STRUCTURAL VARIANTS - // + // + // ANNOTATE STRUCTURAL VARIANTS + // if (!params.skip_sv_annotation) { ANNOTATE_STRUCTURAL_VARIANTS ( CALL_STRUCTURAL_VARIANTS.out.vcf,