From ec5bddc363ce4da24148a26b930447c54f149933 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 10 Jan 2024 05:49:22 +0100 Subject: [PATCH 1/5] found in deepvariant --- assets/foundin.hdr | 1 + conf/modules/call_snv_deepvariant.config | 8 ++++ conf/test_one_sample.config | 1 + lib/WorkflowRaredisease.groovy | 14 ++++++ subworkflows/local/call_snv.nf | 4 +- .../variant_calling/call_snv_deepvariant.nf | 45 ++++++++++++++----- workflows/raredisease.nf | 2 + 7 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 assets/foundin.hdr diff --git a/assets/foundin.hdr b/assets/foundin.hdr new file mode 100644 index 00000000..9b38b87f --- /dev/null +++ b/assets/foundin.hdr @@ -0,0 +1 @@ +##INFO= diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config index 329bc15f..fdd7ece6 100644 --- a/conf/modules/call_snv_deepvariant.config +++ b/conf/modules/call_snv_deepvariant.config @@ -39,4 +39,12 @@ process { ext.prefix = { "${meta.id}_split_rmdup" } } + withName: '.*CALL_SNV_DEEPVARIANT:ZIP_TABIX_VARCALLERBED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*CALL_SNV_DEEPVARIANT:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.prefix = { "${meta.id}_split_rmdup_info" } + } } diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index f39cb40c..cb40d008 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -43,6 +43,7 @@ params { known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" + score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" diff --git a/lib/WorkflowRaredisease.groovy b/lib/WorkflowRaredisease.groovy index f45fcc7c..b6b58af6 100755 --- a/lib/WorkflowRaredisease.groovy +++ b/lib/WorkflowRaredisease.groovy @@ -35,6 +35,20 @@ class WorkflowRaredisease { return outfile } + // + // Create a bed file which includes the name of variant caller from fai + // + public static String makeBedWithVariantCallerInfo(fai, parent_dir, varcaller) { + def outfile = new File(parent_dir + '/' + varcaller +'.bed') + def writer = outfile.newWriter() + fai.eachLine { line -> + def split_str = line.tokenize("\t") + writer << [split_str[0],"1",split_str[1],varcaller].join("\t") + "\n" + } + writer.close() + return outfile + } + // // Get workflow summary for MultiQC // diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 6413f896..fdcc4629 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -28,6 +28,7 @@ workflow CALL_SNV { ch_call_interval // channel: [mandatory] [ path(intervals) ] ch_ml_model // channel: [mandatory] [ path(model) ] ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] main: @@ -42,7 +43,8 @@ workflow CALL_SNV { ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, - ch_case_info + ch_case_info, + ch_foundin_header ) ch_deepvar_vcf = CALL_SNV_DEEPVARIANT.out.vcf ch_deepvar_tbi = CALL_SNV_DEEPVARIANT.out.tabix diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index 8324aa35..197869b8 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -2,18 +2,22 @@ // A variant caller workflow for deepvariant // -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_GL } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_GL } from '../../../modules/nf-core/bcftools/norm/main' -include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main' -include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' -include { TABIX_TABIX as TABIX_GL } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_GL } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_GL } from '../../../modules/nf-core/bcftools/norm/main' +include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main' +include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' +include { TABIX_BGZIPTABIX as ZIP_TABIX_VARCALLERBED } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_TABIX as TABIX_GL } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' workflow CALL_SNV_DEEPVARIANT { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] main: ch_versions = Channel.empty() @@ -48,6 +52,25 @@ workflow CALL_SNV_DEEPVARIANT { TABIX_GL (REMOVE_DUPLICATES_GL.out.vcf) + ch_genome_fai.map{meta, fai -> + return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "deepvariant")] + } + .set { ch_varcallerinfo } + + ZIP_TABIX_VARCALLERBED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + REMOVE_DUPLICATES_GL.out.vcf + .join(TABIX_GL.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions.first()) ch_versions = ch_versions.mix(GLNEXUS.out.versions) ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_GL.out.versions) @@ -55,7 +78,7 @@ workflow CALL_SNV_DEEPVARIANT { ch_versions = ch_versions.mix(TABIX_GL.out.versions) emit: - vcf = REMOVE_DUPLICATES_GL.out.vcf // channel: [ val(meta), path(vcf) ] - tabix = TABIX_GL.out.tbi // channel: [ val(meta), path(tbi) ] + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tabix = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index c4183c93..203b3b5b 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -220,6 +220,7 @@ workflow RAREDISEASE { : Channel.value([[:],[]]) ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) + ch_foundin_header = Channel.fromPath("$projectDir/assets/foundin.hdr", checkIfExists: true).collect() ch_gcnvcaller_model = params.gcnvcaller_model ? Channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) .map { row -> return [[id:file(row.models).simpleName], row.models] @@ -380,6 +381,7 @@ workflow RAREDISEASE { ch_call_interval, ch_ml_model, ch_case_info, + ch_foundin_header, Channel.value(params.sentieon_dnascope_pcr_indel_model) ) ch_versions = ch_versions.mix(CALL_SNV.out.versions) From 7157ec5f3289ecc727a52bd1b5b4f24ccbdcf7df Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 10 Jan 2024 07:49:00 +0100 Subject: [PATCH 2/5] found in sentieon and mutect2 --- conf/modules/call_snv_sentieon.config | 9 ++++ conf/modules/postprocess_MT_calls.config | 11 +++- subworkflows/local/call_snv.nf | 6 ++- .../variant_calling/call_snv_deepvariant.nf | 3 ++ .../variant_calling/call_snv_sentieon.nf | 52 ++++++++++++++----- .../variant_calling/postprocess_MT_calls.nf | 36 +++++++++++-- 6 files changed, 96 insertions(+), 21 deletions(-) diff --git a/conf/modules/call_snv_sentieon.config b/conf/modules/call_snv_sentieon.config index 3abcd31c..2337ef4f 100644 --- a/conf/modules/call_snv_sentieon.config +++ b/conf/modules/call_snv_sentieon.config @@ -48,5 +48,14 @@ process { ext.args = '--output-type z --rm-dup none' ext.prefix = { "${meta.id}_split_rmdup" } } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:ZIP_TABIX_VARCALLERBED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.prefix = { "${meta.id}_split_rmdup_info" } + } } } diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config index c97ea43d..07e266de 100644 --- a/conf/modules/postprocess_MT_calls.config +++ b/conf/modules/postprocess_MT_calls.config @@ -37,6 +37,15 @@ process { withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' { ext.args = '--output-type z' + ext.prefix = { "${meta.id}_split_rmdup_merged" } + } + + withName: '.*POSTPROCESS_MT_CALLS:ZIP_TABIX_VARCALLERBED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" ext.prefix = { "${meta.id}_mitochondria" } publishDir = [ path: { "${params.outdir}/call_snv/mitochondria" }, @@ -45,7 +54,7 @@ process { ] } - withName: '.*POSTPROCESS_MT_CALLS:TABIX_TABIX_MERGE' { + withName: '.*POSTPROCESS_MT_CALLS:TABIX_ANNOTATE' { publishDir = [ path: { "${params.outdir}/call_snv/mitochondria" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index fdcc4629..0f646461 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -59,7 +59,8 @@ workflow CALL_SNV { ch_call_interval, ch_ml_model, ch_case_info, - ch_pcr_indel_model + ch_pcr_indel_model, + ch_foundin_header ) ch_sentieon_vcf = CALL_SNV_SENTIEON.out.vcf ch_sentieon_tbi = CALL_SNV_SENTIEON.out.tabix @@ -102,7 +103,8 @@ workflow CALL_SNV { ch_genome_dictionary, ch_genome_fai, ch_mtshift_backchain, - ch_case_info + ch_case_info, + ch_foundin_header ) ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index 197869b8..6ffea594 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -76,6 +76,9 @@ workflow CALL_SNV_DEEPVARIANT { ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_GL.out.versions) ch_versions = ch_versions.mix(REMOVE_DUPLICATES_GL.out.versions) ch_versions = ch_versions.mix(TABIX_GL.out.versions) + ch_versions = ch_versions.mix(ZIP_TABIX_VARCALLERBED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) emit: vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index a9f5d4f8..bccc830a 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -2,15 +2,18 @@ // A subworkflow to call SNVs by sentieon dnascope with a machine learning model. // -include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' -include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' -include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_SEN } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_SEN } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_BCFTOOLS } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_FILTER as BCF_FILTER_ONE } from '../../../modules/nf-core/bcftools/filter/main' -include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-core/bcftools/filter/main' +include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' +include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_SEN } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_SEN } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_BCFTOOLS } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_FILTER as BCF_FILTER_ONE } from '../../../modules/nf-core/bcftools/filter/main' +include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-core/bcftools/filter/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_BGZIPTABIX as ZIP_TABIX_VARCALLERBED } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow CALL_SNV_SENTIEON { take: @@ -23,7 +26,8 @@ workflow CALL_SNV_SENTIEON { ch_ml_model // channel: [mandatory] [ val(meta), path(model) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] - + ch_foundin_header // channel: [mandatory] [ path(header) ] + main: ch_versions = Channel.empty() @@ -85,6 +89,25 @@ workflow CALL_SNV_SENTIEON { TABIX_SEN(REMOVE_DUPLICATES_SEN.out.vcf) + ch_genome_fai.map{meta, fai -> + return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "sentieon")] + } + .set { ch_varcallerinfo } + + ZIP_TABIX_VARCALLERBED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + REMOVE_DUPLICATES_SEN.out.vcf + .join(TABIX_SEN.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(SENTIEON_DNASCOPE.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DNAMODELAPPLY.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions.first()) @@ -92,9 +115,12 @@ workflow CALL_SNV_SENTIEON { ch_versions = ch_versions.mix(REMOVE_DUPLICATES_SEN.out.versions.first()) ch_versions = ch_versions.mix(TABIX_SEN.out.versions.first()) ch_versions = ch_versions.mix(BCF_FILTER_ONE.out.versions.first()) + ch_versions = ch_versions.mix(ZIP_TABIX_VARCALLERBED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) emit: - vcf = REMOVE_DUPLICATES_SEN.out.vcf // channel: [ val(meta), path(vcf) ] - tabix = TABIX_SEN.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tabix = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index cb14c9b7..304d8bd4 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -11,6 +11,9 @@ include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../.. include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' include { PICARD_LIFTOVERVCF } from '../../../modules/nf-core/picard/liftovervcf/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { TABIX_BGZIPTABIX as ZIP_TABIX_VARCALLERBED } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' workflow POSTPROCESS_MT_CALLS { take: @@ -21,6 +24,7 @@ workflow POSTPROCESS_MT_CALLS { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(backchain) ] ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] main: ch_versions = Channel.empty() @@ -93,9 +97,28 @@ workflow POSTPROCESS_MT_CALLS { BCFTOOLS_MERGE_MT.out.merged_variants .mix(ch_case_vcf.single) - .set { ch_annotation_in } + .set { ch_addfoundintag_in } - TABIX_TABIX_MERGE(ch_annotation_in) + TABIX_TABIX_MERGE(ch_addfoundintag_in) + + ch_genome_fai.map{meta, fai -> + return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "mutect2")] + } + .set { ch_varcallerinfo } + + ZIP_TABIX_VARCALLERBED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + ch_addfoundintag_in + .join(TABIX_TABIX_MERGE.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) @@ -103,9 +126,12 @@ workflow POSTPROCESS_MT_CALLS { ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) + ch_versions = ch_versions.mix(ZIP_TABIX_VARCALLERBED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) emit: - vcf = ch_annotation_in // channel: [ val(meta), path(vcf) ] - tbi = TABIX_TABIX_MERGE.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 9f36e558939608b971b0eca1d42ae86a8d246ce3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 10 Jan 2024 08:23:36 +0100 Subject: [PATCH 3/5] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce1a867b..303d8c07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ngsbits samplegender to check sex [#453](https://github.com/nf-core/raredisease/pull/453) - New workflow for generating cgh files from SV vcfs for interpretation in the CytosSure interpretation software. Turned off by default [#456](https://github.com/nf-core/raredisease/pull/456/) - Fastp to do adapter trimming. It can be skipped using `--skip_fastp` [#457](https://github.com/nf-core/raredisease/pull/457) +- Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471) ### `Changed` From 829282e3057ff711521b19dd8d30de319921bd51 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 11 Jan 2024 12:45:16 +0100 Subject: [PATCH 4/5] review suggestions and switch fai to chromsizes --- lib/WorkflowRaredisease.groovy | 7 ++++--- subworkflows/local/call_snv.nf | 12 ++++++++---- .../local/variant_calling/call_snv_deepvariant.nf | 15 ++++++++------- .../local/variant_calling/call_snv_sentieon.nf | 5 +++-- .../local/variant_calling/postprocess_MT_calls.nf | 5 +++-- workflows/raredisease.nf | 1 + 6 files changed, 27 insertions(+), 18 deletions(-) diff --git a/lib/WorkflowRaredisease.groovy b/lib/WorkflowRaredisease.groovy index b6b58af6..02e91acd 100755 --- a/lib/WorkflowRaredisease.groovy +++ b/lib/WorkflowRaredisease.groovy @@ -36,12 +36,13 @@ class WorkflowRaredisease { } // - // Create a bed file which includes the name of variant caller from fai + // Create a bed file which includes the name of variant caller from chromsizes file // - public static String makeBedWithVariantCallerInfo(fai, parent_dir, varcaller) { + public static String makeBedWithVariantCallerInfo(chromsizes, varcaller) { + def parent_dir = chromsizes.parent.toString() def outfile = new File(parent_dir + '/' + varcaller +'.bed') def writer = outfile.newWriter() - fai.eachLine { line -> + chromsizes.eachLine { line -> def split_str = line.tokenize("\t") writer << [split_str[0],"1",split_str[1],varcaller].join("\t") + "\n" } diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 0f646461..8d926f0e 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -13,7 +13,8 @@ workflow CALL_SNV { take: ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] @@ -44,7 +45,8 @@ workflow CALL_SNV { ch_genome_fasta, ch_genome_fai, ch_case_info, - ch_foundin_header + ch_foundin_header, + ch_genome_chrsizes ) ch_deepvar_vcf = CALL_SNV_DEEPVARIANT.out.vcf ch_deepvar_tbi = CALL_SNV_DEEPVARIANT.out.tabix @@ -60,7 +62,8 @@ workflow CALL_SNV { ch_ml_model, ch_case_info, ch_pcr_indel_model, - ch_foundin_header + ch_foundin_header, + ch_genome_chrsizes ) ch_sentieon_vcf = CALL_SNV_SENTIEON.out.vcf ch_sentieon_tbi = CALL_SNV_SENTIEON.out.tabix @@ -104,7 +107,8 @@ workflow CALL_SNV { ch_genome_fai, ch_mtshift_backchain, ch_case_info, - ch_foundin_header + ch_foundin_header, + ch_genome_chrsizes ) ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index 6ffea594..b20e19ae 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -13,11 +13,12 @@ include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf workflow CALL_SNV_DEEPVARIANT { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] main: ch_versions = Channel.empty() @@ -52,8 +53,8 @@ workflow CALL_SNV_DEEPVARIANT { TABIX_GL (REMOVE_DUPLICATES_GL.out.vcf) - ch_genome_fai.map{meta, fai -> - return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "deepvariant")] + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'deepvariant'], WorkflowRaredisease.makeBedWithVariantCallerInfo(chromsizes, "deepvariant")] } .set { ch_varcallerinfo } diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index bccc830a..53db6095 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -27,6 +27,7 @@ workflow CALL_SNV_SENTIEON { ch_case_info // channel: [mandatory] [ val(case_info) ] ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] main: ch_versions = Channel.empty() @@ -89,8 +90,8 @@ workflow CALL_SNV_SENTIEON { TABIX_SEN(REMOVE_DUPLICATES_SEN.out.vcf) - ch_genome_fai.map{meta, fai -> - return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "sentieon")] + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'sentieon_dnascope'], WorkflowRaredisease.makeBedWithVariantCallerInfo(chromsizes, "sentieon_dnascope")] } .set { ch_varcallerinfo } diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index 304d8bd4..81279a9c 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -25,6 +25,7 @@ workflow POSTPROCESS_MT_CALLS { ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(backchain) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] main: ch_versions = Channel.empty() @@ -101,8 +102,8 @@ workflow POSTPROCESS_MT_CALLS { TABIX_TABIX_MERGE(ch_addfoundintag_in) - ch_genome_fai.map{meta, fai -> - return [meta, WorkflowRaredisease.makeBedWithVariantCallerInfo(fai, fai.parent.toString(), "mutect2")] + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'mutect2'], WorkflowRaredisease.makeBedWithVariantCallerInfo(chromsizes, "mutect2")] } .set { ch_varcallerinfo } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 203b3b5b..ec1a2609 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -367,6 +367,7 @@ workflow RAREDISEASE { ch_mapped.genome_bam_bai, ch_mapped.mt_bam_bai, ch_mapped.mtshift_bam_bai, + ch_genome_chrsizes, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary, From a64bd131bfc042a33d337ede93a968147357e520 Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Thu, 11 Jan 2024 13:37:27 +0100 Subject: [PATCH 5/5] Update lib/WorkflowRaredisease.groovy --- lib/WorkflowRaredisease.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowRaredisease.groovy b/lib/WorkflowRaredisease.groovy index 02e91acd..578dc6d6 100755 --- a/lib/WorkflowRaredisease.groovy +++ b/lib/WorkflowRaredisease.groovy @@ -44,7 +44,7 @@ class WorkflowRaredisease { def writer = outfile.newWriter() chromsizes.eachLine { line -> def split_str = line.tokenize("\t") - writer << [split_str[0],"1",split_str[1],varcaller].join("\t") + "\n" + writer << [split_str[0],"0",split_str[1],varcaller].join("\t") + "\n" } writer.close() return outfile