From bbc94c0bdb39b4982abd2e9eca2746f107d09831 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:10:29 +0100 Subject: [PATCH 1/2] update expansionhunter --- conf/modules/call_repeat_expansions.config | 17 +++++++++ modules.json | 2 +- modules/nf-core/expansionhunter/main.nf | 8 ++-- modules/nf-core/expansionhunter/meta.yml | 10 ++++- .../expansionhunter/tests/main.nf.test | 37 +++++++++++++++++++ .../expansionhunter/tests/main.nf.test.snap | 15 ++++++++ .../nf-core/expansionhunter/tests/tags.yml | 2 + subworkflows/local/call_repeat_expansions.nf | 8 ++++ 8 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 modules/nf-core/expansionhunter/tests/main.nf.test create mode 100644 modules/nf-core/expansionhunter/tests/main.nf.test.snap create mode 100644 modules/nf-core/expansionhunter/tests/tags.yml diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config index 13a7e519..e31911a7 100644 --- a/conf/modules/call_repeat_expansions.config +++ b/conf/modules/call_repeat_expansions.config @@ -28,6 +28,23 @@ process { ext.prefix = { "${meta.id}_exphunter" } } + withName: '.*CALL_REPEAT_EXPANSIONS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}_exphunter_sorted" } + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*CALL_REPEAT_EXPANSIONS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: '.*CALL_REPEAT_EXPANSIONS:BCFTOOLS_REHEADER_EXP' { ext.prefix = { "${meta.id}_reheader" } } diff --git a/modules.json b/modules.json index e33ea340..3ea648f7 100644 --- a/modules.json +++ b/modules.json @@ -122,7 +122,7 @@ }, "expansionhunter": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3c4b0007393248aa7419a9ec3d14f391cd702f48", "installed_by": ["modules"] }, "fastp": { diff --git a/modules/nf-core/expansionhunter/main.nf b/modules/nf-core/expansionhunter/main.nf index 008b13ad..bea5916a 100644 --- a/modules/nf-core/expansionhunter/main.nf +++ b/modules/nf-core/expansionhunter/main.nf @@ -14,9 +14,10 @@ process EXPANSIONHUNTER { tuple val(meta4), path(variant_catalog) output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.json.gz") , emit: json - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.json.gz") , emit: json + tuple val(meta), path("*_realigned.bam") , emit: bam + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -49,6 +50,7 @@ process EXPANSIONHUNTER { """ touch ${prefix}.vcf.gz touch ${prefix}.json.gz + touch ${prefix}_realigned.bam cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/expansionhunter/meta.yml b/modules/nf-core/expansionhunter/meta.yml index b970ced4..698529dc 100644 --- a/modules/nf-core/expansionhunter/meta.yml +++ b/modules/nf-core/expansionhunter/meta.yml @@ -24,6 +24,10 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of BAM/CRAM file + pattern: "*.{bai,crai}" - meta2: type: map description: | @@ -38,7 +42,7 @@ input: description: | Groovy Map containing reference information e.g. [ id:'test' ] - - fasta: + - fasta_fai: type: file description: Reference genome index pattern: "*.fai" @@ -61,6 +65,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - bam: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" - vcf: type: file description: VCF with repeat expansions diff --git a/modules/nf-core/expansionhunter/tests/main.nf.test b/modules/nf-core/expansionhunter/tests/main.nf.test new file mode 100644 index 00000000..6204460a --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process EXPANSIONHUNTER" + script "modules/nf-core/expansionhunter/main.nf" + process "EXPANSIONHUNTER" + tag "modules" + tag "modules_nfcore" + tag "expansionhunter" + + test("expansionhunter") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + input[1] = [[id:'fasta'],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[id:'fasta_fai'],file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] + input[3] = [[id:'catalogue'],file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.size() == 8}, + {assert path(process.out.json.get(0).get(1)).linesGzip.size() == 27}, + {assert snapshot(process.out.bam).match()} + ) + } + + } + +} diff --git a/modules/nf-core/expansionhunter/tests/main.nf.test.snap b/modules/nf-core/expansionhunter/tests/main.nf.test.snap new file mode 100644 index 00000000..a98fb599 --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "expansionhunter": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_realigned.bam:md5,b37a72c0b97b45e63636a9758f3144d7" + ] + ] + ], + "timestamp": "2024-01-26T17:35:10.45442497" + } +} \ No newline at end of file diff --git a/modules/nf-core/expansionhunter/tests/tags.yml b/modules/nf-core/expansionhunter/tests/tags.yml new file mode 100644 index 00000000..03266351 --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/tags.yml @@ -0,0 +1,2 @@ +expansionhunter: + - "modules/nf-core/expansionhunter/**" diff --git a/subworkflows/local/call_repeat_expansions.nf b/subworkflows/local/call_repeat_expansions.nf index 1fd76cb9..fda99625 100644 --- a/subworkflows/local/call_repeat_expansions.nf +++ b/subworkflows/local/call_repeat_expansions.nf @@ -8,6 +8,8 @@ include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf- include { EXPANSIONHUNTER } from '../../modules/nf-core/expansionhunter/main' include { PICARD_RENAMESAMPLEINVCF as RENAMESAMPLE_EXP } from '../../modules/nf-core/picard/renamesampleinvcf/main' include { STRANGER } from '../../modules/nf-core/stranger/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { SVDB_MERGE as SVDB_MERGE_REPEATS } from '../../modules/nf-core/svdb/merge/main' include { TABIX_BGZIPTABIX as BGZIPTABIX_EXP } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main' @@ -31,6 +33,10 @@ workflow CALL_REPEAT_EXPANSIONS { ch_variant_catalog ) + // Sort and index realigned bam + SAMTOOLS_SORT(EXPANSIONHUNTER.out.bam) + SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) + // Fix header and rename sample BCFTOOLS_REHEADER_EXP ( EXPANSIONHUNTER.out.vcf.map{ meta, vcf -> [ meta, vcf, [], [] ]}, @@ -77,6 +83,8 @@ workflow CALL_REPEAT_EXPANSIONS { ch_versions = ch_versions.mix(STRANGER.out.versions.first()) ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first()) ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) emit: vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ] From 8f8f41ddfc106a3aa226584993ef1dd87bb2381c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:30:15 +0100 Subject: [PATCH 2/2] review suggestion --- conf/modules/call_repeat_expansions.config | 2 +- docs/output.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config index 23d3e2f8..39934a4b 100644 --- a/conf/modules/call_repeat_expansions.config +++ b/conf/modules/call_repeat_expansions.config @@ -83,7 +83,7 @@ process { } withName: '.*CALL_REPEAT_EXPANSIONS:COMPRESS_STRANGER' { - ext.prefix = { "${meta.id}_repeat_expansion" } + ext.prefix = { "${meta.id}_repeat_expansion_stranger" } ext.args = '--output-type z' publishDir = [ path: { "${params.outdir}/repeat_expansions" }, diff --git a/docs/output.md b/docs/output.md index 080c73ac..43ca906d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -301,6 +301,10 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. - `repeat_expansions/` - `_repeat_expansion.vcf.gz`: file containing variant calls. - `_repeat_expansion.vcf.gz.tbi`: index of the file containing variant calls. + - `_exphunter_sorted.bam`: A BAMlet containing alignments of reads that overlap or located in close proximity to each variant identified by ExpansionHunter + - `_exphunter_sorted.bam.bai`: Index of the BAMlet file + - `_repeat_expansion_stranger.vcf.gz`: Output from ExpansionHunter annotated with the pathologic implications of the repeat sizes by Stranger. + - `_repeat_expansion_stranger.vcf.gz`: Index of the Stranger VCF file.