From da6bea8cbbc90b48e1ad353d95a451a3942dac93 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 28 May 2021 09:28:42 +0200 Subject: [PATCH 01/20] Fix malformed software version regex --- bin/scrape_software_versions.py | 4 ++-- main.nf | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 5c9c0da9c..74d4ab0be 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -16,7 +16,7 @@ 'Bowtie2': ['v_bowtie2.txt', r"bowtie2-([0-9]+\.[0-9]+\.[0-9]+) -fdebug"], 'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"], 'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"], - #'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"], + 'GATK UnifiedGenotyper': ['v_gatk3.txt', r"(\S+)"], 'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"], 'fastP': ['v_fastp.txt', r"([\d\.]+)"], 'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"], @@ -37,7 +37,7 @@ 'kraken':['v_kraken.txt', r"Kraken version (\S+)"], 'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"], 'mapDamage2':['v_mapdamage.txt',r"(\S+)"], - 'bbduk':['v_bbduk.txt',r"(\S+)"] + 'bbduk':['v_bbduk.txt',r"(.*)"] } results = OrderedDict() diff --git a/main.nf b/main.nf index 5abe42fef..933bc1dc7 100644 --- a/main.nf +++ b/main.nf @@ -2940,6 +2940,7 @@ process get_software_versions { qualimap --version &> v_qualimap.txt 2>&1 || true preseq &> v_preseq.txt 2>&1 || true gatk --version 2>&1 | head -n 1 > v_gatk.txt 2>&1 || true + gatk3 --version 2>&1 > v_gatk3.txt 2>&1 || true freebayes --version &> v_freebayes.txt 2>&1 || true bedtools --version &> v_bedtools.txt 2>&1 || true damageprofiler --version &> v_damageprofiler.txt 2>&1 || true @@ -2958,7 +2959,7 @@ process get_software_versions { pileupCaller --version &> v_sequencetools.txt 2>&1 || true bowtie2 --version | grep -a 'bowtie2-.* -fdebug' > v_bowtie2.txt || true eigenstrat_snp_coverage --version | cut -d ' ' -f2 >v_eigenstrat_snp_coverage.txt || true - mapDamage2 --version > v_mapdamage.txt || true + mapDamage --version > v_mapdamage.txt || true bbduk.sh | grep 'Last modified' | cut -d' ' -f 3-99 > v_bbduk.txt || true scrape_software_versions.py &> software_versions_mqc.yaml From 6dc290f6a7b7b893b8fbb5d2b89346c04ac65ca1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 28 May 2021 09:30:11 +0200 Subject: [PATCH 02/20] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 461812f99..6d6a912f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#751](https://github.com/nf-core/eager/pull/751) - Added missing label to mtnucratio - General code cleanup and standardisation of parameters with no default setting - [#750](https://github.com/nf-core/eager/issues/750) - Fixed piped commands requesting the same number of CPUs at each command step +- [#759](https://github.com/nf-core/eager/pull/759) - Fixed malformed software scraping regex that resulted in N/A in MultiQC report ### `Dependencies` From b775bd0d24454863400c78ef0fe3a58cb14a86b5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 10:02:49 +0200 Subject: [PATCH 03/20] Adding debugging for GHA --- main.nf | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b0c3e0d3b..fb81182c0 100644 --- a/main.nf +++ b/main.nf @@ -1563,10 +1563,14 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' + echo "Samtools Filter Mapped" samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + echo "Samtools Indexing" samtools index !{libraryid}.filtered.bam !{size} + echo "Samtools BAM2FASTQ" ## FASTQ samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz rm !{libraryid}.unmapped.bam @@ -1602,22 +1606,34 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ ''' + echo "Samtools Filter Mapped" samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + + echo "Samtools Fragment Length Filtering" filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam + + echo "Samtools Indexing" samtools index !{libraryid}.filtered.bam !{size} + echo "Samtools BAM2FASTQ" ## FASTQ samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz rm !{libraryid}.unmapped.bam ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ ''' + echo "Samtools Filter Mapped" samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + echo "Samtools Fragment Length Filtering" filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam + echo "Samtools Indexing" samtools index !{libraryid}.filtered.bam !{size} - + echo "Samtools BAM2FASTQ" ## FASTQ samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz ''' From 8f7f26b5a44d37fe1a22bf94fe321e308d2ad9e7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 11:27:03 +0200 Subject: [PATCH 04/20] More samtools debugging --- main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index fb81182c0..ed445d2be 100644 --- a/main.nf +++ b/main.nf @@ -1564,9 +1564,9 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' echo "Samtools Filter Mapped" - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -b -o !{libraryid}.unmapped.bam --verbosity 9 echo "Samtools Filter Unmapped" - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam --verbosity 9 echo "Samtools Indexing" samtools index !{libraryid}.filtered.bam !{size} @@ -1578,7 +1578,7 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ ''' samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ @@ -1588,7 +1588,7 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { ''' samtools view -h -b !{bam} -@ !{task.cpus} -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -b -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ From 71b7dfda8a80c7a98f0d56fb5de99d06371384a4 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 12:14:10 +0200 Subject: [PATCH 05/20] Verbosity is a newer thing... --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index ed445d2be..2c51e52e2 100644 --- a/main.nf +++ b/main.nf @@ -1564,9 +1564,9 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' echo "Samtools Filter Mapped" - samtools view -h !{bam} -@ !{task.cpus} -f4 -b -o !{libraryid}.unmapped.bam --verbosity 9 + samtools view -h !{bam} -@ !{task.cpus} -f4 -b -o !{libraryid}.unmapped.bam echo "Samtools Filter Unmapped" - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam --verbosity 9 + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam echo "Samtools Indexing" samtools index !{libraryid}.filtered.bam !{size} From c48e51ef2fa50a8c9bc3ca2aabfc228147515df1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 13:34:07 +0200 Subject: [PATCH 06/20] Replace shell with script black --- main.nf | 135 ++++++++++++++++++++++++++------------------------------ 1 file changed, 63 insertions(+), 72 deletions(-) diff --git a/main.nf b/main.nf index 2c51e52e2..cd400e6cf 100644 --- a/main.nf +++ b/main.nf @@ -1540,103 +1540,94 @@ process samtools_filter { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.unmapped.bam") optional true // Using shell block rather than script because we are playing with awk - shell: + script: - size = !{params.large_ref} ? '-c' : '' + size = ${params.large_ref} ? '-c' : '' // Unmapped/MAPQ Filtering WITHOUT min-length filtering if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength == 0 ) { - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ - ''' - echo "Samtools Filter Mapped" - samtools view -h !{bam} -@ !{task.cpus} -f4 -b -o !{libraryid}.unmapped.bam + """ echo "Samtools Filter Unmapped" - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b -o ${libraryid}.unmapped.bam + + echo "Samtools Filter Mapped" + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b -o ${libraryid}.filtered.bam + echo "Samtools Indexing" - samtools index !{libraryid}.filtered.bam !{size} + samtools index ${libraryid}.filtered.bam ${size} echo "Samtools BAM2FASTQ" ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz - rm !{libraryid}.unmapped.bam - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus - 1} > ${libraryid}.unmapped.fastq.gz + rm ${libraryid}.unmapped.bam + """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -b -o !{libraryid}.filtered.bam - samtools index !{libraryid}.filtered.bam !{size} + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b -o ${libraryid}.filtered.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus -1} > !{libraryid}.unmapped.fastq.gz - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus -1} > ${libraryid}.unmapped.fastq.gz + """ // Unmapped/MAPQ Filtering WITH min-length filtering } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -b -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -b -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h -b !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ - ''' - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - samtools index !{libraryid}.filtered.bam !{size} - ''' + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} + """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ - ''' - echo "Samtools Filter Mapped" - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - echo "Samtools Filter Unmapped" - - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - + """ + samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} - echo "Samtools Indexing" - samtools index !{libraryid}.filtered.bam !{size} - - echo "Samtools BAM2FASTQ" ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz - rm !{libraryid}.unmapped.bam - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus - 1} > ${libraryid}.unmapped.fastq.gz + rm ${libraryid}.unmapped.bam + """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ - ''' - echo "Samtools Filter Mapped" - samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - echo "Samtools Filter Unmapped" - samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam - echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam - echo "Samtools Indexing" - samtools index !{libraryid}.filtered.bam !{size} - echo "Samtools BAM2FASTQ" + """ + + samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools index ${libraryid}.filtered.bam ${size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz - ''' + samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus} > ${libraryid}.unmapped.fastq.gz + """ } } From c7daf3627792729946e6e2144bc98ffafee354ac Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 14:27:06 +0200 Subject: [PATCH 07/20] Hehehehe forgot to change script variable assignement --- main.nf | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index cd400e6cf..253033097 100644 --- a/main.nf +++ b/main.nf @@ -1542,7 +1542,7 @@ process samtools_filter { // Using shell block rather than script because we are playing with awk script: - size = ${params.large_ref} ? '-c' : '' + size = params.large_ref ? '-c' : '' // Unmapped/MAPQ Filtering WITHOUT min-length filtering if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength == 0 ) { @@ -1563,12 +1563,10 @@ process samtools_filter { """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ """ - echo "Samtools Filter Unmapped" - samtools view -h ${bam} -@ ${task.cpus} -f4 -b -o ${libraryid}.unmapped.bam - echo "Samtools Filter Mapped" + samtools view -h ${bam} -@ ${task.cpus} -f4 -b -o ${libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b -o ${libraryid}.filtered.bam - echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} @@ -1608,23 +1606,34 @@ process samtools_filter { """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ """ + echo "Samtools Filter Mapped" samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + echo "Samtools Fragment Length Filtering" filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + + echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} + echo "Samtools BAM2FASTQ" ## FASTQ samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus - 1} > ${libraryid}.unmapped.fastq.gz rm ${libraryid}.unmapped.bam """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ """ - + echo "Samtools Filter Mapped" samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + echo "Samtools Filter Unmapped" samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + echo "Samtools Fragment Length Filtering" filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} + echo "Samtools BAM2FASTQ" ## FASTQ samtools fastq -tn ${libraryid}.unmapped.bam | pigz -p ${task.cpus} > ${libraryid}.unmapped.fastq.gz """ From 56c1373c9319177cb0154559e20829f391a26573 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 16:22:09 +0200 Subject: [PATCH 08/20] Try changing samtools version --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 0b147d356..136665810 100644 --- a/environment.yml +++ b/environment.yml @@ -17,7 +17,7 @@ dependencies: - bioconda::adapterremovalfixprefix=0.0.5 - bioconda::bwa=0.7.17 - bioconda::picard=2.22.9 - - bioconda::samtools=1.9 + - bioconda::samtools=1.10.3 - bioconda::dedup=0.12.8 - bioconda::angsd=0.933 - bioconda::circularmapper=1.93.5 From 24640807f3b7427ea7ec0d6717bea180a66212ca Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 16:24:09 +0200 Subject: [PATCH 09/20] Oops wrong --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 136665810..7683579f0 100644 --- a/environment.yml +++ b/environment.yml @@ -17,7 +17,7 @@ dependencies: - bioconda::adapterremovalfixprefix=0.0.5 - bioconda::bwa=0.7.17 - bioconda::picard=2.22.9 - - bioconda::samtools=1.10.3 + - bioconda::samtools=1.10 - bioconda::dedup=0.12.8 - bioconda::angsd=0.933 - bioconda::circularmapper=1.93.5 From 551220af9dfebb368f6904cc6de1c8b4e43fd24c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 16:35:08 +0200 Subject: [PATCH 10/20] Try moving BAM filtering to separate CI test --- .github/workflows/ci.yml | 62 +++++++++++++++++++++++++++++++++------- environment.yml | 2 +- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 213c2ac69..00426ed33 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,15 +111,6 @@ jobs: - name: MAPPER_BT2 Test running with BowTie2 run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --bt2_trim5 1 --bt2_trim3 1 - - name: HOST_REMOVAL_FASTQ Run the basic pipeline with output unmapped reads as fastq - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --hostremoval_input_fastq - - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_unmapped_type 'fastq' - - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 - name: DEDUPLICATION Test with dedup run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --dedup_all_merged @@ -194,4 +185,55 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio - name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled' \ No newline at end of file + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled' +bamfilter: + name: Run workflow tests - BAM filtering only + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: ['20.07.1', ''] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v4 + with: + FILES: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.MATCHED_FILES + run: docker build --no-cache . -t nfcore/eager:dev + + - name: Pull docker image + if: ${{ !env.MATCHED_FILES }} + run: | + docker pull nfcore/eager:dev + docker tag nfcore/eager:dev nfcore/eager:dev + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: HELPTEXT Run with the help flag + run: | + nextflow run ${GITHUB_WORKSPACE} --help + - name: HOST_REMOVAL_FASTQ Run the basic pipeline with output unmapped reads as fastq + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --hostremoval_input_fastq + - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_unmapped_type 'fastq' + - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 \ No newline at end of file diff --git a/environment.yml b/environment.yml index 7683579f0..0b147d356 100644 --- a/environment.yml +++ b/environment.yml @@ -17,7 +17,7 @@ dependencies: - bioconda::adapterremovalfixprefix=0.0.5 - bioconda::bwa=0.7.17 - bioconda::picard=2.22.9 - - bioconda::samtools=1.10 + - bioconda::samtools=1.9 - bioconda::dedup=0.12.8 - bioconda::angsd=0.933 - bioconda::circularmapper=1.93.5 From b0bf1ba56c276d71e48ad2ff01a17473454b89df Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 16:36:10 +0200 Subject: [PATCH 11/20] Fix indent --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00426ed33..05fb12bc4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -186,7 +186,7 @@ jobs: - name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled' -bamfilter: + bamfilter: name: Run workflow tests - BAM filtering only # Only run on push if this is the nf-core dev branch (merged PRs) if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }} From 03e0e1a29c2ab2187cc9f00f2d6a7dba07a7a87d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 19:29:35 +0200 Subject: [PATCH 12/20] More debugging --- main.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.nf b/main.nf index 253033097..eee9ada05 100644 --- a/main.nf +++ b/main.nf @@ -1587,8 +1587,11 @@ process samtools_filter { // Unmapped/MAPQ Filtering WITH min-length filtering } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { """ + echo "Samtools quality filtering" samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + echo "Length filtering" filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -b -o ${libraryid} tmp_mapped.bam + echo "Indexing" samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ From 234386cbb2bbc8f80ccfc8d7f0ca5448d0ce6b39 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 19:35:49 +0200 Subject: [PATCH 13/20] Try replacing -o with redirect for samtools filter --- main.nf | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/main.nf b/main.nf index eee9ada05..92d3f031e 100644 --- a/main.nf +++ b/main.nf @@ -1547,18 +1547,18 @@ process samtools_filter { // Unmapped/MAPQ Filtering WITHOUT min-length filtering if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength == 0 ) { """ - samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} > ${libraryid}.filtered.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength == 0 ){ """ - samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} > ${libraryid}.filtered.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ """ - samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${libraryid}.filtered.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ @@ -1566,7 +1566,7 @@ process samtools_filter { echo "Samtools Filter Mapped" samtools view -h ${bam} -@ ${task.cpus} -f4 -b -o ${libraryid}.unmapped.bam echo "Samtools Filter Unmapped" - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b -o ${libraryid}.filtered.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} @@ -1577,8 +1577,8 @@ process samtools_filter { """ } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ """ - samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b -o ${libraryid}.filtered.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > ${libraryid}.filtered.bam samtools index ${libraryid}.filtered.bam ${size} ## FASTQ @@ -1588,35 +1588,35 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { """ echo "Samtools quality filtering" - samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam echo "Length filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -b -o ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam echo "Indexing" samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ """ - samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ """ - samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ """ echo "Samtools Filter Mapped" - samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam echo "Samtools Filter Unmapped" - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} @@ -1629,11 +1629,11 @@ process samtools_filter { } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ """ echo "Samtools Filter Mapped" - samtools view -h ${bam} -@ ${task.cpus} -f4 -o ${libraryid}.unmapped.bam + samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam echo "Samtools Filter Unmapped" - samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} echo "Samtools BAM2FASTQ" From 52c6f1863fa861da3a28fe114536c61f58dfd247 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 19:48:20 +0200 Subject: [PATCH 14/20] Fix bam filter command --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 92d3f031e..ecd83f19d 100644 --- a/main.nf +++ b/main.nf @@ -1590,21 +1590,21 @@ process samtools_filter { echo "Samtools quality filtering" samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam echo "Length filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam echo "Indexing" samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ """ samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ """ samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ @@ -1616,7 +1616,7 @@ process samtools_filter { samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} @@ -1633,7 +1633,7 @@ process samtools_filter { echo "Samtools Filter Unmapped" samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} > ${libraryid} tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} echo "Samtools BAM2FASTQ" From 87a523c123bc90fb9e34da3eacede0f34275a9e4 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 31 May 2021 21:27:25 +0200 Subject: [PATCH 15/20] filter bam auto-produces file names... --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index ecd83f19d..f9931400c 100644 --- a/main.nf +++ b/main.nf @@ -1590,21 +1590,21 @@ process samtools_filter { echo "Samtools quality filtering" samtools view -h -b ${bam} -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam echo "Length filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam echo "Indexing" samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "discard" && params.bam_filter_minreadlength != 0 ){ """ samtools view -h -b ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} > tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ """ samtools view -h ${bam} -@ ${task.cpus} -f4 -b > ${libraryid}.unmapped.bam samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam samtools index ${libraryid}.filtered.bam ${size} """ } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ @@ -1616,7 +1616,7 @@ process samtools_filter { samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} @@ -1633,7 +1633,7 @@ process samtools_filter { echo "Samtools Filter Unmapped" samtools view -h ${bam} -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -b > tmp_mapped.bam echo "Samtools Fragment Length Filtering" - filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid}.filtered.bam tmp_mapped.bam + filter_bam_fragment_length.py -a -l ${params.bam_filter_minreadlength} -o ${libraryid} tmp_mapped.bam echo "Samtools Indexing" samtools index ${libraryid}.filtered.bam ${size} echo "Samtools BAM2FASTQ" From 24934b32e099eb5f17b4c662e6bd6a17081d865c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 1 Jun 2021 10:57:41 +0200 Subject: [PATCH 16/20] Get artefact to find reason for failure --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05fb12bc4..1cd6f1522 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -236,4 +236,8 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_unmapped_type 'fastq' - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 \ No newline at end of file + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 + - uses: actions/upload-artifact@v2 + with: + name: failed-run-dir-upload + path: ./work/ \ No newline at end of file From 63f5fed466008667a8508f65e9f869a81dbaa48a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 1 Jun 2021 11:09:16 +0200 Subject: [PATCH 17/20] Fix issue with dodgy character --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cd6f1522..d181c0570 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -237,7 +237,10 @@ jobs: - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 + - name: prep upload + run: | + zip -r work work/ - uses: actions/upload-artifact@v2 with: name: failed-run-dir-upload - path: ./work/ \ No newline at end of file + path: work.zip \ No newline at end of file From e1841a89cd51b53dd1528ed9cdf8134979f9447b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 1 Jun 2021 13:37:33 +0200 Subject: [PATCH 18/20] Set up to allways upload artefact and sep. MALT --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d181c0570..6d2a7b95f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -241,6 +241,7 @@ jobs: run: | zip -r work work/ - uses: actions/upload-artifact@v2 + if: ${{ always() }} with: name: failed-run-dir-upload path: work.zip \ No newline at end of file From 462319850ed4925e9d806a235943fd6e8a98ddbd Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 1 Jun 2021 13:39:24 +0200 Subject: [PATCH 19/20] Add file --- .github/workflows/ci.yml | 92 ++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d2a7b95f..9b7f9b63f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,27 +153,6 @@ jobs: - name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam - - name: METAGENOMIC Download MALT database - run: | - mkdir -p databases/malt - readlink -f databases/malt/ - for i in index0.idx ref.db ref.idx ref.inf table0.db table0.idx taxonomy.idx taxonomy.map taxonomy.tre; do wget https://github.com/nf-core/test-datasets/raw/eager/databases/malt/"$i" -P databases/malt/; done - - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output - - name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter - - name: MALTEXTRACT Download resource files - run: | - mkdir -p databases/maltextract - for i in ncbi.tre ncbi.map; do wget https://github.com/rhuebler/HOPS/raw/0.33/Resources/"$i" -P databases/maltextract/; done - - name: MALTEXTRACT Basic with MALT plus MaltExtract - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt" --run_maltextract --maltextract_ncbifiles "/home/runner/work/eager/eager/databases/maltextract/" --maltextract_taxon_list 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/maltextract/MaltExtract_list.txt' - - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into Kraken - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_kraken,docker --run_bam_filtering --bam_unmapped_type 'fastq' - name: SEXDETERMINATION Run the basic pipeline with the bam input profile, but don't convert BAM, skip everything but sex determination run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --run_sexdeterrmine @@ -244,4 +223,73 @@ jobs: if: ${{ always() }} with: name: failed-run-dir-upload - path: work.zip \ No newline at end of file + path: work.zip + maltrun: + name: Run workflow tests - BAM filtering only + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + nxf_ver: ['20.07.1', ''] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v4 + with: + FILES: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.MATCHED_FILES + run: docker build --no-cache . -t nfcore/eager:dev + + - name: Pull docker image + if: ${{ !env.MATCHED_FILES }} + run: | + docker pull nfcore/eager:dev + docker tag nfcore/eager:dev nfcore/eager:dev + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: METAGENOMIC Download MALT database + run: | + mkdir -p databases/malt + readlink -f databases/malt/ + for i in index0.idx ref.db ref.idx ref.inf table0.db table0.idx taxonomy.idx taxonomy.map taxonomy.tre; do wget https://github.com/nf-core/test-datasets/raw/eager/databases/malt/"$i" -P databases/malt/; done + - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output + - name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter + - name: MALTEXTRACT Download resource files + run: | + mkdir -p databases/maltextract + for i in ncbi.tre ncbi.map; do wget https://github.com/rhuebler/HOPS/raw/0.33/Resources/"$i" -P databases/maltextract/; done + - name: MALTEXTRACT Basic with MALT plus MaltExtract + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt" --run_maltextract --maltextract_ncbifiles "/home/runner/work/eager/eager/databases/maltextract/" --maltextract_taxon_list 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/maltextract/MaltExtract_list.txt' + - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into Kraken + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_kraken,docker --run_bam_filtering --bam_unmapped_type 'fastq' + - name: prep upload + run: | + zip -r work work/ + - uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: failed-run-dir-upload-malt + path: | + work.zip \ No newline at end of file From 953367f424ee7d47765af37f77260f2de7450284 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 1 Jun 2021 14:59:58 +0200 Subject: [PATCH 20/20] Make sure ZIP should also be always run --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b7f9b63f..ecb840d09 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,6 +217,7 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50 - name: prep upload + if: ${{ always() }} run: | zip -r work work/ - uses: actions/upload-artifact@v2 @@ -285,6 +286,7 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_kraken,docker --run_bam_filtering --bam_unmapped_type 'fastq' - name: prep upload + if: ${{ always() }} run: | zip -r work work/ - uses: actions/upload-artifact@v2