From e15e3fb85345259499f7c23628907bef1b1552af Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 21 May 2021 13:03:43 +0200 Subject: [PATCH 1/7] Fix piped command CPUs --- CHANGELOG.md | 5 +++-- main.nf | 62 +++++++++++++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3907b4cde..bccf07fe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Updated template to nf-core/tools 1.14 - [#688](https://github.com/nf-core/eager/issues/688) - Clarified the pipeline is not just for humans and microbes, but also plants and animals, and also for modern DNA - [#751](https://github.com/nf-core/eager/pull/751) - Added missing label to mtnucratio -- General code cleanup and standarisation of parameters with no default setting - +- General code cleanup and standardisation of parameters with no default setting +- [#750](https://github.com/nf-core/eager/issues/750) - Fixed piped commands requesting the same number of CPUs at each command step + ### `Dependencies` ### `Deprecated` diff --git a/main.nf b/main.nf index a45bc8943..6f66fc57a 100644 --- a/main.nf +++ b/main.nf @@ -791,7 +791,7 @@ process adapter_removal { mv *.settings output/ ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz """ //PE mode, collapse and trim, outputting all reads, preserving 5p } else if (seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && !params.mergedonly && params.preserve5p) { @@ -805,7 +805,7 @@ process adapter_removal { mv *.settings output/ ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz """ // PE mode, collapse and trim but only output collapsed reads } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) { @@ -816,7 +816,7 @@ process adapter_removal { cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -829,7 +829,7 @@ process adapter_removal { cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -843,7 +843,7 @@ process adapter_removal { cat *.collapsed.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -857,7 +857,7 @@ process adapter_removal { cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus - 1} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -1162,14 +1162,14 @@ process bwa { """ bwa aln -t ${task.cpus} $fasta ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r1.sai bwa aln -t ${task.cpus} $fasta ${r2} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r2.sai - bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus} -O bam - > ${libraryid}_"${seqtype}".mapped.bam + bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus -1} -O bam - > ${libraryid}_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { //PE collapsed, or SE data """ bwa aln -t ${task.cpus} ${fasta} ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.sai - bwa samse -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.sai $r1 | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam + bwa samse -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.sai $r1 | samtools sort -@ ${task.cpus - 1} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } @@ -1194,18 +1194,19 @@ process bwamem { params.mapper == 'bwamem' script: + def split_cpus = Math.floor(task.cpus/2) def fasta = "${index}/${fasta_base}" def size = params.large_ref ? '-c' : '' if (!params.single_end && params.skip_collapse){ """ - bwa mem -t ${task.cpus} $fasta $r1 $r2 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam + bwa mem -t ${split_cpus} $fasta $r1 $r2 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam samtools index ${size} -@ ${task.cpus} "${libraryid}".mapped.bam """ } else { """ - bwa mem -t ${task.cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam - samtools index -@ ${task.cpus} "${libraryid}"_"${seqtype}".mapped.bam ${size} + bwa mem -t ${task.cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam + samtools index -@ ${split_cpus} "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } @@ -1540,6 +1541,7 @@ process samtools_filter { // Using shell block rather than script because we are playing with awk shell: + size = !{params.large_ref} ? '-c' : '' // Unmapped/MAPQ Filtering WITHOUT min-length filtering @@ -1555,28 +1557,28 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz + samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz rm !{libraryid}.unmapped.bam ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz + samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus -1} > !{libraryid}.unmapped.fastq.gz ''' // Unmapped/MAPQ Filtering WITH min-length filtering } else if ( "${params.bam_unmapped_type}" == "keep" && params.bam_filter_minreadlength != 0 ) { @@ -1593,26 +1595,26 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ - samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus} > !{libraryid}.unmapped.fastq.gz + samtools fastq -tn !{libraryid}.unmapped.bam | pigz -p !{task.cpus - 1} > !{libraryid}.unmapped.fastq.gz rm !{libraryid}.unmapped.bam ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} @@ -1936,8 +1938,8 @@ process bedtools { script: """ - bedtools coverage -nonamecheck -a ${anno_file} -b $bam | pigz -p ${task.cpus} > "${bam.baseName}".breadth.gz - bedtools coverage -nonamecheck -a ${anno_file} -b $bam -mean | pigz -p ${task.cpus} > "${bam.baseName}".depth.gz + bedtools coverage -nonamecheck -a ${anno_file} -b $bam | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz + bedtools coverage -nonamecheck -a ${anno_file} -b $bam -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz """ } @@ -2033,7 +2035,7 @@ process pmdtools { def platypus = params.pmdtools_platypus ? '--platypus' : '' """ #Run Filtering step - samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus} -Sb - > "${libraryid}".pmd.bam + samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus - 1} -Sb - > "${libraryid}".pmd.bam #Run Calc Range step ## To allow early shut off of pipe: https://github.com/nextflow-io/nextflow/issues/1564 @@ -2339,7 +2341,7 @@ ch_input_for_genotyping_pileupcaller.doubleStranded .set {ch_prepped_for_pileupcaller_double} process genotyping_pileupcaller { - label 'mc_small' + label 'sc_small' tag "${strandedness}" publishDir "${params.outdir}/genotyping", mode: params.publish_dir_mode From da9788d706d713bd90348d286c6a4ec11ec0f731 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 21 May 2021 13:06:42 +0200 Subject: [PATCH 2/7] Obligatory linting fix --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bccf07fe5..461812f99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#751](https://github.com/nf-core/eager/pull/751) - Added missing label to mtnucratio - General code cleanup and standardisation of parameters with no default setting - [#750](https://github.com/nf-core/eager/issues/750) - Fixed piped commands requesting the same number of CPUs at each command step - + ### `Dependencies` ### `Deprecated` From 68512b4f7558f03209ed4e17de5f7a3425c333e1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 21 May 2021 13:35:50 +0200 Subject: [PATCH 3/7] All pipeed command processes to multi-threaded and updated bowtie to split_cpus --- main.nf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 6f66fc57a..dd876556b 100644 --- a/main.nf +++ b/main.nf @@ -1303,6 +1303,7 @@ process bowtie2 { params.mapper == 'bowtie2' script: + def split_cpus = Math.floor(task.cpus/2) def size = params.large_ref ? '-c' : '' def fasta = "${index}/${fasta_base}" def trim5 = params.bt2_trim5 != 0 ? "--trim5 ${params.bt2_trim5}" : "" @@ -1346,13 +1347,13 @@ process bowtie2 { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --maxins ${params.bt2_maxins} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${split_cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --maxins ${params.bt2_maxins} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${split_cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { //PE collapsed, or SE data """ - bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -U ${r1} -p ${split_cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${split_cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } @@ -2341,7 +2342,7 @@ ch_input_for_genotyping_pileupcaller.doubleStranded .set {ch_prepped_for_pileupcaller_double} process genotyping_pileupcaller { - label 'sc_small' + label 'mc_small' tag "${strandedness}" publishDir "${params.outdir}/genotyping", mode: params.publish_dir_mode @@ -2575,7 +2576,7 @@ process sexdeterrmine_prep { // As we collect all files for a single sex_deterrmine run, we DO NOT use the normal input/output tuple process sexdeterrmine { - label 'sc_small' + label 'mc_small' publishDir "${params.outdir}/sex_determination", mode: params.publish_dir_mode input: @@ -2911,7 +2912,7 @@ process output_documentation { */ process get_software_versions { - label 'sc_tiny' + label 'mc_small' publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.indexOf(".csv") > 0) filename From cf76c0290716eacbebd86d597e86bf1e89d17f2a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 26 May 2021 09:33:48 +0200 Subject: [PATCH 4/7] Remove duplicate samtools --- main.nf | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index dd876556b..9777d4cef 100644 --- a/main.nf +++ b/main.nf @@ -1558,14 +1558,14 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ @@ -1574,8 +1574,8 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ @@ -1596,15 +1596,15 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} @@ -1614,8 +1614,8 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} | samtools view - -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} From eeec5d40d2b6531c7d80525346c1dd4fb7fdce52 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 26 May 2021 11:20:04 +0200 Subject: [PATCH 5/7] remove leftover samtools -h and add missing split_cpus --- main.nf | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/main.nf b/main.nf index 9777d4cef..04ff39600 100644 --- a/main.nf +++ b/main.nf @@ -1162,7 +1162,7 @@ process bwa { """ bwa aln -t ${task.cpus} $fasta ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r1.sai bwa aln -t ${task.cpus} $fasta ${r2} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r2.sai - bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus -1} -O bam - > ${libraryid}_"${seqtype}".mapped.bam + bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus - 1} -O bam - > ${libraryid}_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { @@ -1205,8 +1205,8 @@ process bwamem { """ } else { """ - bwa mem -t ${task.cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam - samtools index -@ ${split_cpus} "${libraryid}"_"${seqtype}".mapped.bam ${size} + bwa mem -t ${split_cpus} $fasta $r1 -R "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" | samtools sort -@ ${split_cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam + samtools index -@ ${task.cpus} "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } @@ -1558,14 +1558,14 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ @@ -1574,8 +1574,8 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength == 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o !{libraryid}.filtered.bam samtools index !{libraryid}.filtered.bam !{size} ## FASTQ @@ -1596,15 +1596,15 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "bam" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} ''' } else if ( "${params.bam_unmapped_type}" == "fastq" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} @@ -1614,8 +1614,8 @@ process samtools_filter { ''' } else if ( "${params.bam_unmapped_type}" == "both" && params.bam_filter_minreadlength != 0 ){ ''' - samtools view -h !{bam} -@ !{task.cpus - 1} -f4 -o !{libraryid}.unmapped.bam - samtools view -h !{bam} -@ !{task.cpus - 1} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam + samtools view -h !{bam} -@ !{task.cpus} -f4 -o !{libraryid}.unmapped.bam + samtools view -h !{bam} -@ !{task.cpus} -F4 -q !{params.bam_mapping_quality_threshold} -o tmp_mapped.bam filter_bam_fragment_length.py -a -l !{params.bam_filter_minreadlength} -o !{libraryid} tmp_mapped.bam samtools index !{libraryid}.filtered.bam !{size} @@ -2009,7 +2009,7 @@ process mapdamage_rescaling { // Optionally perform further aDNA evaluation or filtering for just reads with damage etc. process pmdtools { - label 'mc_small' + label 'mc_medium' tag "${libraryid}" publishDir "${params.outdir}/pmdtools", mode: params.publish_dir_mode @@ -2036,12 +2036,12 @@ process pmdtools { def platypus = params.pmdtools_platypus ? '--platypus' : '' """ #Run Filtering step - samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus - 1} -Sb - > "${libraryid}".pmd.bam + samtools calmd -b ${bam} ${fasta} | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus - 2} -Sb - > "${libraryid}".pmd.bam #Run Calc Range step ## To allow early shut off of pipe: https://github.com/nextflow-io/nextflow/issues/1564 trap 'if [[ \$? == 141 ]]; then echo "Shutting samtools early due to -n parameter" && samtools index ${libraryid}.pmd.bam ${size}; exit 0; fi' EXIT - samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt + samtools calmd -b ${bam} ${fasta} | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt echo "Running indexing" samtools index ${libraryid}.pmd.bam ${size} From 8fafdbc72ca51e1cbe2f1bd52b1b38fa9d3efdad Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 26 May 2021 13:09:46 +0200 Subject: [PATCH 6/7] REadd pmdtools view - required apparently --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 04ff39600..a7ea49e57 100644 --- a/main.nf +++ b/main.nf @@ -2035,15 +2035,15 @@ process pmdtools { def size = params.large_ref ? '-c' : '' def platypus = params.pmdtools_platypus ? '--platypus' : '' """ + ## Note all `samtools view` steps ARE required for some reason #Run Filtering step - samtools calmd -b ${bam} ${fasta} | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -@ ${task.cpus - 2} -Sb - > "${libraryid}".pmd.bam + samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -Sb - > "${libraryid}".pmd.bam #Run Calc Range step ## To allow early shut off of pipe: https://github.com/nextflow-io/nextflow/issues/1564 trap 'if [[ \$? == 141 ]]; then echo "Shutting samtools early due to -n parameter" && samtools index ${libraryid}.pmd.bam ${size}; exit 0; fi' EXIT - samtools calmd -b ${bam} ${fasta} | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt + samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt - echo "Running indexing" samtools index ${libraryid}.pmd.bam ${size} """ } From 49956c1f962f328f980ee6c53f7dc661a7a8176a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 26 May 2021 14:30:01 +0200 Subject: [PATCH 7/7] Remove -b flag that @mahesh-panchal correctly pointed out was the problem --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index a7ea49e57..5abe42fef 100644 --- a/main.nf +++ b/main.nf @@ -2037,12 +2037,12 @@ process pmdtools { """ ## Note all `samtools view` steps ARE required for some reason #Run Filtering step - samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -Sb - > "${libraryid}".pmd.bam + samtools calmd ${bam} ${fasta} | pmdtools --threshold ${params.pmdtools_threshold} ${treatment} ${snpcap} --header | samtools view -Sb - > "${libraryid}".pmd.bam #Run Calc Range step ## To allow early shut off of pipe: https://github.com/nextflow-io/nextflow/issues/1564 trap 'if [[ \$? == 141 ]]; then echo "Shutting samtools early due to -n parameter" && samtools index ${libraryid}.pmd.bam ${size}; exit 0; fi' EXIT - samtools calmd -b ${bam} ${fasta} | samtools view -h - | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt + samtools calmd ${bam} ${fasta} | pmdtools --deamination ${platypus} --range ${params.pmdtools_range} ${treatment} ${snpcap} -n ${params.pmdtools_max_reads} > "${libraryid}".cpg.range."${params.pmdtools_range}".txt samtools index ${libraryid}.pmd.bam ${size} """