From 8ed89e4699cbd49c91015bd83821d85433dda9fb Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 16 Jul 2022 07:48:18 +0000 Subject: [PATCH 1/8] Remove old dumps --- main.nf | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index b8f4f489e..9c938a375 100644 --- a/main.nf +++ b/main.nf @@ -800,7 +800,7 @@ process adapter_removal { input: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_fastp_for_adapterremoval - path adapterlist from ch_adapterlist.collect().dump(tag: "Adapter list") + path adapterlist from ch_adapterlist.collect() output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1 @@ -968,13 +968,10 @@ if ( params.skip_collapse ){ // AdapterRemoval bypass when not running it if (!params.skip_adapterremoval) { ch_output_from_adapterremoval.mix(ch_fastp_for_skipadapterremoval) - .dump(tag: "post_ar_adapterremoval_decision_skipar") .filter { it =~/.*combined.fq.gz|.*truncated.gz/ } - .dump(tag: "ar_bypass") .into { ch_adapterremoval_for_post_ar_trimming; ch_adapterremoval_for_skip_post_ar_trimming; } } else { ch_fastp_for_skipadapterremoval - .dump(tag: "post_ar_adapterremoval_decision_withar") .into { ch_adapterremoval_for_post_ar_trimming; ch_adapterremoval_for_skip_post_ar_trimming; } } @@ -1076,7 +1073,6 @@ ch_branched_for_lanemerge = ch_inlinebarcoderemoval_for_lanemerge [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] } - .dump(tag: "lanemerge_bypass_decision") .branch { skip_merge: it[7].size() == 1 // Can skip merging if only single lanes merge_me: it[7].size() > 1 @@ -1097,7 +1093,6 @@ ch_branched_for_lanemerge_skipme = ch_branched_for_lanemerge.skip_merge [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] } - .dump(tag: "lanemerge_reconfigure") ch_branched_for_lanemerge_ready = ch_branched_for_lanemerge.merge_me @@ -1125,7 +1120,7 @@ process lanemerge { publishDir "${params.outdir}/lanemerging", mode: params.publish_dir_mode input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_branched_for_lanemerge_ready.dump(tag: "lange_merge_input") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_branched_for_lanemerge_ready output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_R1_lanemerged.fq.gz") into ch_lanemerge_for_mapping_r1 @@ -1149,7 +1144,6 @@ process lanemerge { // Ensuring always valid R2 file even if doesn't exist for AWS if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { ch_lanemerge_for_mapping_r1 - .dump(tag: "post_lanemerge_reconfigure") .mix(ch_lanemerge_for_mapping_r2) .groupTuple(by: [0,1,2,3,4,5,6]) .map{ @@ -1264,8 +1258,8 @@ process bwa { publishDir "${params.outdir}/mapping/bwa", mode: params.publish_dir_mode input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_lanemerge_for_bwa.dump(tag: "bwa_input_reads") - path index from bwa_index.collect().dump(tag: "input_index") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_lanemerge_for_bwa + path index from bwa_index.collect() output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mapped.bam"), path("*.{bai,csi}") into ch_output_from_bwa @@ -1582,7 +1576,6 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging [ samplename, libraryid, lane, seqtype_new, organism, strandedness, udg, r1, r2 ] } - .dump(tag: "pre_seqtype_decision") .branch { skip_merge: it[7].size() == 1 // Can skip merging if only single lanes merge_me: it[7].size() > 1 @@ -1955,7 +1948,6 @@ ch_input_for_librarymerging.merge_me [it[0], libraryid, it[2], seqtype, it[4], it[5], it[6], bam, bai ] } - .dump(tag: "input_for_lib_merging") .set { ch_fixedinput_for_librarymerging } process library_merge { @@ -1964,7 +1956,7 @@ process library_merge { publishDir "${params.outdir}/merged_bams/initial", mode: params.publish_dir_mode input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_fixedinput_for_librarymerging.dump(tag: "library_merge_input") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_fixedinput_for_librarymerging output: tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, path("*_libmerged_rmdup.bam"), path("*_libmerged_rmdup.bam.{bai,csi}") into ch_output_from_librarymerging @@ -2510,7 +2502,7 @@ process genotyping_pileupcaller { file fai from ch_fai_for_pileupcaller.collect() file dict from ch_dict_for_pileupcaller.collect() path(bed) from ch_bed_for_pileupcaller.collect() - path(snp) from ch_snp_for_pileupcaller.collect().dump(tag: "pileupcaller_snp_file") + path(snp) from ch_snp_for_pileupcaller.collect() output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("pileupcaller.${strandedness}.*") into ch_for_eigenstrat_snp_coverage @@ -2541,7 +2533,7 @@ process eigenstrat_snp_coverage { params.run_genotyping && params.genotyping_tool == 'pileupcaller' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") from ch_for_eigenstrat_snp_coverage.dump(tag:'eigenstrat_input') + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") from ch_for_eigenstrat_snp_coverage output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_eigenstrat_snp_cov_for_multiqc @@ -2672,7 +2664,7 @@ process vcf2genome { if (!params.additional_vcf_files) { ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.collect() } else { - ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.mix(ch_extravcfs_for_multivcfanalyzer).collect().dump(tag: "postmix") + ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.mix(ch_extravcfs_for_multivcfanalyzer).collect() } process multivcfanalyzer { @@ -3341,7 +3333,6 @@ workflow.onError { def extract_data(tsvFile) { Channel.fromPath(tsvFile) .splitCsv(header: true, sep: '\t') - .dump(tag:'tsv_extract') .map { row -> def expected_keys = ['Sample_Name', 'Library_ID', 'Lane', 'Colour_Chemistry', 'SeqType', 'Organism', 'Strandedness', 'UDG_Treatment', 'R1', 'R2', 'BAM'] From 5168c1253b7902a83e2b4490e37f923b26c2ffae Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 16 Jul 2022 09:08:46 +0000 Subject: [PATCH 2/8] Fix failed of endorspy to execute or cache on resume --- CHANGELOG.md | 1 + main.nf | 36 +++++++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9836d64b..9557d3f37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#882](https://github.com/nf-core/eager/pull/882) Define DSL1 execution explicitly, as new versions Nextflow made DSL2 default (♥ to & fix from @Lehmann-Fabian) - [#879](https://github.com/nf-core/eager/issues/879) Add missing threads parameter for pre-clipping FastQC for single end data that caused insufficient memory in some cases (♥ to @marcel-keller for reporting) +- [#880](https://github.com/nf-core/eager/issues/880) Fix failure of endorSpy to be cached or reexecuted on resume (♥ to @KathrinNaegele, @TCLamnidis, @mahesh-panchal for reporting and debugging) - [#885](https://github.com/nf-core/eager/issues/885) Specify task memory for all tools in get_software_versions to account for incompatibilty of java with some SGE clusters causing hanging of the process (♥ to @maxibor for reporting) - [#887](https://github.com/nf-core/eager/issues/887) Clarify what is considered 'ultra-short' reads in the help text of clip_readlength, for when you may wish to turn of length filtering during AdapterRemoval (♥ to @TCLamnidis for reporting) - [#889](https://github.com/nf-core/eager/issues/889) Remove/updated parameters from benchmarking test profiles (♥ to @TCLamnidis for reporting) diff --git a/main.nf b/main.nf index 9c938a375..0479bacff 100644 --- a/main.nf +++ b/main.nf @@ -769,7 +769,7 @@ ch_input_for_fastp.fourcol [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] } - .set { ch_skipfastp_for_merge } + .set { ch_skipfastp_for_merge } ch_output_from_fastp .map{ @@ -1062,7 +1062,7 @@ ch_branched_for_lanemerge = ch_inlinebarcoderemoval_for_lanemerge it -> def samplename = it[0] def libraryid = it[1] - def lane = it[2] + def lane = 0 def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1100,7 +1100,7 @@ ch_branched_for_lanemerge_ready = ch_branched_for_lanemerge.merge_me it -> def samplename = it[0] def libraryid = it[1] - def lane = it[2] + def lane = 0 def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1150,7 +1150,7 @@ if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { it -> def samplename = it[0] def libraryid = it[1] - def lane = it[2] + def lane = 0 def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1169,7 +1169,7 @@ if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { it -> def samplename = it[0] def libraryid = it[1] - def lane = it[2] + def lane = 0 def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1557,7 +1557,7 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging it -> def samplename = it[0] def libraryid = it[1] - def lane = it[2] + def lane = 0 def seqtype = it[3].unique() // How to deal with this? def organism = it[4] def strandedness = it[5] @@ -1565,9 +1565,13 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging def r1 = it[7] def r2 = it[8] - // We will assume if mixing it is better to set as PE as this is informative + // 1. We will assume if mixing it is better to set as PE as this is informative // for DeDup (and markduplicates doesn't care), but will throw a warning! - def seqtype_new = seqtype.flatten().size() > 1 ? 'PE' : seqtype + // 2. We will also flatten to a single value to address problems with 'unstable' + // Nextflow ArrayBag object types not allowing the .join to work between resumes + // See: https://github.com/nf-core/eager/issues/880 + + def seqtype_new = seqtype.flatten().size() > 1 ? 'PE' : seqtype.flatten()[0] if ( seqtype.flatten().size() > 1 && params.dedupper == 'dedup' ) { log.warn "[nf-core/eager] Warning: you are running DeDup on BAMs with a mixture of PE/SE data for library: ${libraryid}. DeDup is designed for PE data only, deduplication maybe suboptimal!" @@ -1771,6 +1775,19 @@ process samtools_flagstat_after_filter { if (params.run_bam_filtering) { ch_flagstat_for_endorspy .join(ch_bam_filtered_flagstat_for_endorspy, by: [0,1,2,3,4,5,6]) + .map { it -> + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def stats = file(it[7]) + def poststats = file(it[8]) + + [samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] + } .set{ ch_allflagstats_for_endorspy } } else { @@ -1787,7 +1804,8 @@ if (params.run_bam_filtering) { def stats = file(it[7]) def poststats = file("$projectDir/assets/nf-core_eager_dummy.txt") - [samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] } + [samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] + } .set{ ch_allflagstats_for_endorspy } } From d36a0438d82f216d1f359f7a2001a4f4c33facd7 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 16 Jul 2022 09:09:46 +0000 Subject: [PATCH 3/8] Grammar --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9557d3f37..884bbefa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#882](https://github.com/nf-core/eager/pull/882) Define DSL1 execution explicitly, as new versions Nextflow made DSL2 default (♥ to & fix from @Lehmann-Fabian) - [#879](https://github.com/nf-core/eager/issues/879) Add missing threads parameter for pre-clipping FastQC for single end data that caused insufficient memory in some cases (♥ to @marcel-keller for reporting) -- [#880](https://github.com/nf-core/eager/issues/880) Fix failure of endorSpy to be cached or reexecuted on resume (♥ to @KathrinNaegele, @TCLamnidis, @mahesh-panchal for reporting and debugging) +- [#880](https://github.com/nf-core/eager/issues/880) Fix failure of endorSpy to be cached or reexecuted on resume (♥ to @KathrinNaegele, @TCLamnidis, & @mahesh-panchal for reporting and debugging) - [#885](https://github.com/nf-core/eager/issues/885) Specify task memory for all tools in get_software_versions to account for incompatibilty of java with some SGE clusters causing hanging of the process (♥ to @maxibor for reporting) - [#887](https://github.com/nf-core/eager/issues/887) Clarify what is considered 'ultra-short' reads in the help text of clip_readlength, for when you may wish to turn of length filtering during AdapterRemoval (♥ to @TCLamnidis for reporting) - [#889](https://github.com/nf-core/eager/issues/889) Remove/updated parameters from benchmarking test profiles (♥ to @TCLamnidis for reporting) From 10747130454aa08eab4c6e9dd200dc674aef1f68 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 16 Jul 2022 09:15:24 +0000 Subject: [PATCH 4/8] Remove unused code --- main.nf | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index 0479bacff..79dd28174 100644 --- a/main.nf +++ b/main.nf @@ -1062,7 +1062,7 @@ ch_branched_for_lanemerge = ch_inlinebarcoderemoval_for_lanemerge it -> def samplename = it[0] def libraryid = it[1] - def lane = 0 + def lane = it[2] def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1100,7 +1100,7 @@ ch_branched_for_lanemerge_ready = ch_branched_for_lanemerge.merge_me it -> def samplename = it[0] def libraryid = it[1] - def lane = 0 + def lane = it[2] def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1128,7 +1128,7 @@ process lanemerge { script: if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ - lane = 0 + def lane = 0 """ cat ${r1} > "${libraryid}"_R1_lanemerged.fq.gz cat ${r2} > "${libraryid}"_R2_lanemerged.fq.gz @@ -1150,7 +1150,7 @@ if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { it -> def samplename = it[0] def libraryid = it[1] - def lane = 0 + def lane = it[2] def seqtype = it[3] def organism = it[4] def strandedness = it[5] @@ -1775,19 +1775,6 @@ process samtools_flagstat_after_filter { if (params.run_bam_filtering) { ch_flagstat_for_endorspy .join(ch_bam_filtered_flagstat_for_endorspy, by: [0,1,2,3,4,5,6]) - .map { it -> - def samplename = it[0] - def libraryid = it[1] - def lane = it[2] - def seqtype = it[3] - def organism = it[4] - def strandedness = it[5] - def udg = it[6] - def stats = file(it[7]) - def poststats = file(it[8]) - - [samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] - } .set{ ch_allflagstats_for_endorspy } } else { From d2418f64a8cb1d4b1cdb7f1b7a971eb58ccf78c9 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 16 Jul 2022 09:19:56 +0000 Subject: [PATCH 5/8] Undo more debugging --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 79dd28174..e1c7d7b1e 100644 --- a/main.nf +++ b/main.nf @@ -1169,7 +1169,7 @@ if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { it -> def samplename = it[0] def libraryid = it[1] - def lane = 0 + def lane = it[2] def seqtype = it[3] def organism = it[4] def strandedness = it[5] From b4b58591427023961452664ec5bcfad9a28ace37 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 27 Jul 2022 14:28:37 +0200 Subject: [PATCH 6/8] Add path call in pileupcaller bams --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index e1c7d7b1e..a289208c6 100644 --- a/main.nf +++ b/main.nf @@ -2502,7 +2502,7 @@ process genotyping_pileupcaller { params.run_genotyping && params.genotyping_tool == 'pileupcaller' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai from ch_prepped_for_pileupcaller_double.mix(ch_prepped_for_pileupcaller_single) + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_prepped_for_pileupcaller_double.mix(ch_prepped_for_pileupcaller_single) file fasta from ch_fasta_for_genotyping_pileupcaller.collect() file fai from ch_fai_for_pileupcaller.collect() file dict from ch_dict_for_pileupcaller.collect() From 637cd73c3a695290379212cfde1cd10b286b9398 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 28 Jul 2022 18:21:57 +0200 Subject: [PATCH 7/8] fix pileupcaller path input. paths in bamtrim. --- main.nf | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index a289208c6..419cd7cc2 100644 --- a/main.nf +++ b/main.nf @@ -1787,7 +1787,7 @@ if (params.run_bam_filtering) { def seqtype = it[3] def organism = it[4] def strandedness = it[5] - def udg = it[6] + def udg = it[6] def stats = file(it[7]) def poststats = file("$projectDir/assets/nf-core_eager_dummy.txt") @@ -2229,7 +2229,7 @@ process bam_trim { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_bamutils_decision.totrim output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.trimmed.bam"), file("*.trimmed.bam.{bai,csi}") into ch_trimmed_from_bamutils + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.trimmed.bam"), path("*.trimmed.bam.{bai,csi}") into ch_trimmed_from_bamutils script: def softclip = params.bamutils_softclip ? '-c' : '' @@ -2261,7 +2261,7 @@ ch_trimmed_formerge = ch_bamutils_decision.notrim def seqtype = it[3] def organism = it[4] def strandedness = it[5] - def udg = it[6] + def udg = it[6] def bam = it[7].flatten() def bai = it[8].flatten() @@ -2487,10 +2487,36 @@ ch_damagemanipulation_for_genotyping_pileupcaller // Create pileupcaller input tuples ch_input_for_genotyping_pileupcaller.singleStranded .groupTuple(by:[5]) + .map{ + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def bam = it[7].flatten() + def bai = it[8].flatten() + + [samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai ] + } .set {ch_prepped_for_pileupcaller_single} ch_input_for_genotyping_pileupcaller.doubleStranded .groupTuple(by:[5]) + .map{ + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def bam = it[7].flatten() + def bai = it[8].flatten() + + [samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai ] + } .set {ch_prepped_for_pileupcaller_double} process genotyping_pileupcaller { From 4f19b50770101dc3dbbc4f717fc2b3b89261e842 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 28 Jul 2022 18:32:39 +0200 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 884bbefa1..e3d240390 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#895](https://github.com/nf-core/eager/issues/895) Output documentation typo fix and added location of output docs in pipeline summary (♥ to @RodrigoBarquera for reporting) - [#897](https://github.com/nf-core/eager/issues/897) Fix pipeline crash if no Kraken2 results generated (♥ to @alexandregilardet for reporting) - [#899](https://github.com/nf-core/eager/issues/897) Fix pipeline crash for circulargenerator if reference file does not end in .fasta (♥ to @scarlhoff for reporting) +- Fix staging of input bams for genotyping_pileupcaller process. Downstream changes from changes introduced when fixing endorspy caching. ### `Dependencies`