Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

- [#882](https://github.com/nf-core/eager/pull/882) Define DSL1 execution explicitly, as new versions Nextflow made DSL2 default (♥ to & fix from @Lehmann-Fabian)
- [#879](https://github.com/nf-core/eager/issues/879) Add missing threads parameter for pre-clipping FastQC for single end data that caused insufficient memory in some cases (♥ to @marcel-keller for reporting)
- [#880](https://github.com/nf-core/eager/issues/880) Fix failure of endorSpy to be cached or reexecuted on resume (♥ to @KathrinNaegele, @TCLamnidis, & @mahesh-panchal for reporting and debugging)
- [#885](https://github.com/nf-core/eager/issues/885) Specify task memory for all tools in get_software_versions to account for incompatibilty of java with some SGE clusters causing hanging of the process (♥ to @maxibor for reporting)
- [#887](https://github.com/nf-core/eager/issues/887) Clarify what is considered 'ultra-short' reads in the help text of clip_readlength, for when you may wish to turn of length filtering during AdapterRemoval (♥ to @TCLamnidis for reporting)
- [#889](https://github.com/nf-core/eager/issues/889) Remove/updated parameters from benchmarking test profiles (♥ to @TCLamnidis for reporting)
Expand All @@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- [#899](https://github.com/nf-core/eager/issues/897) Fix pipeline crash for circulargenerator if reference file does not end in .fasta (♥ to @scarlhoff for reporting)
- Fixed some missing default values in the nextflow parameter schema JSON
- [#789](https://github.com/nf-core/eager/issues/789) Substantial speed and memory optimisation of the `extract_map_reads.py` script (♥ to @ivelsko for reporting, @maxibor for optimisation)
- Fix staging of input bams for genotyping_pileupcaller process. Downstream changes from changes introduced when fixing endorspy caching.

### `Dependencies`

Expand Down
76 changes: 49 additions & 27 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ ch_input_for_fastp.fourcol
[ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ]

}
.set { ch_skipfastp_for_merge }
.set { ch_skipfastp_for_merge }

ch_output_from_fastp
.map{
Expand Down Expand Up @@ -800,7 +800,7 @@ process adapter_removal {

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_fastp_for_adapterremoval
path adapterlist from ch_adapterlist.collect().dump(tag: "Adapter list")
path adapterlist from ch_adapterlist.collect()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1
Expand Down Expand Up @@ -968,13 +968,10 @@ if ( params.skip_collapse ){
// AdapterRemoval bypass when not running it
if (!params.skip_adapterremoval) {
ch_output_from_adapterremoval.mix(ch_fastp_for_skipadapterremoval)
.dump(tag: "post_ar_adapterremoval_decision_skipar")
.filter { it =~/.*combined.fq.gz|.*truncated.gz/ }
.dump(tag: "ar_bypass")
.into { ch_adapterremoval_for_post_ar_trimming; ch_adapterremoval_for_skip_post_ar_trimming; }
} else {
ch_fastp_for_skipadapterremoval
.dump(tag: "post_ar_adapterremoval_decision_withar")
.into { ch_adapterremoval_for_post_ar_trimming; ch_adapterremoval_for_skip_post_ar_trimming; }
}

Expand Down Expand Up @@ -1076,7 +1073,6 @@ ch_branched_for_lanemerge = ch_inlinebarcoderemoval_for_lanemerge
[ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ]

}
.dump(tag: "lanemerge_bypass_decision")
.branch {
skip_merge: it[7].size() == 1 // Can skip merging if only single lanes
merge_me: it[7].size() > 1
Expand All @@ -1097,7 +1093,6 @@ ch_branched_for_lanemerge_skipme = ch_branched_for_lanemerge.skip_merge

[ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ]
}
.dump(tag: "lanemerge_reconfigure")


ch_branched_for_lanemerge_ready = ch_branched_for_lanemerge.merge_me
Expand Down Expand Up @@ -1125,15 +1120,15 @@ process lanemerge {
publishDir "${params.outdir}/lanemerging", mode: params.publish_dir_mode

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_branched_for_lanemerge_ready.dump(tag: "lange_merge_input")
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_branched_for_lanemerge_ready

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_R1_lanemerged.fq.gz") into ch_lanemerge_for_mapping_r1
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_R2_lanemerged.fq.gz") optional true into ch_lanemerge_for_mapping_r2

script:
if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){
lane = 0
def lane = 0
"""
cat ${r1} > "${libraryid}"_R1_lanemerged.fq.gz
cat ${r2} > "${libraryid}"_R2_lanemerged.fq.gz
Expand All @@ -1149,7 +1144,6 @@ process lanemerge {
// Ensuring always valid R2 file even if doesn't exist for AWS
if ( ( params.skip_collapse || params.skip_adapterremoval ) ) {
ch_lanemerge_for_mapping_r1
.dump(tag: "post_lanemerge_reconfigure")
.mix(ch_lanemerge_for_mapping_r2)
.groupTuple(by: [0,1,2,3,4,5,6])
.map{
Expand Down Expand Up @@ -1264,8 +1258,8 @@ process bwa {
publishDir "${params.outdir}/mapping/bwa", mode: params.publish_dir_mode

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_lanemerge_for_bwa.dump(tag: "bwa_input_reads")
path index from bwa_index.collect().dump(tag: "input_index")
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_lanemerge_for_bwa
path index from bwa_index.collect()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mapped.bam"), path("*.{bai,csi}") into ch_output_from_bwa
Expand Down Expand Up @@ -1564,17 +1558,21 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging
it ->
def samplename = it[0]
def libraryid = it[1]
def lane = it[2]
def lane = 0
def seqtype = it[3].unique() // How to deal with this?
def organism = it[4]
def strandedness = it[5]
def udg = it[6]
def r1 = it[7]
def r2 = it[8]

// We will assume if mixing it is better to set as PE as this is informative
// 1. We will assume if mixing it is better to set as PE as this is informative
// for DeDup (and markduplicates doesn't care), but will throw a warning!
def seqtype_new = seqtype.flatten().size() > 1 ? 'PE' : seqtype
// 2. We will also flatten to a single value to address problems with 'unstable'
// Nextflow ArrayBag object types not allowing the .join to work between resumes
// See: https://github.com/nf-core/eager/issues/880

def seqtype_new = seqtype.flatten().size() > 1 ? 'PE' : seqtype.flatten()[0]

if ( seqtype.flatten().size() > 1 && params.dedupper == 'dedup' ) {
log.warn "[nf-core/eager] Warning: you are running DeDup on BAMs with a mixture of PE/SE data for library: ${libraryid}. DeDup is designed for PE data only, deduplication maybe suboptimal!"
Expand All @@ -1583,7 +1581,6 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging
[ samplename, libraryid, lane, seqtype_new, organism, strandedness, udg, r1, r2 ]

}
.dump(tag: "pre_seqtype_decision")
.branch {
skip_merge: it[7].size() == 1 // Can skip merging if only single lanes
merge_me: it[7].size() > 1
Expand Down Expand Up @@ -1791,11 +1788,12 @@ if (params.run_bam_filtering) {
def seqtype = it[3]
def organism = it[4]
def strandedness = it[5]
def udg = it[6]
def udg = it[6]
def stats = file(it[7])
def poststats = file("$projectDir/assets/nf-core_eager_dummy.txt")

[samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] }
[samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ]
}
.set{ ch_allflagstats_for_endorspy }
}

Expand Down Expand Up @@ -1956,7 +1954,6 @@ ch_input_for_librarymerging.merge_me

[it[0], libraryid, it[2], seqtype, it[4], it[5], it[6], bam, bai ]
}
.dump(tag: "input_for_lib_merging")
.set { ch_fixedinput_for_librarymerging }

process library_merge {
Expand All @@ -1965,7 +1962,7 @@ process library_merge {
publishDir "${params.outdir}/merged_bams/initial", mode: params.publish_dir_mode

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_fixedinput_for_librarymerging.dump(tag: "library_merge_input")
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_fixedinput_for_librarymerging

output:
tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, path("*_libmerged_rmdup.bam"), path("*_libmerged_rmdup.bam.{bai,csi}") into ch_output_from_librarymerging
Expand Down Expand Up @@ -2233,7 +2230,7 @@ process bam_trim {
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_bamutils_decision.totrim

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.trimmed.bam"), file("*.trimmed.bam.{bai,csi}") into ch_trimmed_from_bamutils
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.trimmed.bam"), path("*.trimmed.bam.{bai,csi}") into ch_trimmed_from_bamutils

script:
def softclip = params.bamutils_softclip ? '-c' : ''
Expand Down Expand Up @@ -2265,7 +2262,7 @@ ch_trimmed_formerge = ch_bamutils_decision.notrim
def seqtype = it[3]
def organism = it[4]
def strandedness = it[5]
def udg = it[6]
def udg = it[6]
def bam = it[7].flatten()
def bai = it[8].flatten()

Expand Down Expand Up @@ -2491,10 +2488,36 @@ ch_damagemanipulation_for_genotyping_pileupcaller
// Create pileupcaller input tuples
ch_input_for_genotyping_pileupcaller.singleStranded
.groupTuple(by:[5])
.map{
def samplename = it[0]
def libraryid = it[1]
def lane = it[2]
def seqtype = it[3]
def organism = it[4]
def strandedness = it[5]
def udg = it[6]
def bam = it[7].flatten()
def bai = it[8].flatten()

[samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai ]
}
.set {ch_prepped_for_pileupcaller_single}

ch_input_for_genotyping_pileupcaller.doubleStranded
.groupTuple(by:[5])
.map{
def samplename = it[0]
def libraryid = it[1]
def lane = it[2]
def seqtype = it[3]
def organism = it[4]
def strandedness = it[5]
def udg = it[6]
def bam = it[7].flatten()
def bai = it[8].flatten()

[samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai ]
}
.set {ch_prepped_for_pileupcaller_double}

process genotyping_pileupcaller {
Expand All @@ -2506,12 +2529,12 @@ process genotyping_pileupcaller {
params.run_genotyping && params.genotyping_tool == 'pileupcaller'

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, bam, bai from ch_prepped_for_pileupcaller_double.mix(ch_prepped_for_pileupcaller_single)
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_prepped_for_pileupcaller_double.mix(ch_prepped_for_pileupcaller_single)
file fasta from ch_fasta_for_genotyping_pileupcaller.collect()
file fai from ch_fai_for_pileupcaller.collect()
file dict from ch_dict_for_pileupcaller.collect()
path(bed) from ch_bed_for_pileupcaller.collect()
path(snp) from ch_snp_for_pileupcaller.collect().dump(tag: "pileupcaller_snp_file")
path(snp) from ch_snp_for_pileupcaller.collect()

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("pileupcaller.${strandedness}.*") into ch_for_eigenstrat_snp_coverage
Expand Down Expand Up @@ -2542,7 +2565,7 @@ process eigenstrat_snp_coverage {
params.run_genotyping && params.genotyping_tool == 'pileupcaller'

input:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") from ch_for_eigenstrat_snp_coverage.dump(tag:'eigenstrat_input')
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") from ch_for_eigenstrat_snp_coverage

output:
tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_eigenstrat_snp_cov_for_multiqc
Expand Down Expand Up @@ -2673,7 +2696,7 @@ process vcf2genome {
if (!params.additional_vcf_files) {
ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.collect()
} else {
ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.mix(ch_extravcfs_for_multivcfanalyzer).collect().dump(tag: "postmix")
ch_vcfs_for_multivcfanalyzer = ch_ug_for_multivcfanalyzer.map{ it[-1] }.mix(ch_extravcfs_for_multivcfanalyzer).collect()
}

process multivcfanalyzer {
Expand Down Expand Up @@ -3342,7 +3365,6 @@ workflow.onError {
def extract_data(tsvFile) {
Channel.fromPath(tsvFile)
.splitCsv(header: true, sep: '\t')
.dump(tag:'tsv_extract')
.map { row ->

def expected_keys = ['Sample_Name', 'Library_ID', 'Lane', 'Colour_Chemistry', 'SeqType', 'Organism', 'Strandedness', 'UDG_Treatment', 'R1', 'R2', 'BAM']
Expand Down