From 66d4a68732413cd95f56ca16262b3c008d5a304a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 30 Apr 2021 14:23:06 +0200 Subject: [PATCH 01/16] Add bcftools stats modules --- .github/workflows/ci.yml | 3 + CHANGELOG.md | 1 + assets/multiqc_config.yaml | 2 + docs/images/nf-core_eager_logo_small.svg | 503 +++++++++++++++++++++++ environment.yml | 2 +- main.nf | 50 ++- nextflow.config | 1 + nextflow_schema.json | 7 + 8 files changed, 556 insertions(+), 13 deletions(-) create mode 100644 docs/images/nf-core_eager_logo_small.svg diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 442e3c4c8..10abe9f80 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,6 +135,9 @@ jobs: - name: GENOTYPING_ANGSD Test running ANGSD genotype likelihood calculation run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'angsd' + - name: GENOTYPING_BCFTOOLS Test running FreeBayes with bcftools stats turned on + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'freebayes' --run_bcftools_stats - name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes' diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bc8ae070..2657eacf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` - [#729](https://github.com/nf-core/eager/issues/729) Added Bowtie2 flag `--maxins` for PE mapping modern DNA mapping contexts +- [#317](https://github.com/nf-core/eager/issues/317) Added bcftools stats for general genotyping statistics of VCF files ### `Fixed` diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 0d8c7c28a..d7e962a5d 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -25,6 +25,7 @@ run_modules: - samtools - sexdeterrmine - hops + - bcftools extra_fn_clean_exts: - '_fastp' @@ -91,6 +92,7 @@ top_modules: - 'mtnucratio' - 'qualimap' - 'sexdeterrmine' + - 'bcftools' - 'multivcfanalyzer': path_filters: - '*MultiVCFAnalyzer.json' diff --git a/docs/images/nf-core_eager_logo_small.svg b/docs/images/nf-core_eager_logo_small.svg new file mode 100644 index 000000000..9e756f4e7 --- /dev/null +++ b/docs/images/nf-core_eager_logo_small.svg @@ -0,0 +1,503 @@ + +image/svg+xml + + +nf- core/eager diff --git a/environment.yml b/environment.yml index 893175dce..d42c7295e 100644 --- a/environment.yml +++ b/environment.yml @@ -49,4 +49,4 @@ dependencies: - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.0 - bioconda::bbmap=38.87 - + - bioconda::bcftools=1.12.1 \ No newline at end of file diff --git a/main.nf b/main.nf index 274e96fad..c96d683ec 100644 --- a/main.nf +++ b/main.nf @@ -179,7 +179,7 @@ if("${params.fasta}".endsWith(".gz")){ path zipped_fasta from file(params.fasta) // path doesn't like it if a string of an object is not prefaced with a root dir (/), so use file() to resolve string before parsing to `path` output: - path "$unzip" into ch_fasta into ch_fasta_for_bwaindex,ch_fasta_for_bt2index,ch_fasta_for_faidx,ch_fasta_for_seqdict,ch_fasta_for_circulargenerator,ch_fasta_for_circularmapper,ch_fasta_for_damageprofiler,ch_fasta_for_qualimap,ch_fasta_for_pmdtools,ch_fasta_for_genotyping_ug,ch_fasta_for_genotyping_hc,ch_fasta_for_genotyping_freebayes,ch_fasta_for_genotyping_pileupcaller,ch_fasta_for_vcf2genome,ch_fasta_for_multivcfanalyzer,ch_fasta_for_genotyping_angsd,ch_fasta_for_damagerescaling + path "$unzip" into ch_fasta into ch_fasta_for_bwaindex,ch_fasta_for_bt2index,ch_fasta_for_faidx,ch_fasta_for_seqdict,ch_fasta_for_circulargenerator,ch_fasta_for_circularmapper,ch_fasta_for_damageprofiler,ch_fasta_for_qualimap,ch_fasta_for_pmdtools,ch_fasta_for_genotyping_ug,ch_fasta_for_genotyping_hc,ch_fasta_for_genotyping_freebayes,ch_fasta_for_genotyping_pileupcaller,ch_fasta_for_vcf2genome,ch_fasta_for_multivcfanalyzer,ch_fasta_for_genotyping_angsd,ch_fasta_for_damagerescaling,ch_fasta_for_bcftools_stats script: unzip = zipped_fasta.toString() - '.gz' @@ -190,7 +190,7 @@ if("${params.fasta}".endsWith(".gz")){ } else { fasta_for_indexing = Channel .fromPath("${params.fasta}", checkIfExists: true) - .into{ ch_fasta_for_bwaindex; ch_fasta_for_bt2index; ch_fasta_for_faidx; ch_fasta_for_seqdict; ch_fasta_for_circulargenerator; ch_fasta_for_circularmapper; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_genotyping_ug; ch_fasta__for_genotyping_hc; ch_fasta_for_genotyping_hc; ch_fasta_for_genotyping_freebayes; ch_fasta_for_genotyping_pileupcaller; ch_fasta_for_vcf2genome; ch_fasta_for_multivcfanalyzer;ch_fasta_for_genotyping_angsd;ch_fasta_for_damagerescaling } + .into{ ch_fasta_for_bwaindex; ch_fasta_for_bt2index; ch_fasta_for_faidx; ch_fasta_for_seqdict; ch_fasta_for_circulargenerator; ch_fasta_for_circularmapper; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_genotyping_ug; ch_fasta__for_genotyping_hc; ch_fasta_for_genotyping_hc; ch_fasta_for_genotyping_freebayes; ch_fasta_for_genotyping_pileupcaller; ch_fasta_for_vcf2genome; ch_fasta_for_multivcfanalyzer;ch_fasta_for_genotyping_angsd;ch_fasta_for_damagerescaling,ch_fasta_for_bcftools_stats } } // Check that fasta index file path ends in '.fai' @@ -2230,7 +2230,7 @@ process genotyping_ug { file dict from ch_dict_for_ug.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*vcf.gz") into ch_ug_for_multivcfanalyzer,ch_ug_for_vcf2genome + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*vcf.gz") into ch_ug_for_multivcfanalyzer,ch_ug_for_vcf2genome,ch_ug_for_bcftools_stats tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.realign.{bam,bai}") optional true script: @@ -2245,7 +2245,7 @@ process genotyping_ug { $keep_realign - pigz -p ${task.cpus} ${samplename}.unifiedgenotyper.vcf + bgzip -@ ${task.cpus} ${samplename}.unifiedgenotyper.vcf """ else if (params.gatk_dbsnp != '') """ @@ -2256,7 +2256,7 @@ process genotyping_ug { $keep_realign - pigz -p ${task.cpus} ${samplename}.unifiedgenotyper.vcf + bgzip -@ ${task.cpus} ${samplename}.unifiedgenotyper.vcf """ } @@ -2271,7 +2271,7 @@ process genotyping_hc { params.run_genotyping && params.genotyping_tool == 'hc' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_hc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_hc,ch_hc_for_bcftools_stats file fasta from ch_fasta_for_genotyping_hc.collect() file fai from ch_fai_for_hc.collect() file dict from ch_dict_for_hc.collect() @@ -2283,13 +2283,13 @@ process genotyping_hc { if (params.gatk_dbsnp == '') """ gatk HaplotypeCaller -R ${fasta} -I ${bam} -O ${samplename}.haplotypecaller.vcf -stand-call-conf ${params.gatk_call_conf} --sample-ploidy ${params.gatk_ploidy} --output-mode ${params.gatk_hc_out_mode} --emit-ref-confidence ${params.gatk_hc_emitrefconf} - pigz -p ${task.cpus} ${samplename}.haplotypecaller.vcf + bgzip -@ ${task.cpus} ${samplename}.haplotypecaller.vcf """ else if (params.gatk_dbsnp != '') """ gatk HaplotypeCaller -R ${fasta} -I ${bam} -O ${samplename}.haplotypecaller.vcf --dbsnp ${params.gatk_dbsnp} -stand-call-conf ${params.gatk_call_conf} --sample_ploidy ${params.gatk_ploidy} --output_mode ${params.gatk_hc_out_mode} --emit-ref-confidence ${params.gatk_hc_emitrefconf} - pigz -p ${task.cpus} ${samplename}.haplotypecaller.vcf + bgzip -@ ${task.cpus} ${samplename}.haplotypecaller.vcf """ } @@ -2304,7 +2304,7 @@ process genotyping_freebayes { params.run_genotyping && params.genotyping_tool == 'freebayes' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_freebayes + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_freebayes,ch_fb_for_bcftools_stats file fasta from ch_fasta_for_genotyping_freebayes.collect() file fai from ch_fai_for_freebayes.collect() file dict from ch_dict_for_freebayes.collect() @@ -2316,7 +2316,7 @@ process genotyping_freebayes { def skip_coverage = "${params.freebayes_g}" == 0 ? "" : "-g ${params.freebayes_g}" """ freebayes -f ${fasta} -p ${params.freebayes_p} -C ${params.freebayes_C} ${skip_coverage} ${bam} > ${samplename}.freebayes.vcf - pigz -p ${task.cpus} ${samplename}.freebayes.vcf + bgzip -@ ${task.cpus} ${samplename}.freebayes.vcf """ } @@ -2448,6 +2448,31 @@ process genotyping_angsd { """ } +//////////////////////////////////// +/* -- GENOTYPING STATS -- */ +//////////////////////////////////// + +process bcftools_stats { + label 'mc_small' + tag "${samplename}" + publishDir "${params.outdir}/bcftools/stats", mode: params.publish_dir_mode + + when: + params.run_bcftools_stats + + input: + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(vcf) from ch_ug_for_vcf2genome.mix(ch_hc_for_bcftools_stats,ch_fb_for_bcftools_stats) + file fasta from ch_fasta_for_bcftools_stats.collect() + + output: + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.vcf.stats") into ch_bcftools_stats_for_multiqc + + script: + """ + bcftools stats *.vcf.gz -F ${fasta} > ${samplename}.vcf.stats + """ +} + //////////////////////////////////// /* -- CONSENSUS CALLING -- */ //////////////////////////////////// @@ -2473,7 +2498,7 @@ process vcf2genome { def out = "${params.vcf2genome_outfile}" == '' ? "${samplename}.fasta" : "${params.vcf2genome_outfile}" def fasta_head = "${params.vcf2genome_header}" == '' ? "${samplename}" : "${params.vcf2genome_header}" """ - pigz -f -d -p ${task.cpus} *.vcf.gz + bgzip -f -d -@ ${task.cpus} *.vcf.gz vcf2genome -Xmx${task.memory.toGiga()}g -draft ${out}.fasta -draftname "${fasta_head}" -in ${vcf.baseName} -minc ${params.vcf2genome_minc} -minfreq ${params.vcf2genome_minfreq} -minq ${params.vcf2genome_minq} -ref ${fasta} -refMod ${out}_refmod.fasta -uncertain ${out}_uncertainy.fasta pigz -p ${task.cpus} *.fasta pigz -p ${task.cpus} *.vcf @@ -2516,7 +2541,7 @@ process multivcfanalyzer { script: def write_freqs = params.write_allele_frequencies ? "T" : "F" """ - gunzip -f *.vcf.gz + bgzip -f -d -@ ${task.cpus} *.vcf.gz multivcfanalyzer -Xmx${task.memory.toGiga()}g ${params.snp_eff_results} ${fasta} ${params.reference_gff_annotations} . ${write_freqs} ${params.min_genotype_quality} ${params.min_base_coverage} ${params.min_allele_freq_hom} ${params.min_allele_freq_het} ${params.reference_gff_exclude} *.vcf pigz -p ${task.cpus} *.tsv *.txt snpAlignment.fasta snpAlignmentIncludingRefGenome.fasta fullAlignment.fasta """ @@ -2998,6 +3023,7 @@ process multiqc { file ('hops/*') from ch_hops_for_multiqc.collect().ifEmpty([]) file ('nuclear_contamination/*') from ch_nuclear_contamination_for_multiqc.collect().ifEmpty([]) file ('genotyping/*') from ch_eigenstrat_snp_cov_for_multiqc.collect().ifEmpty([]) + file ('bcftools_stats') from ch_bcftools_stats_for_multiqc.collect().ifEmpty([]) file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") output: diff --git a/nextflow.config b/nextflow.config index 72127c379..fecdbd504 100644 --- a/nextflow.config +++ b/nextflow.config @@ -167,6 +167,7 @@ params { angsd_glformat = 'binary' angsd_createfasta = false angsd_fastamethod = 'random' + run_bcftools_stats = test_stresstest_human //Consensus sequence generation run_vcf2genome = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 26a2fbf0f..14d6acbd1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1147,6 +1147,13 @@ "random", "common" ] + }, + "run_bcftools_stats": { + "type": "boolean", + "default": true, + "description": "Turn on bcftools stats generation for VCF based variant calling statistics", + "help_text": "Runs `bcftools stats` against VCF files from GATK and FreeBayes genotypers.\n\nIt will automatically include the FASTA reference for INDEL-related statistics.", + "fa_icon": "far fa-chart-bar" } }, "fa_icon": "fas fa-sliders-h", From 51f5790c63a325c99a53334a0eb54a75760acd2c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 3 May 2021 10:08:53 +0200 Subject: [PATCH 02/16] Add bcftools - still missing output docs --- environment.yml | 2 +- main.nf | 28 ++++++++++++++++++++++------ nextflow.config | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/environment.yml b/environment.yml index d42c7295e..739f18e6d 100644 --- a/environment.yml +++ b/environment.yml @@ -49,4 +49,4 @@ dependencies: - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.0 - bioconda::bbmap=38.87 - - bioconda::bcftools=1.12.1 \ No newline at end of file + - bioconda::bcftools=1.9 diff --git a/main.nf b/main.nf index c96d683ec..20d82dceb 100644 --- a/main.nf +++ b/main.nf @@ -190,7 +190,7 @@ if("${params.fasta}".endsWith(".gz")){ } else { fasta_for_indexing = Channel .fromPath("${params.fasta}", checkIfExists: true) - .into{ ch_fasta_for_bwaindex; ch_fasta_for_bt2index; ch_fasta_for_faidx; ch_fasta_for_seqdict; ch_fasta_for_circulargenerator; ch_fasta_for_circularmapper; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_genotyping_ug; ch_fasta__for_genotyping_hc; ch_fasta_for_genotyping_hc; ch_fasta_for_genotyping_freebayes; ch_fasta_for_genotyping_pileupcaller; ch_fasta_for_vcf2genome; ch_fasta_for_multivcfanalyzer;ch_fasta_for_genotyping_angsd;ch_fasta_for_damagerescaling,ch_fasta_for_bcftools_stats } + .into{ ch_fasta_for_bwaindex; ch_fasta_for_bt2index; ch_fasta_for_faidx; ch_fasta_for_seqdict; ch_fasta_for_circulargenerator; ch_fasta_for_circularmapper; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_genotyping_ug; ch_fasta__for_genotyping_hc; ch_fasta_for_genotyping_hc; ch_fasta_for_genotyping_freebayes; ch_fasta_for_genotyping_pileupcaller; ch_fasta_for_vcf2genome; ch_fasta_for_multivcfanalyzer;ch_fasta_for_genotyping_angsd;ch_fasta_for_damagerescaling;ch_fasta_for_bcftools_stats } } // Check that fasta index file path ends in '.fai' @@ -2213,8 +2213,24 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) { } + + // Unified Genotyper - although not-supported, better for aDNA (because HC does de novo assembly which requires higher coverages), and needed for MultiVCFAnalyzer +// initialise empty bcftool related empty channels + +if ( params.genotyping_tool == 'ug' ) { + ch_hc_for_bcftools_stats = Channel.empty() + ch_fb_for_bcftools_stats = Channel.empty() +} else if ( params.genotyping_tool == 'hc' ) { + ch_ug_for_bcftools_stats = Channel.empty() + ch_fb_for_bcftools_stats = Channel.empty() +} else if ( params.genotyping_tool == 'fb ') { + ch_ug_for_bcftools_stats = Channel.empty() + ch_hc_for_bcftools_stats = Channel.empty() +} + + process genotyping_ug { label 'mc_small' tag "${samplename}" @@ -2271,13 +2287,13 @@ process genotyping_hc { params.run_genotyping && params.genotyping_tool == 'hc' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_hc,ch_hc_for_bcftools_stats + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_hc file fasta from ch_fasta_for_genotyping_hc.collect() file fai from ch_fai_for_hc.collect() file dict from ch_dict_for_hc.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") into ch_hc_for_bcftools_stats script: if (params.gatk_dbsnp == '') @@ -2304,13 +2320,13 @@ process genotyping_freebayes { params.run_genotyping && params.genotyping_tool == 'freebayes' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_freebayes,ch_fb_for_bcftools_stats + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_damagemanipulation_for_genotyping_freebayes file fasta from ch_fasta_for_genotyping_freebayes.collect() file fai from ch_fai_for_freebayes.collect() file dict from ch_dict_for_freebayes.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") into ch_fb_for_bcftools_stats script: def skip_coverage = "${params.freebayes_g}" == 0 ? "" : "-g ${params.freebayes_g}" @@ -2461,7 +2477,7 @@ process bcftools_stats { params.run_bcftools_stats input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(vcf) from ch_ug_for_vcf2genome.mix(ch_hc_for_bcftools_stats,ch_fb_for_bcftools_stats) + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(vcf) from ch_ug_for_bcftools_stats.mix(ch_hc_for_bcftools_stats,ch_fb_for_bcftools_stats) file fasta from ch_fasta_for_bcftools_stats.collect() output: diff --git a/nextflow.config b/nextflow.config index fecdbd504..f4fc14a0f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -167,7 +167,7 @@ params { angsd_glformat = 'binary' angsd_createfasta = false angsd_fastamethod = 'random' - run_bcftools_stats = test_stresstest_human + run_bcftools_stats = true //Consensus sequence generation run_vcf2genome = false From 56a50bc6daa3f1bf0bd70e90e77fe1d7a2677eeb Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 3 May 2021 11:19:54 +0200 Subject: [PATCH 03/16] Remove unnecessary branching --- main.nf | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index 20d82dceb..b80230641 100644 --- a/main.nf +++ b/main.nf @@ -2219,16 +2219,16 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) { // initialise empty bcftool related empty channels -if ( params.genotyping_tool == 'ug' ) { - ch_hc_for_bcftools_stats = Channel.empty() - ch_fb_for_bcftools_stats = Channel.empty() -} else if ( params.genotyping_tool == 'hc' ) { - ch_ug_for_bcftools_stats = Channel.empty() - ch_fb_for_bcftools_stats = Channel.empty() -} else if ( params.genotyping_tool == 'fb ') { - ch_ug_for_bcftools_stats = Channel.empty() - ch_hc_for_bcftools_stats = Channel.empty() -} +//if ( params.genotyping_tool == 'ug' ) { +// ch_hc_for_bcftools_stats = Channel.empty() +// ch_fb_for_bcftools_stats = Channel.empty() +//} else if ( params.genotyping_tool == 'hc' ) { +// ch_ug_for_bcftools_stats = Channel.empty() +// ch_fb_for_bcftools_stats = Channel.empty() +//} else if ( params.genotyping_tool == 'fb ') { +// ch_ug_for_bcftools_stats = Channel.empty() +// ch_hc_for_bcftools_stats = Channel.empty() +//} process genotyping_ug { From f9bc10a6e5bcc054a5d8791df4788e500ed6c0e6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 5 May 2021 22:19:17 +0200 Subject: [PATCH 04/16] Add docs and fix compatibility with MVA and VCF2Genomes --- docs/output.md | 26 ++++++++++++++++++++++++++ main.nf | 8 ++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/docs/output.md b/docs/output.md index cc07d9a69..7bd1084b3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -618,6 +618,31 @@ If this correlation is not observed, your data is skewed towards higher coverage

+### Bcftools + +### Background + +Bcftools is a toolkit for processing and summaries VCF files, i.e. variant call format files. nf-core/eager currently uses bcftools for the `stats` functionality. This summarises in a text file a range of statistics about VCF files, produced by GATK and FreeBayes variant callers. + +#### Variant Substitution Types + +This stack bar plot shows you the distribution of all types of point-mutation variants away from the reference nucleotide at each position, (e.g. A>C, A>G etc.). + +For low-coverage non-UDG treated, non-trimmed nor re-scaled aDNA data, you expect to see a C>T substitutions as the largest category, due to the most common ancient DNA damage being C to T deamination. + +#### Variant Quality + +This gives you the distribution of variant-call _qualities_ in your VCF files. Each variant will get given a 'Phred-scale' like value that represents the confidence of the variant caller that it has made the right call. The scale is very similar to that of base-call values in FASTQ files (as assessed by FastQC). Distributions that have peaks at higher variant quality scores (>= 30) suggest more confident variant calls. However, in cases of low-coverage aDNA data, these distributions may not be so good. + +More detailed explanation of variant quality scores can be seen in the Broad Institute's [GATK documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360035531872-Phred-scaled-quality-scores). + +#### Indel Distribution + +This plot shows you the distribution of the sizes of insertion- and deletions (InDels) in the variant calling (assuming you configured your variant caller parameters to do so). Low-coverage aDNA data often will not have high enough coverage to accurately assess InDels. In cases of high-coverage data of small-genomes such as microbes, large numbers of InDels, however, may indicate your reads are actually from a _relative_ of the reference mapped to - and should be verified downstream. +#### Variant depths + +This plot shows the distribution of depth coverages of each variant called. Typically higher coverage will result in higher quality variant calls (see Variant Quality, above), however in many cases in aDNA these may be low and unequally distributed (due to uneven mapping coverage from contamination). + ### MultiVCFAnalyzer #### Background @@ -674,3 +699,4 @@ Each module has it's own output directory which sit alongside the `MultiQC/` dir * `maltextract/`: this contains a `results` directory in which contains the output from MaltExtract - typically one folder for each filter type, an error and a log file. The characteristics of each node (e.g. damage, read lengths, edit distances - each in different txt formats) can be seen in each sub-folder of the filter folders. Output can be visualised either with the [HOPS postprocessing script](https://github.com/rhuebler/HOPS) or [MEx-IPA](https://github.com/jfy133/MEx-IPA) * `consensus_sequence/`: this contains three FASTA files from VCF2Genome of a consensus sequence based on the reference FASTA with each sample's unique modifications. The main FASTA is a standard file with bases not passing the specified thresholds as Ns. The two other FASTAS (`_refmod.fasta.gz`) and (`_uncertainity.fasta.gz`) are IUPAC uncertainty codes (rather than Ns) and a special number-based uncertainty system used for other downstream tools, respectively. * `librarymerged_bams/`: these contain the final BAM files that would go into genotyping (if genotyping is turned on). This means the files will contain all libraries of a given sample (including trimmed non-UDG or half-UDG treated libraries, if BAM trimming turned on) +* `bcftools`: this currently contains a single directory called `stats/` that includes general statistics on variant callers producing VCF files as output by `bcftools stats`. These includethings such as the number of positions, number of transititions/transversions and depth coverage of SNPs etc. These are only produced if `--run_bcftools_stats` is supplied. \ No newline at end of file diff --git a/main.nf b/main.nf index b80230641..e8d9982a0 100644 --- a/main.nf +++ b/main.nf @@ -2514,9 +2514,9 @@ process vcf2genome { def out = "${params.vcf2genome_outfile}" == '' ? "${samplename}.fasta" : "${params.vcf2genome_outfile}" def fasta_head = "${params.vcf2genome_header}" == '' ? "${samplename}" : "${params.vcf2genome_header}" """ - bgzip -f -d -@ ${task.cpus} *.vcf.gz - vcf2genome -Xmx${task.memory.toGiga()}g -draft ${out}.fasta -draftname "${fasta_head}" -in ${vcf.baseName} -minc ${params.vcf2genome_minc} -minfreq ${params.vcf2genome_minfreq} -minq ${params.vcf2genome_minq} -ref ${fasta} -refMod ${out}_refmod.fasta -uncertain ${out}_uncertainy.fasta - pigz -p ${task.cpus} *.fasta + pigz -d -f -p ${task.cpus} ${vcf} + vcf2genome -Xmx${task.memory.toGiga()}g -draft ${out} -draftname "${fasta_head}" -in ${vcf.baseName} -minc ${params.vcf2genome_minc} -minfreq ${params.vcf2genome_minfreq} -minq ${params.vcf2genome_minq} -ref ${fasta} -refMod ${out}_refmod.fasta -uncertain ${out}_uncertainty.fasta + pigz -f -p ${task.cpus} ${out}* pigz -p ${task.cpus} *.vcf """ } @@ -2557,7 +2557,7 @@ process multivcfanalyzer { script: def write_freqs = params.write_allele_frequencies ? "T" : "F" """ - bgzip -f -d -@ ${task.cpus} *.vcf.gz + pigz -d -f -p ${task.cpus} ${vcf} multivcfanalyzer -Xmx${task.memory.toGiga()}g ${params.snp_eff_results} ${fasta} ${params.reference_gff_annotations} . ${write_freqs} ${params.min_genotype_quality} ${params.min_base_coverage} ${params.min_allele_freq_hom} ${params.min_allele_freq_het} ${params.reference_gff_exclude} *.vcf pigz -p ${task.cpus} *.tsv *.txt snpAlignment.fasta snpAlignmentIncludingRefGenome.fasta fullAlignment.fasta """ From e0fae4efe6c7541866749ff0cf47ff1028b7c6db Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 6 May 2021 08:37:41 +0200 Subject: [PATCH 05/16] Update output.md --- docs/output.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 7bd1084b3..b3030d6b1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -639,6 +639,7 @@ More detailed explanation of variant quality scores can be seen in the Broad Ins #### Indel Distribution This plot shows you the distribution of the sizes of insertion- and deletions (InDels) in the variant calling (assuming you configured your variant caller parameters to do so). Low-coverage aDNA data often will not have high enough coverage to accurately assess InDels. In cases of high-coverage data of small-genomes such as microbes, large numbers of InDels, however, may indicate your reads are actually from a _relative_ of the reference mapped to - and should be verified downstream. + #### Variant depths This plot shows the distribution of depth coverages of each variant called. Typically higher coverage will result in higher quality variant calls (see Variant Quality, above), however in many cases in aDNA these may be low and unequally distributed (due to uneven mapping coverage from contamination). @@ -699,4 +700,4 @@ Each module has it's own output directory which sit alongside the `MultiQC/` dir * `maltextract/`: this contains a `results` directory in which contains the output from MaltExtract - typically one folder for each filter type, an error and a log file. The characteristics of each node (e.g. damage, read lengths, edit distances - each in different txt formats) can be seen in each sub-folder of the filter folders. Output can be visualised either with the [HOPS postprocessing script](https://github.com/rhuebler/HOPS) or [MEx-IPA](https://github.com/jfy133/MEx-IPA) * `consensus_sequence/`: this contains three FASTA files from VCF2Genome of a consensus sequence based on the reference FASTA with each sample's unique modifications. The main FASTA is a standard file with bases not passing the specified thresholds as Ns. The two other FASTAS (`_refmod.fasta.gz`) and (`_uncertainity.fasta.gz`) are IUPAC uncertainty codes (rather than Ns) and a special number-based uncertainty system used for other downstream tools, respectively. * `librarymerged_bams/`: these contain the final BAM files that would go into genotyping (if genotyping is turned on). This means the files will contain all libraries of a given sample (including trimmed non-UDG or half-UDG treated libraries, if BAM trimming turned on) -* `bcftools`: this currently contains a single directory called `stats/` that includes general statistics on variant callers producing VCF files as output by `bcftools stats`. These includethings such as the number of positions, number of transititions/transversions and depth coverage of SNPs etc. These are only produced if `--run_bcftools_stats` is supplied. \ No newline at end of file +* `bcftools`: this currently contains a single directory called `stats/` that includes general statistics on variant callers producing VCF files as output by `bcftools stats`. These includethings such as the number of positions, number of transititions/transversions and depth coverage of SNPs etc. These are only produced if `--run_bcftools_stats` is supplied. From 9032bf6acc315629ac549ea815ea7c94f04130dc Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 6 May 2021 20:58:59 +0200 Subject: [PATCH 06/16] Remove now uncessary CONTRIBUTING step due to new magic groovy functions --- .github/CONTRIBUTING.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 3e4a4cfa2..75b61b9ff 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -70,14 +70,13 @@ If you wish to contribute a new step, please use the following coding standards: 3. Define the output channel if needed (see below). 4. Add any new flags/options to `nextflow.config` with a default (see below). 5. Add any new flags/options to `nextflow_schema.json` with help text (with `nf-core schema build .`). -6. Add any new flags/options to the help message (for integer/text parameters, print to help the corresponding `nextflow.config` parameter). -7. Add sanity checks for all relevant parameters. -8. Add any new software to the `scrape_software_versions.py` script in `bin/` and the version command to the `scrape_software_versions` process in `main.nf`. -9. Do local tests that the new code works properly and as expected. -10. Add a new test command in `.github/workflow/ci.yaml`. -11. If applicable add a [MultiQC](https://https://multiqc.info/) module. -12. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, name clean up, General Statistics Table column order, and module figures are in the right order. -13. Optional: Add any descriptions of MultiQC report sections and output files to `docs/output.md`. +6. Add sanity checks for all relevant parameters. +7. Add any new software to the `scrape_software_versions.py` script in `bin/` and the version command to the `scrape_software_versions` process in `main.nf`. +8. Do local tests that the new code works properly and as expected. +9. Add a new test command in `.github/workflow/ci.yaml`. +10. If applicable add a [MultiQC](https://https://multiqc.info/) module. +11. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, name clean up, General Statistics Table column order, and module figures are in the right order. +12. Optional: Add any descriptions of MultiQC report sections and output files to `docs/output.md`. ### Default values From 3702a07008b820077549b3f673a934e2a65e45bb Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 6 May 2021 22:14:45 +0200 Subject: [PATCH 07/16] Software version reporting fixes and additions --- bin/scrape_software_versions.py | 6 ++++-- main.nf | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 5c9c0da9c..2e28d168e 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -16,7 +16,7 @@ 'Bowtie2': ['v_bowtie2.txt', r"bowtie2-([0-9]+\.[0-9]+\.[0-9]+) -fdebug"], 'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"], 'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"], - #'GATK UnifiedGenotyper': ['v_gatk3_5.txt', r"version (\S+)"], + 'GATK UnifiedGenotyper': ['v_gatk3.txt', r"(\S+)"], 'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"], 'fastP': ['v_fastp.txt', r"([\d\.]+)"], 'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"], @@ -37,7 +37,8 @@ 'kraken':['v_kraken.txt', r"Kraken version (\S+)"], 'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"], 'mapDamage2':['v_mapdamage.txt',r"(\S+)"], - 'bbduk':['v_bbduk.txt',r"(\S+)"] + 'bbduk':['v_bbduk.txt',r"(\S+\ .+)"], + 'bcftools':['v_bcftools.txt',r"(\S+)"] } results = OrderedDict() @@ -75,6 +76,7 @@ results['eigenstrat_snp_coverage'] = 'N/A' results['mapDamage2'] = 'N/A' results['bbduk'] = 'N/A' +results['bcftools'] = 'N/A' # Search each file using its regex for k, v in regexes.items(): diff --git a/main.nf b/main.nf index e8d9982a0..426e003b9 100644 --- a/main.nf +++ b/main.nf @@ -2977,6 +2977,7 @@ process get_software_versions { qualimap --version &> v_qualimap.txt 2>&1 || true preseq &> v_preseq.txt 2>&1 || true gatk --version 2>&1 | head -n 1 > v_gatk.txt 2>&1 || true + gatk3 --version 2>&1 | head -n 1 > v_gatk3.txt 2>&1 || true freebayes --version &> v_freebayes.txt 2>&1 || true bedtools --version &> v_bedtools.txt 2>&1 || true damageprofiler --version &> v_damageprofiler.txt 2>&1 || true @@ -2995,8 +2996,9 @@ process get_software_versions { pileupCaller --version &> v_sequencetools.txt 2>&1 || true bowtie2 --version | grep -a 'bowtie2-.* -fdebug' > v_bowtie2.txt || true eigenstrat_snp_coverage --version | cut -d ' ' -f2 >v_eigenstrat_snp_coverage.txt || true - mapDamage2 --version > v_mapdamage.txt || true - bbduk.sh | grep 'Last modified' | cut -d' ' -f 3-99 > v_bbduk.txt || true + mapDamage --version > v_mapdamage.txt || true + bbduk.sh | grep 'Last modified' | cut -d ' ' -f 3-99 > v_bbduk.txt || true + bcftools --version | grep 'bcftools' | cut -d ' ' -f 2 > v_bcftools.txt || true scrape_software_versions.py &> software_versions_mqc.yaml """ From a47f7442d21d9f8488fc21b8bac3709fe1419ab9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 6 May 2021 22:16:16 +0200 Subject: [PATCH 08/16] Update changelog --- CHANGELOG.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2657eacf8..3b155f559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,15 +3,27 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## v2.3.4dev - [unreleased] +## v2.4dev - [unreleased] ### `Added` -- [#729](https://github.com/nf-core/eager/issues/729) Added Bowtie2 flag `--maxins` for PE mapping modern DNA mapping contexts - [#317](https://github.com/nf-core/eager/issues/317) Added bcftools stats for general genotyping statistics of VCF files ### `Fixed` +- Fixed some missing or incorrectly reported software versions + +### `Dependencies` + +### `Deprecated` + +## v2.3.4dev - [unreleased] + +### `Added` + +- [#729](https://github.com/nf-core/eager/issues/729) Added Bowtie2 flag `--maxins` for PE mapping modern DNA mapping contexts +### `Fixed` + - Corrected explanation of the "--min_adap_overlap" parameter for AdapterRemoval in the docs - [#725](https://github.com/nf-core/eager/pull/725) `bwa_index` doc update - Re-adds gzip piping to AdapterRemovalFixPrefix to speed up process after reports of being very slow From d92bcf20640612f7b55da8c5808ac62735d7c191 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 7 May 2021 14:39:02 +0200 Subject: [PATCH 09/16] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b155f559..94743a677 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` - [#729](https://github.com/nf-core/eager/issues/729) Added Bowtie2 flag `--maxins` for PE mapping modern DNA mapping contexts + ### `Fixed` - Corrected explanation of the "--min_adap_overlap" parameter for AdapterRemoval in the docs From a9f18d391ea6eba888776cff478e2a769b934d56 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 7 Jun 2021 19:39:40 +0200 Subject: [PATCH 10/16] Bump vcftools verison to latest --- environment.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 0a3bbfca4..171d13d61 100644 --- a/environment.yml +++ b/environment.yml @@ -36,10 +36,10 @@ dependencies: - bioconda::fastp=0.20.1 - bioconda::bamutil=1.0.15 - bioconda::mtnucratio=0.7 - - bioconda::pysam=0.16.0 #Says python3.7 or less + - bioconda::pysam=0.16.0 - bioconda::kraken2=2.1.2 - - conda-forge::pandas=1.2.4 #.4 is python3.8+ compatible - - bioconda::freebayes=1.3.5 #should be fine with python 3.8, but says <3.7 on webpage + - conda-forge::pandas=1.2.4 + - bioconda::freebayes=1.3.5 - bioconda::sexdeterrmine=1.1.2 - bioconda::multivcfanalyzer=0.85.2 - bioconda::hops=0.35 @@ -49,4 +49,4 @@ dependencies: - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.1 - bioconda::bbmap=38.90 - - bioconda::bcftools=1.9 + - bioconda::bcftools=1.12 From ea7f9058c4ef9b09df19bcc0474aab7e4e1917c1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 7 Jun 2021 20:32:35 +0200 Subject: [PATCH 11/16] Fix script --- bin/scrape_software_versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 0b41a10ec..ed7008381 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -37,7 +37,7 @@ 'kraken':['v_kraken.txt', r"Kraken version (\S+)"], 'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"], 'mapDamage2':['v_mapdamage.txt',r"(\S+)"], - 'bbduk':['v_bbduk.txt',r"(.*)"] + 'bbduk':['v_bbduk.txt',r"(.*)"], 'bcftools':['v_bcftools.txt',r"(\S+)"] } From a68b9d45c61dca76d41194d0cf0de30fc4c6748a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 23 Aug 2021 20:27:13 +0200 Subject: [PATCH 12/16] Update docs/output.md Co-authored-by: Alexander Peltzer --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index b3030d6b1..9fd7280d4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -622,7 +622,7 @@ If this correlation is not observed, your data is skewed towards higher coverage ### Background -Bcftools is a toolkit for processing and summaries VCF files, i.e. variant call format files. nf-core/eager currently uses bcftools for the `stats` functionality. This summarises in a text file a range of statistics about VCF files, produced by GATK and FreeBayes variant callers. +Bcftools is a toolkit for processing and summarising of VCF files, i.e. variant call format files. nf-core/eager currently uses bcftools for the `stats` functionality. This summarises in a text file a range of statistics about VCF files, produced by GATK and FreeBayes variant callers. #### Variant Substitution Types From d51783ead3056fc43ae5d7926ac4c4e3d6e56a7b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 23 Aug 2021 21:25:03 +0200 Subject: [PATCH 13/16] Delete useless block --- main.nf | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/main.nf b/main.nf index d43fec147..98c88688d 100644 --- a/main.nf +++ b/main.nf @@ -2213,24 +2213,8 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) { } - - // Unified Genotyper - although not-supported, better for aDNA (because HC does de novo assembly which requires higher coverages), and needed for MultiVCFAnalyzer -// initialise empty bcftool related empty channels - -//if ( params.genotyping_tool == 'ug' ) { -// ch_hc_for_bcftools_stats = Channel.empty() -// ch_fb_for_bcftools_stats = Channel.empty() -//} else if ( params.genotyping_tool == 'hc' ) { -// ch_ug_for_bcftools_stats = Channel.empty() -// ch_fb_for_bcftools_stats = Channel.empty() -//} else if ( params.genotyping_tool == 'fb ') { -// ch_ug_for_bcftools_stats = Channel.empty() -// ch_hc_for_bcftools_stats = Channel.empty() -//} - - process genotyping_ug { label 'mc_small' tag "${samplename}" @@ -2517,7 +2501,7 @@ process vcf2genome { pigz -d -f -p ${task.cpus} ${vcf} vcf2genome -Xmx${task.memory.toGiga()}g -draft ${out} -draftname "${fasta_head}" -in ${vcf.baseName} -minc ${params.vcf2genome_minc} -minfreq ${params.vcf2genome_minfreq} -minq ${params.vcf2genome_minq} -ref ${fasta} -refMod ${out}_refmod.fasta -uncertain ${out}_uncertainty.fasta pigz -f -p ${task.cpus} ${out}* - pigz -p ${task.cpus} *.vcf + bgzip -@ ${task.cpus} *.vcf """ } @@ -2560,6 +2544,7 @@ process multivcfanalyzer { pigz -d -f -p ${task.cpus} ${vcf} multivcfanalyzer -Xmx${task.memory.toGiga()}g ${params.snp_eff_results} ${fasta} ${params.reference_gff_annotations} . ${write_freqs} ${params.min_genotype_quality} ${params.min_base_coverage} ${params.min_allele_freq_hom} ${params.min_allele_freq_het} ${params.reference_gff_exclude} *.vcf pigz -p ${task.cpus} *.tsv *.txt snpAlignment.fasta snpAlignmentIncludingRefGenome.fasta fullAlignment.fasta + bgzip -@ ${task.cpus} *.vcf """ } @@ -2616,7 +2601,7 @@ process sexdeterrmine_prep { process sexdeterrmine { label 'mc_small' publishDir "${params.outdir}/sex_determination", mode: params.publish_dir_mode - + input: path bam from ch_prepped_for_sexdeterrmine.collect() path(bed) from ch_bed_for_sexdeterrmine From 1d42a5caa393defcf4d87458cf5c769d011fb7c8 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 23 Aug 2021 22:03:12 +0200 Subject: [PATCH 14/16] Changelog linting --- CHANGELOG.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 178c6c981..7e8c71f2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,16 +8,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` - [#317](https://github.com/nf-core/eager/issues/317) Added bcftools stats for general genotyping statistics of VCF files - -### `Fixed` - -- Fixed some missing or incorrectly reported software versions - [#651](https://github.com/nf-core/eager/issues/651) - Adds removal of adapters specified in an AdapterRemoval adapter list file -- [#769](https://github.com/nf-core/eager/issues/769) - Adds lc_extrap mode to preseq (requested by @roberta-davidson) - [#642](https://github.com/nf-core/eager/issues/642) and [#431](https://github.com/nf-core/eager/issues/431) adds post-adapter removal barcode/fastq trimming +- [#769](https://github.com/nf-core/eager/issues/769) - Adds lc_extrap mode to preseq (requested by @roberta-davidson) ### `Fixed` +- Fixed some missing or incorrectly reported software versions - [#771](https://github.com/nf-core/eager/issues/771) Remove legacy code - Improved output documentation for MultiQC general stats table (thanks to @KathrinNaegele and @esalmela) - Improved output documentation for BowTie2 (thanks to @isinaltinkaya) From 5ce424a32bc9aa4a33aba8d3b039e9f3876d19b7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 23 Aug 2021 22:08:22 +0200 Subject: [PATCH 15/16] A few version bumps --- environment.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 171d13d61..4a65d0a7e 100644 --- a/environment.yml +++ b/environment.yml @@ -16,7 +16,7 @@ dependencies: - bioconda::adapterremoval=2.3.2 - bioconda::adapterremovalfixprefix=0.0.5 - bioconda::bwa=0.7.17 - - bioconda::picard=2.25.5 + - bioconda::picard=2.26.0 - bioconda::samtools=1.12 - bioconda::dedup=0.12.8 - bioconda::angsd=0.935 @@ -26,7 +26,7 @@ dependencies: - bioconda::qualimap=2.2.2d - bioconda::vcf2genome=0.91 - bioconda::damageprofiler=0.4.9 # Don't upgrade - later versions don't allow java 8 - - bioconda::multiqc=1.10.1 + - bioconda::multiqc=1.11 - bioconda::pmdtools=0.60 - bioconda::bedtools=2.30.0 - conda-forge::libiconv=1.16 @@ -48,5 +48,5 @@ dependencies: - bioconda::bowtie2=2.4.4 - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.1 - - bioconda::bbmap=38.90 - - bioconda::bcftools=1.12 + - bioconda::bbmap=38.92 + - bioconda::bcftools=1.13 From 93da6eac3a8ccb58b4ad2b307e813d2dc2c0948a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 24 Aug 2021 08:22:14 +0200 Subject: [PATCH 16/16] Sync bcftools with samtools version --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 4a65d0a7e..7765f5688 100644 --- a/environment.yml +++ b/environment.yml @@ -49,4 +49,4 @@ dependencies: - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.1 - bioconda::bbmap=38.92 - - bioconda::bcftools=1.13 + - bioconda::bcftools=1.12