From 9cd85025cbb76e5f412559f9a5533c397d264bc5 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 11 May 2021 11:09:56 +0000 Subject: [PATCH 01/11] Template update for nf-core/tools version 1.14 --- .github/PULL_REQUEST_TEMPLATE.md | 7 +++-- .github/workflows/ci.yml | 5 ++- CHANGELOG.md | 2 +- Dockerfile | 6 ++-- README.md | 2 +- environment.yml | 2 +- lib/NfcoreSchema.groovy | 54 +++++++++++++++++--------------- main.nf | 4 +-- nextflow.config | 18 ++++++----- nextflow_schema.json | 6 ++-- 10 files changed, 57 insertions(+), 49 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4d46a3ac7..864af6938 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,14 +10,15 @@ Remember that PRs should be made against the dev branch, unless you're preparing Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/eager/tree/master/.github/CONTRIBUTING.md) --> + ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - add to the software_versions process and a regex to `scrape_software_versions.py` - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/eager/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/eager _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - add to the software_versions process and a regex to `scrape_software_versions.py` + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](nf-core/eager/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/eager _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint .`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c7eed4fa..4eab342d2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,9 @@ on: release: types: [published] +# Uncomment if we need an edge release of Nextflow again +# env: NXF_EDGE: 1 + jobs: test: name: Run workflow tests @@ -20,7 +23,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['20.04.0', '21.03.0-edge'] + nxf_ver: ['20.04.0', ''] steps: - name: Check out pipeline code uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 49475226f..4f270e57d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.3.3dev - [date] +## v2.3.5dev - [date] Initial release of nf-core/eager, created with the [nf-core](https://nf-co.re/) template. diff --git a/Dockerfile b/Dockerfile index b9f80769c..005098d7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nfcore/base:1.13.3 +FROM nfcore/base:1.14 LABEL authors="The nf-core/eager community" \ description="Docker image containing all software requirements for the nf-core/eager pipeline" @@ -7,7 +7,7 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-eager-2.3.3dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.3.5dev/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-eager-2.3.3dev > nf-core-eager-2.3.3dev.yml +RUN conda env export --name nf-core-eager-2.3.5dev > nf-core-eager-2.3.5dev.yml diff --git a/README.md b/README.md index e064cbd7b..1ebb2a2d6 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool ## Quick Start -1. Install [`nextflow`](https://nf-co.re/usage/installation) +1. Install [`nextflow`](https://nf-co.re/usage/installation) (`>=20.04.0`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ diff --git a/environment.yml b/environment.yml index f489b9dac..a55e66f78 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-eager-2.3.3dev +name: nf-core-eager-2.3.5dev channels: - conda-forge - bioconda diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 54935ec81..52ee73043 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -112,8 +112,14 @@ class NfcoreSchema { } // unexpected params def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam)) { - unexpectedParams.push(specifiedParam) + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !expectedParamsLowerCase.contains(specifiedParamLowerCase)) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } } } @@ -191,11 +197,11 @@ class NfcoreSchema { // Remove an element from a JSONArray private static JSONArray removeElement(jsonArray, element){ - def list = [] + def list = [] int len = jsonArray.length() - for (int i=0;i hnames.each { hname -> if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error '====================================================\n' + + log.error "${c_red}====================================================${c_reset}\n" + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - '============================================================' + "${c_red}====================================================${c_reset}\n" } } } diff --git a/nextflow.config b/nextflow.config index 066e45fa0..12df12d9a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,12 +25,13 @@ params { plaintext_email = false monochrome_logs = false help = false - igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_base = 's3://ngi-igenomes/igenomes' tracedir = "${params.outdir}/pipeline_info" igenomes_ignore = false custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" hostnames = false + config_profile_name = null config_profile_description = false config_profile_contact = false config_profile_url = false @@ -65,7 +66,7 @@ profiles { singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud = false + charliecloud.enabled = false process.conda = "$projectDir/environment.yml" } debug { process.beforeScript = 'echo $HOSTNAME' } @@ -94,7 +95,7 @@ profiles { docker.enabled = false podman.enabled = true shifter.enabled = false - charliecloud = false + charliecloud.enabled = false } shifter { singularity.enabled = false @@ -129,21 +130,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline.html" + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report.html" + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace.txt" + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" } manifest { @@ -153,7 +155,7 @@ manifest { description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline' mainScript = 'main.nf' nextflowVersion = '>=20.04.0' - version = '2.3.3dev' + version = '2.3.5dev' } // Function to ensure that resource requirements don't go beyond diff --git a/nextflow_schema.json b/nextflow_schema.json index 2aa9e9535..6f4f0e016 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,7 +62,7 @@ "igenomes_base": { "type": "string", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes/", + "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", "hidden": true }, @@ -183,7 +183,7 @@ "description": "Maximum amount of memory that can be requested for any single job.", "default": "128.GB", "fa_icon": "fas fa-memory", - "pattern": "^[\\d\\.]+\\s*.(K|M|G|T)?B$", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, @@ -192,7 +192,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^[\\d\\.]+\\.*(s|m|h|d)$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } From 6a8325bd71231208d0dd80d9991c0625b521a4d9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 12 May 2021 09:33:30 +0200 Subject: [PATCH 02/11] Remove space --- lib/NfcoreSchema.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 52ee73043..c2390f966 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -259,7 +259,7 @@ class NfcoreSchema { return new_params } - /* + /* * This method tries to read a JSON params file */ private static LinkedHashMap params_load(String json_schema) { From af37f1c69bcd3647fbd69deeff8ba3aef7457833 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 12 May 2021 10:14:32 +0200 Subject: [PATCH 03/11] Add reporting of location of multiqc run report on completeion --- CHANGELOG.md | 2 ++ main.nf | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e68441280..9e073ac3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` +- [https://github.com/nf-core/eager/issues/736] - Add printing of multiqc run report location on successful completion + ### `Fixed` ### `Dependencies` diff --git a/main.nf b/main.nf index 3473973ba..9745ed085 100644 --- a/main.nf +++ b/main.nf @@ -3128,6 +3128,7 @@ workflow.onComplete { if (workflow.success) { log.info "-${c_purple}[nf-core/eager]${c_green} Pipeline completed successfully${c_reset}-" + log.info "-${c_purple}[nf-core/eager]${c_green} MultiQC run report can be found in ${params.outdir}/multiqc ${c_reset}-" } else { checkHostname() log.info "-${c_purple}[nf-core/eager]${c_red} Pipeline completed with errors${c_reset}-" From 08dc6abd9db8d7edeade728f274804716eb7343d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 12 May 2021 10:30:42 +0200 Subject: [PATCH 04/11] Add bwa -o parameter as modifying is recommend for aDNA data according to latest simulation studies --- CHANGELOG.md | 2 ++ main.nf | 8 ++++---- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e68441280..994c38740 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## v2.3.5dev - [date] +- [#722](https://github.com/nf-core/eager/issues/722) - Adds bwa `-o` flag for more flexibility in bwa parameters + ### `Added` ### `Fixed` diff --git a/main.nf b/main.nf index 3473973ba..e6d0ba5e4 100644 --- a/main.nf +++ b/main.nf @@ -1160,15 +1160,15 @@ process bwa { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bwa aln -t ${task.cpus} $fasta ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${libraryid}.r1.sai - bwa aln -t ${task.cpus} $fasta ${r2} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${libraryid}.r2.sai + bwa aln -t ${task.cpus} $fasta ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r1.sai + bwa aln -t ${task.cpus} $fasta ${r2} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.r2.sai bwa sampe -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.r1.sai ${libraryid}.r2.sai ${r1} ${r2} | samtools sort -@ ${task.cpus} -O bam - > ${libraryid}_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { - //PE collapsed, or SE data + //PE collapsed, or SE data """ - bwa aln -t ${task.cpus} ${fasta} ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${libraryid}.sai + bwa aln -t ${task.cpus} ${fasta} ${r1} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -o ${params.bwaalno} -f ${libraryid}.sai bwa samse -r "@RG\\tID:ILLUMINA-${libraryid}\\tSM:${libraryid}\\tPL:illumina\\tPU:ILLUMINA-${libraryid}-${seqtype}" $fasta ${libraryid}.sai $r1 | samtools sort -@ ${task.cpus} -O bam - > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ diff --git a/nextflow.config b/nextflow.config index 40d3bb6c3..44786a02f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -80,6 +80,7 @@ params { bwaalnn = 0.04 bwaalnk = 2 bwaalnl = 1024 // From Schubert et al. 2012 (10.1186/1471-2164-13-178) + bwaalno = 1 // leave at bwa default for now circularextension = 500 circulartarget = 'MT' circularfilter = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 3c05bdc51..a33f5455c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -562,6 +562,13 @@ "fa_icon": "fas fa-ruler-horizontal", "help_text": "Configures the length of the seed used in `bwa aln -l`. Default is set to be 'turned off' at the recommendation of Schubert et al. ([2012 _BMC Genomics_](https://doi.org/10.1186/1471-2164-13-178)) for ancient DNA with `1024`.\n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`\n" }, + "bwaalno": { + "type": "integer", + "default": 1, + "fa_icon": "fas fa-people-arrows", + "description": "Specify the -o parameter for BWA aln i.e. the number of gaps allowed.", + "help_text": "Configures the number of gaps used in `bwa aln`. Default is set to `bwa` default.\n\n> Modifies BWA aln parameter: `-o`\n" + }, "circularextension": { "type": "integer", "default": 500, From e8228a22d6bb72f7d0bcbcb8be470069fec876ca Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 12 May 2021 10:49:37 +0200 Subject: [PATCH 05/11] Improve TSV validation check --- CHANGELOG.md | 1 + main.nf | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e68441280..7be664c0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` +- [#723](https://github.com/nf-core/eager/issues/723) - Fixes empty fields in TSV resulting in uninformative error ### `Dependencies` ### `Deprecated` diff --git a/main.nf b/main.nf index 3473973ba..c2c927549 100644 --- a/main.nf +++ b/main.nf @@ -3157,17 +3157,17 @@ def extract_data(tsvFile) { checkNumberOfItem(row, 11) - if ( row.Sample_Name.isEmpty() ) exit 1, "[nf-core/eager] error: the Sample_Name column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.Library_ID.isEmpty() ) exit 1, "[nf-core/eager] error: the Library_ID column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.Lane.isEmpty() ) exit 1, "[nf-core/eager] error: the Lane column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.Colour_Chemistry.isEmpty() ) exit 1, "[nf-core/eager] error: the Colour_Chemistry column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.SeqType.isEmpty() ) exit 1, "[nf-core/eager] error: the SeqType column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.Organism.isEmpty() ) exit 1, "[nf-core/eager] error: the Organism column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.Strandedness.isEmpty() ) exit 1, "[nf-core/eager] error: the Strandedness column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.UDG_Treatment.isEmpty() ) exit 1, "[nf-core/eager] error: the UDG_Treatment column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.R1.isEmpty() ) exit 1, "[nf-core/eager] error: the R1 column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.R2.isEmpty() ) exit 1, "[nf-core/eager] error: the R2 column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" - if ( row.BAM.isEmpty() ) exit 1, "[nf-core/eager] error: the BAM column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Sample_Name == null || row.Sample_Name.isEmpty() ) exit 1, "[nf-core/eager] error: the Sample_Name column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Library_ID == null || row.Library_ID.isEmpty() ) exit 1, "[nf-core/eager] error: the Library_ID column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Lane == null || row.Lane.isEmpty() ) exit 1, "[nf-core/eager] error: the Lane column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Colour_Chemistry == null || row.Colour_Chemistry.isEmpty() ) exit 1, "[nf-core/eager] error: the Colour_Chemistry column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.SeqType == null || row.SeqType.isEmpty() ) exit 1, "[nf-core/eager] error: the SeqType column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Organism == null || row.Organism.isEmpty() ) exit 1, "[nf-core/eager] error: the Organism column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.Strandedness == null || row.Strandedness.isEmpty() ) exit 1, "[nf-core/eager] error: the Strandedness column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.UDG_Treatment == null || row.UDG_Treatment.isEmpty() ) exit 1, "[nf-core/eager] error: the UDG_Treatment column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.R1 == null || row.R1.isEmpty() ) exit 1, "[nf-core/eager] error: the R1 column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.R2 == null || row.R2.isEmpty() ) exit 1, "[nf-core/eager] error: the R2 column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" + if ( row.BAM == null || row.BAM.isEmpty() ) exit 1, "[nf-core/eager] error: the BAM column is empty. Ensure all cells are filled or contain 'NA' for optional fields. Check row:\n ${row}" def samplename = row.Sample_Name def libraryid = row.Library_ID From 4d05d7ec70ced51c3a6fb70f912bb073ef5ff1f6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 12 May 2021 11:14:49 +0200 Subject: [PATCH 06/11] Replace empty strings with nulls following nf-core best practises --- main.nf | 64 ++++++++++++++++++++++++------------------------- nextflow.config | 44 +++++++++++++++++----------------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/main.nf b/main.nf index c2c927549..e0eb03432 100644 --- a/main.nf +++ b/main.nf @@ -46,7 +46,7 @@ if ( params.skip_collapse && params.skip_trim ) { } // Bedtools validation -if(params.run_bedtools_coverage && params.anno_file == ''){ +if(params.run_bedtools_coverage && params.anno_file ){ exit 1, "[nf-core/eager] error: you have turned on bedtools coverage, but not specified a BED or GFF file with --anno_file. Please validate your parameters." } @@ -62,7 +62,7 @@ if (params.dedupper == 'dedup' && !params.mergedonly) { // Genotyping validation if (params.run_genotyping){ - if (params.genotyping_tool == 'pileupcaller' && ( params.pileupcaller_bedfile == '' || params.pileupcaller_snpfile == '' ) ) { + if (params.genotyping_tool == 'pileupcaller' && ( !params.pileupcaller_bedfile || !params.pileupcaller_snpfile ) ) { exit 1, "[nf-core/eager] error: please check your pileupCaller bed file and snp file parameters. You must supply a bed file and a snp file." } @@ -96,7 +96,7 @@ if (params.run_multivcfanalyzer) { exit 1, "[nf-core/eager] error: MultiVCFAnalyzer only accepts VCF files generated with a GATK ploidy set to 2. Found parameter: --gatk_ploidy ${params.gatk_ploidy}." } - if (params.additional_vcf_files != '') { + if (params.additional_vcf_files) { ch_extravcfs_for_multivcfanalyzer = Channel.fromPath(params.additional_vcf_files, checkIfExists: true) } } @@ -110,7 +110,7 @@ if (params.run_metagenomic_screening) { exit 1, "[nf-core/eager] error: metagenomic classification can only run on unmapped reads in FASTQ format. Please supply --bam_unmapped_type 'fastq'. Found parameter: --bam_unmapped_type '${params.bam_unmapped_type}'." } - if (params.database == '' ) { + if (!params.database) { exit 1, "[nf-core/eager] error: metagenomic classification requires a path to a database directory. Please specify one with --database '/path/to/database/'." } @@ -138,7 +138,7 @@ if (params.run_maltextract) { exit 1, "[nf-core/eager] error: MaltExtract can only accept MALT output. Please supply --metagenomic_tool 'malt'. Found parameter: --metagenomic_tool '${params.metagenomic_tool}'" } - if (params.maltextract_taxon_list == '') { + if (!params.maltextract_taxon_list) { exit 1, "[nf-core/eager] error: MaltExtract requires a taxon list specifying the target taxa of interest. Specify the file with --params.maltextract_taxon_list." } } @@ -204,7 +204,7 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome } // Index files provided? Then check whether they are correct and complete -if( params.bwa_index != '' && (params.mapper == 'bwaaln' | params.mapper == 'bwamem' | params.mapper == 'circularmapper')){ +if( params.bwa_index && (params.mapper == 'bwaaln' | params.mapper == 'bwamem' | params.mapper == 'circularmapper')){ Channel .fromPath(params.bwa_index, checkIfExists: true) .ifEmpty { exit 1, "[nf-core/eager] error: bwa indices not found in: ${index_base}." } @@ -213,7 +213,7 @@ if( params.bwa_index != '' && (params.mapper == 'bwaaln' | params.mapper == 'bwa bt2_index = Channel.empty() } -if( params.bt2_index != '' && params.mapper == 'bowtie2' ){ +if( params.bt2_index && params.mapper == 'bowtie2' ){ lastPath = params.bt2_index.lastIndexOf(File.separator) bt2_dir = params.bt2_index.substring(0,lastPath+1) bt2_base = params.bt2_index.substring(lastPath+1) @@ -228,41 +228,41 @@ if( params.bt2_index != '' && params.mapper == 'bowtie2' ){ } // SexDetermination channel set up and bedfile validation -if (params.sexdeterrmine_bedfile == '') { +if (!params.sexdeterrmine_bedfile) { ch_bed_for_sexdeterrmine = Channel.fromPath("$projectDir/assets/nf-core_eager_dummy.txt") } else { ch_bed_for_sexdeterrmine = Channel.fromPath(params.sexdeterrmine_bedfile, checkIfExists: true) } // pileupCaller channel generation and input checks for 'random sampling' genotyping -if (params.pileupcaller_bedfile.isEmpty()) { +if (!params.pileupcaller_bedfile) { ch_bed_for_pileupcaller = Channel.fromPath("$projectDir/assets/nf-core_eager_dummy.txt") } else { ch_bed_for_pileupcaller = Channel.fromPath(params.pileupcaller_bedfile, checkIfExists: true) } -if (params.pileupcaller_snpfile.isEmpty ()) { +if (!params.pileupcaller_snpfile) { ch_snp_for_pileupcaller = Channel.fromPath("$projectDir/assets/nf-core_eager_dummy2.txt") } else { ch_snp_for_pileupcaller = Channel.fromPath(params.pileupcaller_snpfile, checkIfExists: true) } // Create input channel for MALT database directory, checking directory exists -if ( params.database == '') { +if ( !params.database ) { ch_db_for_malt = Channel.empty() } else { ch_db_for_malt = Channel.fromPath(params.database, checkIfExists: true) } // Create input channel for MaltExtract taxon list, to allow downloading of taxon list, checking file exists. -if ( params.maltextract_taxon_list== '' ) { +if ( !params.maltextract_taxon_list ) { ch_taxonlist_for_maltextract = Channel.empty() } else { ch_taxonlist_for_maltextract = Channel.fromPath(params.maltextract_taxon_list, checkIfExists: true) } // Create input channel for MaltExtract NCBI files, checking files exists. -if ( params.maltextract_ncbifiles == '' ) { +if ( !params.maltextract_ncbifiles ) { ch_ncbifiles_for_maltextract = Channel.empty() } else { ch_ncbifiles_for_maltextract = Channel.fromPath(params.maltextract_ncbifiles, checkIfExists: true) @@ -300,7 +300,7 @@ where_are_my_files = file("$projectDir/assets/where_are_my_files.txt") /////////////////////////////////////////////////// // check if we have valid --reads or --input -if (params.input == null) { +if (!params.input) { exit 1, "[nf-core/eager] error: --input was not supplied! Please check '--help' or documentation under 'running the pipeline' for details" } @@ -448,7 +448,7 @@ log.info "Schaffa, Schaffa, Genome Baua!" /////////////////////////////////////////////////// // BWA Index -if( params.bwa_index == '' && !params.fasta.isEmpty() && (params.mapper == 'bwaaln' || params.mapper == 'bwamem' || params.mapper == 'circularmapper')){ +if( !params.bwa_index && params.fasta && (params.mapper == 'bwaaln' || params.mapper == 'bwamem' || params.mapper == 'circularmapper')){ process makeBWAIndex { label 'sc_medium' tag "${fasta}" @@ -477,7 +477,7 @@ if( params.bwa_index == '' && !params.fasta.isEmpty() && (params.mapper == 'bwaa } // bowtie2 Index -if(params.bt2_index == '' && !params.fasta.isEmpty() && params.mapper == "bowtie2"){ +if( !params.bt2_index && params.fasta && params.mapper == "bowtie2"){ process makeBT2Index { label 'sc_medium' tag "${fasta}" @@ -508,7 +508,7 @@ if(params.bt2_index == '' && !params.fasta.isEmpty() && params.mapper == "bowtie } // FASTA Index (FAI) -if (params.fasta_index != '') { +if (params.fasta_index) { Channel .fromPath( params.fasta_index ) .set { ch_fai_for_skipfastaindexing } @@ -527,7 +527,7 @@ process makeFastaIndex { else null } - when: params.fasta_index == '' && !params.fasta.isEmpty() && ( params.mapper == 'bwaaln' || params.mapper == 'bwamem' || params.mapper == 'circularmapper') + when: !params.fasta_index && params.fasta && ( params.mapper == 'bwaaln' || params.mapper == 'bwamem' || params.mapper == 'circularmapper') input: path fasta from ch_fasta_for_faidx @@ -548,7 +548,7 @@ ch_fai_for_skipfastaindexing.mix(ch_fasta_faidx_index) // Stage dict index file if supplied, else load it into the channel -if (params.seq_dict != '') { +if (params.seq_dict) { Channel .fromPath( params.seq_dict ) .set { ch_dict_for_skipdict } @@ -567,7 +567,7 @@ process makeSeqDict { else null } - when: params.seq_dict == '' && !params.fasta.isEmpty() + when: !params.seq_dict && params.fasta input: path fasta from ch_fasta_for_seqdict @@ -2023,8 +2023,8 @@ process pmdtools { script: //Check which treatment for the libraries was used def treatment = udg ? (udg == 'half' ? '--UDGhalf' : '--CpG') : '--UDGminus' - if(params.snpcapture_bed != ''){ - snpcap = (params.pmdtools_reference_mask != '') ? "--refseq ${params.pmdtools_reference_mask}" : '' + if(params.snpcapture_bed){ + snpcap = (params.pmdtools_reference_mask) ? "--refseq ${params.pmdtools_reference_mask}" : '' log.info"######No reference mask specified for PMDtools, therefore ignoring that for downstream analysis!" } else { snpcap = '' @@ -2163,7 +2163,7 @@ process qualimap { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") into ch_qualimap_results script: - def snpcap = params.snpcapture_bed != '' ? "-gff ${params.snpcapture_bed}" : '' + def snpcap = params.snpcapture_bed ? "-gff ${params.snpcapture_bed}" : '' """ qualimap bamqc -bam $bam -nt ${task.cpus} -outdir . -outformat "HTML" ${snpcap} --java-mem-size=${task.memory.toGiga()}G """ @@ -2234,9 +2234,9 @@ process genotyping_ug { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.realign.{bam,bai}") optional true script: - def defaultbasequalities = params.gatk_ug_defaultbasequalities == '' ? '' : " --defaultBaseQualities ${params.gatk_ug_defaultbasequalities}" + def defaultbasequalities = !params.gatk_ug_defaultbasequalities ? '' : " --defaultBaseQualities ${params.gatk_ug_defaultbasequalities}" def keep_realign = params.gatk_ug_keep_realign_bam ? "samtools index ${samplename}.realign.bam" : "rm ${samplename}.realign.{bam,bai}" - if (params.gatk_dbsnp == '') + if (!params.gatk_dbsnp) """ samtools index -b ${bam} gatk3 -T RealignerTargetCreator -R ${fasta} -I ${bam} -nt ${task.cpus} -o ${samplename}.intervals ${defaultbasequalities} @@ -2247,7 +2247,7 @@ process genotyping_ug { pigz -p ${task.cpus} ${samplename}.unifiedgenotyper.vcf """ - else if (params.gatk_dbsnp != '') + else if (params.gatk_dbsnp) """ samtools index ${bam} gatk3 -T RealignerTargetCreator -R ${fasta} -I ${bam} -nt ${task.cpus} -o ${samplename}.intervals ${defaultbasequalities} @@ -2280,13 +2280,13 @@ process genotyping_hc { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") script: - if (params.gatk_dbsnp == '') + if (!params.gatk_dbsnp) """ gatk HaplotypeCaller -R ${fasta} -I ${bam} -O ${samplename}.haplotypecaller.vcf -stand-call-conf ${params.gatk_call_conf} --sample-ploidy ${params.gatk_ploidy} --output-mode ${params.gatk_hc_out_mode} --emit-ref-confidence ${params.gatk_hc_emitrefconf} pigz -p ${task.cpus} ${samplename}.haplotypecaller.vcf """ - else if (params.gatk_dbsnp != '') + else if (params.gatk_dbsnp) """ gatk HaplotypeCaller -R ${fasta} -I ${bam} -O ${samplename}.haplotypecaller.vcf --dbsnp ${params.gatk_dbsnp} -stand-call-conf ${params.gatk_call_conf} --sample_ploidy ${params.gatk_ploidy} --output_mode ${params.gatk_hc_out_mode} --emit-ref-confidence ${params.gatk_hc_emitrefconf} pigz -p ${task.cpus} ${samplename}.haplotypecaller.vcf @@ -2470,8 +2470,8 @@ process vcf2genome { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fasta.gz") script: - def out = "${params.vcf2genome_outfile}" == '' ? "${samplename}.fasta" : "${params.vcf2genome_outfile}" - def fasta_head = "${params.vcf2genome_header}" == '' ? "${samplename}" : "${params.vcf2genome_header}" + def out = !params.vcf2genome_outfile ? "${samplename}.fasta" : "${params.vcf2genome_outfile}" + def fasta_head = !params.vcf2genome_header ? "${samplename}" : "${params.vcf2genome_header}" """ pigz -f -d -p ${task.cpus} *.vcf.gz vcf2genome -Xmx${task.memory.toGiga()}g -draft ${out}.fasta -draftname "${fasta_head}" -in ${vcf.baseName} -minc ${params.vcf2genome_minc} -minfreq ${params.vcf2genome_minfreq} -minq ${params.vcf2genome_minq} -ref ${fasta} -refMod ${out}_refmod.fasta -uncertain ${out}_uncertainy.fasta @@ -2691,7 +2691,7 @@ if (params.metagenomic_tool == 'malt') { .set {ch_input_for_metagenomic_kraken} ch_input_for_metagenomic_malt = Channel.empty() -} else if ( params.metagenomic_tool == '' ) { +} else if ( !params.metagenomic_tool ) { ch_input_for_metagenomic_malt = Channel.empty() ch_input_for_metagenomic_kraken = Channel.empty() @@ -2811,7 +2811,7 @@ if (params.run_metagenomic_screening && params.database.endsWith(".tar.gz") && p """ } -} else if (! params.database.endsWith(".tar.gz") && params.run_metagenomic_screening && params.metagenomic_tool == 'kraken') { +} else if (params.database && ! params.database.endsWith(".tar.gz") && params.run_metagenomic_screening && params.metagenomic_tool == 'kraken') { ch_krakendb = Channel.fromPath(params.database).first() } else { ch_krakendb = Channel.empty() diff --git a/nextflow.config b/nextflow.config index 40d3bb6c3..8a74f5b48 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,12 +14,12 @@ params { single_end = false outdir = './results' publish_dir_mode = 'copy' - config_profile_name = '' + config_profile_name = null // aws - awsqueue = '' + awsqueue = null awsregion = 'eu-west-1' - awscli = '' + awscli = null //Pipeline options enable_conda = false @@ -35,15 +35,15 @@ params { bam = false // Optional input information - snpcapture_bed = '' + snpcapture_bed = null run_convertinputbam = false //Input reference - fasta = '' - bwa_index = '' - bt2_index = '' - fasta_index = '' - seq_dict = '' + fasta = null + bwa_index = null + bt2_index = null + fasta_index = null + seq_dict = null large_ref = false save_reference = false @@ -117,7 +117,7 @@ params { run_pmdtools = false pmdtools_range = 10 pmdtools_threshold = 3 - pmdtools_reference_mask = '' + pmdtools_reference_mask = null pmdtools_max_reads = 10000 pmdtools_platypus = false @@ -128,7 +128,7 @@ params { //Bedtools settings run_bedtools_coverage = false - anno_file = '' + anno_file = null //bamUtils trimbam settings run_trim_bam = false @@ -140,26 +140,26 @@ params { //Genotyping options run_genotyping = false - genotyping_tool = '' + genotyping_tool = null genotyping_source = 'raw' // gatk options gatk_call_conf = 30 gatk_ploidy = 2 gatk_downsample = 250 - gatk_dbsnp = '' + gatk_dbsnp = null gatk_hc_out_mode = 'EMIT_VARIANTS_ONLY' gatk_hc_emitrefconf = 'GVCF' gatk_ug_genotype_model = 'SNP' gatk_ug_out_mode = 'EMIT_VARIANTS_ONLY' gatk_ug_keep_realign_bam = false - gatk_ug_defaultbasequalities = '' + gatk_ug_defaultbasequalities = null // freebayes options freebayes_C = 1 freebayes_g = 0 freebayes_p = 2 // Sequencetools pileupCaller - pileupcaller_snpfile = '' - pileupcaller_bedfile = '' + pileupcaller_snpfile = null + pileupcaller_bedfile = null pileupcaller_method = 'randomHaploid' pileupcaller_transitions_mode = 'AllSites' // ANGSD Genotype Likelihoods @@ -183,7 +183,7 @@ params { min_base_coverage = 5 min_allele_freq_hom = 0.9 min_allele_freq_het = 0.9 - additional_vcf_files = '' + additional_vcf_files = null reference_gff_annotations = 'NA' reference_gff_exclude = 'NA' snp_eff_results = 'NA' @@ -194,7 +194,7 @@ params { //Sex.DetERRmine settings run_sexdeterrmine = false - sexdeterrmine_bedfile = '' + sexdeterrmine_bedfile = null //Nuclear contamination based on chromosome X heterozygosity. run_nuclear_contamination = false @@ -206,8 +206,8 @@ params { metagenomic_complexity_filter = false metagenomic_complexity_entropy = 0.3 - metagenomic_tool = '' - database = '' + metagenomic_tool = null + database = null metagenomic_min_support_reads = 1 percent_identity = 85 malt_mode = 'BlastN' @@ -222,8 +222,8 @@ params { // maltextract - only including number // parameters if default documented or duplicate of MALT run_maltextract = false - maltextract_taxon_list = '' - maltextract_ncbifiles = '' + maltextract_taxon_list = null + maltextract_ncbifiles = null maltextract_filter = 'def_anc' maltextract_toppercent = 0.01 maltextract_destackingoff = false From ff618e0592ed9cd3df9eb3357064b633ae05426c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 12 May 2021 12:06:53 +0200 Subject: [PATCH 07/11] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7be664c0e..5cd95ef8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` - [#723](https://github.com/nf-core/eager/issues/723) - Fixes empty fields in TSV resulting in uninformative error +- ### `Dependencies` ### `Deprecated` From 62a0c29ac4b569b110183cc5b969ee3bd8f6f3ac Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 12 May 2021 12:09:25 +0200 Subject: [PATCH 08/11] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cd95ef8a..fbcff14fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` - [#723](https://github.com/nf-core/eager/issues/723) - Fixes empty fields in TSV resulting in uninformative error -- + ### `Dependencies` ### `Deprecated` From 0d87a0cb66b6c274314e998c60301c6144c88bb8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 14 May 2021 13:46:10 +0200 Subject: [PATCH 09/11] Update CHANGELOG.md --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 952d92350..78b9bbf03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## v2.3.5dev - [date] -- [#722](https://github.com/nf-core/eager/issues/722) - Adds bwa `-o` flag for more flexibility in bwa parameters - ### `Added` -- [https://github.com/nf-core/eager/issues/736] - Add printing of multiqc run report location on successful completion +- [#722](https://github.com/nf-core/eager/issues/722) - Adds bwa `-o` flag for more flexibility in bwa parameters +- [#736](https://github.com/nf-core/eager/issues/736) - Add printing of multiqc run report location on successful completion ### `Fixed` - [#723](https://github.com/nf-core/eager/issues/723) - Fixes empty fields in TSV resulting in uninformative error +- Updated template to nf-core/tools 1.14 + ### `Dependencies` From 6ebf66083959cf1b312d604da0aa563c670f2f3c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 14 May 2021 13:46:28 +0200 Subject: [PATCH 10/11] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b9bbf03..b92fbed60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#723](https://github.com/nf-core/eager/issues/723) - Fixes empty fields in TSV resulting in uninformative error - Updated template to nf-core/tools 1.14 - ### `Dependencies` ### `Deprecated` From 3509b30ec72fbc751e5c3d7eea9e2db9fdd54600 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 14 May 2021 14:08:50 +0200 Subject: [PATCH 11/11] Ninja push for linting reasons --- lib/NfcoreSchema.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index c2390f966..52ee73043 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -259,7 +259,7 @@ class NfcoreSchema { return new_params } - /* + /* * This method tries to read a JSON params file */ private static LinkedHashMap params_load(String json_schema) {