Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Fixed`

- [#653](https://github.com/nf-core/taxprofiler/pull/653) Enable runs for `PACBIO_SMRT`data (fixed by @LilyAnderssonLee)

### `Changed`

### `Dependencies`
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Please see the rest of this page for information about how to prepare input samp

## Samplesheet inputs

nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers). For PACBIO_SMRT data, please convert BAM files to FASTQ format before input.

You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermore, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.

Expand Down
26 changes: 13 additions & 13 deletions subworkflows/local/profiling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ workflow PROFILING {
[meta, input_reads, db_meta_new, db]
}
.filter { meta, _input_reads, db_meta_new, _db ->
if (db_meta_new.tool == 'bracken' && meta.instrument_platform == 'OXFORD_NANOPORE') {
log.warn("[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}.")
if (db_meta_new.tool == 'bracken' && (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT')) {
log.warn("[nf-core/taxprofiler] Bracken has not been evaluated for long-read datasets. Skipping Bracken for sample ${meta.id}.")
}
db_meta_new.tool == 'kraken2' || (db_meta_new.tool == 'bracken' && meta.instrument_platform != 'OXFORD_NANOPORE')
db_meta_new.tool == 'kraken2' || (db_meta_new.tool == 'bracken' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT'))
}

ch_input_for_kraken2 = ch_prepare_for_kraken2.multiMap { it ->
Expand Down Expand Up @@ -378,25 +378,25 @@ workflow PROFILING {

ch_input_for_kmcp = ch_input_for_profiling.kmcp
.filter { meta, _input_reads, meta_db, _db ->
if (meta['instrument_platform'] == 'OXFORD_NANOPORE') {
if (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT') {
log.warn("[nf-core/taxprofiler] KMCP is only suitable for short-read metagenomic profiling, with much lower sensitivity on long-read datasets. Skipping KMCP for sample ${meta.id}.")
}
meta_db['tool'] == 'kmcp' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
meta_db.tool == 'kmcp' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT')
}
.map { meta, input_reads, db_meta, db ->
def db_meta_keys = db_meta.keySet()
def db_meta_new = db_meta.subMap(db_meta_keys)

// Split the string, the arguments before semicolon should be parsed into kmcp search
def parsed_params = db_meta_new['db_params'].split(";")
def parsed_params = db_meta_new.db_params.split(";")
if (parsed_params.size() == 2) {
db_meta_new['db_params'] = parsed_params[0]
db_meta_new.db_params = parsed_params[0]
}
else if (parsed_params.size() == 0) {
db_meta_new['db_params'] = ""
db_meta_new.db_params = ""
}
else {
db_meta_new['db_params'] = parsed_params[0]
db_meta_new.db_params = parsed_params[0]
}

[meta, input_reads, db_meta_new, db]
Expand Down Expand Up @@ -424,7 +424,7 @@ workflow PROFILING {
def db_meta_keys = db_meta.keySet()
def db_meta_new = db_meta.subMap(db_meta_keys)

def parsed_params = db_meta['db_params'].split(";")
def parsed_params = db_meta.db_params.split(";")

if (parsed_params.size() == 2) {
db_meta_new = db_meta + [db_params: parsed_params[1]]
Expand Down Expand Up @@ -452,10 +452,10 @@ workflow PROFILING {

ch_input_for_ganonclassify = ch_input_for_profiling.ganon
.filter { meta, _input_reads, meta_db, _db ->
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
log.warn("[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}.")
if (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT') {
log.warn("[nf-core/taxprofiler] Ganon has not been evaluated for long-read datasets. Skipping Ganon for sample ${meta.id}.")
}
meta_db.tool == 'ganon' && meta.instrument_platform != 'OXFORD_NANOPORE'
meta_db.tool == 'ganon' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT')
}
.multiMap { it ->
reads: [it[0] + it[2], it[1]]
Expand Down
18 changes: 12 additions & 6 deletions workflows/taxprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ workflow TAXPROFILER {
meta.instrument_platform = instrument_platform

// Define single_end based on the conditions
meta.single_end = (fastq_1 && !fastq_2 && instrument_platform != 'OXFORD_NANOPORE')
meta.single_end = (fastq_1 && !fastq_2 && instrument_platform != 'OXFORD_NANOPORE' && instrument_platform != 'PACBIO_SMRT')

// Define is_fasta based on the presence of fasta
meta.is_fasta = fasta ? true : false
Expand All @@ -95,6 +95,9 @@ workflow TAXPROFILER {
if (meta.instrument_platform == 'OXFORD_NANOPORE' && fastq_2) {
error("Error: Please check input samplesheet: for Oxford Nanopore reads entry `fastq_2` should be empty!")
}
if (meta.instrument_platform == 'PACBIO_SMRT' && fastq_2) {
error("Error: Please check input samplesheet: for PacBio reads entry `fastq_2` should be empty!")
}
if (meta.single_end && fastq_2) {
error("Error: Please check input samplesheet: for single-end reads entry `fastq_2` should be empty")
}
Expand All @@ -106,16 +109,19 @@ workflow TAXPROFILER {
nanopore: instrument_platform == 'OXFORD_NANOPORE' && !meta.is_fasta
meta.single_end = true
return [meta + [type: "long"], [fastq_1]]
pacbio: instrument_platform == 'PACBIO_SMRT' && !meta.is_fasta
meta.single_end = true
return [meta + [type: "long"], [fastq_1]]
fasta_short: meta.is_fasta && instrument_platform == 'ILLUMINA'
meta.single_end = true
return [meta + [type: "short"], [fasta]]
fasta_long: meta.is_fasta && instrument_platform == 'OXFORD_NANOPORE'
fasta_long: meta.is_fasta && (instrument_platform == 'OXFORD_NANOPORE' || instrument_platform == 'PACBIO_SMRT')
meta.single_end = true
return [meta + [type: "long"], [fasta]]
}

// Merge ch_input.fastq and ch_input.nanopore into a single channel
ch_input_for_fastqc = ch_input.fastq.mix(ch_input.nanopore)
ch_input_for_fastqc = ch_input.fastq.mix(ch_input.nanopore, ch_input.pacbio )

// Validate and decompress databases
ch_dbs_for_untar = databases.branch { db_meta, db_path ->
Expand Down Expand Up @@ -184,11 +190,11 @@ workflow TAXPROFILER {
}

if (params.perform_longread_qc) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING(ch_input.nanopore).reads.map { it -> [it[0], [it[1]]] }
ch_longreads_preprocessed_nanopore = LONGREAD_PREPROCESSING(ch_input.nanopore).reads.map { it -> [it[0], [it[1]]] }
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions)
}
else {
ch_longreads_preprocessed = ch_input.nanopore
ch_longreads_preprocessed_nanopore = ch_input.nanopore
}

/*
Expand Down Expand Up @@ -224,7 +230,7 @@ workflow TAXPROFILER {
else {
ch_shortreads_hostremoved = ch_shortreads_filtered
}

ch_longreads_preprocessed = ch_longreads_preprocessed_nanopore.mix(ch_input.pacbio)
if (params.perform_longread_hostremoval) {
ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL(ch_longreads_preprocessed, ch_reference, ch_longread_reference_index).reads
ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)
Expand Down