nf-core · LilyAnderssonLee · Oct 3, 2025 · Sep 26, 2025 · Sep 29, 2025 · Oct 2, 2025
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Fixed`
 
+- [#653](https://github.com/nf-core/taxprofiler/pull/653) Enable runs for `PACBIO_SMRT`data (fixed by @LilyAnderssonLee)
+
 ### `Changed`
 
 ### `Dependencies`

@@ -30,7 +30,7 @@ Please see the rest of this page for information about how to prepare input samp
 
 ## Samplesheet inputs
 
-nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
+nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers). For PACBIO_SMRT data, please convert BAM files to FASTQ format before input.
 
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermore, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
 

@@ -152,10 +152,10 @@ workflow PROFILING {
                 [meta, input_reads, db_meta_new, db]
             }
             .filter { meta, _input_reads, db_meta_new, _db ->
-                if (db_meta_new.tool == 'bracken' && meta.instrument_platform == 'OXFORD_NANOPORE') {
-                    log.warn("[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}.")
+                if (db_meta_new.tool == 'bracken' && (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT')) {
+                    log.warn("[nf-core/taxprofiler] Bracken has not been evaluated for long-read datasets. Skipping Bracken for sample ${meta.id}.")
                 }
-                db_meta_new.tool == 'kraken2' || (db_meta_new.tool == 'bracken' && meta.instrument_platform != 'OXFORD_NANOPORE')
+                db_meta_new.tool == 'kraken2' || (db_meta_new.tool == 'bracken' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT'))
             }
 
         ch_input_for_kraken2 = ch_prepare_for_kraken2.multiMap { it ->
@@ -378,25 +378,25 @@ workflow PROFILING {
 
         ch_input_for_kmcp = ch_input_for_profiling.kmcp
             .filter { meta, _input_reads, meta_db, _db ->
-                if (meta['instrument_platform'] == 'OXFORD_NANOPORE') {
+                if (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT') {
                     log.warn("[nf-core/taxprofiler] KMCP is only suitable for short-read metagenomic profiling, with much lower sensitivity on long-read datasets. Skipping KMCP for sample ${meta.id}.")
                 }
-                meta_db['tool'] == 'kmcp' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
+                meta_db.tool == 'kmcp' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT')
             }
             .map { meta, input_reads, db_meta, db ->
                 def db_meta_keys = db_meta.keySet()
                 def db_meta_new = db_meta.subMap(db_meta_keys)
 
                 // Split the string, the arguments before semicolon should be parsed into kmcp search
-                def parsed_params = db_meta_new['db_params'].split(";")
+                def parsed_params = db_meta_new.db_params.split(";")
                 if (parsed_params.size() == 2) {
-                    db_meta_new['db_params'] = parsed_params[0]
+                    db_meta_new.db_params = parsed_params[0]
                 }
                 else if (parsed_params.size() == 0) {
-                    db_meta_new['db_params'] = ""
+                    db_meta_new.db_params = ""
                 }
                 else {
-                    db_meta_new['db_params'] = parsed_params[0]
+                    db_meta_new.db_params = parsed_params[0]
                 }
 
                 [meta, input_reads, db_meta_new, db]
@@ -424,7 +424,7 @@ workflow PROFILING {
                 def db_meta_keys = db_meta.keySet()
                 def db_meta_new = db_meta.subMap(db_meta_keys)
 
-                def parsed_params = db_meta['db_params'].split(";")
+                def parsed_params = db_meta.db_params.split(";")
 
                 if (parsed_params.size() == 2) {
                     db_meta_new = db_meta + [db_params: parsed_params[1]]
@@ -452,10 +452,10 @@ workflow PROFILING {
 
         ch_input_for_ganonclassify = ch_input_for_profiling.ganon
             .filter { meta, _input_reads, meta_db, _db ->
-                if (meta.instrument_platform == 'OXFORD_NANOPORE') {
-                    log.warn("[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}.")
+                if (meta.instrument_platform == 'OXFORD_NANOPORE' || meta.instrument_platform == 'PACBIO_SMRT') {
+                    log.warn("[nf-core/taxprofiler] Ganon has not been evaluated for long-read datasets. Skipping Ganon for sample ${meta.id}.")
                 }
-                meta_db.tool == 'ganon' && meta.instrument_platform != 'OXFORD_NANOPORE'
+                meta_db.tool == 'ganon' && (meta.instrument_platform != 'OXFORD_NANOPORE' && meta.instrument_platform != 'PACBIO_SMRT')
             }
             .multiMap { it ->
                 reads: [it[0] + it[2], it[1]]

@@ -84,7 +84,7 @@ workflow TAXPROFILER {
             meta.instrument_platform = instrument_platform
 
             // Define single_end based on the conditions
-            meta.single_end = (fastq_1 && !fastq_2 && instrument_platform != 'OXFORD_NANOPORE')
+            meta.single_end = (fastq_1 && !fastq_2 && instrument_platform != 'OXFORD_NANOPORE' && instrument_platform != 'PACBIO_SMRT')
 
             // Define is_fasta based on the presence of fasta
             meta.is_fasta = fasta ? true : false
@@ -95,6 +95,9 @@ workflow TAXPROFILER {
             if (meta.instrument_platform == 'OXFORD_NANOPORE' && fastq_2) {
                 error("Error: Please check input samplesheet: for Oxford Nanopore reads entry `fastq_2` should be empty!")
             }
+            if (meta.instrument_platform == 'PACBIO_SMRT' && fastq_2) {
+                error("Error: Please check input samplesheet: for PacBio reads entry `fastq_2` should be empty!")
+            }
             if (meta.single_end && fastq_2) {
                 error("Error: Please check input samplesheet: for single-end reads entry `fastq_2` should be empty")
             }
@@ -106,16 +109,19 @@ workflow TAXPROFILER {
             nanopore: instrument_platform == 'OXFORD_NANOPORE' && !meta.is_fasta
             meta.single_end = true
             return [meta + [type: "long"], [fastq_1]]
+            pacbio: instrument_platform == 'PACBIO_SMRT' && !meta.is_fasta
+            meta.single_end = true
+            return [meta + [type: "long"], [fastq_1]]
             fasta_short: meta.is_fasta && instrument_platform == 'ILLUMINA'
             meta.single_end = true
             return [meta + [type: "short"], [fasta]]
-            fasta_long: meta.is_fasta && instrument_platform == 'OXFORD_NANOPORE'
+            fasta_long: meta.is_fasta && (instrument_platform == 'OXFORD_NANOPORE' || instrument_platform == 'PACBIO_SMRT')
             meta.single_end = true
             return [meta + [type: "long"], [fasta]]
         }
 
     // Merge ch_input.fastq and ch_input.nanopore into a single channel
-    ch_input_for_fastqc = ch_input.fastq.mix(ch_input.nanopore)
+    ch_input_for_fastqc = ch_input.fastq.mix(ch_input.nanopore, ch_input.pacbio )
 
     // Validate and decompress databases
     ch_dbs_for_untar = databases.branch { db_meta, db_path ->
@@ -184,11 +190,11 @@ workflow TAXPROFILER {
     }
 
     if (params.perform_longread_qc) {
-        ch_longreads_preprocessed = LONGREAD_PREPROCESSING(ch_input.nanopore).reads.map { it -> [it[0], [it[1]]] }
+        ch_longreads_preprocessed_nanopore = LONGREAD_PREPROCESSING(ch_input.nanopore).reads.map { it -> [it[0], [it[1]]] }
         ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions)
     }
     else {
-        ch_longreads_preprocessed = ch_input.nanopore
+        ch_longreads_preprocessed_nanopore = ch_input.nanopore
     }
 
     /*
@@ -224,7 +230,7 @@ workflow TAXPROFILER {
     else {
         ch_shortreads_hostremoved = ch_shortreads_filtered
     }
-
+    ch_longreads_preprocessed = ch_longreads_preprocessed_nanopore.mix(ch_input.pacbio)
     if (params.perform_longread_hostremoval) {
         ch_longreads_hostremoved = LONGREAD_HOSTREMOVAL(ch_longreads_preprocessed, ch_reference, ch_longread_reference_index).reads
         ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions)