From 22e9570e7fae77fc96bc75f5e6dc293e479e1028 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:08:55 +0200
Subject: [PATCH 01/29] Increased font size in plots

---
 bin/plot_orthologs.R | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/bin/plot_orthologs.R b/bin/plot_orthologs.R
index c533d75..891efd0 100755
--- a/bin/plot_orthologs.R
+++ b/bin/plot_orthologs.R
@@ -15,6 +15,7 @@ if (length(args) < 2) {
 # Styles
 text_color <- "#DDDDDD"
 bg_color <- "transparent"
+font_size <- 16
 
 # Load the data
 data <- read.csv(args[1], header = TRUE, stringsAsFactors = FALSE)
@@ -38,9 +39,9 @@ p <- ggplot(melted_crosstable, aes(x = method, y = count, fill = score)) +
     labs(title = "Support for predictions", x = "Database", y = "Number of orthologs", fill = "Support") +
     scale_fill_manual(values = c("#59B4C3", "#74E291", "#8F7AC2", "#EFF396", "#FF9A8D")) +
     theme(legend.position = "right",
-        text = element_text(size = 12, color = text_color),
-        axis.text.x = element_text(color = text_color),
-        axis.text.y = element_text(color = text_color),
+        text = element_text(size = font_size, color = text_color),
+        axis.text.x = element_text(size = font_size, color = text_color),
+        axis.text.y = element_text(size = font_size, color = text_color),
         plot.background = element_rect(color = bg_color, fill = bg_color),
         panel.background = element_rect(color = bg_color, fill = bg_color))
 
@@ -54,7 +55,7 @@ for (i in colnames(data)[4:ncol(data)-1]) {
 }
 venn.plot <- ggVennDiagram(venn.data, set_color = text_color) +
     theme(legend.position = "none",
-        text = element_text(size = 12, color = text_color),
+        text = element_text(size = font_size, color = text_color),
         plot.background = element_rect(color = bg_color, fill = bg_color),
         panel.background = element_rect(color = bg_color, fill = bg_color))
 ggsave(paste0(args[2], "_venn.png"), plot = venn.plot, width = 6, height = 6, dpi = 300)
@@ -81,9 +82,9 @@ p <- ggplot(jaccard, aes(x = method1, y = method2, fill = jaccard)) +
     theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
     labs(title = "Jaccard Index", x = "", y = "", fill = "Jaccard Index") +
     theme(legend.position = "right",
-        text = element_text(size = 12, color = text_color),
-        axis.text.x = element_text(color = text_color),
-        axis.text.y = element_text(color = text_color),
+        text = element_text(size = font_size, color = text_color),
+        axis.text.x = element_text(size = font_size, color = text_color),
+        axis.text.y = element_text(size = font_size, color = text_color),
         plot.background = element_rect(color = bg_color, fill = bg_color),
         panel.background = element_rect(color = bg_color, fill = bg_color))
 

From 3187ac41bcb5976e82985dc68ff462a9a2ebcfc2 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:09:33 +0200
Subject: [PATCH 02/29] Fixed a bug if report is generated without MSA

---
 subworkflows/local/report.nf | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
index 9dadae5..dcb1a84 100644
--- a/subworkflows/local/report.nf
+++ b/subworkflows/local/report.nf
@@ -21,16 +21,21 @@ workflow REPORT {
 
     main:
     ch_versions = Channel.empty()
+    ch_fasta    = ch_seqinfo.map { [it[0], []] }
 
     DUMP_PARAMS(
         ch_seqinfo.map { [it[0], it[3]] }
     )
 
-    CONVERT_FASTA(ch_alignment)
+    if(!params.skip_downstream) {
+        CONVERT_FASTA(ch_alignment)
 
-    ch_versions
-        .mix(CONVERT_FASTA.out.versions)
-        .set { ch_versions }
+        ch_fasta = CONVERT_FASTA.out.fasta
+
+        ch_versions
+            .mix(CONVERT_FASTA.out.versions)
+            .set { ch_versions }
+    }
 
     ch_forreport = ch_seqinfo
         .join(ch_scoretable, by:0)
@@ -43,7 +48,7 @@ workflow REPORT {
         .join(ch_seqmisses, by:0)
         .join(ch_strhits, by:0)
         .join(ch_strmisses, by:0)
-        .join(CONVERT_FASTA.out.fasta, by:0)
+        .join(ch_fasta, by:0)
         .join(ch_iqtree, by:0)
         .join(ch_fastme, by:0)
         .join(DUMP_PARAMS.out.params, by:0)

From 59bccc461b4ad5c4e20ee3912d3e2cdd1f27309c Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:12:37 +0200
Subject: [PATCH 03/29] Linting fix - single quotes in resource label

---
 modules/local/convert_fasta.nf             | 2 +-
 modules/local/convert_phylip.nf            | 2 +-
 modules/local/dump_params.nf               | 2 +-
 modules/local/fetch_afdb_structures.nf     | 2 +-
 modules/local/fetch_eggnog_group_local.nf  | 2 +-
 modules/local/fetch_oma_group_local.nf     | 2 +-
 modules/local/fetch_panther_group_local.nf | 2 +-
 modules/local/fetch_sequences_online.nf    | 2 +-
 modules/local/filter_fasta.nf              | 2 +-
 modules/local/make_stats.nf                | 2 +-
 modules/local/plot_tree.nf                 | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/modules/local/convert_fasta.nf b/modules/local/convert_fasta.nf
index 79cfe51..7b32950 100644
--- a/modules/local/convert_fasta.nf
+++ b/modules/local/convert_fasta.nf
@@ -1,6 +1,6 @@
 process CONVERT_FASTA {
     tag "$input_file"
-    label "process_single"
+    label 'process_single'
 
     conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/convert_phylip.nf b/modules/local/convert_phylip.nf
index 1591ac6..11dab37 100644
--- a/modules/local/convert_phylip.nf
+++ b/modules/local/convert_phylip.nf
@@ -1,6 +1,6 @@
 process CONVERT_PHYLIP {
     tag "$input_file"
-    label "process_single"
+    label 'process_single'
 
     conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf
index 0406a15..3b3f751 100644
--- a/modules/local/dump_params.nf
+++ b/modules/local/dump_params.nf
@@ -1,6 +1,6 @@
 process DUMP_PARAMS {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     input:
     tuple val(meta), path(exact)
diff --git a/modules/local/fetch_afdb_structures.nf b/modules/local/fetch_afdb_structures.nf
index 9f3d04b..d560887 100644
--- a/modules/local/fetch_afdb_structures.nf
+++ b/modules/local/fetch_afdb_structures.nf
@@ -1,6 +1,6 @@
 process FETCH_AFDB_STRUCTURES {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/fetch_eggnog_group_local.nf b/modules/local/fetch_eggnog_group_local.nf
index a227132..d35e731 100644
--- a/modules/local/fetch_eggnog_group_local.nf
+++ b/modules/local/fetch_eggnog_group_local.nf
@@ -1,6 +1,6 @@
 process FETCH_EGGNOG_GROUP_LOCAL {
     tag "$meta.id"
-    label "process_short"
+    label 'process_short'
 
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
diff --git a/modules/local/fetch_oma_group_local.nf b/modules/local/fetch_oma_group_local.nf
index 4c3d231..1dc4fc7 100644
--- a/modules/local/fetch_oma_group_local.nf
+++ b/modules/local/fetch_oma_group_local.nf
@@ -1,6 +1,6 @@
 process FETCH_OMA_GROUP_LOCAL {
     tag "$meta.id"
-    label "process_short"
+    label 'process_single'
 
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
diff --git a/modules/local/fetch_panther_group_local.nf b/modules/local/fetch_panther_group_local.nf
index 42948e5..9e4ef1f 100644
--- a/modules/local/fetch_panther_group_local.nf
+++ b/modules/local/fetch_panther_group_local.nf
@@ -1,6 +1,6 @@
 process FETCH_PANTHER_GROUP_LOCAL {
     tag "$meta.id"
-    label "process_short"
+    label 'process_single'
 
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf
index 2026c66..5242abe 100644
--- a/modules/local/fetch_sequences_online.nf
+++ b/modules/local/fetch_sequences_online.nf
@@ -1,6 +1,6 @@
 process FETCH_SEQUENCES_ONLINE {
     tag "${meta.id}"
-    label "process_single"
+    label 'process_single'
 
     conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/filter_fasta.nf b/modules/local/filter_fasta.nf
index fa69e30..a39ca49 100644
--- a/modules/local/filter_fasta.nf
+++ b/modules/local/filter_fasta.nf
@@ -1,6 +1,6 @@
 process FILTER_FASTA {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     input:
     tuple val(meta), path(fasta), path(structures)
diff --git a/modules/local/make_stats.nf b/modules/local/make_stats.nf
index a62e9f1..f1e7b3d 100644
--- a/modules/local/make_stats.nf
+++ b/modules/local/make_stats.nf
@@ -1,6 +1,6 @@
 process MAKE_STATS {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/plot_tree.nf b/modules/local/plot_tree.nf
index 509bd59..238df56 100644
--- a/modules/local/plot_tree.nf
+++ b/modules/local/plot_tree.nf
@@ -1,6 +1,6 @@
 process PLOT_TREE {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'docker://itrujnara/plot-tree:1.0.0' :

From 0e6098f4fa2bbca1b20d0ce9d18b016ea9a0a845 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:13:27 +0200
Subject: [PATCH 04/29] Linting fix again - single quotes

---
 modules/local/make_report.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/make_report.nf b/modules/local/make_report.nf
index 67a8d57..970d318 100644
--- a/modules/local/make_report.nf
+++ b/modules/local/make_report.nf
@@ -1,6 +1,6 @@
 process MAKE_REPORT {
     tag "$meta.id"
-    label "process_single"
+    label 'process_single'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'docker://itrujnara/orthologs-report:1.0.0' :

From 9e113c16c223ef17b60d0c7fcc041650e37ec91a Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:14:19 +0200
Subject: [PATCH 05/29] Linting fix - wrong resource label

---
 modules/local/fetch_eggnog_group_local.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/fetch_eggnog_group_local.nf b/modules/local/fetch_eggnog_group_local.nf
index d35e731..96b187d 100644
--- a/modules/local/fetch_eggnog_group_local.nf
+++ b/modules/local/fetch_eggnog_group_local.nf
@@ -1,6 +1,6 @@
 process FETCH_EGGNOG_GROUP_LOCAL {
     tag "$meta.id"
-    label 'process_short'
+    label 'process_single'
 
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)

From 40b51002c1e9c82a2e64adb781cd9019d0ca293e Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:25:15 +0200
Subject: [PATCH 06/29] Linting fix - added missing containers

---
 modules/local/create_tcoffeetemplate.nf    | 4 ++++
 modules/local/dump_params.nf               | 4 ++++
 modules/local/fetch_eggnog_group_local.nf  | 5 +++++
 modules/local/fetch_oma_group_local.nf     | 5 +++++
 modules/local/fetch_panther_group_local.nf | 5 +++++
 modules/local/filter_fasta.nf              | 5 +++++
 6 files changed, 28 insertions(+)

diff --git a/modules/local/create_tcoffeetemplate.nf b/modules/local/create_tcoffeetemplate.nf
index 3d845fb..071c3bf 100644
--- a/modules/local/create_tcoffeetemplate.nf
+++ b/modules/local/create_tcoffeetemplate.nf
@@ -2,6 +2,10 @@ process CREATE_TCOFFEETEMPLATE {
     tag "$meta.id"
     label 'process_low'
 
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+    'nf-core/ubuntu:20.04' }"
+
     input:
     tuple val(meta), path(accessory_informations)
 
diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf
index 3b3f751..f354fe2 100644
--- a/modules/local/dump_params.nf
+++ b/modules/local/dump_params.nf
@@ -2,6 +2,10 @@ process DUMP_PARAMS {
     tag "$meta.id"
     label 'process_single'
 
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+    'nf-core/ubuntu:20.04' }"
+
     input:
     tuple val(meta), path(exact)
 
diff --git a/modules/local/fetch_eggnog_group_local.nf b/modules/local/fetch_eggnog_group_local.nf
index 96b187d..32df3da 100644
--- a/modules/local/fetch_eggnog_group_local.nf
+++ b/modules/local/fetch_eggnog_group_local.nf
@@ -2,6 +2,11 @@ process FETCH_EGGNOG_GROUP_LOCAL {
     tag "$meta.id"
     label 'process_single'
 
+    conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' :
+        'biocontainers/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' }"
+
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
     path db
diff --git a/modules/local/fetch_oma_group_local.nf b/modules/local/fetch_oma_group_local.nf
index 1dc4fc7..db08c55 100644
--- a/modules/local/fetch_oma_group_local.nf
+++ b/modules/local/fetch_oma_group_local.nf
@@ -2,6 +2,11 @@ process FETCH_OMA_GROUP_LOCAL {
     tag "$meta.id"
     label 'process_single'
 
+    conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' :
+        'biocontainers/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' }"
+
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
     path db
diff --git a/modules/local/fetch_panther_group_local.nf b/modules/local/fetch_panther_group_local.nf
index 9e4ef1f..be4a9a1 100644
--- a/modules/local/fetch_panther_group_local.nf
+++ b/modules/local/fetch_panther_group_local.nf
@@ -2,6 +2,11 @@ process FETCH_PANTHER_GROUP_LOCAL {
     tag "$meta.id"
     label 'process_single'
 
+    conda "conda-forge::python=3.11.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.11.0' :
+        'biocontainers/python:3.11.0' }"
+
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
     path panther_db
diff --git a/modules/local/filter_fasta.nf b/modules/local/filter_fasta.nf
index a39ca49..d65a64b 100644
--- a/modules/local/filter_fasta.nf
+++ b/modules/local/filter_fasta.nf
@@ -2,6 +2,11 @@ process FILTER_FASTA {
     tag "$meta.id"
     label 'process_single'
 
+    conda "conda-forge::python=3.11.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.11.0' :
+        'biocontainers/python:3.11.0' }"
+
     input:
     tuple val(meta), path(fasta), path(structures)
 

From 8da78a25d048fbff495a5fb675093f029f26286e Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:32:56 +0200
Subject: [PATCH 07/29] Linting fix - nonexistent container version

---
 modules/local/fetch_panther_group_local.nf | 6 +++---
 modules/local/filter_fasta.nf              | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/local/fetch_panther_group_local.nf b/modules/local/fetch_panther_group_local.nf
index be4a9a1..aa178dd 100644
--- a/modules/local/fetch_panther_group_local.nf
+++ b/modules/local/fetch_panther_group_local.nf
@@ -2,10 +2,10 @@ process FETCH_PANTHER_GROUP_LOCAL {
     tag "$meta.id"
     label 'process_single'
 
-    conda "conda-forge::python=3.11.0"
+    conda "conda-forge::python=3.10.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.11.0' :
-        'biocontainers/python:3.11.0' }"
+        'https://depot.galaxyproject.org/singularity/python:3.10' :
+        'biocontainers/python:3.10' }"
 
     input:
     tuple val(meta), path(uniprot_id), path(taxid), path(exact)
diff --git a/modules/local/filter_fasta.nf b/modules/local/filter_fasta.nf
index d65a64b..23a8f89 100644
--- a/modules/local/filter_fasta.nf
+++ b/modules/local/filter_fasta.nf
@@ -2,10 +2,10 @@ process FILTER_FASTA {
     tag "$meta.id"
     label 'process_single'
 
-    conda "conda-forge::python=3.11.0"
+    conda "conda-forge::python=3.10.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.11.0' :
-        'biocontainers/python:3.11.0' }"
+        'https://depot.galaxyproject.org/singularity/python:3.10' :
+        'biocontainers/python:3.10' }"
 
     input:
     tuple val(meta), path(fasta), path(structures)

From 6338f0fcd93ddd58736dc0dc22d20a2d82fc7066 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:38:14 +0200
Subject: [PATCH 08/29] Removed fulfilled TODOs from readme

---
 README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/README.md b/README.md
index e7607ea..6f512ba 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,6 @@
 
 ![nf-core-reportho tube map](docs/images/reportho_tube_map.svg?raw=true "nf-core-reportho tube map")
 
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
-
 1. **Obtain Query Information**: (depends on provided input) identification of Uniprot ID and taxon ID for the query or its closest homolog.
 2. **Fetch Orthologs**: fetching of ortholog predictions from public databases, either through API or from local snapshot.
 3. **Compare and Assemble**: calculation of agreement statistics, creation of ortholog lists, selection of the consensus list.
@@ -66,8 +64,6 @@ If using the latter format, you must set `--uniprot_query` to true.
 
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
 nextflow run nf-core/reportho \
    -profile <docker/singularity/.../institute> \

From cdaf4f63935f2de889f68c94b8e21f7aca8c47b6 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:40:06 +0200
Subject: [PATCH 09/29] Tweak to contributor names in readme

---
 README.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 6f512ba..8a9d492 100644
--- a/README.md
+++ b/README.md
@@ -85,15 +85,13 @@ For more details about the output files and reports, please refer to the
 
 ## Credits
 
-nf-core/reportho was originally written by itrujnara.
+nf-core/reportho was originally written by Igor Trujnara (@itrujnara).
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
-@lsantus
-
-@avignoli
-
-@JoseEspinosa
+- Luisa Santus (@lsantus)
+- Alessio Vignoli (@avignoli)
+- Jose Espinosa-Carrasco (@JoseEspinosa)
 
 ## Contributions and Support
 

From ab103aa0e3ae9ec1e8aa66a2980ce16832e044af Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 12:56:08 +0200
Subject: [PATCH 10/29] Changed modules run script name

---
 modules/local/make_report.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/make_report.nf b/modules/local/make_report.nf
index 970d318..4d5aacd 100644
--- a/modules/local/make_report.nf
+++ b/modules/local/make_report.nf
@@ -48,7 +48,7 @@ process MAKE_REPORT {
     $iqtree_cmd
     $fastme_cmd
     yarn run build
-    echo "python3 -m http.server 0" > dist/${prefix}_run.sh
+    echo "python3 -m http.server 0" > dist/run.sh
     mv dist ${prefix}_dist
 
     cat <<- END_VERSIONS > versions.yml

From b4923312a44de8d70fbdd0b270df0d01805f5289 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 13:18:01 +0200
Subject: [PATCH 11/29] Tweaked usage.md to match the pipeline

---
 docs/usage.md | 41 +++++++++++------------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index f673563..d2c5106 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,45 +10,27 @@
 
 ## Samplesheet input
 
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, and a header row as shown in the examples below.
 
 ```bash
 --input '[path to samplesheet file]'
 ```
 
-### Multiple runs of the same sample
-
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
-
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
-```
-
 ### Full samplesheet
 
-The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
+The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below.
 
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
+A final samplesheet file may look something like the one below.
 
 ```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
+id,query
+BicD2,Q8TD16
 ```
 
-| Column    | Description                                                                                                                                                                            |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`  | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+| Column  | Description                                                                                                                                                         |
+| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`    | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces.                   |
+| `query` | The query of the user-specified type. If `--uniprot_query` is `true`, it should be a valid Uniprot accession. Otherwise, it should be a valid path to a FASTA file. |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
@@ -57,7 +39,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run nf-core/reportho --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker
+nextflow run nf-core/reportho --input ./samplesheet.csv --outdir ./results -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@@ -90,7 +72,6 @@ with `params.yaml` containing:
 ```yaml
 input: './samplesheet.csv'
 outdir: './results/'
-genome: 'GRCh37'
 <...>
 ```
 
@@ -112,7 +93,7 @@ First, go to the [nf-core/reportho releases page](https://github.com/nf-core/rep
 
 This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
 
-To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
+To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
 
 :::tip
 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles.

From c8db09abf6c72f1b28f2dacf6ac7682fac3a656f Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 15:35:16 +0200
Subject: [PATCH 12/29] Updated output.md to match pipeline.

---
 assets/samplesheet.csv |   5 +-
 docs/output.md         | 180 +++++++++++++++++++++++++++++++++++------
 2 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
index 5f653ab..2b40ea6 100644
--- a/assets/samplesheet.csv
+++ b/assets/samplesheet.csv
@@ -1,3 +1,2 @@
-sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
+id,query
+BicD2,Q8TD16
diff --git a/docs/output.md b/docs/output.md
index ab52940..e9e6ece 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -2,58 +2,186 @@
 
 ## Introduction
 
-This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline.
+This document describes the output produced by the pipeline. Most of the plots are taken from the report, which summarizes results at the end of the pipeline.
 
 The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
 
-<!-- TODO nf-core: Write this documentation describing your workflow's output -->
-
 ## Pipeline overview
 
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
-- [FastQC](#fastqc) - Raw read QC
-- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
-- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
+- [Query identification](#fastqc) - obtaining basic information on the query
+- [Ortholog fetching](#ortholog-fetching) - obtaining ortholog predictions from public databases
+- [Ortholog scoring](#ortholog-scoring) - creation of a score table
+- [Ortholog filtering](#ortholog-filtering) - selection of final ortholog list
+- [Ortholog plotting](#ortholog-plotting) - creation of plots describing the predictions
+- [Ortholog statistics](#ortholog-statistics) - calculation of several statistics about the predictions
+- [Sequence fetching](#sequence-fetching) - obtaining ortholog sequences form public databases
+- [Structure fetching](#structure-fetching) - obtaining ortholog structures from AlphaFoldDB
+- [MSA](#msa) - alignment of ortholog sequences
+- [Tree reconstruction](#tree-reconstruction) - creation of phylogenies with ML or ME
+- [Report generation](#report-generation) - creation of a human-readable report
+- [Pipeline information](#pipeline-information) - basic information about the pipeline run
+
+### Query identification
+
+<details markdown="1">
+<summary>Output files</summary>
 
-### FastQC
+- `seqinfo/`
+  - `*_id.txt`: File containing Uniprot identifier of the query or the closest BLAST hit.
+  - `*_taxid.txt`: File containing NCBI taxon ID of the query/closest hit.
+  - `*_exact.txt`: File containing information on whether the query was found in the database (`true`), or the output is the top BLAST hit (`false`).
+  </details>
+
+Query information necessary for further steps is obtained here. If a sequence was passed, it is identified using [OMA](https://omabrowser.org). A Uniprot identifier is obtained, along with indication whether it was an exact or closest match. For either query type, an NCBI taxon ID is obtained using the OMA API.
+
+### Ortholog fetching
 
 <details markdown="1">
 <summary>Output files</summary>
 
-- `fastqc/`
-  - `*_fastqc.html`: FastQC report containing quality metrics.
-  - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
+- `orthologs/`
+  - `[dbname]/`
+    - `*_[dbname]_group.csv`: A CSV file with the hits from the database. It has an additional column necessary for later merging.
+    </details>
 
-</details>
+Ortholog predictions are fetched from the databases. Each database can be used locally or online, subject to the feasibility of these access modes. The databases currently supported are:
 
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
+- OMA (online and local)
+- PANTHER (online and local)
+- OrthoInspector (online)
+- EggNOG (local).
 
-![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
+### Ortholog scoring
 
-![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png)
+<details markdown="1">
+<summary>Output files</summary>
 
-![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
+- `orthologs/`
+  - `merge_csv/`
+    - `*.csv`: A merged CSV file with predictions from all the databases.
+  - `score_table/`
+    - `*_score_table.csv`: A merged CSV with a score column added. The score is the number of databases supporting the prediction.
+    </details>
 
-:::note
-The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
-:::
+At this step, the predictions are combined into a single table. They are also assigned a score which is used for later filtering. The score is the number of supporting sources.
 
-### MultiQC
+### Ortholog filtering
 
 <details markdown="1">
 <summary>Output files</summary>
 
-- `multiqc/`
-  - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
-  - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
-  - `multiqc_plots/`: directory containing static images from the report in various formats.
+- `orthologs/`
+  - `filter_hits/`
+    - `*_minscore_*.txt`: Lists of predictions passing different score thresholds, from 1 to the number of sources. For example, `BicD2_minscore_2.txt` would include orthologs of BicD2 supported by at least 2 sources.
+    - `*_centroid.txt`: A list of predictions from the source with the highest agreement with other sources.
+    - `*_filtered_hits.txt`: The final list of orthologs, chosen based on user-defined criteria.
+    </details>
 
-</details>
+In this step, the predictions are split into lists with different minimal scores, indicating each level of support. Additionally, the source with the highest total agreement is found.
+
+The final list of orthologs is determined in one of two ways. If `--use_centroid` is set, the highest-agreement source will be used. Otherwise, orthologs with a score higher than `--min_score` are used.
+
+### Ortholog plotting
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `orthologs/`
+  - `plots/`
+    - `*_supports.png`: A bar plot representing the number of predictions from each source and the support of the predictions.
+    - `*_venn.png`: A Venn diagram representing the intersections between databases.
+    - `*_jaccard.png`: A tile plot representing the Jaccard index (pairwise agreement) between databases.
+    </details>
+
+Plots representing certain aspects of the predictions are generated using `ggplot`.
+
+### Ortholog statistics
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `orthologs/`
+  - `stats/`
+    - `*_stats.yml`: A YAML file containing ortholog statistics.
+    </details>
+
+The following statistics of the predictions are calculated:
+
+- percentage of consensus - the fraction of predictions which are supported by all the sources
+- percentage of privates - the fractions of predictions which are supported by only 1 source
+- goodness - the ratio of the real sum of scores to the theoretical maximum (i.e. the number of databases times the number of predictions).
+
+### Sequence fetching
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `sequences/`
+  - `*_orthologs.fa`: A FASTA file containing all ortholog sequences that could be found.
+  - `*_seq_hits.txt`: The list of all orthologs whose sequence was found.
+  - `*_seq_misses.txt`: The list of all orthologs whose sequence was not found.
+  </details>
+
+If downstream analysis is performed, protein sequences of all orthologs in FASTA format are fetched. The primary source of sequences is [OMA](http://omabrowser.org) due to its fast API. IDs not found in OMA are sent to [Uniprot](http://uniprot.org). Anything not found in Uniprot is considered a miss.
+
+### Structure fetching
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `sequences/`
+  - `*.pdb`: PDB files with structures of the orthologs, obtained from AlphaFoldDB.
+  - `*_af_versions.txt`: Versions of the AlphaFold structures.
+  - `*_str_hits.txt`: The list of all orthologs whose structure was found.
+  - `*_str_misses.txt`: The list of all orthologs whose structure was not found.
+  </details>
+
+If `--use_structures` is set, structures from the alignment are obtained from AlphaFoldDB. For feasibility of AlphaFold structures for MSA, check [Baltzis et al. 2022](http://doi.org/10.1093/bioinformatics/btac625).
+
+### MSA
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `alignment/`
+  - `*.aln`: A multiple sequence alignment of the orthologs in Clustal format.
+  </details>
+
+Multiple sequence alignment is performed using [T-COFFEE](https://tcoffee.org). 3D-COFFEE mode is used if `--use_structures` is set. Otherwise, default mode is used.
+
+### Tree reconstruction
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `trees/`
+  - `iqtree/`
+    - `*.treefile`: The IQTREE phylogeny in Newick format.
+    - `*.ufboot`: Bootstrap trees, if generated.
+  - `fastme/`
+    - `*.nwk`: The FastME phylogeny in Newick format.
+    - `*.bootstrap`: The bootstrap trees, if generated.
+  - `plots/`
+    - `*_iqtree_tree.png`: The IQTREE phylogeny as an image.
+    - `*_fastme_tree.png`: The FastME phylogeny as an image.
+    </details>
+
+The phylogeny can be constructed using maximum likelihood ([IQTREE](http://www.iqtree.org/)) or minimum evolution ([FastME](http://www.atgc-montpellier.fr/fastme/)).
+
+### Report generation
+
+<details markdown="1">
+<summary>Output files</summary>
 
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory.
+- `*_dist/`
+  - `*.html`: The report in HTML format.
+  - `run.sh`: A script to correctly open the report.
+  - Other files necessary for the report.
+  </details>
 
-Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>.
+The report is generated in the form of a React application. It must be hosted on localhost to work correctly. This can be done manually or with the run script provided.
 
 ### Pipeline information
 

From 9f1f1bfeee3ebb3d82953068e369f483a4803cc2 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 16:42:40 +0200
Subject: [PATCH 13/29] Set test.config to correct values

---
 conf/test.config | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 2cf94b1..d1106d4 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,10 +20,15 @@ params {
     max_time   = '6.h'
 
     // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv'
 
-    // Genome references
-    genome = 'R64-1-1'
+    // Other parameters
+    uniprot_query    = true
+    use_all          = false
+    use_inspector    = true
+    use_eggnog       = false
+    min_score        = 3
+    use_iqtree       = false
+    use_fastme       = true
+    fastme_bootstrap = 0
 }

From 5798f5cf13da4a695d4eb1340178c675fb9198ff Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 16:42:52 +0200
Subject: [PATCH 14/29] Tweaked report to work with skipped downstream

---
 subworkflows/local/report.nf | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
index dcb1a84..381c377 100644
--- a/subworkflows/local/report.nf
+++ b/subworkflows/local/report.nf
@@ -20,8 +20,27 @@ workflow REPORT {
     ch_fastme
 
     main:
-    ch_versions = Channel.empty()
-    ch_fasta    = ch_seqinfo.map { [it[0], []] }
+    ch_versions  = Channel.empty()
+    ch_fasta     = ch_seqinfo.map { [it[0], []] }
+
+    if(params.skip_downstream) {
+        ch_seqhits   = ch_seqinfo.map { [it[0], []] }
+        ch_seqmisses = ch_seqinfo.map { [it[0], []] }
+        ch_strhits   = ch_seqinfo.map { [it[0], []] }
+        ch_strmisses = ch_seqinfo.map { [it[0], []] }
+        ch_alignment = ch_seqinfo.map { [it[0], []] }
+    }
+    else if(!params.use_structures) {
+        ch_strhits   = ch_seqinfo.map { [it[0], []] }
+        ch_strmisses = ch_seqinfo.map { [it[0], []] }
+    }
+
+    if (!params.use_iqtree) {
+        ch_iqtree = ch_seqinfo.map { [it[0], []] }
+    }
+    if (!params.use_fastme) {
+        ch_fastme = ch_seqinfo.map { [it[0], []] }
+    }
 
     DUMP_PARAMS(
         ch_seqinfo.map { [it[0], it[3]] }

From c30a037b7747c3cb9dd25ca8c189fc53d7ccfb1b Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 16:52:57 +0200
Subject: [PATCH 15/29] Added options to skip plotting

---
 nextflow.config                     |  2 ++
 nextflow_schema.json                | 14 ++++++++++
 subworkflows/local/get_orthologs.nf | 28 +++++++++++++-------
 subworkflows/local/make_trees.nf    | 40 +++++++++++++++++------------
 4 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 11eef6e..8f2005d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -30,6 +30,7 @@ params {
     eggnog_idmap_path          = null
     use_centroid               = false
     min_score                  = 2
+    skip_orthoplots            = false
 
     // Downstream analysis options
     skip_downstream            = false
@@ -39,6 +40,7 @@ params {
     use_fastme                 = false
     iqtree_bootstrap           = 100
     fastme_bootstrap           = 100
+    skip_treeplots             = false
 
     // Boilerplate options
     outdir                     = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4f183c9..23adf3d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -168,6 +168,13 @@
                     "description": "Minimum score for the ortholog search.",
                     "help_text": "The minimum score for the ortholog search. If `use_centroid` is set to `true`, this parameter will be ignored.",
                     "fa_icon": "fas fa-database"
+                },
+                "skip_orthoplots": {
+                    "type": "boolean",
+                    "default": "false",
+                    "description": "Skip the ortholog plots.",
+                    "help_text": "If set to `true`, the pipeline will skip the ortholog plots.",
+                    "fa_icon": "fas fa-database"
                 }
             }
         },
@@ -225,6 +232,13 @@
                     "description": "Number of bootstrap replicates for FastME.",
                     "help_text": "If set to `0`, bootstrap will not be performed.",
                     "fa_icon": "fas fa-rotate"
+                },
+                "skip_treeplots": {
+                    "type": "boolean",
+                    "default": "false",
+                    "description": "Skip the tree plots.",
+                    "help_text": "If set to `true`, the pipeline will skip the tree plots.",
+                    "fa_icon": "fas fa-tree"
                 }
             }
         },
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index 183f75a..b4cdbea 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -272,13 +272,23 @@ workflow GET_ORTHOLOGS {
         .mix(FILTER_HITS.out.versions)
         .set { ch_versions }
 
-    PLOT_ORTHOLOGS (
-        MAKE_SCORE_TABLE.out.score_table
-    )
+    ch_supportsplot = ch_seqinfo.map { [it[0], []]}
+    ch_vennplot = ch_seqinfo.map { [it[0], []]}
+    ch_jaccardplot = ch_seqinfo.map { [it[0], []]}
 
-    ch_versions
-        .mix(PLOT_ORTHOLOGS.out.versions)
-        .set { ch_versions }
+    if(!params.skip_orthoplots) {
+        PLOT_ORTHOLOGS (
+            MAKE_SCORE_TABLE.out.score_table
+        )
+
+        ch_supportsplot = PLOT_ORTHOLOGS.out.supports
+        ch_vennplot = PLOT_ORTHOLOGS.out.venn
+        ch_jaccardplot = PLOT_ORTHOLOGS.out.jaccard
+
+        ch_versions
+            .mix(PLOT_ORTHOLOGS.out.versions)
+            .set { ch_versions }
+    }
 
     MAKE_STATS(
         MAKE_SCORE_TABLE.out.score_table
@@ -300,9 +310,9 @@ workflow GET_ORTHOLOGS {
     orthogroups     = ch_orthogroups
     score_table     = MAKE_SCORE_TABLE.out.score_table
     orthologs       = FILTER_HITS.out.filtered_hits
-    supports_plot   = PLOT_ORTHOLOGS.out.supports
-    venn_plot       = PLOT_ORTHOLOGS.out.venn
-    jaccard_plot    = PLOT_ORTHOLOGS.out.jaccard
+    supports_plot   = ch_supportsplot
+    venn_plot       = ch_vennplot
+    jaccard_plot    = ch_jaccardplot
     stats           = MAKE_STATS.out.stats
     versions        = ch_merged_versions
 
diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf
index 34b75c4..2fd0fb2 100644
--- a/subworkflows/local/make_trees.nf
+++ b/subworkflows/local/make_trees.nf
@@ -28,16 +28,20 @@ workflow MAKE_TREES {
             .mix(IQTREE.out.versions)
             .set { ch_versions }
 
-        PLOT_IQTREE (
-            IQTREE.out.phylogeny,
-            "iqtree"
-        )
+        ch_mlplot = ch_seqinfo.map { [it[0], []] }
 
-        ch_mlplot = PLOT_IQTREE.out.plot
+        if(!params.skip_treeplots) {
+            PLOT_IQTREE (
+                IQTREE.out.phylogeny,
+                "iqtree"
+            )
 
-        ch_versions
-            .mix(PLOT_IQTREE.out.versions)
-            .set { ch_versions }
+            ch_mlplot = PLOT_IQTREE.out.plot
+
+            ch_versions
+                .mix(PLOT_IQTREE.out.versions)
+                .set { ch_versions }
+        }
     }
 
     if (params.use_fastme) {
@@ -60,16 +64,20 @@ workflow MAKE_TREES {
             .mix(FASTME.out.versions)
             .set { ch_versions }
 
-        PLOT_FASTME (
-            FASTME.out.nwk,
-            "fastme"
-        )
+        ch_meplot = ch_seqinfo.map { [it[0], []] }
 
-        ch_meplot = PLOT_FASTME.out.plot
+        if(!params.skip_treeplots) {
+            PLOT_FASTME (
+                FASTME.out.nwk,
+                "fastme"
+            )
 
-        ch_versions
-            .mix(PLOT_FASTME.out.versions)
-            .set { ch_versions }
+            ch_meplot = PLOT_FASTME.out.plot
+
+            ch_versions
+                .mix(PLOT_FASTME.out.versions)
+                .set { ch_versions }
+        }
     }
 
     emit:

From 5fe68ed7e4ae9c5e464c577b49636e0133f4998d Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 29 Apr 2024 16:57:18 +0200
Subject: [PATCH 16/29] Minor fixes to plotting

---
 subworkflows/local/get_orthologs.nf | 10 +++++-----
 subworkflows/local/make_trees.nf    |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index b4cdbea..db67640 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -272,9 +272,9 @@ workflow GET_ORTHOLOGS {
         .mix(FILTER_HITS.out.versions)
         .set { ch_versions }
 
-    ch_supportsplot = ch_seqinfo.map { [it[0], []]}
-    ch_vennplot = ch_seqinfo.map { [it[0], []]}
-    ch_jaccardplot = ch_seqinfo.map { [it[0], []]}
+    ch_supportsplot = ch_query.map { [it[0], []]}
+    ch_vennplot     = ch_query.map { [it[0], []]}
+    ch_jaccardplot  = ch_query.map { [it[0], []]}
 
     if(!params.skip_orthoplots) {
         PLOT_ORTHOLOGS (
@@ -282,8 +282,8 @@ workflow GET_ORTHOLOGS {
         )
 
         ch_supportsplot = PLOT_ORTHOLOGS.out.supports
-        ch_vennplot = PLOT_ORTHOLOGS.out.venn
-        ch_jaccardplot = PLOT_ORTHOLOGS.out.jaccard
+        ch_vennplot     = PLOT_ORTHOLOGS.out.venn
+        ch_jaccardplot  = PLOT_ORTHOLOGS.out.jaccard
 
         ch_versions
             .mix(PLOT_ORTHOLOGS.out.versions)
diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf
index 2fd0fb2..34cc4b5 100644
--- a/subworkflows/local/make_trees.nf
+++ b/subworkflows/local/make_trees.nf
@@ -28,7 +28,7 @@ workflow MAKE_TREES {
             .mix(IQTREE.out.versions)
             .set { ch_versions }
 
-        ch_mlplot = ch_seqinfo.map { [it[0], []] }
+        ch_mlplot = ch_alignment.map { [it[0], []] }
 
         if(!params.skip_treeplots) {
             PLOT_IQTREE (
@@ -64,7 +64,7 @@ workflow MAKE_TREES {
             .mix(FASTME.out.versions)
             .set { ch_versions }
 
-        ch_meplot = ch_seqinfo.map { [it[0], []] }
+        ch_meplot = ch_alignment.map { [it[0], []] }
 
         if(!params.skip_treeplots) {
             PLOT_FASTME (

From 2c27b50b8d10c6fcdd0110eb6a8e43c79059d640 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 13:41:45 +0200
Subject: [PATCH 17/29] Added stats aggregation

---
 bin/yml2csv.py                               | 25 +++++++++
 conf/modules.config                          | 17 ++++++
 modules.json                                 |  5 ++
 modules/local/stats2csv.nf                   | 31 +++++++++++
 modules/nf-core/csvtk/concat/environment.yml |  7 +++
 modules/nf-core/csvtk/concat/main.nf         | 43 +++++++++++++++
 modules/nf-core/csvtk/concat/meta.yml        | 49 +++++++++++++++++
 subworkflows/local/get_orthologs.nf          | 57 +++++++++++++++-----
 8 files changed, 222 insertions(+), 12 deletions(-)
 create mode 100755 bin/yml2csv.py
 create mode 100644 modules/local/stats2csv.nf
 create mode 100644 modules/nf-core/csvtk/concat/environment.yml
 create mode 100644 modules/nf-core/csvtk/concat/main.nf
 create mode 100644 modules/nf-core/csvtk/concat/meta.yml

diff --git a/bin/yml2csv.py b/bin/yml2csv.py
new file mode 100755
index 0000000..04cbd4a
--- /dev/null
+++ b/bin/yml2csv.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import sys
+
+import yaml
+
+
+def main() -> None:
+    if len(sys.argv) < 4:
+        print("Usage: yml2csv.py <id> <input_file> <output_file>")
+        sys.exit(1)
+
+    sample_id = sys.argv[1]
+    input_file = sys.argv[2]
+    output_file = sys.argv[3]
+
+    with open(input_file) as f:
+        data = yaml.safe_load(f)
+
+    with open(output_file, "w") as f:
+        print("id,percent_max,percent_privates,goodness", file=f)
+        print(f"{sample_id},{data['percent_max']},{data['percent_privates']},{data['goodness']}", file=f)
+
+if __name__ == "__main__":
+    main()
diff --git a/conf/modules.config b/conf/modules.config
index 367a3a3..ca1bddc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -103,6 +103,23 @@ process {
         ]
     }
 
+    withName: 'STATS2CSV' {
+        publishDir = [
+            path: { "${params.outdir}/orthologs/stats" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: 'MERGE_STATS' {
+        ext.args = "-u NA"
+        publishDir = [
+            path: { "${params.outdir}/orthologs/stats" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     // ----------------------
     // Sequence alignment
     // ----------------------
diff --git a/modules.json b/modules.json
index a309b53..1e87009 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "csvtk/concat": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
                     "csvtk/join": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
diff --git a/modules/local/stats2csv.nf b/modules/local/stats2csv.nf
new file mode 100644
index 0000000..362ff42
--- /dev/null
+++ b/modules/local/stats2csv.nf
@@ -0,0 +1,31 @@
+process STATS2CSV {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "conda-forge::python=3.11.0 conda-forge::pyyaml=5.4.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-deac90960ddeb4d14fb31faf92c0652d613b3327:10b46d090d02e9e22e206db80d14e994267520c3-0' :
+        'biocontainers/mulled-v2-deac90960ddeb4d14fb31faf92c0652d613b3327:10b46d090d02e9e22e206db80d14e994267520c3-0' }"
+
+    input:
+    tuple val(meta), path(stats)
+
+    output:
+    tuple val(meta), path("*_stats.csv"), emit: csv
+    path "versions.yml"                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: meta.id
+    """
+    yml2csv.py ${meta.id} $stats ${prefix}_stats.csv
+
+    cat <<- END_VERSIONS > versions.yml
+    "${task.process}":
+        Python: \$(python --version | cut -d ' ' -f 2)
+        PyYAML: \$(pip show pyyaml | grep Version | cut -d ' ' -f 2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/csvtk/concat/environment.yml b/modules/nf-core/csvtk/concat/environment.yml
new file mode 100644
index 0000000..ed1ba26
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/environment.yml
@@ -0,0 +1,7 @@
+name: csvtk_concat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::csvtk=0.23.0
diff --git a/modules/nf-core/csvtk/concat/main.nf b/modules/nf-core/csvtk/concat/main.nf
new file mode 100644
index 0000000..16e59f6
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/main.nf
@@ -0,0 +1,43 @@
+process CSVTK_CONCAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/csvtk:0.23.0--h9ee0642_0' :
+        'biocontainers/csvtk:0.23.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(csv)
+    val in_format
+    val out_format
+
+    output:
+    tuple val(meta), path("${prefix}.${out_extension}"), emit: csv
+    path "versions.yml"                                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
+    def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
+    out_extension = out_format == "tsv" ? 'tsv' : 'csv'
+    """
+    csvtk \\
+        concat \\
+        $args \\
+        --num-cpus $task.cpus \\
+        --delimiter "${delimiter}" \\
+        --out-delimiter "${out_delimiter}" \\
+        --out-file ${prefix}.${out_extension} \\
+        $csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/csvtk/concat/meta.yml b/modules/nf-core/csvtk/concat/meta.yml
new file mode 100644
index 0000000..5f53229
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/meta.yml
@@ -0,0 +1,49 @@
+name: csvtk_concat
+description: Concatenate two or more CSV (or TSV) tables into a single table
+keywords:
+  - concatenate
+  - tsv
+  - csv
+tools:
+  - csvtk:
+      description: A cross-platform, efficient, practical CSV/TSV toolkit
+      homepage: http://bioinf.shenwei.me/csvtk
+      documentation: http://bioinf.shenwei.me/csvtk
+      tool_dev_url: https://github.com/shenwei356/csvtk
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - csv:
+      type: file
+      description: CSV/TSV formatted files
+      pattern: "*.{csv,tsv}"
+  - in_format:
+      type: string
+      description: Input format (csv, tab, or a delimiting character)
+      pattern: "*"
+  - out_format:
+      type: string
+      description: Output format (csv, tab, or a delimiting character)
+      pattern: "*"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "version.yml"
+  - csv:
+      type: file
+      description: Concatenated CSV/TSV file
+      pattern: "*.{csv,tsv}"
+authors:
+  - "@rpetit3"
+maintainers:
+  - "@rpetit3"
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index db67640..df55d4d 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -14,6 +14,8 @@ include { MAKE_SCORE_TABLE             } from "../../modules/local/make_score_ta
 include { FILTER_HITS                  } from "../../modules/local/filter_hits"
 include { PLOT_ORTHOLOGS               } from "../../modules/local/plot_orthologs"
 include { MAKE_STATS                   } from "../../modules/local/make_stats"
+include { STATS2CSV                    } from "../../modules/local/stats2csv"
+include { CSVTK_CONCAT as MERGE_STATS  } from "../../modules/nf-core/csvtk/concat/main"
 
 workflow GET_ORTHOLOGS {
     take:
@@ -242,6 +244,8 @@ workflow GET_ORTHOLOGS {
         }
     }
 
+    // Result merging
+
     MERGE_CSV (
         ch_orthogroups.groupTuple()
     )
@@ -250,6 +254,8 @@ workflow GET_ORTHOLOGS {
         .mix(MERGE_CSV.out.versions)
         .set { ch_versions }
 
+    // Scoring and filtering
+
     MAKE_SCORE_TABLE (
         MERGE_CSV.out.csv
     )
@@ -272,6 +278,8 @@ workflow GET_ORTHOLOGS {
         .mix(FILTER_HITS.out.versions)
         .set { ch_versions }
 
+    // Plotting
+
     ch_supportsplot = ch_query.map { [it[0], []]}
     ch_vennplot     = ch_query.map { [it[0], []]}
     ch_jaccardplot  = ch_query.map { [it[0], []]}
@@ -290,6 +298,8 @@ workflow GET_ORTHOLOGS {
             .set { ch_versions }
     }
 
+    // Stats
+
     MAKE_STATS(
         MAKE_SCORE_TABLE.out.score_table
     )
@@ -298,22 +308,45 @@ workflow GET_ORTHOLOGS {
         .mix(MAKE_STATS.out.versions)
         .set { ch_versions }
 
+    STATS2CSV(
+        MAKE_STATS.out.stats
+    )
+
+    ch_versions
+        .mix(STATS2CSV.out.versions)
+        .set { ch_versions }
+
+    ch_stats = STATS2CSV.out.csv
+        .collect { it[1] }
+        .map { [[id: "all"], it] }
+
+    MERGE_STATS(
+        ch_stats,
+        "csv",
+        "csv"
+    )
+
+    ch_versions
+        .mix(MERGE_STATS.out.versions)
+        .set { ch_versions }
+
     ch_versions
         .collectFile(name: "get_orthologs_versions.yml", sort: true, newLine: true)
         .set { ch_merged_versions }
 
     emit:
-    seqinfo         = ch_query
-    id              = ch_query.map { it[1] }
-    taxid           = ch_query.map { it[2] }
-    exact           = ch_query.map { it[3] }
-    orthogroups     = ch_orthogroups
-    score_table     = MAKE_SCORE_TABLE.out.score_table
-    orthologs       = FILTER_HITS.out.filtered_hits
-    supports_plot   = ch_supportsplot
-    venn_plot       = ch_vennplot
-    jaccard_plot    = ch_jaccardplot
-    stats           = MAKE_STATS.out.stats
-    versions        = ch_merged_versions
+    seqinfo          = ch_query
+    id               = ch_query.map { it[1] }
+    taxid            = ch_query.map { it[2] }
+    exact            = ch_query.map { it[3] }
+    orthogroups      = ch_orthogroups
+    score_table      = MAKE_SCORE_TABLE.out.score_table
+    orthologs        = FILTER_HITS.out.filtered_hits
+    supports_plot    = ch_supportsplot
+    venn_plot        = ch_vennplot
+    jaccard_plot     = ch_jaccardplot
+    stats            = MAKE_STATS.out.stats
+    aggregated_stats = MERGE_STATS.out.csv
+    versions         = ch_merged_versions
 
 }

From c29075931ef35c5f57b439c53a71121419c0107d Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 13:58:30 +0200
Subject: [PATCH 18/29] Added file name for CSV merge

---
 conf/modules.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/modules.config b/conf/modules.config
index ca1bddc..a81954b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -113,6 +113,7 @@ process {
 
     withName: 'MERGE_STATS' {
         ext.args = "-u NA"
+        ext.prefix = "aggregated_stats"
         publishDir = [
             path: { "${params.outdir}/orthologs/stats" },
             mode: params.publish_dir_mode,

From fd1957eebe07dc421f38a0c980f96b98dd1f09f5 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 13:58:40 +0200
Subject: [PATCH 19/29] Added config for full test

---
 conf/test_full.config | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/conf/test_full.config b/conf/test_full.config
index 87e7fee..2782daf 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -14,11 +14,14 @@ params {
     config_profile_name        = 'Full test profile'
     config_profile_description = 'Full test dataset to check pipeline function'
 
-    // Input data for full size test
-    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    // Input data
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv'
 
-    // Genome references
-    genome = 'R64-1-1'
+    // Other parameters
+    uniprot_query  = true
+    eggnog_path    = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz'
+    min_score      = 3
+    use_structures = true
+    use_iqtree     = true
+    use_fastme     = true
 }

From e591d9d4b078ee16cf9ece741bd84994672b4869 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 14:31:34 +0200
Subject: [PATCH 20/29] Tweaks to full test

---
 conf/test_full.config         | 13 +++++++------
 modules/local/filter_fasta.nf |  6 +++---
 nextflow.config               |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/conf/test_full.config b/conf/test_full.config
index 2782daf..fca2aaf 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -18,10 +18,11 @@ params {
     input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv'
 
     // Other parameters
-    uniprot_query  = true
-    eggnog_path    = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz'
-    min_score      = 3
-    use_structures = true
-    use_iqtree     = true
-    use_fastme     = true
+    uniprot_query     = true
+    eggnog_path       = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz'
+    eggnog_idmap_path = "http://eggnog5.embl.de/download/eggnog_5.0/id_mappings/uniprot/latest.Eukaryota.tsv.gz"
+    min_score         = 3
+    use_structures    = true
+    use_iqtree        = true
+    use_fastme        = true
 }
diff --git a/modules/local/filter_fasta.nf b/modules/local/filter_fasta.nf
index 23a8f89..ecfd20e 100644
--- a/modules/local/filter_fasta.nf
+++ b/modules/local/filter_fasta.nf
@@ -2,10 +2,10 @@ process FILTER_FASTA {
     tag "$meta.id"
     label 'process_single'
 
-    conda "conda-forge::python=3.10.0"
+    conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.10' :
-        'biocontainers/python:3.10' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' :
+        'biocontainers/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' }"
 
     input:
     tuple val(meta), path(fasta), path(structures)
diff --git a/nextflow.config b/nextflow.config
index 8f2005d..96582d6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,7 +38,7 @@ params {
     use_structures             = false
     use_iqtree                 = true
     use_fastme                 = false
-    iqtree_bootstrap           = 100
+    iqtree_bootstrap           = 1000
     fastme_bootstrap           = 100
     skip_treeplots             = false
 

From 9796e302590982dd6860a1e3ee082434a16f782b Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 15:00:54 +0200
Subject: [PATCH 21/29] Added citations

---
 CITATIONS.md | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 4f03aaa..5c5643e 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,13 +10,51 @@
 
 ## Pipeline tools
 
-- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+- [OMA](htpps://omabrowser.org)
 
-  > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
+> Adrian M Altenhoff, Clément-Marie Train, Kimberly J Gilbert, Ishita Mediratta, Tarcisio Mendes de Farias, David Moi, Yannis Nevers, Hale-Seda Radoykova, Victor Rossier, Alex Warwick Vesztrocy, Natasha M Glover, Christophe Dessimoz, OMA orthology in 2021: website overhaul, conserved isoforms, ancestral gene order and more, Nucleic Acids Research, Volume 49, Issue D1, 8 January 2021, Pages D373–D379, https://doi.org/10.1093/nar/gkaa1007
 
-- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
+- [PANTHER](https://pantherdb.org)
 
-  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
+> Thomas PD, Ebert D, Muruganujan A, Mushayahama T, Albou L-P, Mi H. PANTHER: Making genome-scale phylogenetics accessible to all. Protein Science. 2022; 31: 8–22. https://doi.org/10.1002/pro.4218
+
+- [OrthoInspector](https://lbgi.fr/orthoinspector)
+
+> Yannis Nevers, Arnaud Kress, Audrey Defosset, Raymond Ripp, Benjamin Linard, Julie D Thompson, Olivier Poch, Odile Lecompte, OrthoInspector 3.0: open portal for comparative genomics, Nucleic Acids Research, Volume 47, Issue D1, 08 January 2019, Pages D411–D418, https://doi.org/10.1093/nar/gky1068
+
+- [EggNOG](https://eggnog5.embl.de)
+
+> Jaime Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars J Jensen, Christian von Mering, Peer Bork, eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses, Nucleic Acids Research, Volume 47, Issue D1, 08 January 2019, Pages D309–D314, https://doi.org/10.1093/nar/gky1085
+
+- [UniProt](https://uniprot.org)
+
+> The UniProt Consortium , UniProt: the Universal Protein Knowledgebase in 2023, Nucleic Acids Research, Volume 51, Issue D1, 6 January 2023, Pages D523–D531, https://doi.org/10.1093/nar/gkac1052
+
+- [UniProt ID Mapping](https://uniprot.org/id-mapping)
+
+> Huang H, McGarvey PB, Suzek BE, Mazumder R, Zhang J, Chen Y, Wu CH. A comprehensive protein-centric ID mapping service for molecular data integration. Bioinformatics. 2011 Apr 15;27(8):1190-1. doi: 10.1093/bioinformatics/btr101. PMID: 21478197; PMCID: PMC3072559.
+
+- [AlphaFold](https://deepmind.google/technologies/alphafold)
+
+> Jumper, J., Evans, R., Pritzel, A. et al. Highly accurate protein structure prediction with AlphaFold. Nature 596, 583–589 (2021). https://doi.org/10.1038/s41586-021-03819-2
+
+- [AlphaFold Database](https://alphafold.ebi.ac.uk)
+
+> Mihaly Varadi, Stephen Anyango, Mandar Deshpande, Sreenath Nair, Cindy Natassia, Galabina Yordanova, David Yuan, Oana Stroe, Gemma Wood, Agata Laydon, Augustin Žídek, Tim Green, Kathryn Tunyasuvunakool, Stig Petersen, John Jumper, Ellen Clancy, Richard Green, Ankur Vora, Mira Lutfi, Michael Figurnov, Andrew Cowie, Nicole Hobbs, Pushmeet Kohli, Gerard Kleywegt, Ewan Birney, Demis Hassabis, Sameer Velankar, AlphaFold Protein Structure Database: massively expanding the structural coverage of protein-sequence space with high-accuracy models, Nucleic Acids Research, Volume 50, Issue D1, 7 January 2022, Pages D439–D444, https://doi.org/10.1093/nar/gkab1061
+
+- [T-COFFEE](https://tcoffee.org)
+
+> Notredame C, Higgins DG, Heringa J. T-Coffee: A novel method for fast and accurate multiple sequence alignment. J Mol Biol. 2000 Sep 8;302(1):205-17. doi: 10.1006/jmbi.2000.4042. PMID: 10964570.
+
+- [IQTREE](https://iqtree.org)
+
+> B.Q. Minh, H.A. Schmidt, O. Chernomor, D. Schrempf, M.D. Woodhams, A. von Haeseler, R. Lanfear (2020) IQ-TREE 2: New models and efficient methods for phylogenetic inference in the genomic era. Mol. Biol. Evol., 37:1530-1534. https://doi.org/10.1093/molbev/msaa015
+
+> D.T. Hoang, O. Chernomor, A. von Haeseler, B.Q. Minh, L.S. Vinh (2018) UFBoot2: Improving the ultrafast bootstrap approximation. Mol. Biol. Evol., 35:518–522. https://doi.org/10.1093/molbev/msx281
+
+- [FastME](https://atgc-montpellier.fr/fastme/)
+
+> Vincent Lefort, Richard Desper, Olivier Gascuel, FastME 2.0: A Comprehensive, Accurate, and Fast Distance-Based Phylogeny Inference Program, Molecular Biology and Evolution, Volume 32, Issue 10, October 2015, Pages 2798–2800, https://doi.org/10.1093/molbev/msv150
 
 ## Software packaging/containerisation tools
 

From 5acc84846371bc99765ff80cb2d8504076be45c1 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Tue, 30 Apr 2024 15:01:03 +0200
Subject: [PATCH 22/29] Linting fix - default params

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 23adf3d..246036a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -221,7 +221,7 @@
                 },
                 "iqtree_bootstrap": {
                     "type": "integer",
-                    "default": 100,
+                    "default": 1000,
                     "description": "Number of bootstrap replicates for IQ-TREE.",
                     "help_text": "If set to `0`, bootstrap will not be performed.",
                     "fa_icon": "fas fa-rotate"

From 2bac3978c80894c7a0ad04f4bdd43466549a7ad6 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Thu, 2 May 2024 11:09:28 +0200
Subject: [PATCH 23/29] Switched parameters to be false by default

---
 conf/test.config                    |  7 ++---
 conf/test_full.config               |  2 --
 nextflow.config                     | 14 ++++-----
 nextflow_schema.json                | 46 ++++++++++++++---------------
 subworkflows/local/get_orthologs.nf | 12 ++++----
 subworkflows/local/make_trees.nf    |  4 +--
 subworkflows/local/report.nf        |  4 +--
 7 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index d1106d4..fc9ded3 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,11 +24,8 @@ params {
 
     // Other parameters
     uniprot_query    = true
-    use_all          = false
-    use_inspector    = true
-    use_eggnog       = false
+    skip_eggnog      = true
     min_score        = 3
-    use_iqtree       = false
-    use_fastme       = true
+    skip_iqtree      = true
     fastme_bootstrap = 0
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index fca2aaf..2f59347 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -23,6 +23,4 @@ params {
     eggnog_idmap_path = "http://eggnog5.embl.de/download/eggnog_5.0/id_mappings/uniprot/latest.Eukaryota.tsv.gz"
     min_score         = 3
     use_structures    = true
-    use_iqtree        = true
-    use_fastme        = true
 }
diff --git a/nextflow.config b/nextflow.config
index 96582d6..8ef9692 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -13,19 +13,19 @@ params {
     uniprot_query              = false
 
     // Ortholog options
-    use_all                    = true
+    use_all                    = false
     local_databases            = false
-    use_oma                    = true
+    skip_oma                   = false
     oma_path                   = null
     oma_uniprot_path           = null
     oma_ensembl_path           = null
     oma_refseq_path            = null
-    use_panther                = true
+    skip_panther               = false
     panther_path               = null
-    use_inspector              = true
+    skip_inspector             = false
     inspector_path             = null
     inspector_version          = 'Eukaryota2023'
-    use_eggnog                 = true
+    skip_eggnog                = false
     eggnog_path                = null
     eggnog_idmap_path          = null
     use_centroid               = false
@@ -36,8 +36,8 @@ params {
     skip_downstream            = false
     skip_report                = false
     use_structures             = false
-    use_iqtree                 = true
-    use_fastme                 = false
+    skip_iqtree                = false
+    skip_fastme                = false
     iqtree_bootstrap           = 1000
     fastme_bootstrap           = 100
     skip_treeplots             = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 246036a..2a3ed3a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -52,7 +52,7 @@
             "properties": {
                 "use_all": {
                     "type": "boolean",
-                    "default": "true",
+                    "default": "false",
                     "description": "Use all ortholog search methods. Will mix online and local methods if needed. Overrides all individual database flags.",
                     "help_text": "If set to `true`, the pipeline will use all ortholog search methods.",
                     "fa_icon": "fas fa-database"
@@ -64,11 +64,11 @@
                     "help_text": "If set to `true`, the pipeline will use local databases for the analysis.",
                     "fa_icon": "fas fa-database"
                 },
-                "use_oma": {
+                "skip_oma": {
                     "type": "boolean",
-                    "default": "true",
-                    "description": "Use OMA for the ortholog search.",
-                    "help_text": "If set to `true`, the pipeline will use OMA for the ortholog search.",
+                    "default": "false",
+                    "description": "Skip using OMA for the ortholog search.",
+                    "help_text": "If set to `true`, the pipeline will not use OMA for the ortholog search.",
                     "fa_icon": "fas fa-database"
                 },
                 "oma_path": {
@@ -99,11 +99,11 @@
                     "help_text": "If `local_databases` is set to `true`, the pipeline will use this path to the OMA-RefSeq ID map.",
                     "fa_icon": "fas fa-database"
                 },
-                "use_panther": {
+                "skip_panther": {
                     "type": "boolean",
-                    "default": "true",
-                    "description": "Use PANTHER for the ortholog search.",
-                    "help_text": "If set to `true`, the pipeline will use PANTHER for the ortholog search.",
+                    "default": "false",
+                    "description": "Skip using PANTHER for the ortholog search.",
+                    "help_text": "If set to `true`, the pipeline will not use PANTHER for the ortholog search.",
                     "fa_icon": "fas fa-database"
                 },
                 "panther_path": {
@@ -113,11 +113,11 @@
                     "help_text": "If `local_databases` is set to `true`, the pipeline will use this path to the PANTHER database.",
                     "fa_icon": "fas fa-database"
                 },
-                "use_inspector": {
+                "skip_inspector": {
                     "type": "boolean",
-                    "default": "true",
-                    "description": "Use OrthoInspector for the ortholog search.",
-                    "help_text": "If set to `true`, the pipeline will use OrthoInspector for the ortholog search.",
+                    "default": "false",
+                    "description": "Skip using OrthoInspector for the ortholog search.",
+                    "help_text": "If set to `true`, the pipeline will not use OrthoInspector for the ortholog search.",
                     "fa_icon": "fas fa-database"
                 },
                 "inspector_version": {
@@ -134,11 +134,11 @@
                     "help_text": "If `local_databases` is set to `true`, the pipeline will use this path to the OrthoInspector database.",
                     "fa_icon": "fas fa-database"
                 },
-                "use_eggnog": {
+                "skip_eggnog": {
                     "type": "boolean",
-                    "default": "true",
+                    "default": "false",
                     "description": "Use EggNOG for the ortholog search.",
-                    "help_text": "If set to `true`, the pipeline will use EggNOG for the ortholog search.",
+                    "help_text": "If set to `true`, the pipeline will not use EggNOG for the ortholog search.",
                     "fa_icon": "fas fa-database"
                 },
                 "eggnog_path": {
@@ -205,18 +205,18 @@
                     "help_text": "If set to `true`, the pipeline will use AlphaFold structures for the analysis.",
                     "fa_icon": "fas fa-dna"
                 },
-                "use_iqtree": {
+                "skip_iqtree": {
                     "type": "boolean",
-                    "default": "true",
-                    "description": "Use IQ-TREE for the phylogenetic analysis.",
-                    "help_text": "If set to `true`, the pipeline will use IQ-TREE for the phylogenetic analysis.",
+                    "default": "false",
+                    "description": "Skip using IQ-TREE for the phylogenetic analysis.",
+                    "help_text": "If set to `true`, the pipeline will not use IQ-TREE for the phylogenetic analysis.",
                     "fa_icon": "fas fa-tree"
                 },
-                "use_fastme": {
+                "skip_fastme": {
                     "type": "boolean",
                     "default": "false",
-                    "description": "Use FastME for the phylogenetic analysis.",
-                    "help_text": "If set to `true`, the pipeline will use FastME for the phylogenetic analysis.",
+                    "description": "Skip using FastME for the phylogenetic analysis.",
+                    "help_text": "If set to `true`, the pipeline will not use FastME for the phylogenetic analysis.",
                     "fa_icon": "fas fa-tree"
                 },
                 "iqtree_bootstrap": {
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index df55d4d..115e03d 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -149,7 +149,7 @@ workflow GET_ORTHOLOGS {
     } else { // online/local separation is used
         // local only
         if (params.local_databases) {
-            if (params.use_oma) {
+            if (!params.skip_oma) {
                 FETCH_OMA_GROUP_LOCAL (
                     ch_query,
                     params.oma_path,
@@ -167,7 +167,7 @@ workflow GET_ORTHOLOGS {
                     .set { ch_versions }
             }
 
-            if (params.use_panther) {
+            if (!params.skip_panther) {
                 FETCH_PANTHER_GROUP_LOCAL (
                     ch_query,
                     params.panther_path
@@ -182,7 +182,7 @@ workflow GET_ORTHOLOGS {
                     .set { ch_versions }
             }
 
-            if(params.use_eggnog) {
+            if(!params.skip_eggnog) {
                 FETCH_EGGNOG_GROUP_LOCAL (
                     ch_query,
                     params.eggnog_path,
@@ -200,7 +200,7 @@ workflow GET_ORTHOLOGS {
             }
         }
         else { // online only
-            if (params.use_oma) {
+            if (!params.skip_oma) {
                 FETCH_OMA_GROUP_ONLINE (
                     ch_query
                 )
@@ -214,7 +214,7 @@ workflow GET_ORTHOLOGS {
                     .set { ch_versions }
 
             }
-            if (params.use_panther) {
+            if (!params.skip_panther) {
                 FETCH_PANTHER_GROUP_ONLINE (
                     ch_query
                 )
@@ -227,7 +227,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
                     .set { ch_versions }
             }
-            if (params.use_inspector) {
+            if (!params.skip_inspector) {
                 FETCH_INSPECTOR_GROUP_ONLINE (
                     ch_query,
                     params.inspector_version
diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf
index 34cc4b5..b4743a0 100644
--- a/subworkflows/local/make_trees.nf
+++ b/subworkflows/local/make_trees.nf
@@ -16,7 +16,7 @@ workflow MAKE_TREES {
     ch_mlplot   = Channel.empty()
     ch_meplot   = Channel.empty()
 
-    if (params.use_iqtree) {
+    if (!params.skip_iqtree) {
         IQTREE (
             ch_alignment,
             []
@@ -44,7 +44,7 @@ workflow MAKE_TREES {
         }
     }
 
-    if (params.use_fastme) {
+    if (!params.skip_fastme) {
 
         CONVERT_PHYLIP (
             ch_alignment
diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
index 381c377..a1ea745 100644
--- a/subworkflows/local/report.nf
+++ b/subworkflows/local/report.nf
@@ -35,10 +35,10 @@ workflow REPORT {
         ch_strmisses = ch_seqinfo.map { [it[0], []] }
     }
 
-    if (!params.use_iqtree) {
+    if (params.skip_iqtree) {
         ch_iqtree = ch_seqinfo.map { [it[0], []] }
     }
-    if (!params.use_fastme) {
+    if (params.skip_fastme) {
         ch_fastme = ch_seqinfo.map { [it[0], []] }
     }
 

From a70c77b562d03e00b7b5a525a8adecaa8f5a79c0 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Thu, 2 May 2024 11:28:51 +0200
Subject: [PATCH 24/29] Added copyright info to scripts

---
 bin/clustal2phylip.py        | 3 +++
 bin/csv_adorn.py             | 3 +++
 bin/ensembl2uniprot.py       | 3 +++
 bin/fetch_afdb_structures.py | 3 +++
 bin/fetch_inspector_group.py | 3 +++
 bin/fetch_oma_by_sequence.py | 3 +++
 bin/fetch_oma_group.py       | 3 +++
 bin/fetch_oma_groupid.py     | 3 +++
 bin/fetch_oma_taxid_by_id.py | 3 +++
 bin/fetch_panther_group.py   | 3 +++
 bin/fetch_sequences.py       | 3 +++
 bin/filter_fasta.py          | 3 +++
 bin/get_oma_version.py       | 3 +++
 bin/make_score_table.py      | 3 +++
 bin/make_stats.py            | 3 +++
 bin/map_uniprot.py           | 3 +++
 bin/oma2uniprot_local.py     | 3 +++
 bin/plot_orthologs.R         | 3 +++
 bin/plot_tree.R              | 3 +++
 bin/refseq2uniprot.py        | 3 +++
 bin/score_hits.py            | 3 +++
 bin/uniprot2oma_local.py     | 3 +++
 bin/uniprot2uniprot.py       | 3 +++
 bin/uniprotize_oma_local.py  | 3 +++
 bin/uniprotize_oma_online.py | 3 +++
 bin/utils.py                 | 4 ++++
 bin/yml2csv.py               | 3 +++
 27 files changed, 82 insertions(+)

diff --git a/bin/clustal2phylip.py b/bin/clustal2phylip.py
index 186fcd0..246b11a 100755
--- a/bin/clustal2phylip.py
+++ b/bin/clustal2phylip.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from Bio import SeqIO
diff --git a/bin/csv_adorn.py b/bin/csv_adorn.py
index b7801ba..2052082 100755
--- a/bin/csv_adorn.py
+++ b/bin/csv_adorn.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 
diff --git a/bin/ensembl2uniprot.py b/bin/ensembl2uniprot.py
index 9097c82..2483dca 100644
--- a/bin/ensembl2uniprot.py
+++ b/bin/ensembl2uniprot.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/fetch_afdb_structures.py b/bin/fetch_afdb_structures.py
index e57d1b3..c13a6a6 100755
--- a/bin/fetch_afdb_structures.py
+++ b/bin/fetch_afdb_structures.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/fetch_inspector_group.py b/bin/fetch_inspector_group.py
index e462413..211c08a 100755
--- a/bin/fetch_inspector_group.py
+++ b/bin/fetch_inspector_group.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
index 636e6fc..eeab2ba 100755
--- a/bin/fetch_oma_by_sequence.py
+++ b/bin/fetch_oma_by_sequence.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 from warnings import warn
 
diff --git a/bin/fetch_oma_group.py b/bin/fetch_oma_group.py
index 168924f..11e5cd2 100755
--- a/bin/fetch_oma_group.py
+++ b/bin/fetch_oma_group.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/fetch_oma_groupid.py b/bin/fetch_oma_groupid.py
index 7beafbd..b61898f 100755
--- a/bin/fetch_oma_groupid.py
+++ b/bin/fetch_oma_groupid.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from utils import fetch_seq
diff --git a/bin/fetch_oma_taxid_by_id.py b/bin/fetch_oma_taxid_by_id.py
index 83ef185..18f3286 100755
--- a/bin/fetch_oma_taxid_by_id.py
+++ b/bin/fetch_oma_taxid_by_id.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from utils import fetch_seq
diff --git a/bin/fetch_panther_group.py b/bin/fetch_panther_group.py
index c07034a..4d81b2e 100755
--- a/bin/fetch_panther_group.py
+++ b/bin/fetch_panther_group.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/fetch_sequences.py b/bin/fetch_sequences.py
index e392024..8f5a11c 100755
--- a/bin/fetch_sequences.py
+++ b/bin/fetch_sequences.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/filter_fasta.py b/bin/filter_fasta.py
index 6840885..b6348ca 100755
--- a/bin/filter_fasta.py
+++ b/bin/filter_fasta.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from Bio import SeqIO
diff --git a/bin/get_oma_version.py b/bin/get_oma_version.py
index d0d70f8..d75619b 100755
--- a/bin/get_oma_version.py
+++ b/bin/get_oma_version.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import requests
 
 
diff --git a/bin/make_score_table.py b/bin/make_score_table.py
index 68efe87..ccea2df 100755
--- a/bin/make_score_table.py
+++ b/bin/make_score_table.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import csv
 import re
 import sys
diff --git a/bin/make_stats.py b/bin/make_stats.py
index 8a51181..7a0bf26 100755
--- a/bin/make_stats.py
+++ b/bin/make_stats.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import csv
 import sys
 
diff --git a/bin/map_uniprot.py b/bin/map_uniprot.py
index d556f73..dd74a16 100644
--- a/bin/map_uniprot.py
+++ b/bin/map_uniprot.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from ensembl2uniprot import ensembl2uniprot
diff --git a/bin/oma2uniprot_local.py b/bin/oma2uniprot_local.py
index 95b2213..19c605b 100755
--- a/bin/oma2uniprot_local.py
+++ b/bin/oma2uniprot_local.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import gzip
 import sys
 
diff --git a/bin/plot_orthologs.R b/bin/plot_orthologs.R
index 891efd0..34c7219 100755
--- a/bin/plot_orthologs.R
+++ b/bin/plot_orthologs.R
@@ -1,5 +1,8 @@
 #!/usr/bin/env Rscript
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 suppressMessages(library(ggplot2))
 suppressMessages(library(reshape2))
 suppressMessages(library(dplyr))
diff --git a/bin/plot_tree.R b/bin/plot_tree.R
index 945ff90..dc92ab6 100755
--- a/bin/plot_tree.R
+++ b/bin/plot_tree.R
@@ -1,5 +1,8 @@
 #!/usr/bin/env Rscript
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 library(treeio)
 library(ggtree)
 library(ggplot2)
diff --git a/bin/refseq2uniprot.py b/bin/refseq2uniprot.py
index fa62edd..fe3ef0d 100644
--- a/bin/refseq2uniprot.py
+++ b/bin/refseq2uniprot.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/score_hits.py b/bin/score_hits.py
index aa4ccee..7ad39cc 100755
--- a/bin/score_hits.py
+++ b/bin/score_hits.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import csv
 import sys
 
diff --git a/bin/uniprot2oma_local.py b/bin/uniprot2oma_local.py
index f816bb0..ee97ca3 100755
--- a/bin/uniprot2oma_local.py
+++ b/bin/uniprot2oma_local.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import gzip
 import sys
 
diff --git a/bin/uniprot2uniprot.py b/bin/uniprot2uniprot.py
index a7c0e01..dbe3242 100644
--- a/bin/uniprot2uniprot.py
+++ b/bin/uniprot2uniprot.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import requests
diff --git a/bin/uniprotize_oma_local.py b/bin/uniprotize_oma_local.py
index 16317d4..3e12da9 100755
--- a/bin/uniprotize_oma_local.py
+++ b/bin/uniprotize_oma_local.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import gzip
 import sys
 
diff --git a/bin/uniprotize_oma_online.py b/bin/uniprotize_oma_online.py
index 9b9a6df..91f26e2 100755
--- a/bin/uniprotize_oma_online.py
+++ b/bin/uniprotize_oma_online.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from map_uniprot import map_uniprot
diff --git a/bin/utils.py b/bin/utils.py
index cebe0e7..3bfc95a 100644
--- a/bin/utils.py
+++ b/bin/utils.py
@@ -1,3 +1,7 @@
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+# Includes code written by UniProt contributors published under CC-BY 4.0 license
+
 import time
 from typing import Any
 
diff --git a/bin/yml2csv.py b/bin/yml2csv.py
index 04cbd4a..da2468d 100755
--- a/bin/yml2csv.py
+++ b/bin/yml2csv.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 import yaml

From 1abec3070b9115b55057c6656123c4832c2558eb Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Thu, 2 May 2024 11:31:56 +0200
Subject: [PATCH 25/29] Missing copyright note

---
 bin/clustal2fasta.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bin/clustal2fasta.py b/bin/clustal2fasta.py
index 8f3de57..2ccad47 100755
--- a/bin/clustal2fasta.py
+++ b/bin/clustal2fasta.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Written by Igor Trujnara, released under the MIT license
+# See https://opensource.org/license/mit for details
+
 import sys
 
 from Bio import SeqIO

From f960b85edb0a6199f9ee35512e2b7726b4f088b8 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Thu, 2 May 2024 11:51:05 +0200
Subject: [PATCH 26/29] Removed a fulfilled TODO

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 8a9d492..0186d5b 100644
--- a/README.md
+++ b/README.md
@@ -104,8 +104,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 <!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
 <!-- If you use nf-core/reportho for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
 
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
-
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 
 You can cite the `nf-core` publication as follows:

From fbfbde648af048eb9c29c7c8d9f59dee3ff50507 Mon Sep 17 00:00:00 2001
From: Igor Trujnara <53370556+itrujnara@users.noreply.github.com>
Date: Mon, 6 May 2024 15:04:45 +0200
Subject: [PATCH 27/29] Update bin/yml2csv.py

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 bin/yml2csv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/yml2csv.py b/bin/yml2csv.py
index da2468d..27842b8 100755
--- a/bin/yml2csv.py
+++ b/bin/yml2csv.py
@@ -13,8 +13,8 @@ def main() -> None:
         print("Usage: yml2csv.py <id> <input_file> <output_file>")
         sys.exit(1)
 
-    sample_id = sys.argv[1]
-    input_file = sys.argv[2]
+    sample_id   = sys.argv[1]
+    input_file  = sys.argv[2]
     output_file = sys.argv[3]
 
     with open(input_file) as f:

From cf3d0ddf905f0ce3a32f151d8b9ee9c0b5464e90 Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 6 May 2024 15:08:00 +0200
Subject: [PATCH 28/29] Added FASTA example to the usage document

---
 docs/usage.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index d2c5106..fbf1216 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -20,13 +20,20 @@ You will need to create a samplesheet with information about the samples you wou
 
 The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below.
 
-A final samplesheet file may look something like the one below.
+A final samplesheet file may look something like the one below, with `--uniprot_query` enabled:
 
 ```csv title="samplesheet.csv"
 id,query
 BicD2,Q8TD16
 ```
 
+or the one below, otherwise:
+
+```csv title="samplesheet.csv"
+id,query
+BicD2,/home/myuser/data/bicd2.fa
+```
+
 | Column  | Description                                                                                                                                                         |
 | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `id`    | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces.                   |

From 8fc225447667ad9b4d51db14efa6b76c982c6c0b Mon Sep 17 00:00:00 2001
From: itrujnara <itrujnara@wp.pl>
Date: Mon, 6 May 2024 15:12:35 +0200
Subject: [PATCH 29/29] Refactored Orthoinspector parameter names

---
 nextflow.config                     | 6 +++---
 nextflow_schema.json                | 6 +++---
 subworkflows/local/get_orthologs.nf | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 8ef9692..6c195f4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,9 +22,9 @@ params {
     oma_refseq_path            = null
     skip_panther               = false
     panther_path               = null
-    skip_inspector             = false
-    inspector_path             = null
-    inspector_version          = 'Eukaryota2023'
+    skip_orthoinspector        = false
+    orthoinspector_path        = null
+    orthoinspector_version     = 'Eukaryota2023'
     skip_eggnog                = false
     eggnog_path                = null
     eggnog_idmap_path          = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2a3ed3a..2fb4c2f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -113,21 +113,21 @@
                     "help_text": "If `local_databases` is set to `true`, the pipeline will use this path to the PANTHER database.",
                     "fa_icon": "fas fa-database"
                 },
-                "skip_inspector": {
+                "skip_orthoinspector": {
                     "type": "boolean",
                     "default": "false",
                     "description": "Skip using OrthoInspector for the ortholog search.",
                     "help_text": "If set to `true`, the pipeline will not use OrthoInspector for the ortholog search.",
                     "fa_icon": "fas fa-database"
                 },
-                "inspector_version": {
+                "orthoinspector_version": {
                     "type": "string",
                     "description": "The version of the OrthoInspector database to use.",
                     "help_text": "This SHOULD be left as the default if working with eukaryotes. Only change if working with bacteria, or an old version is required for reproducibility.",
                     "default": "Eukaryota2023",
                     "fa_icon": "fas fa-database"
                 },
-                "inspector_path": {
+                "orthoinspector_path": {
                     "type": "string",
                     "format": "path",
                     "description": "Path to the OrthoInspector database.",
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index 115e03d..4db3d00 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -121,7 +121,7 @@ workflow GET_ORTHOLOGS {
         // OrthoInspector
         FETCH_INSPECTOR_GROUP_ONLINE (
             ch_query,
-            params.inspector_version
+            params.orthoinspector_version
         )
 
         ch_orthogroups
@@ -227,10 +227,10 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
                     .set { ch_versions }
             }
-            if (!params.skip_inspector) {
+            if (!params.skip_orthoinspector) {
                 FETCH_INSPECTOR_GROUP_ONLINE (
                     ch_query,
-                    params.inspector_version
+                    params.orthoinspector_version
                 )
 
                 ch_orthogroups