From 710fd5738baf9a1315bb16c3e4266879a8ec693a Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 20 Apr 2023 15:48:35 +0200 Subject: [PATCH 1/4] add the possibility to translate regenie asset aliases --- CHANGELOG.md | 1 + nf_core/refgenie.py | 36 ++++++++++++++++++++++++++++++++++++ tests/test_refgenie.py | 17 +++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e59d084a8..bf42fa67c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - Use `nfcore/gitpod:dev` container in the dev branch ([#2196](https://github.com/nf-core/tools/pull/2196)) - Replace requests_mock with responses in test mocks ([#2165](https://github.com/nf-core/tools/pull/2165)). - Add warning when installing a module from an `org_path` that exists in multiple remotes in `modules.json` ([#2228](https://github.com/nf-core/tools/pull/2228)). +- Add the possibility to translate refgenie asset aliases to the ones used in a pipeline with an alias_translations.yaml file (). ## [v2.7.2 - Mercury Eagle Patch](https://github.com/nf-core/tools/releases/tag/2.7.2) - [2022-12-19] diff --git a/nf_core/refgenie.py b/nf_core/refgenie.py index a10e4fecdf..75b4752a62 100644 --- a/nf_core/refgenie.py +++ b/nf_core/refgenie.py @@ -2,6 +2,7 @@ Update a nextflow.config file with refgenie genomes """ +import json import logging import os import re @@ -10,6 +11,7 @@ import rich import rich.traceback +import yaml import nf_core.utils @@ -45,6 +47,7 @@ def _print_nf_config(rgc): """ abg = rgc.list_assets_by_genome() genomes_str = "" + alias_translations = _get_alias_translation_file() for genome, asset_list in abg.items(): genomes_str += f" '{genome}' {{\n" for asset in asset_list: @@ -54,6 +57,9 @@ def _print_nf_config(rgc): except Exception: log.warning(f"{genome}/{asset} is incomplete, ignoring...") else: + # Translate an alias name to the alias used in the pipeline + if asset in alias_translations.keys(): + asset = alias_translations[asset] genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n' genomes_str += " }\n" @@ -100,6 +106,36 @@ def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home): log.info(f"Created new nextflow config file: {nxf_home_config}") +def _get_alias_translation_file(): + """ + Read a file containing alias translations. + + Alias translation file should be located in the same folder as the refgenie `genome_config.yaml` file, + the path is set to $REFGENIE environment variable by `refgenie init`. + Alias translation file should be named `alias_translations.yaml` + + Input file contains the name of refgenie server aliases as keys and the name of the respective nf-core pipeline aliases as values. + Such as: + ensembl_gtf: gtf + star_index: star + """ + translations = {} + + if "REFGENIE" in os.environ: + refgenie_genomes_config_path = os.environ.get("REFGENIE") + else: + return translations + + refgenie_genomes_config_directory = Path(refgenie_genomes_config_path).parents[0] + try: + with open(refgenie_genomes_config_directory / "alias_translations.yaml") as yaml_file: + translations = yaml.load(yaml_file, Loader=yaml.Loader) + except FileNotFoundError: + pass + + return translations + + def update_config(rgc): """ Update the genomes.config file after a local refgenie database has been updated diff --git a/tests/test_refgenie.py b/tests/test_refgenie.py index 1ff2683416..73fbcb863f 100644 --- a/tests/test_refgenie.py +++ b/tests/test_refgenie.py @@ -7,6 +7,8 @@ import tempfile import unittest +import yaml + class TestRefgenie(unittest.TestCase): """Class for refgenie tests""" @@ -19,6 +21,7 @@ def setUp(self): self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow") self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config") self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml") + self.translation_file = os.path.join(self.tmp_dir, "alias_translations.yaml") # Set NXF_HOME environment variable # avoids adding includeConfig statement to config file outside the current tmpdir try: @@ -37,6 +40,10 @@ def setUp(self): with open(self.REFGENIE, "a") as fh: fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n") + # Add an alias translation to YAML file + with open(self.translation_file, "a") as fh: + fh.write("ensembl_gtf: gtf\n") + def tearDown(self) -> None: # Remove the tempdir again os.system(f"rm -rf {self.tmp_dir}") @@ -53,3 +60,13 @@ def test_update_refgenie_genomes_config(self): out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) assert "Updated nf-core genomes config" in str(out) + + def test_asset_alias_translation(self): + """Test that asset aliases are translated correctly""" + # Populate the config with a genome + cmd = f"refgenie pull hg38/ensembl_gtf -c {self.REFGENIE}" + subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) + cmd = f"cat {self.NXF_REFGENIE_PATH}" + out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) + assert " gtf = " in str(out) + assert " ensembl_gtf = " not in str(out) From 72920d53a1806e9a0e1505baf9e07f02631a7dde Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 20 Apr 2023 15:52:01 +0200 Subject: [PATCH 2/4] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf42fa67c0..fcd2c50de8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ - Use `nfcore/gitpod:dev` container in the dev branch ([#2196](https://github.com/nf-core/tools/pull/2196)) - Replace requests_mock with responses in test mocks ([#2165](https://github.com/nf-core/tools/pull/2165)). - Add warning when installing a module from an `org_path` that exists in multiple remotes in `modules.json` ([#2228](https://github.com/nf-core/tools/pull/2228)). -- Add the possibility to translate refgenie asset aliases to the ones used in a pipeline with an alias_translations.yaml file (). +- Add the possibility to translate refgenie asset aliases to the ones used in a pipeline with an alias_translations.yaml file ([#2242](https://github.com/nf-core/tools/pull/2242)). ## [v2.7.2 - Mercury Eagle Patch](https://github.com/nf-core/tools/releases/tag/2.7.2) - [2022-12-19] From 832ef1e832dd8815a0cf2d487afa6d877d6aec01 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 24 Apr 2023 10:20:39 +0200 Subject: [PATCH 3/4] add log info --- nf_core/refgenie.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nf_core/refgenie.py b/nf_core/refgenie.py index 75b4752a62..177dfc78b7 100644 --- a/nf_core/refgenie.py +++ b/nf_core/refgenie.py @@ -59,6 +59,7 @@ def _print_nf_config(rgc): else: # Translate an alias name to the alias used in the pipeline if asset in alias_translations.keys(): + log.info(f"Translating refgenie asset alias {asset} to {alias_translations[asset]}.") asset = alias_translations[asset] genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n' genomes_str += " }\n" From 46a3c3845c572cd7fefc9fbe9f03f53f424477e4 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 24 Apr 2023 10:43:36 +0200 Subject: [PATCH 4/4] obtain refgenie path also from refgenconf object --- nf_core/refgenie.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nf_core/refgenie.py b/nf_core/refgenie.py index 177dfc78b7..b666844699 100644 --- a/nf_core/refgenie.py +++ b/nf_core/refgenie.py @@ -47,7 +47,7 @@ def _print_nf_config(rgc): """ abg = rgc.list_assets_by_genome() genomes_str = "" - alias_translations = _get_alias_translation_file() + alias_translations = _get_alias_translation_file(rgc) for genome, asset_list in abg.items(): genomes_str += f" '{genome}' {{\n" for asset in asset_list: @@ -107,7 +107,7 @@ def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home): log.info(f"Created new nextflow config file: {nxf_home_config}") -def _get_alias_translation_file(): +def _get_alias_translation_file(rgc): """ Read a file containing alias translations. @@ -124,10 +124,12 @@ def _get_alias_translation_file(): if "REFGENIE" in os.environ: refgenie_genomes_config_path = os.environ.get("REFGENIE") + refgenie_genomes_config_directory = Path(refgenie_genomes_config_path).parents[0] + elif "genome_folder" in rgc: + refgenie_genomes_config_directory = Path(rgc["genome_folder"]) else: return translations - refgenie_genomes_config_directory = Path(refgenie_genomes_config_path).parents[0] try: with open(refgenie_genomes_config_directory / "alias_translations.yaml") as yaml_file: translations = yaml.load(yaml_file, Loader=yaml.Loader)