From e43c87b2f2df72add9e32f2984ed52791ff406ef Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 27 Apr 2021 11:22:38 +0200 Subject: [PATCH 1/2] Lint - merge markers - ignore binary files. Also add ability to ignore specific files in config. Moved binary detection function into utils. --- nf_core/create.py | 15 +++------------ nf_core/lint/merge_markers.py | 19 +++++++++++++++---- nf_core/utils.py | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/nf_core/create.py b/nf_core/create.py index 85c60b7bc4..db3a47d6e6 100644 --- a/nf_core/create.py +++ b/nf_core/create.py @@ -6,7 +6,6 @@ import git import jinja2 import logging -import mimetypes import os import pathlib import requests @@ -83,8 +82,6 @@ def render_template(self): loader=jinja2.PackageLoader("nf_core", "pipeline-template"), keep_trailing_newline=True ) template_dir = os.path.join(os.path.dirname(__file__), "pipeline-template") - binary_ftypes = ["image", "application/java-archive", "application/x-java-archive"] - binary_extensions = [".jpeg", ".jpg", ".png", ".zip", ".gz", ".jar", ".tar"] object_attrs = vars(self) object_attrs["nf_core_version"] = nf_core.__version__ @@ -108,15 +105,9 @@ def render_template(self): os.makedirs(os.path.dirname(output_path), exist_ok=True) try: - # Just copy certain file extensions - filename, file_extension = os.path.splitext(template_fn_path) - if file_extension in binary_extensions: - raise AttributeError(f"File extension: {file_extension}") - - # Try to detect binary files - (ftype, encoding) = mimetypes.guess_type(template_fn_path, strict=False) - if encoding is not None or (ftype is not None and any([ftype.startswith(ft) for ft in binary_ftypes])): - raise AttributeError(f"Encoding: {encoding}") + # Just copy binary files + if nf_core.utils.is_file_binary(template_fn_path): + raise AttributeError(f"Binary file: {template_fn_path}") # Got this far - render the template log.debug(f"Rendering template file: '{template_fn}'") diff --git a/nf_core/lint/merge_markers.py b/nf_core/lint/merge_markers.py index 21a689a8ea..6f0d9e3d2e 100644 --- a/nf_core/lint/merge_markers.py +++ b/nf_core/lint/merge_markers.py @@ -5,6 +5,8 @@ import io import fnmatch +import nf_core.utils + log = logging.getLogger(__name__) @@ -18,6 +20,9 @@ def merge_markers(self): """ passed = [] failed = [] + ignored = [] + + ignored_config = self.lint_config.get("merge_markers", []) ignore = [".git"] if os.path.isfile(os.path.join(self.wf_path, ".gitignore")): @@ -31,16 +36,22 @@ def merge_markers(self): dirs[:] = [d for d in dirs if not fnmatch.fnmatch(os.path.join(root, d), i)] files[:] = [f for f in files if not fnmatch.fnmatch(os.path.join(root, f), i)] for fname in files: + # File ignored in config + if os.path.relpath(os.path.join(root, fname), self.wf_path) in ignored_config: + ignored.append(f"Ignoring file `{os.path.join(root, fname)}`") + continue + # Skip binary files + if nf_core.utils.is_file_binary(os.path.join(root, fname)): + continue try: with io.open(os.path.join(root, fname), "rt", encoding="latin1") as fh: for l in fh: if ">>>>>>>" in l: - failed.append(f"Merge marker '>>>>>>>' in `{os.path.join(root, fname)}`: {l}") + failed.append(f"Merge marker '>>>>>>>' in `{os.path.join(root, fname)}`: {l[:30]}") if "<<<<<<<" in l: - failed.append(f"Merge marker '<<<<<<<' in `{os.path.join(root, fname)}`: {l}") - print(root) + failed.append(f"Merge marker '<<<<<<<' in `{os.path.join(root, fname)}`: {l[:30]}") except FileNotFoundError: log.debug(f"Could not open file {os.path.join(root, fname)} in merge_markers lint test") if len(failed) == 0: passed.append("No merge markers found in pipeline files") - return {"passed": passed, "failed": failed} + return {"passed": passed, "failed": failed, "ignored": ignored} diff --git a/nf_core/utils.py b/nf_core/utils.py index 18f2dcb581..6c47d8c7b0 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -11,6 +11,7 @@ import hashlib import json import logging +import mimetypes import os import prompt_toolkit import re @@ -550,3 +551,19 @@ def write_line_break(self, data=None): CustomDumper.add_representer(dict, CustomDumper.represent_dict_preserve_order) return CustomDumper + + +def is_file_binary(path): + """ Check file path to see if it is a binary file """ + binary_ftypes = ["image", "application/java-archive", "application/x-java-archive"] + binary_extensions = [".jpeg", ".jpg", ".png", ".zip", ".gz", ".jar", ".tar"] + + # Check common file extensions + filename, file_extension = os.path.splitext(path) + if file_extension in binary_extensions: + return True + + # Try to detect binary files + (ftype, encoding) = mimetypes.guess_type(path, strict=False) + if encoding is not None or (ftype is not None and any([ftype.startswith(ft) for ft in binary_ftypes])): + return True From 25fe58722dd3985083c3b9e9ac308657c782ba60 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 27 Apr 2021 11:24:37 +0200 Subject: [PATCH 2/2] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4a657d4c3..e59a4c73dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ * Fix bug in nf-core lint config skipping for the `nextflow_config` test [[#1019](https://github.com/nf-core/tools/issues/1019)] * New `-k`/`--key` cli option for `nf-core lint` to allow you to run only named lint tests, for faster local debugging * Ignore permission errors for setting up requests cache directories to allow starting with an invalid or read-only HOME directory +* Merge markers lint test - ignore binary files, allow config to ignore specific files [[#1040](https://github.com/nf-core/tools/pull/1040)] ### Template