Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/datasets/
test_data
/conda/envs
/conf/aws.config
.nextflow*
Expand Down
116 changes: 58 additions & 58 deletions main.nf
100755 → 100644

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions modules/ariba/ariba_analysis/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# ariba_analysis process testing:

This process run reads against all available (if any) ARIBA datasets
## About testing this process:

Using DSL2 each module can be tested separately, using a test workflow inside the process.nf file, testing requires 3 itens:
- the local files in `test_data`
- params in `test_params.yaml`
- `test` profile in `nextflow.config`

## How to test it:

$ nextflow run ariba_analysis.nf -params-file test_params.yaml -profile test,docker -entry test


if you've used `bactopia conda activate` you can also trade `docker` by conda to test with conda.
51 changes: 51 additions & 0 deletions modules/ariba/ariba_analysis/ariba_analysis.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
nextflow.enable.dsl = 2

process ARIBA_ANALYSIS {
/* Run reads against all available (if any) ARIBA datasets */
tag "${sample} - ${dataset_name}"

publishDir "${outdir}/${sample}/logs", mode: "${params.publish_mode}", overwrite: params.overwrite, pattern: "${task.process}/*"
publishDir "${outdir}/${sample}/ariba", mode: "${params.publish_mode}", overwrite: params.overwrite, pattern: "${dataset_name}/*"

input:
tuple val(sample), val(single_end), path(fq)
each path(dataset)

output:
file "${dataset_name}/*"
file "${task.process}/*" optional true

when:
single_end == false && ARIBA_DATABASES.isEmpty() == false

shell:
dataset_tarball = path(dataset).getName()
dataset_name = dataset_tarball.replace('.tar.gz', '')
spades_options = params.spades_options ? "--spades_options '${params.spades_options}'" : ""
noclean = params.ariba_no_clean ? "--noclean" : ""

template "ariba_analysis.sh"
stub:
dataset_tarball = path(dataset).getName()
dataset_name = dataset_tarball.replace('.tar.gz', '')
"""
mkdir ${dataset_name}
mkdir ${task.process}
touch ${dataset_name}/${sample}
touch ${task.process}/${sample}
"""
}

//###############
//Module testing
//###############

workflow test {
TEST_PARAMS_CH = Channel.of([
params.sample,
params.single_end,
path(params.fq)
])
TEST_PARAMS_CH2 = Channel.of(path(params.card),path(params.vfdb))
ariba_analysis(TEST_PARAMS_CH,TEST_PARAMS_CH2.collect())
}
95 changes: 95 additions & 0 deletions modules/ariba/ariba_analysis/bin/build-containers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env bash
# build-containers
#
# Automate the building of Bactopia related containers
VERSION=1.6.0
CONTAINER_VERSION="${VERSION%.*}.x"

function singularity_build {
recipe=$1
name=$2
image=$3
version=$4
latest=${5:-0}

echo "Working on ${recipe}"
singularity build -F ${image} ${recipe}
singularity sign ${image}
singularity push ${image} library://rpetit3/bactopia/${name}:${version}

if [[ "${latest}" == "1" ]]; then
singularity push ${image} library://rpetit3/bactopia/${name}:latest
fi
}

function docker_build {
recipe=$1
image=$2
latest=${3:-0}

echo "Working on ${recipe}"
docker build --rm -t ${image} -f ${recipe} .
docker push ${image}

if [[ "${latest}" != "0" ]]; then
docker tag ${image} ${latest}
docker push ${latest}
fi
}


if [[ $# == 0 ]]; then
echo ""
echo "build-containers.sh BACTOPIA_DIR OUTPUT_DIR"
echo ""
echo "Example Command"
echo "build-containers.sh /home/bactopia/bactopia container-images/ "
echo ""
exit
fi

BACTOPIA_DIR=$1
OUTPUT_DIR=${2:-"./"}
if [ -z ${BACTOPIA_DIR} ]; then
echo "Got ${#} arguement"
echo "Must give the path to Bactopia repository"
exit 1
fi
MAJOR_VERSION=${3:-"0"}

mkdir -p ${OUTPUT_DIR}

# Build Bactopia containers
#singularity_build Singularity bactopia ${OUTPUT_DIR}/bactopia-${VERSION}.simg ${VERSION} 1
#docker_build Dockerfile bactopia/bactopia:${VERSION} bactopia/bactopia:latest

if [ "${MAJOR_VERSION}" == "1" ]; then
# Build Singularity
for recipe in $(ls "${BACTOPIA_DIR}/containers/singularity" | grep ".Singularity"); do
recipe_path="${BACTOPIA_DIR}/containers/singularity/${recipe}"
recipe_name=$(echo ${recipe} | sed 's/.Singularity//')
recipe_image="${OUTPUT_DIR}/${recipe_name}-${CONTAINER_VERSION}.simg"
singularity_build ${recipe_path} ${recipe_name} ${recipe_image} ${CONTAINER_VERSION}
done

# Build Docker
docker_build Dockerfile bactopia/bactopia:${CONTAINER_VERSION} bactopia/bactopia:latest
for recipe in $(ls "${BACTOPIA_DIR}/containers/docker" | grep ".Dockerfile"); do
recipe_path="${BACTOPIA_DIR}/containers/docker/${recipe}"
recipe_name=$(echo ${recipe} | sed 's/.Dockerfile//')
recipe_image="bactopia/${recipe_name}:${CONTAINER_VERSION}"
#docker_build ${recipe_path} ${recipe_image}
done

# Build Bactopia Tools containers
for tool in $(ls "${BACTOPIA_DIR}/tools"); do
recipe_path="${BACTOPIA_DIR}/tools/${tool}"
docker_file="${recipe_path}/Dockerfile"
docker_image="bactopia/tools-${tool}:${CONTAINER_VERSION}"
#docker_build ${docker_file} ${docker_image}

singularity_file="${recipe_path}/Singularity"
singularity_image="${OUTPUT_DIR}/tools-${tool}-${CONTAINER_VERSION}.simg"
singularity_build ${singularity_file} "tools-${tool}" ${singularity_image} ${CONTAINER_VERSION}
done
fi
79 changes: 79 additions & 0 deletions modules/ariba/ariba_analysis/bin/check-assembly-accession.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#! /usr/bin/env python3
"""
"""
PROGRAM = "check-assembly-accession"
VERSION = "1.6.0"


def check_assembly_version(accession):
from Bio import Entrez
import time
import json
Entrez.email = "[email protected]"
Entrez.tool = "BactopiaCheckAssemblyAccession"

handle = Entrez.esearch(db="assembly", term=accession, retmax="500")
record = Entrez.read(handle, validate=False)
time.sleep(1) # Be kind to NCBI

if len(record["IdList"]):
handle = Entrez.esummary(db="assembly", id=",".join(record["IdList"]))
record = Entrez.read(handle, validate=False)

time.sleep(1) # Be kind to NCBI

records = []
excluded = set()
for assembly in record['DocumentSummarySet']["DocumentSummary"]:
if assembly["ExclFromRefSeq"]:
# PGAP can cause some Assemblies to eventually become excluded from RefSeq
# https://www.ncbi.nlm.nih.gov/assembly/help/anomnotrefseq/
for reason in assembly["ExclFromRefSeq"]:
excluded.add(reason)
else:
records.append(assembly["AssemblyAccession"])

if excluded:
return [','.join(list(excluded)), True]
else:
return [sorted(records, reverse=True)[0], False]
else:

return [f'No records found for {accession}', True]


if __name__ == '__main__':
import argparse as ap
from collections import defaultdict
import random
import sys
parser = ap.ArgumentParser(
prog=PROGRAM,
conflict_handler='resolve',
description=(
f'{PROGRAM} (v{VERSION}) - Verifies NCBI Assembly accession is latest and still available'
)
)

parser.add_argument(
'reference', metavar="STR", type=str,
help='NCBI Assembly accession to be tested.'
)
parser.add_argument('--version', action='version',
version=f'{PROGRAM} {VERSION}')

if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)

args = parser.parse_args()
reference = args.reference.split('.')[0]
current_accession, excluded = check_assembly_version(reference)
if excluded:
print(
f'Skipping {reference}. Reason: {current_accession}',
file=sys.stderr
)
else:
print(f'Using {current_accession} for {args.reference}', file=sys.stderr)
print(current_accession)
109 changes: 109 additions & 0 deletions modules/ariba/ariba_analysis/bin/check-fastqs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#! /usr/bin/env python3
"""
Sometimes with AWS, files might fail to download but not cause an error.
This script checks to verify all expected inputs are staged.
"""
PROGRAM = "check-staging"
VERSION = "1.6.0"
import sys


def read_json(json_file):
import json
json_data = None
with open(json_file, 'rt') as json_fh:
json_data = json.load(json_fh)
return json_data


def write_error(filename, error_msg):
print(error_msg, file=sys.stderr)
with open(filename, "wt") as fh_out:
fh_out.write(error_msg)
return 1


def check_reads(fq1, sample, min_reads, fq2=None):
error = 0
total_reads = fq1 + fq2 if fq2 else fq1

if total_reads < min_reads:
error_msg = (f"{sample} FASTQ(s) contain {total_reads} total reads. This does not \n"
f"exceed the required minimum {min_reads} read count. Further analysis is \n"
"discontinued.\n")
error += write_error(f'{sample}-low-read-count-error.txt', error_msg)

if fq2:
if fq1 != fq2:
# different number of reads in the pair
error_msg = (f"{sample} FASTQs have different read counts (R1: {fq1}, R2: {fq2}). Please \n"
"investigate these FASTQs. Further analysis is discontinued.\n")
error += write_error(f'{sample}-different-read-count-error.txt', error_msg)

return error


def check_basepairs(fq1, sample, min_basepairs, fq2=None, min_proportion=None):
error = 0
total_bp= fq1 + fq2 if fq2 else fq1

if total_bp < min_basepairs:
error_msg = (f"{sample} FASTQ(s) contain {total_bp} total basepairs. This does not \n"
f"exceed the required minimum {min_basepairs} bp. Further analysis is \n"
"discontinued.\n")
error += write_error(f'{sample}-low-sequence-depth-error.txt', error_msg)

if fq2:
proportion = float(fq1) / float(fq2) if fq1 < fq2 else float(fq2) / float(fq1)
if proportion < min_proportion:
# More basepairs in one sample that exceeds minimum proportion
error_msg = (f"{sample} FASTQs failed to meet the minimum shared basepairs ({min_proportion}). \n"
f"They shared {proportion:.4f} basepairs, with R1 having {fq1} bp and \n"
f"R2 having {fq2} bp. Further analysis is discontinued.\n")
error += write_error(f'{sample}-low-basepair-proportion-error.txt', error_msg)

return error


if __name__ == '__main__':
import argparse as ap
import os
parser = ap.ArgumentParser(
prog=PROGRAM,
conflict_handler='resolve',
description=(
f'{PROGRAM} (v{VERSION}) - Verifies inputs for a process are available.'
)
)

parser.add_argument('--sample', metavar="STR", type=str, help='Name of the input sample.')
parser.add_argument('--fq1', metavar="STR", type=str, help='Stats for SE or R1 FASTQ in JSON format.')
parser.add_argument('--fq2', metavar="STR", type=str, help='Stats for R2 FASTQ in JSON format.')
parser.add_argument('--min_proportion', metavar="FLOAT", type=float,
help='The proportion of sequenced basepairs that R1 and R2 must be')
parser.add_argument('--min_reads', metavar="INT", type=int, help='Minimum number of reads.')
parser.add_argument('--min_basepairs',metavar="INT", type=int, help='Minimum number of seqeunced basepairs')
parser.add_argument('--version', action='version', version=f'{PROGRAM} {VERSION}')

if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)

args = parser.parse_args()

error = 0
if args.fq1 and args.fq2:
# Paired end
r1 = read_json(args.fq1)
r2 = read_json(args.fq2)
error += check_reads(r1["qc_stats"]["read_total"], args.sample, args.min_reads,
fq2=r2["qc_stats"]["read_total"])
error += check_basepairs(r1["qc_stats"]["total_bp"], args.sample, args.min_basepairs,
fq2=r2["qc_stats"]["total_bp"], min_proportion=args.min_proportion)

else:
se = read_json(args.fq1)
error += check_reads(se["qc_stats"]["read_total"], args.sample, args.min_reads)
error += check_basepairs(se["qc_stats"]["total_bp"], args.sample, args.min_basepairs)

sys.exit(error)
Loading