Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e3d8405

Browse files
committed
rename
1 parent cc1d967 commit e3d8405

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

extra/10_whitmans/solution.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
""" Probabalistically subset FASTA files """
3+
4+
import argparse
5+
import os
6+
import random
7+
from Bio import SeqIO
8+
9+
10+
# --------------------------------------------------
11+
def get_args():
12+
"""get args"""
13+
parser = argparse.ArgumentParser(
14+
description='Probabalistically subset FASTA files',
15+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16+
17+
parser.add_argument('file',
18+
metavar='FILE',
19+
type=argparse.FileType('r'),
20+
nargs='+',
21+
help='Input FASTA file(s)')
22+
23+
parser.add_argument('-p',
24+
'--pct',
25+
help='Percent of reads',
26+
metavar='reads',
27+
type=float,
28+
default=.1)
29+
30+
parser.add_argument('-s',
31+
'--seed',
32+
help='Random seed value',
33+
metavar='seed',
34+
type=int,
35+
default=None)
36+
37+
parser.add_argument('-o',
38+
'--outdir',
39+
help='Output directory',
40+
metavar='DIR',
41+
type=str,
42+
default='out')
43+
44+
args = parser.parse_args()
45+
46+
if not 0 < args.pct < 1:
47+
parser.error(f'--pct "{args.pct}" must be between 0 and 1')
48+
49+
if not os.path.isdir(args.outdir):
50+
os.makedirs(args.outdir)
51+
52+
return args
53+
54+
55+
# --------------------------------------------------
56+
def main():
57+
"""Make a jazz noise here"""
58+
59+
args = get_args()
60+
random.seed(args.seed)
61+
62+
total_num = 0
63+
for i, fh in enumerate(args.file, start=1):
64+
basename = os.path.basename(fh.name)
65+
out_file = os.path.join(args.outdir, basename)
66+
print(f'{i:3}: {basename}')
67+
68+
out_fh = open(out_file, 'wt')
69+
num_taken = 0
70+
71+
for rec in SeqIO.parse(fh, 'fasta'):
72+
if random.random() <= args.pct:
73+
num_taken += 1
74+
SeqIO.write(rec, out_fh, 'fasta')
75+
76+
out_fh.close()
77+
total_num += num_taken
78+
79+
num_files = len(args.file)
80+
print(f'Wrote {total_num:,} sequence{"" if total_num == 1 else "s"} '
81+
f'from {num_files:,} file{"" if num_files == 1 else "s"} '
82+
f'to directory "{args.outdir}"')
83+
84+
85+
# --------------------------------------------------
86+
if __name__ == '__main__':
87+
main()

0 commit comments

Comments
 (0)