Thanks to visit codestin.com
Credit goes to code.bioconductor.org

Browse code

Add DAPAR, gcatest, iCheck, Imetagene, lfa, MEAL, metagenomeFeatures, pathVar, Prostar, SICtools, SISPA, SWATH2stats

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SICtools@109238 bc3139a8-67e5-0310-9ffc-ced21a209358

James Hester authored on 06/10/2015 17:15:15
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,191 @@
1
+#include <string.h>
2
+#include <stdlib.h>
3
+#include <sys/stat.h>
4
+#include <unistd.h>
5
+#include "knetfile.h"
6
+#include "bcf.h"
7
+
8
+#include "kseq.h"
9
+KSTREAM_INIT(gzFile, gzread, 0x10000)
10
+
11
+int bcfview(int argc, char *argv[]);
12
+int bcf_main_index(int argc, char *argv[]);
13
+
14
+#define BUF_SIZE 0x10000
15
+
16
+int bcf_cat(int n, char * const *fn)
17
+{
18
+	int i;
19
+	bcf_t *out;
20
+	uint8_t *buf;
21
+	buf = malloc(BUF_SIZE);
22
+	out = bcf_open("-", "w");
23
+	for (i = 0; i < n; ++i) {
24
+		bcf_t *in;
25
+		bcf_hdr_t *h;
26
+		off_t end;
27
+		struct stat s;
28
+		in = bcf_open(fn[i], "r");
29
+		h = bcf_hdr_read(in);
30
+		if (i == 0) bcf_hdr_write(out, h);
31
+		bcf_hdr_destroy(h);
32
+#ifdef _USE_KNETFILE
33
+		fstat(knet_fileno((knetFile*)in->fp->fp), &s);
34
+		end = s.st_size - 28;
35
+		while (knet_tell((knetFile*)in->fp->fp) < end) {
36
+			int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp);
37
+			knet_read(in->fp->fp, buf, size);
38
+			fwrite(buf, 1, size, out->fp->fp);
39
+		}
40
+#else
41
+		abort(); // FIXME: not implemented
42
+#endif
43
+		bcf_close(in);
44
+	}
45
+	bcf_close(out);
46
+	free(buf);
47
+	return 0;
48
+}
49
+
50
+extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
51
+
52
+int bcf_main_ldpair(int argc, char *argv[])
53
+{
54
+	bcf_t *fp;
55
+	bcf_hdr_t *h;
56
+	bcf1_t *b0, *b1;
57
+	bcf_idx_t *idx;
58
+	kstring_t str;
59
+	void *str2id;
60
+	gzFile fplist;
61
+	kstream_t *ks;
62
+	int dret, lineno = 0;
63
+	if (argc < 3) {
64
+		fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n");
65
+		return 1;
66
+	}
67
+	fplist = gzopen(argv[2], "rb");
68
+	ks = ks_init(fplist);
69
+	memset(&str, 0, sizeof(kstring_t));
70
+	fp = bcf_open(argv[1], "rb");
71
+	h = bcf_hdr_read(fp);
72
+	str2id = bcf_build_refhash(h);
73
+	idx = bcf_idx_load(argv[1]);
74
+	if (idx == 0) {
75
+		fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__);
76
+		return 1;
77
+	}
78
+	b0 = calloc(1, sizeof(bcf1_t));
79
+	b1 = calloc(1, sizeof(bcf1_t));
80
+	while (ks_getuntil(ks, '\n', &str, &dret) >= 0) {
81
+		char *p, *q;
82
+		int k;
83
+		int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1;
84
+		++lineno;
85
+		for (p = q = str.s, k = 0; *p; ++p) {
86
+			if (*p == ' ' || *p == '\t') {
87
+				*p = '\0';
88
+				if (k == 0) tid0 = bcf_str2id(str2id, q);
89
+				else if (k == 1) pos0 = atoi(q) - 1;
90
+				else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0;
91
+				else if (k == 3) pos1 = atoi(q) - 1;
92
+				q = p + 1;
93
+				++k;
94
+			}
95
+		}
96
+		if (k == 3) pos1 = atoi(q) - 1;
97
+		if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) {
98
+			uint64_t off;
99
+			double r, f[4];
100
+			off = bcf_idx_query(idx, tid0, pos0);
101
+			bgzf_seek(fp->fp, off, SEEK_SET);
102
+			while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0);
103
+			off = bcf_idx_query(idx, tid1, pos1);
104
+			bgzf_seek(fp->fp, off, SEEK_SET);
105
+			while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1);
106
+			r = bcf_pair_freq(b0, b1, f);
107
+			r *= r;
108
+			printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1,
109
+				r, f[0], f[1], f[2], f[3]);
110
+		} //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno);
111
+	}
112
+	bcf_destroy(b0); bcf_destroy(b1);
113
+	bcf_idx_destroy(idx);
114
+	bcf_str2id_destroy(str2id);
115
+	bcf_hdr_destroy(h);
116
+	bcf_close(fp);
117
+	free(str.s);
118
+	ks_destroy(ks);
119
+	gzclose(fplist);
120
+	return 0;
121
+}
122
+
123
+int bcf_main_ld(int argc, char *argv[])
124
+{
125
+	bcf_t *fp;
126
+	bcf_hdr_t *h;
127
+	bcf1_t **b, *b0;
128
+	int i, j, m, n;
129
+	double f[4];
130
+	if (argc == 1) {
131
+		fprintf(stderr, "Usage: bcftools ld <in.bcf>\n");
132
+		return 1;
133
+	}
134
+	fp = bcf_open(argv[1], "rb");
135
+	h = bcf_hdr_read(fp);
136
+	// read the entire BCF
137
+	m = n = 0; b = 0;
138
+	b0 = calloc(1, sizeof(bcf1_t));
139
+	while (bcf_read(fp, h, b0) >= 0) {
140
+		if (m == n) {
141
+			m = m? m<<1 : 16;
142
+			b = realloc(b, sizeof(void*) * m);
143
+		}
144
+		b[n] = calloc(1, sizeof(bcf1_t));
145
+		bcf_cpy(b[n++], b0);
146
+	}
147
+	bcf_destroy(b0);
148
+	// compute pair-wise r^2
149
+	printf("%d\n", n); // the number of loci
150
+	for (i = 0; i < n; ++i) {
151
+		printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
152
+		for (j = 0; j < i; ++j) {
153
+			double r = bcf_pair_freq(b[i], b[j], f);
154
+			printf("\t%.3f", r*r);
155
+		}
156
+		printf("\t1.000\n");
157
+	}
158
+	// free
159
+	for (i = 0; i < n; ++i) bcf_destroy(b[i]);
160
+	free(b);
161
+	bcf_hdr_destroy(h);
162
+	bcf_close(fp);
163
+	return 0;
164
+}
165
+
166
+int main(int argc, char *argv[])
167
+{
168
+	if (argc == 1) {
169
+		fprintf(stderr, "\n");
170
+		fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n");
171
+		fprintf(stderr, "Version: %s\n\n", BCF_VERSION);
172
+		fprintf(stderr, "Usage:   bcftools <command> <arguments>\n\n");
173
+		fprintf(stderr, "Command: view      print, extract, convert and call SNPs from BCF\n");
174
+		fprintf(stderr, "         index     index BCF\n");
175
+		fprintf(stderr, "         cat       concatenate BCFs\n");
176
+		fprintf(stderr, "         ld        compute all-pair r^2\n");
177
+		fprintf(stderr, "         ldpair    compute r^2 between requested pairs\n");
178
+		fprintf(stderr, "\n");
179
+		return 1;
180
+	}
181
+	if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
182
+	else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
183
+	else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1);
184
+	else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1);
185
+	else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ...
186
+	else {
187
+		fprintf(stderr, "[main] Unrecognized command.\n");
188
+		return 1;
189
+	}
190
+	return 0;
191
+}