Thanks to visit codestin.com
Credit goes to code.bioconductor.org

Browse code

Add DAPAR, gcatest, iCheck, Imetagene, lfa, MEAL, metagenomeFeatures, pathVar, Prostar, SICtools, SISPA, SWATH2stats

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SICtools@109238 bc3139a8-67e5-0310-9ffc-ced21a209358

James Hester authored on 06/10/2015 17:15:15
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,162 @@
1
+#include <stdlib.h>
2
+#include <stdint.h>
3
+#include <string.h>
4
+#include <stdio.h>
5
+#include <zlib.h>
6
+
7
+#ifdef _WIN32
8
+#define drand48() ((double)rand() / RAND_MAX)
9
+#endif
10
+
11
+#include "ksort.h"
12
+KSORT_INIT_GENERIC(uint64_t)
13
+
14
+#include "kseq.h"
15
+KSTREAM_INIT(gzFile, gzread, 8192)
16
+
17
+typedef struct {
18
+	int n, m;
19
+	uint64_t *a;
20
+	int *idx;
21
+} bed_reglist_t;
22
+
23
+#include "khash.h"
24
+KHASH_MAP_INIT_STR(reg, bed_reglist_t)
25
+
26
+#define LIDX_SHIFT 13
27
+
28
+typedef kh_reg_t reghash_t;
29
+
30
+int *bed_index_core(int n, uint64_t *a, int *n_idx)
31
+{
32
+	int i, j, m, *idx;
33
+	m = *n_idx = 0; idx = 0;
34
+	for (i = 0; i < n; ++i) {
35
+		int beg, end;
36
+		beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT;
37
+		if (m < end + 1) {
38
+			int oldm = m;
39
+			m = end + 1;
40
+			kroundup32(m);
41
+			idx = realloc(idx, m * sizeof(int));
42
+			for (j = oldm; j < m; ++j) idx[j] = -1;
43
+		}
44
+		if (beg == end) {
45
+			if (idx[beg] < 0) idx[beg] = i;
46
+		} else {
47
+			for (j = beg; j <= end; ++j)
48
+				if (idx[j] < 0) idx[j] = i;
49
+		}
50
+		*n_idx = end + 1;
51
+	}
52
+	return idx;
53
+}
54
+
55
+void bed_index(void *_h)
56
+{
57
+	reghash_t *h = (reghash_t*)_h;
58
+	khint_t k;
59
+	for (k = 0; k < kh_end(h); ++k) {
60
+		if (kh_exist(h, k)) {
61
+			bed_reglist_t *p = &kh_val(h, k);
62
+			if (p->idx) free(p->idx);
63
+			ks_introsort(uint64_t, p->n, p->a);
64
+			p->idx = bed_index_core(p->n, p->a, &p->m);
65
+		}
66
+	}
67
+}
68
+
69
+int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
70
+{
71
+	int i, min_off;
72
+	if (p->n == 0) return 0;
73
+	min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT];
74
+	if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here
75
+		int n = beg>>LIDX_SHIFT;
76
+		if (n > p->n) n = p->n;
77
+		for (i = n - 1; i >= 0; --i)
78
+			if (p->idx[i] >= 0) break;
79
+		min_off = i >= 0? p->idx[i] : 0;
80
+	}
81
+	for (i = min_off; i < p->n; ++i) {
82
+		if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed
83
+		if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end)
84
+			return 1; // find the overlap; return
85
+	}
86
+	return 0;
87
+}
88
+
89
+int bed_overlap(const void *_h, const char *chr, int beg, int end)
90
+{
91
+	const reghash_t *h = (const reghash_t*)_h;
92
+	khint_t k;
93
+	if (!h) return 0;
94
+	k = kh_get(reg, h, chr);
95
+	if (k == kh_end(h)) return 0;
96
+	return bed_overlap_core(&kh_val(h, k), beg, end);
97
+}
98
+
99
+void *bed_read(const char *fn)
100
+{
101
+	reghash_t *h = kh_init(reg);
102
+	gzFile fp;
103
+	kstream_t *ks;
104
+	int dret;
105
+	kstring_t *str;
106
+	// read the list
107
+	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
108
+	if (fp == 0) return 0;
109
+	str = calloc(1, sizeof(kstring_t));
110
+	ks = ks_init(fp);
111
+	while (ks_getuntil(ks, 0, str, &dret) >= 0) { // read the chr name
112
+		int beg = -1, end = -1;
113
+		bed_reglist_t *p;
114
+		khint_t k = kh_get(reg, h, str->s);
115
+		if (k == kh_end(h)) { // absent from the hash table
116
+			int ret;
117
+			char *s = strdup(str->s);
118
+			k = kh_put(reg, h, s, &ret);
119
+			memset(&kh_val(h, k), 0, sizeof(bed_reglist_t));
120
+		}
121
+		p = &kh_val(h, k);
122
+		if (dret != '\n') { // if the lines has other characters
123
+			if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
124
+				beg = atoi(str->s); // begin
125
+				if (dret != '\n') {
126
+					if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
127
+						end = atoi(str->s); // end
128
+						if (end < beg) end = -1;
129
+					}
130
+				}
131
+			}
132
+		}
133
+		if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); // skip the rest of the line
134
+		if (end < 0 && beg > 0) end = beg, beg = beg - 1; // if there is only one column
135
+		if (beg >= 0 && end > beg) {
136
+			if (p->n == p->m) {
137
+				p->m = p->m? p->m<<1 : 4;
138
+				p->a = realloc(p->a, p->m * 8);
139
+			}
140
+			p->a[p->n++] = (uint64_t)beg<<32 | end;
141
+		}
142
+	}
143
+	ks_destroy(ks);
144
+	gzclose(fp);
145
+	free(str->s); free(str);
146
+	bed_index(h);
147
+	return h;
148
+}
149
+
150
+void bed_destroy(void *_h)
151
+{
152
+	reghash_t *h = (reghash_t*)_h;
153
+	khint_t k;
154
+	for (k = 0; k < kh_end(h); ++k) {
155
+		if (kh_exist(h, k)) {
156
+			free(kh_val(h, k).a);
157
+			free(kh_val(h, k).idx);
158
+			free((char*)kh_key(h, k));
159
+		}
160
+	}
161
+	kh_destroy(reg, h);
162
+}