Thanks to visit codestin.com
Credit goes to code.bioconductor.org

Browse code

Add DAPAR, gcatest, iCheck, Imetagene, lfa, MEAL, metagenomeFeatures, pathVar, Prostar, SICtools, SISPA, SWATH2stats

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SICtools@109238 bc3139a8-67e5-0310-9ffc-ced21a209358

James Hester authored on 06/10/2015 17:15:15
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,197 @@
1
+/* The MIT License
2
+
3
+   Copyright (c) 2010 Broad Institute
4
+
5
+   Permission is hereby granted, free of charge, to any person obtaining
6
+   a copy of this software and associated documentation files (the
7
+   "Software"), to deal in the Software without restriction, including
8
+   without limitation the rights to use, copy, modify, merge, publish,
9
+   distribute, sublicense, and/or sell copies of the Software, and to
10
+   permit persons to whom the Software is furnished to do so, subject to
11
+   the following conditions:
12
+
13
+   The above copyright notice and this permission notice shall be
14
+   included in all copies or substantial portions of the Software.
15
+
16
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+   SOFTWARE.
24
+*/
25
+
26
+/* Contact: Heng Li <[email protected]> */
27
+
28
+#ifndef BCF_H
29
+#define BCF_H
30
+
31
+#define BCF_VERSION "0.1.19-44428cd"
32
+
33
+#include <stdint.h>
34
+#include <zlib.h>
35
+
36
+#ifndef BCF_LITE
37
+#include "bgzf.h"
38
+typedef BGZF *bcfFile;
39
+#else
40
+typedef gzFile bcfFile;
41
+#define bgzf_open(fn, mode) gzopen(fn, mode)
42
+#define bgzf_fdopen(fd, mode) gzdopen(fd, mode)
43
+#define bgzf_close(fp) gzclose(fp)
44
+#define bgzf_read(fp, buf, len) gzread(fp, buf, len)
45
+#define bgzf_write(fp, buf, len)
46
+#define bgzf_flush(fp)
47
+#endif
48
+
49
+/*
50
+  A member in the structs below is said to "primary" if its content
51
+  cannot be inferred from other members in any of structs below; a
52
+  member is said to be "derived" if its content can be derived from
53
+  other members. For example, bcf1_t::str is primary as this comes from
54
+  the input data, while bcf1_t::info is derived as it can always be
55
+  correctly set if we know bcf1_t::str. Derived members are for quick
56
+  access to the content and must be synchronized with the primary data.
57
+ */
58
+
59
+typedef struct {
60
+	uint32_t fmt; // format of the block, set by bcf_str2int(). 
61
+	int len; // length of data for each individual
62
+	void *data; // concatenated data
63
+	// derived info: fmt, len (<-bcf1_t::fmt)
64
+} bcf_ginfo_t;
65
+
66
+typedef struct {
67
+	int32_t tid, pos; // refID and 0-based position
68
+	int32_t l_str, m_str; // length and the allocated size of ->str
69
+	float qual; // SNP quality
70
+	char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7)
71
+	char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation
72
+	int n_gi, m_gi; // number and the allocated size of geno fields
73
+	bcf_ginfo_t *gi; // array of geno fields
74
+	int n_alleles, n_smpl; // number of alleles and samples
75
+	// derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl)
76
+    uint8_t *ploidy;    // ploidy of all samples; if NULL, ploidy of 2 is assumed.
77
+} bcf1_t;
78
+
79
+typedef struct {
80
+	int32_t n_ref, n_smpl; // number of reference sequences and samples
81
+	int32_t l_nm; // length of concatenated sequence names; 0 padded
82
+	int32_t l_smpl; // length of concatenated sample names; 0 padded
83
+	int32_t l_txt; // length of header text (lines started with ##)
84
+	char *name, *sname, *txt; // concatenated sequence names, sample names and header text
85
+	char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively
86
+	// derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname)
87
+} bcf_hdr_t;
88
+
89
+typedef struct {
90
+	int is_vcf; // if the file in operation is a VCF
91
+	void *v; // auxillary data structure for VCF
92
+	bcfFile fp; // file handler for BCF
93
+} bcf_t;
94
+
95
+struct __bcf_idx_t;
96
+typedef struct __bcf_idx_t bcf_idx_t;
97
+
98
+#ifdef __cplusplus
99
+extern "C" {
100
+#endif
101
+
102
+	// open a BCF file; for BCF file only
103
+	bcf_t *bcf_open(const char *fn, const char *mode);
104
+	// close file
105
+	int bcf_close(bcf_t *b);
106
+	// read one record from BCF; return -1 on end-of-file, and <-1 for errors
107
+	int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b);
108
+	// call this function if b->str is changed
109
+	int bcf_sync(bcf1_t *b);
110
+	// write a BCF record
111
+	int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b);
112
+	// read the BCF header; BCF only
113
+	bcf_hdr_t *bcf_hdr_read(bcf_t *b);
114
+	// write the BCF header
115
+	int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h);
116
+	// set bcf_hdr_t::ns and bcf_hdr_t::sns
117
+	int bcf_hdr_sync(bcf_hdr_t *b);
118
+	// destroy the header
119
+	void bcf_hdr_destroy(bcf_hdr_t *h);
120
+	// destroy a record
121
+	int bcf_destroy(bcf1_t *b);
122
+	// BCF->VCF conversion
123
+	char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b);
124
+	// append more info
125
+	int bcf_append_info(bcf1_t *b, const char *info, int l);
126
+    // remove tag
127
+    int remove_tag(char *string, const char *tag, char delim);
128
+    // remove info tag, string is the kstring holder of bcf1_t.str
129
+    void rm_info(kstring_t *string, const char *key);
130
+	// copy
131
+	int bcf_cpy(bcf1_t *r, const bcf1_t *b);
132
+
133
+	// open a VCF or BCF file if "b" is set in "mode"
134
+	bcf_t *vcf_open(const char *fn, const char *mode);
135
+	// close a VCF/BCF file
136
+	int vcf_close(bcf_t *bp);
137
+	// read the VCF/BCF header
138
+	bcf_hdr_t *vcf_hdr_read(bcf_t *bp);
139
+	// read the sequence dictionary from a separate file; required for VCF->BCF conversion
140
+	int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn);
141
+	// read a VCF/BCF record; return -1 on end-of-file and <-1 for errors
142
+	int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
143
+	// write the VCF header
144
+	int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h);
145
+	// write a VCF record
146
+	int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
147
+
148
+	// keep the first n alleles and discard the rest
149
+	int bcf_shrink_alt(bcf1_t *b, int n);
150
+    // keep the masked alleles and discard the rest
151
+	void bcf_fit_alt(bcf1_t *b, int mask);
152
+	// convert GL to PL
153
+	int bcf_gl2pl(bcf1_t *b);
154
+	// if the site is an indel
155
+	int bcf_is_indel(const bcf1_t *b);
156
+	bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list);
157
+	int bcf_subsam(int n_smpl, int *list, bcf1_t *b);
158
+	// move GT to the first FORMAT field
159
+	int bcf_fix_gt(bcf1_t *b);
160
+	// update PL generated by old samtools
161
+	int bcf_fix_pl(bcf1_t *b);
162
+	// convert PL to GLF-like 10-likelihood GL
163
+	int bcf_gl10(const bcf1_t *b, uint8_t *gl);
164
+	// convert up to 4 INDEL alleles to GLF-like 10-likelihood GL
165
+	int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl);
166
+
167
+	// string hash table
168
+	void *bcf_build_refhash(bcf_hdr_t *h);
169
+	void bcf_str2id_destroy(void *_hash);
170
+	void bcf_str2id_thorough_destroy(void *_hash);
171
+	int bcf_str2id_add(void *_hash, const char *str);
172
+	int bcf_str2id(void *_hash, const char *str);
173
+	void *bcf_str2id_init();
174
+
175
+	// indexing related functions
176
+	int bcf_idx_build(const char *fn);
177
+	uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg);
178
+	int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end);
179
+	bcf_idx_t *bcf_idx_load(const char *fn);
180
+	void bcf_idx_destroy(bcf_idx_t *idx);
181
+
182
+#ifdef __cplusplus
183
+}
184
+#endif
185
+
186
+static inline uint32_t bcf_str2int(const char *str, int l)
187
+{
188
+	int i;
189
+	uint32_t x = 0;
190
+	for (i = 0; i < l && i < 4; ++i) {
191
+		if (str[i] == 0) return x;
192
+		x = x<<8 | str[i];
193
+	}
194
+	return x;
195
+}
196
+
197
+#endif