Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dbbd34d

Browse files
committed
Revised the necessity to only remove duplicates with identical start
coord. It's not so vital with the revised formula, so within 10 works good enough and prevents some more excessive depth raises. Also (ifdef-ed out) implemented the "other" more complex method, for comparison. Cull whichever we wish before merging.
1 parent 1881d9f commit dbbd34d

File tree

1 file changed

+37
-7
lines changed

1 file changed

+37
-7
lines changed

sam.c

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ DEALINGS IN THE SOFTWARE. */
3535
#include <assert.h>
3636
#include <signal.h>
3737
#include <inttypes.h>
38+
#include <math.h>
3839

3940
// Suppress deprecation message for cigar_tab, which we initialise
4041
#include "htslib/hts_defs.h"
@@ -4155,14 +4156,16 @@ struct __bam_plp_t {
41554156
void *data;
41564157
olap_hash_t *overlaps;
41574158

4158-
uint32_t depth[MAX_AHEAD];
4159-
uint64_t last_depth_pos;
4160-
double depth_pos_fract;
4161-
41624159
// For notification of creation and destruction events
41634160
// and associated client-owned pointer.
41644161
int (*plp_construct)(void *data, const bam1_t *b, bam_pileup_cd *cd);
41654162
int (*plp_destruct )(void *data, const bam1_t *b, bam_pileup_cd *cd);
4163+
4164+
// Field used for automatic depth reduction
4165+
uint32_t depth[MAX_AHEAD]; // circular depth buffer from b->core.pos on.
4166+
uint64_t last_depth_pos; // array filed out to this pos
4167+
uint64_t max_depth_pos; // >= max depth up to this pos
4168+
double depth_pos_fract; // (Simple method)
41664169
};
41674170

41684171
bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
@@ -4519,10 +4522,12 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
45194522
return 0;
45204523
} else if (iter->depth_pos_fract > 0
45214524
&& iter->tid == b->core.tid
4522-
&& iter->pos == b->core.pos) {
4525+
&& b->core.pos - iter->pos < 10) {
45234526
// Removal from depth somewhere else along read
45244527
endpos = bam_endpos(b);
45254528
uint32_t len = (uint32_t)(endpos - b->core.pos - 1);
4529+
4530+
#if 1
45264531
len = len * iter->depth_pos_fract + 0.4999;
45274532
if (b->core.pos + len < iter->last_depth_pos
45284533
&& iter->depth[(b->core.pos+len)%MAX_AHEAD] > iter->maxcnt) {
@@ -4532,6 +4537,28 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
45324537
overlap_remove(iter, b);
45334538
return 0;
45344539
}
4540+
#else
4541+
len = len * iter->depth_pos_fract + 0.4999;
4542+
if (len >= MAX_AHEAD) len = MAX_AHEAD-1;
4543+
int64_t pos_delta = (b->core.pos + len) - iter->max_depth_pos -1;
4544+
int last_depth = endpos > iter->last_depth_pos
4545+
? 0
4546+
: iter->depth[(iter->last_depth_pos-1)%MAX_AHEAD];
4547+
int pos_depth = iter->depth[b->core.pos % MAX_AHEAD];
4548+
4549+
#ifndef MIN
4550+
#define MIN(a,b) ((a)<(b)?(a):(b))
4551+
#endif
4552+
4553+
if (drand48() >
4554+
//(double)h / 0xffffffff >
4555+
MIN(1.0, pow((double)pos_delta / (len+.01), 2))
4556+
* MIN(1.0, pow((1-(last_depth+.1)/iter->maxcnt), 2))
4557+
* MIN(1.0, pow((iter->maxcnt+.1)/pos_depth, 2)) *2) {
4558+
overlap_remove(iter, b);
4559+
return 0;
4560+
}
4561+
#endif
45354562

45364563
// Zero newly observed iter depth elemtns.
45374564
uint64_t end_clipped = endpos, p;
@@ -4547,8 +4574,11 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
45474574
}
45484575

45494576
// Increment depth
4550-
for (p = b->core.pos; p < end_clipped; p++)
4551-
iter->depth[p % MAX_AHEAD]++;
4577+
for (p = b->core.pos; p < end_clipped; p++) {
4578+
if (++iter->depth[p % MAX_AHEAD] >= iter->maxcnt)
4579+
if (iter->max_depth_pos < p)
4580+
iter->max_depth_pos = p;
4581+
}
45524582
}
45534583
if (bam_copy1(&iter->tail->b, b) == NULL)
45544584
return -1;

0 commit comments

Comments
 (0)