| ... | ... |
@@ -37,14 +37,25 @@ |
| 37 | 37 |
return(tracks) |
| 38 | 38 |
} |
| 39 | 39 |
|
| 40 |
-.resize_granges <- function(gr, width, seqinfo) {
|
|
| 40 |
+.resize_granges <- function(gr, w, seqinfo) {
|
|
| 41 | 41 |
GenomeInfoDb::seqlevels(gr, pruning.mode = "coarse") <- GenomeInfoDb::seqlevels(seqinfo) |
| 42 | 42 |
GenomeInfoDb::seqinfo(gr) <- seqinfo |
| 43 |
- gr <- suppressWarnings(resize(gr, fix = 'center', width = width)) |
|
| 44 |
- w <- width |
|
| 43 |
+ if (is.null(w)) {
|
|
| 44 |
+ w <- width(gr) |> unlist() |> unique() |
|
| 45 |
+ } |
|
| 46 |
+ else {
|
|
| 47 |
+ gr <- suppressWarnings(resize(gr, fix = 'center', width = w)) |
|
| 48 |
+ } |
|
| 45 | 49 |
gr <- trim(gr) |
| 46 | 50 |
gr <- gr[width(gr) == w] |
| 47 |
- sort(gr) |
|
| 51 |
+ gr |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+.check_granges_widths <- function(features) {
|
|
| 55 |
+ ws <- lapply(features, width) |> unlist() |> unique() |
|
| 56 |
+ if (length(ws) > 1) |
|
| 57 |
+ stop("Input GRanges do not all have the same width.
|
|
| 58 |
+ Use resize() to fix width prior to coverage data extraction.") |
|
| 48 | 59 |
} |
| 49 | 60 |
|
| 50 | 61 |
#' @importFrom IRanges NumericList |
| ... | ... |
@@ -58,7 +69,9 @@ |
| 58 | 69 |
m[which.flip, ] <- rev(m[which.flip, ]) |
| 59 | 70 |
m <- as.matrix(m) |
| 60 | 71 |
} |
| 61 |
- m <- t(scale(t(m), center = center, scale = scale)) |
|
| 72 |
+ if (any(c(center, scale))) {
|
|
| 73 |
+ m <- t(scale(t(m), center = center, scale = scale)) |
|
| 74 |
+ } |
|
| 62 | 75 |
m |
| 63 | 76 |
} |
| 64 | 77 |
|
| ... | ... |
@@ -10,7 +10,7 @@ |
| 10 | 10 |
) |
| 11 | 11 |
}) |> unique() |
| 12 | 12 |
if (length(unique(sis)) > 1) |
| 13 |
- stop("More than 1 seqinfo inferred from the tracks.")
|
|
| 13 |
+ warning("EXPERIMENTAL: More than 1 seqinfo inferred from the tracks. Using `seqinfo` from the first track.")
|
|
| 14 | 14 |
si <- sis[[1]] |
| 15 | 15 |
} |
| 16 | 16 |
else {
|
| ... | ... |
@@ -32,7 +32,7 @@ |
| 32 | 32 |
n <- names(tracks) |
| 33 | 33 |
sis <- lapply(tracks, seqinfo) |> unique() |
| 34 | 34 |
if (length(sis) > 1) |
| 35 |
- stop("More than 1 seqinfo inferred from the tracks.")
|
|
| 35 |
+ warning("EXPERIMENTAL: More than 1 seqinfo inferred from the tracks. Using `seqinfo` from the first track.")
|
|
| 36 | 36 |
si <- sis[[1]] |
| 37 | 37 |
return(tracks) |
| 38 | 38 |
} |
| ... | ... |
@@ -44,21 +44,22 @@ |
| 44 | 44 |
w <- width |
| 45 | 45 |
gr <- trim(gr) |
| 46 | 46 |
gr <- gr[width(gr) == w] |
| 47 |
+ sort(gr) |
|
| 47 | 48 |
} |
| 48 | 49 |
|
| 49 | 50 |
#' @importFrom IRanges NumericList |
| 50 | 51 |
#' @importFrom stats qt |
| 51 | 52 |
|
| 52 | 53 |
.compute_cov <- function(scores, gr, center, scale, ignore.strand = TRUE) {
|
| 53 |
- scores <- as.matrix(scores) |
|
| 54 |
+ m <- as.matrix(scores) |
|
| 54 | 55 |
if (!ignore.strand) {
|
| 55 |
- scores <- data.frame(scores) |
|
| 56 |
+ m <- data.frame(m) |
|
| 56 | 57 |
which.flip <- which(as.vector(strand(gr)) == '-') |
| 57 |
- scores[which.flip, ] <- rev(scores[which.flip, ]) |
|
| 58 |
- scores <- as.matrix(scores) |
|
| 58 |
+ m[which.flip, ] <- rev(m[which.flip, ]) |
|
| 59 |
+ m <- as.matrix(m) |
|
| 59 | 60 |
} |
| 60 |
- scores <- t(scale(t(scores), center = center, scale = scale)) |
|
| 61 |
- scores |
|
| 61 |
+ m <- t(scale(t(m), center = center, scale = scale)) |
|
| 62 |
+ m |
|
| 62 | 63 |
} |
| 63 | 64 |
|
| 64 | 65 |
.summarize_cov <- function(scores) {
|
| ... | ... |
@@ -89,7 +89,7 @@ Please adjust `bin` argument." |
| 89 | 89 |
)$x |
| 90 | 90 |
} |
| 91 | 91 |
|
| 92 |
-.coarsen_mat <- function(x, bin, FUN, ...) {
|
|
| 92 |
+.coarsen_mat <- function(x, bin, FUN = mean, ...) {
|
|
| 93 | 93 |
if ({ncol(x) %% bin} != 0) stop(
|
| 94 | 94 |
"The column number of the provided matrix should be divided by the window size without remainder. |
| 95 | 95 |
Please adjust `window` argument." |
| ... | ... |
@@ -100,5 +100,5 @@ Please adjust `window` argument." |
| 100 | 100 |
by = list(rep(seq(1, length(vec)/bin), each = bin)), |
| 101 | 101 |
FUN = FUN, ... |
| 102 | 102 |
)$x |
| 103 |
- }) |
|
| 103 |
+ }) |> t() |
|
| 104 | 104 |
} |
| ... | ... |
@@ -85,7 +85,7 @@ Please adjust `bin` argument." |
| 85 | 85 |
stats::aggregate( |
| 86 | 86 |
x, |
| 87 | 87 |
by = list(rep(seq(1, length(x)/bin), each = bin)), |
| 88 |
- FUN = FUN |
|
| 88 |
+ FUN = FUN, ... |
|
| 89 | 89 |
)$x |
| 90 | 90 |
} |
| 91 | 91 |
|
| ... | ... |
@@ -98,7 +98,7 @@ Please adjust `window` argument." |
| 98 | 98 |
stats::aggregate( |
| 99 | 99 |
vec, |
| 100 | 100 |
by = list(rep(seq(1, length(vec)/bin), each = bin)), |
| 101 |
- FUN = FUN |
|
| 101 |
+ FUN = FUN, ... |
|
| 102 | 102 |
)$x |
| 103 | 103 |
}) |
| 104 | 104 |
} |
| ... | ... |
@@ -78,14 +78,22 @@ |
| 78 | 78 |
} |
| 79 | 79 |
|
| 80 | 80 |
.coarsen_vec <- function(x, bin, FUN, ...) {
|
| 81 |
+ if ({length(x) %% bin} != 0) stop(
|
|
| 82 |
+ "The length of the provided vector should be divided by the bin size without remainder. |
|
| 83 |
+Please adjust `bin` argument." |
|
| 84 |
+ ) |
|
| 81 | 85 |
stats::aggregate( |
| 82 | 86 |
x, |
| 83 | 87 |
by = list(rep(seq(1, length(x)/bin), each = bin)), |
| 84 |
- FUN = FUN, ... |
|
| 88 |
+ FUN = FUN |
|
| 85 | 89 |
)$x |
| 86 | 90 |
} |
| 87 | 91 |
|
| 88 | 92 |
.coarsen_mat <- function(x, bin, FUN, ...) {
|
| 93 |
+ if ({ncol(x) %% bin} != 0) stop(
|
|
| 94 |
+ "The column number of the provided matrix should be divided by the window size without remainder. |
|
| 95 |
+Please adjust `window` argument." |
|
| 96 |
+ ) |
|
| 89 | 97 |
apply(x, 1, function(vec) {
|
| 90 | 98 |
stats::aggregate( |
| 91 | 99 |
vec, |
| ... | ... |
@@ -77,10 +77,20 @@ |
| 77 | 77 |
) |
| 78 | 78 |
} |
| 79 | 79 |
|
| 80 |
-.coarsen <- function(x, bin, FUN, ...) {
|
|
| 80 |
+.coarsen_vec <- function(x, bin, FUN, ...) {
|
|
| 81 | 81 |
stats::aggregate( |
| 82 | 82 |
x, |
| 83 | 83 |
by = list(rep(seq(1, length(x)/bin), each = bin)), |
| 84 | 84 |
FUN = FUN, ... |
| 85 | 85 |
)$x |
| 86 | 86 |
} |
| 87 |
+ |
|
| 88 |
+.coarsen_mat <- function(x, bin, FUN, ...) {
|
|
| 89 |
+ apply(x, 1, function(vec) {
|
|
| 90 |
+ stats::aggregate( |
|
| 91 |
+ vec, |
|
| 92 |
+ by = list(rep(seq(1, length(vec)/bin), each = bin)), |
|
| 93 |
+ FUN = FUN |
|
| 94 |
+ )$x |
|
| 95 |
+ }) |
|
| 96 |
+} |
| ... | ... |
@@ -49,27 +49,7 @@ |
| 49 | 49 |
#' @importFrom IRanges NumericList |
| 50 | 50 |
#' @importFrom stats qt |
| 51 | 51 |
|
| 52 |
-.compute_cov <- function(rle, gr, center, scale, ignore.strand = TRUE) {
|
|
| 53 |
- scores <- rle[gr] |
|
| 54 |
- scores <- IRanges::NumericList(scores) |
|
| 55 |
- scores <- as.matrix(scores) |
|
| 56 |
- if (!ignore.strand) {
|
|
| 57 |
- scores <- data.frame(scores) |
|
| 58 |
- which.flip <- which(as.vector(strand(gr)) == '-') |
|
| 59 |
- scores[which.flip, ] <- rev(scores[which.flip, ]) |
|
| 60 |
- scores <- as.matrix(scores) |
|
| 61 |
- } |
|
| 62 |
- scores <- t(scale(t(scores), center = center, scale = scale)) |
|
| 63 |
- scores |
|
| 64 |
-} |
|
| 65 |
- |
|
| 66 |
-.compute_cov_bw <- function(bwf, gr, center, scale, ignore.strand = TRUE) {
|
|
| 67 |
- scores <- rtracklayer::import( |
|
| 68 |
- bwf, |
|
| 69 |
- selection = rtracklayer::BigWigSelection(ranges = gr), |
|
| 70 |
- as = "NumericList", |
|
| 71 |
- format = "bigWig" |
|
| 72 |
- ) |
|
| 52 |
+.compute_cov <- function(scores, gr, center, scale, ignore.strand = TRUE) {
|
|
| 73 | 53 |
scores <- as.matrix(scores) |
| 74 | 54 |
if (!ignore.strand) {
|
| 75 | 55 |
scores <- data.frame(scores) |
| ... | ... |
@@ -53,6 +53,12 @@ |
| 53 | 53 |
scores <- rle[gr] |
| 54 | 54 |
scores <- IRanges::NumericList(scores) |
| 55 | 55 |
scores <- as.matrix(scores) |
| 56 |
+ if (!ignore.strand) {
|
|
| 57 |
+ scores <- data.frame(scores) |
|
| 58 |
+ which.flip <- which(as.vector(strand(gr)) == '-') |
|
| 59 |
+ scores[which.flip, ] <- rev(scores[which.flip, ]) |
|
| 60 |
+ scores <- as.matrix(scores) |
|
| 61 |
+ } |
|
| 56 | 62 |
scores <- t(scale(t(scores), center = center, scale = scale)) |
| 57 | 63 |
scores |
| 58 | 64 |
} |
| ... | ... |
@@ -65,6 +71,12 @@ |
| 65 | 71 |
format = "bigWig" |
| 66 | 72 |
) |
| 67 | 73 |
scores <- as.matrix(scores) |
| 74 |
+ if (!ignore.strand) {
|
|
| 75 |
+ scores <- data.frame(scores) |
|
| 76 |
+ which.flip <- which(as.vector(strand(gr)) == '-') |
|
| 77 |
+ scores[which.flip, ] <- rev(scores[which.flip, ]) |
|
| 78 |
+ scores <- as.matrix(scores) |
|
| 79 |
+ } |
|
| 68 | 80 |
scores <- t(scale(t(scores), center = center, scale = scale)) |
| 69 | 81 |
scores |
| 70 | 82 |
} |
| ... | ... |
@@ -28,6 +28,15 @@ |
| 28 | 28 |
return(tracks) |
| 29 | 29 |
} |
| 30 | 30 |
|
| 31 |
+.set_seqinfo_bwfl <- function(tracks) {
|
|
| 32 |
+ n <- names(tracks) |
|
| 33 |
+ sis <- lapply(tracks, seqinfo) |> unique() |
|
| 34 |
+ if (length(sis) > 1) |
|
| 35 |
+ stop("More than 1 seqinfo inferred from the tracks.")
|
|
| 36 |
+ si <- sis[[1]] |
|
| 37 |
+ return(tracks) |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 31 | 40 |
.resize_granges <- function(gr, width, seqinfo) {
|
| 32 | 41 |
GenomeInfoDb::seqlevels(gr, pruning.mode = "coarse") <- GenomeInfoDb::seqlevels(seqinfo) |
| 33 | 42 |
GenomeInfoDb::seqinfo(gr) <- seqinfo |
| ... | ... |
@@ -48,6 +57,18 @@ |
| 48 | 57 |
scores |
| 49 | 58 |
} |
| 50 | 59 |
|
| 60 |
+.compute_cov_bw <- function(bwf, gr, center, scale, ignore.strand = TRUE) {
|
|
| 61 |
+ scores <- rtracklayer::import( |
|
| 62 |
+ bwf, |
|
| 63 |
+ selection = rtracklayer::BigWigSelection(ranges = gr), |
|
| 64 |
+ as = "NumericList", |
|
| 65 |
+ format = "bigWig" |
|
| 66 |
+ ) |
|
| 67 |
+ scores <- as.matrix(scores) |
|
| 68 |
+ scores <- t(scale(t(scores), center = center, scale = scale)) |
|
| 69 |
+ scores |
|
| 70 |
+} |
|
| 71 |
+ |
|
| 51 | 72 |
.summarize_cov <- function(scores) {
|
| 52 | 73 |
mean <- colMeans(scores, na.rm = TRUE) |
| 53 | 74 |
median <- apply(scores, 2, median, na.rm = TRUE) |
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,73 @@ |
| 1 |
+.set_seqinfo <- function(tracks) {
|
|
| 2 |
+ n <- names(tracks) |
|
| 3 |
+ sls <- lapply(tracks, lengths) |
|
| 4 |
+ has_seqlengths <- any(!is.na(lapply(sls, sum))) |
|
| 5 |
+ if (!has_seqlengths) {
|
|
| 6 |
+ sis <- lapply(tracks, function(rle) {
|
|
| 7 |
+ Seqinfo( |
|
| 8 |
+ names(lengths(rle)), |
|
| 9 |
+ lengths(rle) |
|
| 10 |
+ ) |
|
| 11 |
+ }) |> unique() |
|
| 12 |
+ if (length(unique(sis)) > 1) |
|
| 13 |
+ stop("More than 1 seqinfo inferred from the tracks.")
|
|
| 14 |
+ si <- sis[[1]] |
|
| 15 |
+ } |
|
| 16 |
+ else {
|
|
| 17 |
+ sl <- sls[[which(!is.na(lapply(sls, sum)))[[1]]]] |
|
| 18 |
+ si <- list(Seqinfo( |
|
| 19 |
+ names(sl), |
|
| 20 |
+ sl |
|
| 21 |
+ )) |
|
| 22 |
+ } |
|
| 23 |
+ tracks <- lapply(tracks, function(rle) {
|
|
| 24 |
+ GenomeInfoDb::seqinfo(rle) <- si |
|
| 25 |
+ return(rle) |
|
| 26 |
+ }) |
|
| 27 |
+ names(tracks) <- n |
|
| 28 |
+ return(tracks) |
|
| 29 |
+} |
|
| 30 |
+ |
|
| 31 |
+.resize_granges <- function(gr, width, seqinfo) {
|
|
| 32 |
+ GenomeInfoDb::seqlevels(gr, pruning.mode = "coarse") <- GenomeInfoDb::seqlevels(seqinfo) |
|
| 33 |
+ GenomeInfoDb::seqinfo(gr) <- seqinfo |
|
| 34 |
+ gr <- suppressWarnings(resize(gr, fix = 'center', width = width)) |
|
| 35 |
+ w <- width |
|
| 36 |
+ gr <- trim(gr) |
|
| 37 |
+ gr <- gr[width(gr) == w] |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+#' @importFrom IRanges NumericList |
|
| 41 |
+#' @importFrom stats qt |
|
| 42 |
+ |
|
| 43 |
+.compute_cov <- function(rle, gr, center, scale, ignore.strand = TRUE) {
|
|
| 44 |
+ scores <- rle[gr] |
|
| 45 |
+ scores <- IRanges::NumericList(scores) |
|
| 46 |
+ scores <- as.matrix(scores) |
|
| 47 |
+ scores <- t(scale(t(scores), center = center, scale = scale)) |
|
| 48 |
+ scores |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+.summarize_cov <- function(scores) {
|
|
| 52 |
+ mean <- colMeans(scores, na.rm = TRUE) |
|
| 53 |
+ median <- apply(scores, 2, median, na.rm = TRUE) |
|
| 54 |
+ min <- apply(scores, 2, min, na.rm = TRUE) |
|
| 55 |
+ max <- apply(scores, 2, max, na.rm = TRUE) |
|
| 56 |
+ sd <- apply(scores, 2, sd, na.rm = TRUE) |
|
| 57 |
+ se <- sd/sqrt(nrow(scores)) |
|
| 58 |
+ ci_low <- mean - stats::qt(1 - (0.05 / 2), nrow(scores) - 1) * se |
|
| 59 |
+ ci_high <- mean + stats::qt(1 - (0.05 / 2), nrow(scores) - 1) * se |
|
| 60 |
+ data.frame( |
|
| 61 |
+ coord = seq(-ncol(scores)/2, ncol(scores)/2-1, 1), |
|
| 62 |
+ mean = mean, median = median, min = min, max = max, |
|
| 63 |
+ sd = sd, se = se, ci_low = ci_low, ci_high = ci_high |
|
| 64 |
+ ) |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 67 |
+.coarsen <- function(x, bin, FUN, ...) {
|
|
| 68 |
+ stats::aggregate( |
|
| 69 |
+ x, |
|
| 70 |
+ by = list(rep(seq(1, length(x)/bin), each = bin)), |
|
| 71 |
+ FUN = FUN, ... |
|
| 72 |
+ )$x |
|
| 73 |
+} |