this is needed to satisfy testthat
| ... | ... |
@@ -183,7 +183,12 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 183 | 183 |
cls <- "Dense_H5ADMatrixSeed" |
| 184 | 184 |
seed <- HDF5Array::HDF5ArraySeed |
| 185 | 185 |
} else {
|
| 186 |
- return(H5Dread(dataset)) |
|
| 186 |
+ dset <- H5Dread(dataset) |
|
| 187 |
+ if (length(dim(dset)) == 1) |
|
| 188 |
+ dset <- as.vector(dset) |
|
| 189 |
+ else if (length(dim(dset)) == 2) |
|
| 190 |
+ dset <- as.matrix(dset) |
|
| 191 |
+ return(dset) |
|
| 187 | 192 |
} |
| 188 | 193 |
} |
| 189 | 194 |
if (backed) {
|
| ... | ... |
@@ -214,6 +219,10 @@ read_array <- function(attr, encoding, strict=TRUE) {
|
| 214 | 219 |
check_encodingversion(attr, encoding, "0.2.0") |
| 215 | 220 |
|
| 216 | 221 |
ret <- H5Dread(attr) |
| 222 |
+ if (length(dim(ret)) == 1) |
|
| 223 |
+ ret <- as.vector(ret) |
|
| 224 |
+ else if (length(dim(ret)) == 2) |
|
| 225 |
+ ret <- as.matrix(ret) |
|
| 217 | 226 |
# h5py saves boolean arrays as HDF5 enums |
| 218 | 227 |
if (is.factor(ret) && all(levels(ret) == c("FALSE", "TRUE"))) {
|
| 219 | 228 |
ret <- as.logical(ret) |
| ... | ... |
@@ -222,6 +222,8 @@ read_array <- function(attr, encoding, strict=TRUE) {
|
| 222 | 222 |
if (!is.null(encoding) && (endsWith(encoding, "-scalar") || encoding == "string")) {
|
| 223 | 223 |
attr(ret, "encoding-scalar") <- TRUE |
| 224 | 224 |
} |
| 225 |
+ if (length(dim(ret)) > 1) |
|
| 226 |
+ ret <- t(ret) |
|
| 225 | 227 |
ret |
| 226 | 228 |
} |
| 227 | 229 |
|
| ... | ... |
@@ -329,8 +331,6 @@ read_modality <- function(view, backed=FALSE) {
|
| 329 | 331 |
obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
| 330 | 332 |
obsm <- lapply(obsmnames, function(space) {
|
| 331 | 333 |
elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
|
| 332 |
- if (!is.data.frame(elem) && length(dim(elem)) > 1) |
|
| 333 |
- elem <- t(elem) |
|
| 334 | 334 |
rownames(elem) <- rownames(obs) |
| 335 | 335 |
elem |
| 336 | 336 |
}) |
| ... | ... |
@@ -76,9 +76,9 @@ read_dataframe <- function(group, encoding) {
|
| 76 | 76 |
#' Helper function to convert values + labels into factors |
| 77 | 77 |
#' |
| 78 | 78 |
#' @description A helper function to convert categories into factors. |
| 79 |
-#' Assumptions: |
|
| 79 |
+#' Assumptions: |
|
| 80 | 80 |
#' - values correspond to the zero indexed categories |
| 81 |
-#' (i.e. value 0 is the first category) |
|
| 81 |
+#' (i.e. value 0 is the first category) |
|
| 82 | 82 |
#' - NA are encoded with a value -1 |
| 83 | 83 |
#' Categories not uses will be dropped. |
| 84 | 84 |
#' |
| ... | ... |
@@ -88,7 +88,7 @@ read_dataframe <- function(group, encoding) {
|
| 88 | 88 |
#' @returns factor with categorical values |
| 89 | 89 |
#' |
| 90 | 90 |
#' @keywords internal |
| 91 |
-#' @noRd |
|
| 91 |
+#' @noRd |
|
| 92 | 92 |
convert_categoricals <- function(values, categories) {
|
| 93 | 93 |
# The levels are 0 indexed integers |
| 94 | 94 |
levels <- seq_len(length(categories))-1 |
| ... | ... |
@@ -236,13 +236,7 @@ read_categorical <- function(group, encoding) {
|
| 236 | 236 |
|
| 237 | 237 |
values <- as.integer(H5Dread(h5autoclose(group & "codes"))) |
| 238 | 238 |
labels <- H5Dread(h5autoclose(group & "categories")) |
| 239 |
- n_labels <- max(values) + 1 |
|
| 240 |
- if (length(labels) > n_labels) {
|
|
| 241 |
- labels_items <- labels_items[seq_len(n_labels)] |
|
| 242 |
- } |
|
| 243 |
- |
|
| 244 |
- values[values == -1] <- NA |
|
| 245 |
- factor(values, labels=labels, ordered=ordered) |
|
| 239 |
+ convert_categoricals(values, labels) |
|
| 246 | 240 |
} |
| 247 | 241 |
|
| 248 | 242 |
#' @importFrom rhdf5 H5Dread |
| ... | ... |
@@ -1,7 +1,34 @@ |
| 1 | 1 |
#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference |
| 2 |
+#' @importFrom S4Vectors DataFrame |
|
| 2 | 3 |
#' @importMethodsFrom rhdf5 & |
| 3 | 4 |
#' @importFrom methods is |
| 4 |
-read_dataframe <- function(group) {
|
|
| 5 |
+read_dataframe_020 <- function(group, encoding, strict=TRUE) {
|
|
| 6 |
+ if (strict) |
|
| 7 |
+ check_encodingversion(group, encoding, "0.2.0") |
|
| 8 |
+ indexattr <- H5Aopen(group, "_index") |
|
| 9 |
+ indexcol <- H5Aread(indexattr) |
|
| 10 |
+ H5Aclose(indexattr) |
|
| 11 |
+ |
|
| 12 |
+ orderedattr <- H5Aopen(group, "column-order") |
|
| 13 |
+ columnorder <- H5Aread(orderedattr) |
|
| 14 |
+ H5Aclose(orderedattr) |
|
| 15 |
+ |
|
| 16 |
+ col_list <- lapply(columnorder, function(name) {
|
|
| 17 |
+ read_attribute(h5autoclose(group & name)) |
|
| 18 |
+ }) |
|
| 19 |
+ names(col_list) <- columnorder |
|
| 20 |
+ index <- h5autoclose(group & indexcol) |
|
| 21 |
+ col_list[["row.names"]] <- H5Dread(index) |
|
| 22 |
+ do.call(DataFrame, args=col_list) |
|
| 23 |
+} |
|
| 24 |
+ |
|
| 25 |
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference |
|
| 26 |
+#' @importFrom S4Vectors DataFrame |
|
| 27 |
+#' @importMethodsFrom rhdf5 & |
|
| 28 |
+#' @importFrom methods is |
|
| 29 |
+read_dataframe_010 <- function(group, encoding, strict=TRUE) {
|
|
| 30 |
+ if (strict) |
|
| 31 |
+ check_encodingversion(group, encoding, "0.1.0") |
|
| 5 | 32 |
indexcol <- "_index" |
| 6 | 33 |
if (H5Aexists(group, "_index")) {
|
| 7 | 34 |
indexattr <- H5Aopen(group, "_index") |
| ... | ... |
@@ -34,10 +61,16 @@ read_dataframe <- function(group) {
|
| 34 | 61 |
values |
| 35 | 62 |
}) |
| 36 | 63 |
names(col_list) <- columnorder |
| 37 |
- index <- group & indexcol |
|
| 38 |
- col_list[["row.names"]] <- H5Dread(index) |
|
| 39 |
- H5Dclose(index) |
|
| 40 |
- do.call(data.frame, args=col_list) |
|
| 64 |
+ col_list[["row.names"]] <- H5Dread(h5autoclose(group & indexcol)) |
|
| 65 |
+ do.call(DataFrame, args=col_list) |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+read_dataframe <- function(group, encoding) {
|
|
| 69 |
+ version <- check_encodingversion(group, encoding, c("0.1.0", "0.2.0"))
|
|
| 70 |
+ if (version == "0.1.0") |
|
| 71 |
+ read_dataframe_010(group, encoding, strict=FALSE) |
|
| 72 |
+ else |
|
| 73 |
+ read_dataframe_020(group, encoding, strict=FALSE) |
|
| 41 | 74 |
} |
| 42 | 75 |
|
| 43 | 76 |
#' Helper function to convert values + labels into factors |
| ... | ... |
@@ -100,18 +133,16 @@ read_with_index <- function(dataset) {
|
| 100 | 133 |
} |
| 101 | 134 |
} |
| 102 | 135 |
|
| 103 |
-#' @importFrom rhdf5 H5Dread H5Dclose H5Aopen H5Aread H5Aclose |
|
| 136 |
+#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose |
|
| 104 | 137 |
#' @importMethodsFrom rhdf5 & |
| 105 | 138 |
read_sparse_matrix <- function(group, encoding, backed=FALSE) {
|
| 106 |
- indices <- group & "indices" |
|
| 107 |
- indptr <- group & "indptr" |
|
| 108 |
- data <- group & "data" |
|
| 139 |
+ check_encodingversion(group, encoding, "0.1.0") |
|
| 140 |
+ indices <- h5autoclose(group & "indices") |
|
| 141 |
+ indptr <- h5autoclose(group & "indptr") |
|
| 142 |
+ data <- h5autoclose(group & "data") |
|
| 109 | 143 |
i <- as.vector(H5Dread(indices)) |
| 110 | 144 |
p <- as.vector(H5Dread(indptr)) |
| 111 | 145 |
x <- as.vector(H5Dread(data)) |
| 112 |
- H5Dclose(indices) |
|
| 113 |
- H5Dclose(indptr) |
|
| 114 |
- H5Dclose(data) |
|
| 115 | 146 |
shapeattr <- H5Aopen(group, "shape") |
| 116 | 147 |
shape <- H5Aread(shapeattr) |
| 117 | 148 |
H5Aclose(shapeattr) |
| ... | ... |
@@ -166,41 +197,113 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 166 | 197 |
} |
| 167 | 198 |
} |
| 168 | 199 |
|
| 169 |
-#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Aexists h5ls |
|
| 200 |
+#' @importFrom rhdf5 h5ls |
|
| 201 |
+#' @importMethodsFrom rhdf5 & |
|
| 170 | 202 |
#' @importFrom stats setNames |
| 171 |
-read_group <- function(group, read_uns=FALSE) {
|
|
| 172 |
- if (H5Aexists(group, "encoding-type")) {
|
|
| 173 |
- encattr <- H5Aopen(group, "encoding-type") |
|
| 174 |
- encoding <- H5Aread(encattr) |
|
| 175 |
- H5Aclose(encattr) |
|
| 176 |
- |
|
| 177 |
- if (encoding == "dataframe") {
|
|
| 178 |
- return(read_dataframe(group)) |
|
| 179 |
- } else if (endsWith(encoding, "matrix")) {
|
|
| 180 |
- return(read_sparse_matrix(group)) |
|
| 181 |
- } else {
|
|
| 182 |
- warning("Unknown encoding ", encoding)
|
|
| 183 |
- if (!read_uns) |
|
| 184 |
- return(invisible(NULL)) |
|
| 185 |
- } |
|
| 186 |
- } |
|
| 203 |
+read_dict <- function(group, encoding, strict=TRUE) {
|
|
| 204 |
+ if (strict) |
|
| 205 |
+ check_encodingversion(group, encoding, "0.1.0") |
|
| 187 | 206 |
|
| 188 | 207 |
objects <- h5ls(group, recursive=FALSE, datasetinfo=FALSE)$name |
| 189 | 208 |
lapply(setNames(nm=objects), function(x)read_attribute(h5autoclose(group & x))) |
| 190 | 209 |
} |
| 191 | 210 |
|
| 192 |
-#' @importFrom rhdf5 H5Iget_type H5Dread |
|
| 211 |
+#' @importFrom rhdf5 H5Dread |
|
| 212 |
+read_array <- function(attr, encoding, strict=TRUE) {
|
|
| 213 |
+ if (strict) |
|
| 214 |
+ check_encodingversion(attr, encoding, "0.2.0") |
|
| 215 |
+ |
|
| 216 |
+ ret <- H5Dread(attr) |
|
| 217 |
+ # h5py saves boolean arrays as HDF5 enums |
|
| 218 |
+ if (is.factor(ret) && all(levels(ret) == c("FALSE", "TRUE"))) {
|
|
| 219 |
+ ret <- as.logical(ret) |
|
| 220 |
+ } |
|
| 221 |
+ |
|
| 222 |
+ if (!is.null(encoding) && (endsWith(encoding, "-scalar") || encoding == "string")) {
|
|
| 223 |
+ attr(ret, "encoding-scalar") <- TRUE |
|
| 224 |
+ } |
|
| 225 |
+ ret |
|
| 226 |
+} |
|
| 227 |
+ |
|
| 228 |
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Dread |
|
| 229 |
+#' @importMethodsFrom rhdf5 & |
|
| 230 |
+read_categorical <- function(group, encoding) {
|
|
| 231 |
+ check_encodingversion(group, encoding, "0.2.0") |
|
| 232 |
+ |
|
| 233 |
+ orderedattr <- H5Aopen(group, "ordered") |
|
| 234 |
+ ordered <- as.logical(H5Aread(orderedattr)) |
|
| 235 |
+ H5Aclose(orderedattr) |
|
| 236 |
+ |
|
| 237 |
+ values <- as.integer(H5Dread(h5autoclose(group & "codes"))) |
|
| 238 |
+ labels <- H5Dread(h5autoclose(group & "categories")) |
|
| 239 |
+ n_labels <- max(values) + 1 |
|
| 240 |
+ if (length(labels) > n_labels) {
|
|
| 241 |
+ labels_items <- labels_items[seq_len(n_labels)] |
|
| 242 |
+ } |
|
| 243 |
+ |
|
| 244 |
+ values[values == -1] <- NA |
|
| 245 |
+ factor(values, labels=labels, ordered=ordered) |
|
| 246 |
+} |
|
| 247 |
+ |
|
| 248 |
+#' @importFrom rhdf5 H5Dread |
|
| 249 |
+#' @importMethodsFrom rhdf5 & |
|
| 250 |
+read_nullable <- function(group, encoding) {
|
|
| 251 |
+ check_encodingversion(group, encoding, "0.1.0") |
|
| 252 |
+ values <- H5Dread(h5autoclose(group & "values")) |
|
| 253 |
+ mask <- as.logical(H5Dread(h5autoclose(group & "mask"))) |
|
| 254 |
+ values[mask] <- NA |
|
| 255 |
+ if (endsWith(encoding, "-boolean")) {
|
|
| 256 |
+ values <- as.logical(values) |
|
| 257 |
+ } |
|
| 258 |
+ values |
|
| 259 |
+} |
|
| 260 |
+ |
|
| 261 |
+.read_funcs <- list("array"=read_array,
|
|
| 262 |
+ "csr_matrix"=read_sparse_matrix, |
|
| 263 |
+ "csc_matrix"=read_sparse_matrix, |
|
| 264 |
+ "dataframe"=read_dataframe, |
|
| 265 |
+ "dict"=read_dict, |
|
| 266 |
+ "numeric-scalar"=read_array, |
|
| 267 |
+ "string"=read_array, |
|
| 268 |
+ "categorical"=read_categorical, |
|
| 269 |
+ "string-array"=read_array, |
|
| 270 |
+ "nullable-integer"=read_nullable, |
|
| 271 |
+ "nullable-boolean"=read_nullable) |
|
| 272 |
+ |
|
| 273 |
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Iget_type H5Iget_name |
|
| 193 | 274 |
read_attribute <- function(attr) {
|
| 194 |
- if (H5Iget_type(attr) == "H5I_GROUP") |
|
| 195 |
- read_group(attr) |
|
| 196 |
- else {
|
|
| 197 |
- values <- H5Dread(attr) |
|
| 198 |
- # h5py saves boolean arrays as HDF5 enums |
|
| 199 |
- if (is.factor(values) && all(levels(values) == c("FALSE", "TRUE"))) {
|
|
| 200 |
- values <- as.logical(values) |
|
| 275 |
+ ret <- NULL |
|
| 276 |
+ if (H5Aexists(attr, "encoding-type")) {
|
|
| 277 |
+ encattr <- H5Aopen(attr, "encoding-type") |
|
| 278 |
+ encoding <- H5Aread(encattr) |
|
| 279 |
+ H5Aclose(encattr) |
|
| 280 |
+ |
|
| 281 |
+ func <- switch(encoding, |
|
| 282 |
+ "array"=read_array, |
|
| 283 |
+ "csr_matrix"=read_sparse_matrix, |
|
| 284 |
+ "csc_matrix"=read_sparse_matrix, |
|
| 285 |
+ "dataframe"=read_dataframe, |
|
| 286 |
+ "dict"=read_dict, |
|
| 287 |
+ "numeric-scalar"=read_array, |
|
| 288 |
+ "string"=read_array, |
|
| 289 |
+ "categorical"=read_categorical, |
|
| 290 |
+ "string-array"=read_array, |
|
| 291 |
+ "nullable-integer"=read_nullable, |
|
| 292 |
+ "nullable-boolean"=read_nullable) |
|
| 293 |
+ if (is.null(func)) {
|
|
| 294 |
+ warning("Unknown encoding ", encoding, " for element ", H5Iget_name(attr))
|
|
| 295 |
+ } else {
|
|
| 296 |
+ ret <- func(attr, encoding) |
|
| 201 | 297 |
} |
| 202 |
- values |
|
| 203 | 298 |
} |
| 299 |
+ |
|
| 300 |
+ if (is.null(ret)) {
|
|
| 301 |
+ if (H5Iget_type(attr) == "H5I_GROUP") |
|
| 302 |
+ ret <- read_dict(attr, NULL, strict=FALSE) |
|
| 303 |
+ else |
|
| 304 |
+ ret <- read_array(attr, NULL, strict=FALSE) |
|
| 305 |
+ } |
|
| 306 |
+ ret |
|
| 204 | 307 |
} |
| 205 | 308 |
|
| 206 | 309 |
#' @importFrom stats setNames |
| ... | ... |
@@ -273,7 +376,7 @@ read_modality <- function(view, backed=FALSE) {
|
| 273 | 376 |
} |
| 274 | 377 |
|
| 275 | 378 |
if ("uns" %in% viewnames)
|
| 276 |
- metadata(se) <- read_group(h5autoclose(view & "uns")) |
|
| 379 |
+ metadata(se) <- read_dict(h5autoclose(view & "uns"), NULL, strict=FALSE) |
|
| 277 | 380 |
se |
| 278 | 381 |
} |
| 279 | 382 |
|
| ... | ... |
@@ -350,9 +453,8 @@ readH5MU <- function(file, backed=FALSE) {
|
| 350 | 453 |
# if colData(MAE) has different row names than the experiments |
| 351 | 454 |
if (H5Lexists(h5, "obsmap")) {
|
| 352 | 455 |
samplemaps <- lapply(assays, function(mod) {
|
| 353 |
- cmapdset <- h5 & paste("obsmap", mod, sep="/")
|
|
| 456 |
+ cmapdset <- h5autoclose(h5 & paste("obsmap", mod, sep="/"))
|
|
| 354 | 457 |
cmap <- H5Dread(cmapdset) |
| 355 |
- H5Dclose(cmapdset) |
|
| 356 | 458 |
|
| 357 | 459 |
idx <- which(cmap > 0) |
| 358 | 460 |
data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]]) |
| ... | ... |
@@ -363,7 +465,7 @@ readH5MU <- function(file, backed=FALSE) {
|
| 363 | 465 |
} |
| 364 | 466 |
|
| 365 | 467 |
if (H5Lexists(h5, "uns")) |
| 366 |
- args$metadata <- read_group(h5autoclose(h5 & "uns")) |
|
| 468 |
+ args$metadata <- read_dict(h5autoclose(h5 & "uns"), NULL, strict=FALSE) |
|
| 367 | 469 |
|
| 368 | 470 |
# Close the connection |
| 369 | 471 |
H5Fclose(h5) |
Anndata 0.7.8 exports categories with NA with a value -1,
causing the reading of mudata/anndata to fail.
This adds support for NAs.
The convert_categoricals function is added as an internal function
in order to facilitate testing of this logic.
Fixes: #6
| ... | ... |
@@ -25,11 +25,7 @@ read_dataframe <- function(group) {
|
| 25 | 25 |
} else {
|
| 26 | 26 |
labels <- H5Rdereference(labels, h5loc=col) |
| 27 | 27 |
labels_items <- H5Dread(labels) |
| 28 |
- n_labels <- length(unique(values)) |
|
| 29 |
- if (length(labels_items) > n_labels) {
|
|
| 30 |
- labels_items <- labels_items[seq_len(n_labels)] |
|
| 31 |
- } |
|
| 32 |
- values <- factor(as.integer(values), labels=labels_items) |
|
| 28 |
+ values <- convert_categoricals(values, labels_items) |
|
| 33 | 29 |
H5Dclose(labels) |
| 34 | 30 |
} |
| 35 | 31 |
H5Aclose(attr) |
| ... | ... |
@@ -44,6 +40,30 @@ read_dataframe <- function(group) {
|
| 44 | 40 |
do.call(data.frame, args=col_list) |
| 45 | 41 |
} |
| 46 | 42 |
|
| 43 |
+#' Helper function to convert values + labels into factors |
|
| 44 |
+#' |
|
| 45 |
+#' @description A helper function to convert categories into factors. |
|
| 46 |
+#' Assumptions: |
|
| 47 |
+#' - values correspond to the zero indexed categories |
|
| 48 |
+#' (i.e. value 0 is the first category) |
|
| 49 |
+#' - NA are encoded with a value -1 |
|
| 50 |
+#' Categories not uses will be dropped. |
|
| 51 |
+#' |
|
| 52 |
+#' @param values Vector of integer level numbers (zero indexed). -1 indicate NA |
|
| 53 |
+#' @param categories Labels for level numbers (zero indexed). |
|
| 54 |
+#' |
|
| 55 |
+#' @returns factor with categorical values |
|
| 56 |
+#' |
|
| 57 |
+#' @keywords internal |
|
| 58 |
+#' @noRd |
|
| 59 |
+convert_categoricals <- function(values, categories) {
|
|
| 60 |
+ # The levels are 0 indexed integers |
|
| 61 |
+ levels <- seq_len(length(categories))-1 |
|
| 62 |
+ value_factor <- factor(as.integer(values), levels, labels=categories) |
|
| 63 |
+ # Drop unused levels |
|
| 64 |
+ droplevels(value_factor) |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 47 | 67 |
#' @importFrom rhdf5 H5Dread H5Aexists H5Aopen H5Aread H5Aclose |
| 48 | 68 |
read_dataframe_legacy <- function(dataset) {
|
| 49 | 69 |
table <- H5Dread(dataset) |
| ... | ... |
@@ -176,7 +176,7 @@ read_attribute <- function(attr) {
|
| 176 | 176 |
else {
|
| 177 | 177 |
values <- H5Dread(attr) |
| 178 | 178 |
# h5py saves boolean arrays as HDF5 enums |
| 179 |
- if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
|
|
| 179 |
+ if (is.factor(values) && all(levels(values) == c("FALSE", "TRUE"))) {
|
|
| 180 | 180 |
values <- as.logical(values) |
| 181 | 181 |
} |
| 182 | 182 |
values |
| ... | ... |
@@ -15,16 +15,12 @@ read_dataframe <- function(group) {
|
| 15 | 15 |
|
| 16 | 16 |
col_list <- lapply(columnorder, function(name) {
|
| 17 | 17 |
col <- group & name |
| 18 |
- values <- H5Dread(col) |
|
| 19 |
- # h5py saves boolean arrays as HDF5 enums |
|
| 20 |
- if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
|
|
| 21 |
- values <- as.logical(values) |
|
| 22 |
- } |
|
| 18 |
+ values <- read_attribute(col) |
|
| 23 | 19 |
if (H5Aexists(col, "categories")) {
|
| 24 | 20 |
attr <- H5Aopen(col, "categories") |
| 25 | 21 |
labels <- H5Aread(attr) |
| 26 | 22 |
if (!is(labels, "H5Ref")) {
|
| 27 |
- warning("found categories attribute for column ",
|
|
| 23 |
+ warning("found categories attribute for column ",
|
|
| 28 | 24 |
name, ", but it is not a reference") |
| 29 | 25 |
} else {
|
| 30 | 26 |
labels <- H5Rdereference(labels, h5loc=col) |
| ... | ... |
@@ -74,7 +70,7 @@ read_with_index <- function(dataset) {
|
| 74 | 70 |
encoding <- H5Aread(encattr, "encoding-type") |
| 75 | 71 |
H5Aclose(encattr) |
| 76 | 72 |
if (encoding != "dataframe") {
|
| 77 |
- warning("Unknown encoding ", encoding,
|
|
| 73 |
+ warning("Unknown encoding ", encoding,
|
|
| 78 | 74 |
" when attempting to read data frame") |
| 79 | 75 |
return(data.frame()) |
| 80 | 76 |
} |
| ... | ... |
@@ -177,8 +173,14 @@ read_group <- function(group, read_uns=FALSE) {
|
| 177 | 173 |
read_attribute <- function(attr) {
|
| 178 | 174 |
if (H5Iget_type(attr) == "H5I_GROUP") |
| 179 | 175 |
read_group(attr) |
| 180 |
- else |
|
| 181 |
- H5Dread(attr) |
|
| 176 |
+ else {
|
|
| 177 |
+ values <- H5Dread(attr) |
|
| 178 |
+ # h5py saves boolean arrays as HDF5 enums |
|
| 179 |
+ if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
|
|
| 180 |
+ values <- as.logical(values) |
|
| 181 |
+ } |
|
| 182 |
+ values |
|
| 183 |
+ } |
|
| 182 | 184 |
} |
| 183 | 185 |
|
| 184 | 186 |
#' @importFrom stats setNames |
| ... | ... |
@@ -219,7 +219,6 @@ read_modality <- function(view, backed=FALSE) {
|
| 219 | 219 |
args$reducedDims <- obsm |
| 220 | 220 |
} |
| 221 | 221 |
|
| 222 |
- objectclass <- NULL |
|
| 223 | 222 |
if (H5Aexists(view, "origin-class")) {
|
| 224 | 223 |
originattr <- H5Aopen(view, "origin-class") |
| 225 | 224 |
objectclass <- H5Aread(originattr) |
| ... | ... |
@@ -233,9 +232,9 @@ read_modality <- function(view, backed=FALSE) {
|
| 233 | 232 |
} |
| 234 | 233 |
se <- do.call(SingleCellExperiment, args) |
| 235 | 234 |
} |
| 236 |
- } |
|
| 237 |
- if (is.null(objectclass)) |
|
| 235 |
+ } else {
|
|
| 238 | 236 |
se <- do.call(SingleCellExperiment, args) |
| 237 |
+ } |
|
| 239 | 238 |
|
| 240 | 239 |
for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
|
| 241 | 240 |
if (cp$name %in% viewnames) {
|
Object class is saved as an attribute for the root group
of .h5ad and .h5mu files and as an attribute for individual
modalities of .h5mu files.
Unless SummarizedExperiment was saved as a modality,
a SingleCellExperiment is currently created from it.
| ... | ... |
@@ -219,7 +219,23 @@ read_modality <- function(view, backed=FALSE) {
|
| 219 | 219 |
args$reducedDims <- obsm |
| 220 | 220 |
} |
| 221 | 221 |
|
| 222 |
- se <- do.call(SingleCellExperiment, args) |
|
| 222 |
+ objectclass <- NULL |
|
| 223 |
+ if (H5Aexists(view, "origin-class")) {
|
|
| 224 |
+ originattr <- H5Aopen(view, "origin-class") |
|
| 225 |
+ objectclass <- H5Aread(originattr) |
|
| 226 |
+ H5Aclose(originattr) |
|
| 227 |
+ |
|
| 228 |
+ if (objectclass == "SummarizedExperiment") {
|
|
| 229 |
+ se <- do.call(SummarizedExperiment, args) |
|
| 230 |
+ } else {
|
|
| 231 |
+ if (objectclass != "SingleCellExperiment") {
|
|
| 232 |
+ message("Reading as SingleCellExperiment where the original object class is ", objectclass)
|
|
| 233 |
+ } |
|
| 234 |
+ se <- do.call(SingleCellExperiment, args) |
|
| 235 |
+ } |
|
| 236 |
+ } |
|
| 237 |
+ if (is.null(objectclass)) |
|
| 238 |
+ se <- do.call(SingleCellExperiment, args) |
|
| 223 | 239 |
|
| 224 | 240 |
for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
|
| 225 | 241 |
if (cp$name %in% viewnames) {
|
| ... | ... |
@@ -24,14 +24,15 @@ read_dataframe <- function(group) {
|
| 24 | 24 |
attr <- H5Aopen(col, "categories") |
| 25 | 25 |
labels <- H5Aread(attr) |
| 26 | 26 |
if (!is(labels, "H5Ref")) {
|
| 27 |
- warning("found categories attribute for column ", name, ", but it is not a reference")
|
|
| 27 |
+ warning("found categories attribute for column ",
|
|
| 28 |
+ name, ", but it is not a reference") |
|
| 28 | 29 |
} else {
|
| 29 | 30 |
labels <- H5Rdereference(labels, h5loc=col) |
| 30 |
- labels_items <- H5Dread(labels) |
|
| 31 |
- n_labels <- length(unique(values)) |
|
| 32 |
- if (length(labels_items) > n_labels) {
|
|
| 33 |
- labels_items <- labels_items[seq_len(n_labels)] |
|
| 34 |
- } |
|
| 31 |
+ labels_items <- H5Dread(labels) |
|
| 32 |
+ n_labels <- length(unique(values)) |
|
| 33 |
+ if (length(labels_items) > n_labels) {
|
|
| 34 |
+ labels_items <- labels_items[seq_len(n_labels)] |
|
| 35 |
+ } |
|
| 35 | 36 |
values <- factor(as.integer(values), labels=labels_items) |
| 36 | 37 |
H5Dclose(labels) |
| 37 | 38 |
} |
| ... | ... |
@@ -73,7 +74,8 @@ read_with_index <- function(dataset) {
|
| 73 | 74 |
encoding <- H5Aread(encattr, "encoding-type") |
| 74 | 75 |
H5Aclose(encattr) |
| 75 | 76 |
if (encoding != "dataframe") {
|
| 76 |
- warning("Unknown encoding ", encoding, " when attempting to read data frame")
|
|
| 77 |
+ warning("Unknown encoding ", encoding,
|
|
| 78 |
+ " when attempting to read data frame") |
|
| 77 | 79 |
return(data.frame()) |
| 78 | 80 |
} |
| 79 | 81 |
read_dataframe(dataset) |
| ... | ... |
@@ -110,8 +110,7 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 110 | 110 |
if (backed) {
|
| 111 | 111 |
have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
|
| 112 | 112 |
if (!have_delayedarray) {
|
| 113 |
- warning("Could not load the HDF5Array package. HDF5Array is required for backed matrices. Loading matrix into memory...")
|
|
| 114 |
- backed <- FALSE |
|
| 113 |
+ stop("Could not load the HDF5Array package. HDF5Array is required for backed matrices.")
|
|
| 115 | 114 |
} |
| 116 | 115 |
} |
| 117 | 116 |
|
| ... | ... |
@@ -149,19 +149,27 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 149 | 149 |
} |
| 150 | 150 |
} |
| 151 | 151 |
|
| 152 |
-#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose |
|
| 153 |
-read_group <- function(group) {
|
|
| 154 |
- encattr <- H5Aopen(group, "encoding-type") |
|
| 155 |
- encoding <- H5Aread(encattr) |
|
| 156 |
- H5Aclose(encattr) |
|
| 157 |
- if (encoding == "dataframe") {
|
|
| 158 |
- read_dataframe(group) |
|
| 159 |
- } else if (endsWith(encoding, "matrix")) {
|
|
| 160 |
- read_sparse_matrix(group) |
|
| 161 |
- } else {
|
|
| 162 |
- warning("Unknown encoding ", encoding)
|
|
| 163 |
- invisible(NULL) |
|
| 152 |
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Aexists h5ls |
|
| 153 |
+#' @importFrom stats setNames |
|
| 154 |
+read_group <- function(group, read_uns=FALSE) {
|
|
| 155 |
+ if (H5Aexists(group, "encoding-type")) {
|
|
| 156 |
+ encattr <- H5Aopen(group, "encoding-type") |
|
| 157 |
+ encoding <- H5Aread(encattr) |
|
| 158 |
+ H5Aclose(encattr) |
|
| 159 |
+ |
|
| 160 |
+ if (encoding == "dataframe") {
|
|
| 161 |
+ return(read_dataframe(group)) |
|
| 162 |
+ } else if (endsWith(encoding, "matrix")) {
|
|
| 163 |
+ return(read_sparse_matrix(group)) |
|
| 164 |
+ } else {
|
|
| 165 |
+ warning("Unknown encoding ", encoding)
|
|
| 166 |
+ if (!read_uns) |
|
| 167 |
+ return(invisible(NULL)) |
|
| 168 |
+ } |
|
| 164 | 169 |
} |
| 170 |
+ |
|
| 171 |
+ objects <- h5ls(group, recursive=FALSE, datasetinfo=FALSE)$name |
|
| 172 |
+ lapply(setNames(nm=objects), function(x)read_attribute(h5autoclose(group & x))) |
|
| 165 | 173 |
} |
| 166 | 174 |
|
| 167 | 175 |
#' @importFrom rhdf5 H5Iget_type H5Dread |
| ... | ... |
@@ -178,7 +186,7 @@ read_attribute <- function(attr) {
|
| 178 | 186 |
#' @importFrom S4Vectors SimpleList |
| 179 | 187 |
#' @importFrom SingleCellExperiment SingleCellExperiment |
| 180 | 188 |
#' @importMethodsFrom SingleCellExperiment colPair<- rowPair<- |
| 181 |
-#' @importFrom SummarizedExperiment SummarizedExperiment |
|
| 189 |
+#' @importFrom SummarizedExperiment SummarizedExperiment metadata<- |
|
| 182 | 190 |
#' @importFrom methods is |
| 183 | 191 |
read_modality <- function(view, backed=FALSE) {
|
| 184 | 192 |
X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
| ... | ... |
@@ -225,6 +233,9 @@ read_modality <- function(view, backed=FALSE) {
|
| 225 | 233 |
} |
| 226 | 234 |
} |
| 227 | 235 |
} |
| 236 |
+ |
|
| 237 |
+ if ("uns" %in% viewnames)
|
|
| 238 |
+ metadata(se) <- read_group(h5autoclose(view & "uns")) |
|
| 228 | 239 |
se |
| 229 | 240 |
} |
| 230 | 241 |
|
| ... | ... |
@@ -313,6 +324,9 @@ readH5MU <- function(file, backed=FALSE) {
|
| 313 | 324 |
args$sampleMap <- sampleMap |
| 314 | 325 |
} |
| 315 | 326 |
|
| 327 |
+ if (H5Lexists(h5, "uns")) |
|
| 328 |
+ args$metadata <- read_group(h5autoclose(h5 & "uns")) |
|
| 329 |
+ |
|
| 316 | 330 |
# Close the connection |
| 317 | 331 |
H5Fclose(h5) |
| 318 | 332 |
|
| ... | ... |
@@ -242,12 +242,12 @@ read_modality <- function(view, backed=FALSE) {
|
| 242 | 242 |
#' |
| 243 | 243 |
#' @examples |
| 244 | 244 |
#' data(miniACC, package="MultiAssayExperiment") |
| 245 |
-#' WriteH5AD(miniACC[[1]], "miniacc.h5ad") |
|
| 246 |
-#' sce <- ReadH5AD("miniacc.h5ad")
|
|
| 245 |
+#' writeH5AD(miniACC[[1]], "miniacc.h5ad") |
|
| 246 |
+#' sce <- readH5AD("miniacc.h5ad")
|
|
| 247 | 247 |
#' |
| 248 | 248 |
#' @importFrom rhdf5 H5Fclose |
| 249 | 249 |
#' @export |
| 250 |
-ReadH5AD <- function(file, backed=FALSE) {
|
|
| 250 |
+readH5AD <- function(file, backed=FALSE) {
|
|
| 251 | 251 |
h5 <- H5Fopen(file, flags="H5F_ACC_RDONLY", native=FALSE) |
| 252 | 252 |
res <- read_modality(h5, backed) |
| 253 | 253 |
H5Fclose(h5) |
| ... | ... |
@@ -267,8 +267,8 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 267 | 267 |
#' |
| 268 | 268 |
#' @examples |
| 269 | 269 |
#' data(miniACC, package="MultiAssayExperiment") |
| 270 |
-#' WriteH5MU(miniACC, "miniacc.h5mu") |
|
| 271 |
-#' mae <- ReadH5MU("miniacc.h5mu")
|
|
| 270 |
+#' writeH5MU(miniACC, "miniacc.h5mu") |
|
| 271 |
+#' mae <- readH5MU("miniacc.h5mu")
|
|
| 272 | 272 |
#' |
| 273 | 273 |
#' @importFrom stats setNames |
| 274 | 274 |
#' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose |
| ... | ... |
@@ -276,7 +276,7 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 276 | 276 |
#' @importFrom MultiAssayExperiment MultiAssayExperiment |
| 277 | 277 |
#' |
| 278 | 278 |
#' @export |
| 279 |
-ReadH5MU <- function(file, backed=FALSE) {
|
|
| 279 |
+readH5MU <- function(file, backed=FALSE) {
|
|
| 280 | 280 |
# Connect to the the file |
| 281 | 281 |
h5 <- open_and_check_mudata(file) |
| 282 | 282 |
|
| ... | ... |
@@ -283,6 +283,9 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 283 | 283 |
# Check all the assays are written |
| 284 | 284 |
assays <- setNames(nm=h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name) |
| 285 | 285 |
|
| 286 |
+ # Read modality order if available and matches available assays |
|
| 287 |
+ mod_order <- check_mod_order(h5) |
|
| 288 |
+ |
|
| 286 | 289 |
# Create global colData |
| 287 | 290 |
metadata <- read_with_index(h5autoclose(h5 & "obs")) |
| 288 | 291 |
|
| ... | ... |
@@ -290,6 +293,7 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 290 | 293 |
modalities <- lapply(assays, function(mod) {
|
| 291 | 294 |
read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
|
| 292 | 295 |
}) |
| 296 |
+ modalities <- modalities[mod_order] |
|
| 293 | 297 |
|
| 294 | 298 |
args <- list(experiments=modalities, colData=metadata) |
| 295 | 299 |
|
| ... | ... |
@@ -30,7 +30,7 @@ read_dataframe <- function(group) {
|
| 30 | 30 |
labels_items <- H5Dread(labels) |
| 31 | 31 |
n_labels <- length(unique(values)) |
| 32 | 32 |
if (length(labels_items) > n_labels) {
|
| 33 |
- labels_items <- labels_items[1:n_labels] |
|
| 33 |
+ labels_items <- labels_items[seq_len(n_labels)] |
|
| 34 | 34 |
} |
| 35 | 35 |
values <- factor(as.integer(values), labels=labels_items) |
| 36 | 36 |
H5Dclose(labels) |
| ... | ... |
@@ -27,7 +27,12 @@ read_dataframe <- function(group) {
|
| 27 | 27 |
warning("found categories attribute for column ", name, ", but it is not a reference")
|
| 28 | 28 |
} else {
|
| 29 | 29 |
labels <- H5Rdereference(labels, h5loc=col) |
| 30 |
- values <- factor(as.integer(values), labels=H5Dread(labels)) |
|
| 30 |
+ labels_items <- H5Dread(labels) |
|
| 31 |
+ n_labels <- length(unique(values)) |
|
| 32 |
+ if (length(labels_items) > n_labels) {
|
|
| 33 |
+ labels_items <- labels_items[1:n_labels] |
|
| 34 |
+ } |
|
| 35 |
+ values <- factor(as.integer(values), labels=labels_items) |
|
| 31 | 36 |
H5Dclose(labels) |
| 32 | 37 |
} |
| 33 | 38 |
H5Aclose(attr) |
all required rhdf5 patches are now in bioc-devel, so we don't need to
install it separately from Github. Because of this, we are targeting
bioc-devel anyway.
| ... | ... |
@@ -236,7 +236,8 @@ read_modality <- function(view, backed=FALSE) {
|
| 236 | 236 |
#' @return A \code{\linkS4class{SingleCellExperiment}}.
|
| 237 | 237 |
#' |
| 238 | 238 |
#' @examples |
| 239 |
-#' WriteH5AD(MultiAssayExperiment::miniACC[[1]], "miniacc.h5ad") |
|
| 239 |
+#' data(miniACC, package="MultiAssayExperiment") |
|
| 240 |
+#' WriteH5AD(miniACC[[1]], "miniacc.h5ad") |
|
| 240 | 241 |
#' sce <- ReadH5AD("miniacc.h5ad")
|
| 241 | 242 |
#' |
| 242 | 243 |
#' @importFrom rhdf5 H5Fclose |
| ... | ... |
@@ -260,7 +261,8 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 260 | 261 |
#' @return A \code{\linkS4class{MultiAssayExperiment}}
|
| 261 | 262 |
#' |
| 262 | 263 |
#' @examples |
| 263 |
-#' WriteH5MU(MultiAssayExperiment::miniACC, "miniacc.h5mu") |
|
| 264 |
+#' data(miniACC, package="MultiAssayExperiment") |
|
| 265 |
+#' WriteH5MU(miniACC, "miniacc.h5mu") |
|
| 264 | 266 |
#' mae <- ReadH5MU("miniacc.h5mu")
|
| 265 | 267 |
#' |
| 266 | 268 |
#' @importFrom stats setNames |
| ... | ... |
@@ -1,4 +1,4 @@ |
| 1 |
-#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose |
|
| 1 |
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference |
|
| 2 | 2 |
#' @importMethodsFrom rhdf5 & |
| 3 | 3 |
#' @importFrom methods is |
| 4 | 4 |
read_dataframe <- function(group) {
|
| ... | ... |
@@ -16,12 +16,17 @@ read_dataframe <- function(group) {
|
| 16 | 16 |
col_list <- lapply(columnorder, function(name) {
|
| 17 | 17 |
col <- group & name |
| 18 | 18 |
values <- H5Dread(col) |
| 19 |
+ # h5py saves boolean arrays as HDF5 enums |
|
| 20 |
+ if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
|
|
| 21 |
+ values <- as.logical(values) |
|
| 22 |
+ } |
|
| 19 | 23 |
if (H5Aexists(col, "categories")) {
|
| 20 | 24 |
attr <- H5Aopen(col, "categories") |
| 21 | 25 |
labels <- H5Aread(attr) |
| 22 |
- if (!is(labels, "H5IdComponent")) {
|
|
| 26 |
+ if (!is(labels, "H5Ref")) {
|
|
| 23 | 27 |
warning("found categories attribute for column ", name, ", but it is not a reference")
|
| 24 | 28 |
} else {
|
| 29 |
+ labels <- H5Rdereference(labels, h5loc=col) |
|
| 25 | 30 |
values <- factor(as.integer(values), labels=H5Dread(labels)) |
| 26 | 31 |
H5Dclose(labels) |
| 27 | 32 |
} |
| ... | ... |
@@ -20,7 +20,7 @@ read_dataframe <- function(group) {
|
| 20 | 20 |
attr <- H5Aopen(col, "categories") |
| 21 | 21 |
labels <- H5Aread(attr) |
| 22 | 22 |
if (!is(labels, "H5IdComponent")) {
|
| 23 |
- warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
|
|
| 23 |
+ warning("found categories attribute for column ", name, ", but it is not a reference")
|
|
| 24 | 24 |
} else {
|
| 25 | 25 |
values <- factor(as.integer(values), labels=H5Dread(labels)) |
| 26 | 26 |
H5Dclose(labels) |
| ... | ... |
@@ -63,7 +63,7 @@ read_with_index <- function(dataset) {
|
| 63 | 63 |
encoding <- H5Aread(encattr, "encoding-type") |
| 64 | 64 |
H5Aclose(encattr) |
| 65 | 65 |
if (encoding != "dataframe") {
|
| 66 |
- warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
|
|
| 66 |
+ warning("Unknown encoding ", encoding, " when attempting to read data frame")
|
|
| 67 | 67 |
return(data.frame()) |
| 68 | 68 |
} |
| 69 | 69 |
read_dataframe(dataset) |
| ... | ... |
@@ -117,7 +117,7 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 117 | 117 |
return(read_sparse_matrix(dataset, encoding)) |
| 118 | 118 |
} |
| 119 | 119 |
} else {
|
| 120 |
- warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
|
|
| 120 |
+ warning("Unknown encoding ", encoding, "when attempting to read matrix")
|
|
| 121 | 121 |
return(matrix()) |
| 122 | 122 |
} |
| 123 | 123 |
} else {
|
| ... | ... |
@@ -149,7 +149,7 @@ read_group <- function(group) {
|
| 149 | 149 |
} else if (endsWith(encoding, "matrix")) {
|
| 150 | 150 |
read_sparse_matrix(group) |
| 151 | 151 |
} else {
|
| 152 |
- warning(paste0("Unknown encoding ", encoding))
|
|
| 152 |
+ warning("Unknown encoding ", encoding)
|
|
| 153 | 153 |
invisible(NULL) |
| 154 | 154 |
} |
| 155 | 155 |
} |
| ... | ... |
@@ -208,7 +208,7 @@ read_modality <- function(view, backed=FALSE) {
|
| 208 | 208 |
for (name in names) {
|
| 209 | 209 |
cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/"))) |
| 210 | 210 |
if (!is(cpair, "dsparseMatrix")) {
|
| 211 |
- warning(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
|
|
| 211 |
+ warning("Pairwise ", cp$name, " matrix ", name, " is not a sparse matrix. Only sparse matrices are currently supported, skipping...")
|
|
| 212 | 212 |
} else {
|
| 213 | 213 |
se <- cp$setter(se, name, value=cpair) |
| 214 | 214 |
} |
| ... | ... |
@@ -230,6 +230,10 @@ read_modality <- function(view, backed=FALSE) {
|
| 230 | 230 |
#' |
| 231 | 231 |
#' @return A \code{\linkS4class{SingleCellExperiment}}.
|
| 232 | 232 |
#' |
| 233 |
+#' @examples |
|
| 234 |
+#' WriteH5AD(MultiAssayExperiment::miniACC[[1]], "miniacc.h5ad") |
|
| 235 |
+#' sce <- ReadH5AD("miniacc.h5ad")
|
|
| 236 |
+#' |
|
| 233 | 237 |
#' @importFrom rhdf5 H5Fclose |
| 234 | 238 |
#' @export |
| 235 | 239 |
ReadH5AD <- function(file, backed=FALSE) {
|
| ... | ... |
@@ -250,6 +254,10 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 250 | 254 |
#' |
| 251 | 255 |
#' @return A \code{\linkS4class{MultiAssayExperiment}}
|
| 252 | 256 |
#' |
| 257 |
+#' @examples |
|
| 258 |
+#' WriteH5MU(MultiAssayExperiment::miniACC, "miniacc.h5mu") |
|
| 259 |
+#' mae <- ReadH5MU("miniacc.h5mu")
|
|
| 260 |
+#' |
|
| 253 | 261 |
#' @importFrom stats setNames |
| 254 | 262 |
#' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose |
| 255 | 263 |
#' @importMethodsFrom rhdf5 & |
| ... | ... |
@@ -1,5 +1,6 @@ |
| 1 | 1 |
#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose |
| 2 | 2 |
#' @importMethodsFrom rhdf5 & |
| 3 |
+#' @importFrom methods is |
|
| 3 | 4 |
read_dataframe <- function(group) {
|
| 4 | 5 |
indexcol <- "_index" |
| 5 | 6 |
if (H5Aexists(group, "_index")) {
|
| ... | ... |
@@ -94,6 +95,7 @@ read_sparse_matrix <- function(group, encoding, backed=FALSE) {
|
| 94 | 95 |
} |
| 95 | 96 |
|
| 96 | 97 |
#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Gclose H5Fget_name |
| 98 |
+#' @importFrom methods new |
|
| 97 | 99 |
read_matrix <- function(dataset, backed=FALSE) {
|
| 98 | 100 |
if (backed) {
|
| 99 | 101 |
have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
|
| ... | ... |
@@ -167,6 +169,7 @@ read_attribute <- function(attr) {
|
| 167 | 169 |
#' @importFrom SingleCellExperiment SingleCellExperiment |
| 168 | 170 |
#' @importMethodsFrom SingleCellExperiment colPair<- rowPair<- |
| 169 | 171 |
#' @importFrom SummarizedExperiment SummarizedExperiment |
| 172 |
+#' @importFrom methods is |
|
| 170 | 173 |
read_modality <- function(view, backed=FALSE) {
|
| 171 | 174 |
X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
| 172 | 175 |
var <- read_with_index(h5autoclose(view & "var")) |
| ... | ... |
@@ -205,7 +208,7 @@ read_modality <- function(view, backed=FALSE) {
|
| 205 | 208 |
for (name in names) {
|
| 206 | 209 |
cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/"))) |
| 207 | 210 |
if (!is(cpair, "dsparseMatrix")) {
|
| 208 |
- warn(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
|
|
| 211 |
+ warning(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
|
|
| 209 | 212 |
} else {
|
| 210 | 213 |
se <- cp$setter(se, name, value=cpair) |
| 211 | 214 |
} |
| ... | ... |
@@ -109,7 +109,7 @@ read_matrix <- function(dataset, backed=FALSE) {
|
| 109 | 109 |
H5Aclose(encattr) |
| 110 | 110 |
if (encoding %in% c("csr_matrix", "csc_matrix")) {
|
| 111 | 111 |
if (backed) {
|
| 112 |
- cls <- ifelse(encoding == "csr_matrix", "CSR_H5ADMatrixSeed", "CSC_H5ADMatrixSeed") |
|
| 112 |
+ cls <- ifelse(encoding == "csr_matrix", "CSC_H5ADMatrixSeed", "CSR_H5ADMatrixSeed") |
|
| 113 | 113 |
seed <- HDF5Array::H5SparseMatrixSeed |
| 114 | 114 |
} else {
|
| 115 | 115 |
return(read_sparse_matrix(dataset, encoding)) |
| ... | ... |
@@ -160,6 +160,7 @@ read_attribute <- function(attr) {
|
| 160 | 160 |
H5Dread(attr) |
| 161 | 161 |
} |
| 162 | 162 |
|
| 163 |
+#' @importFrom stats setNames |
|
| 163 | 164 |
#' @importFrom rhdf5 h5ls |
| 164 | 165 |
#' @importMethodsFrom rhdf5 & |
| 165 | 166 |
#' @importFrom S4Vectors SimpleList |
| ... | ... |
@@ -170,12 +171,19 @@ read_modality <- function(view, backed=FALSE) {
|
| 170 | 171 |
X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
| 171 | 172 |
var <- read_with_index(h5autoclose(view & "var")) |
| 172 | 173 |
obs <- read_with_index(h5autoclose(view & "obs")) |
| 173 |
- |
|
| 174 |
- viewnames <- h5ls(view, recursive=FALSE)$name |
|
| 175 | 174 |
rownames(X) <- rownames(var) |
| 176 | 175 |
colnames(X) <- rownames(obs) |
| 177 | 176 |
|
| 178 |
- args <- list(assays=list(counts=X), rowData=var, colData=obs) |
|
| 177 |
+ viewnames <- h5ls(view, recursive=FALSE)$name |
|
| 178 |
+ layers <- list() |
|
| 179 |
+ if ("layers" %in% viewnames) {
|
|
| 180 |
+ layers <- lapply(setNames(nm=h5ls(h5autoclose(view & "layers"), recursive=FALSE)$name), function(layer) {
|
|
| 181 |
+ read_matrix(h5autoclose(view & paste("layers", layer, sep="/")), backed=backed)
|
|
| 182 |
+ }) |
|
| 183 |
+ } |
|
| 184 |
+ |
|
| 185 |
+ args <- list(assays=c(list(X), layers), rowData=var, colData=obs) |
|
| 186 |
+ |
|
| 179 | 187 |
if ("obsm" %in% viewnames) {
|
| 180 | 188 |
obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
| 181 | 189 |
obsm <- lapply(obsmnames, function(space) {
|
| ... | ... |
@@ -222,9 +222,9 @@ read_modality <- function(view, backed=FALSE) {
|
| 222 | 222 |
#' @importFrom rhdf5 H5Fclose |
| 223 | 223 |
#' @export |
| 224 | 224 |
ReadH5AD <- function(file, backed=FALSE) {
|
| 225 |
- file <- H5Fopen(filename, flags="H5F_ACC_RDONLY", native=FALSE) |
|
| 226 |
- res <- read_modality(file, backed) |
|
| 227 |
- H5Fclose(file) |
|
| 225 |
+ h5 <- H5Fopen(file, flags="H5F_ACC_RDONLY", native=FALSE) |
|
| 226 |
+ res <- read_modality(h5, backed) |
|
| 227 |
+ H5Fclose(h5) |
|
| 228 | 228 |
res |
| 229 | 229 |
} |
| 230 | 230 |
|
| ... | ... |
@@ -164,6 +164,7 @@ read_attribute <- function(attr) {
|
| 164 | 164 |
#' @importMethodsFrom rhdf5 & |
| 165 | 165 |
#' @importFrom S4Vectors SimpleList |
| 166 | 166 |
#' @importFrom SingleCellExperiment SingleCellExperiment |
| 167 |
+#' @importMethodsFrom SingleCellExperiment colPair<- rowPair<- |
|
| 167 | 168 |
#' @importFrom SummarizedExperiment SummarizedExperiment |
| 168 | 169 |
read_modality <- function(view, backed=FALSE) {
|
| 169 | 170 |
X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
| ... | ... |
@@ -173,6 +174,8 @@ read_modality <- function(view, backed=FALSE) {
|
| 173 | 174 |
viewnames <- h5ls(view, recursive=FALSE)$name |
| 174 | 175 |
rownames(X) <- rownames(var) |
| 175 | 176 |
colnames(X) <- rownames(obs) |
| 177 |
+ |
|
| 178 |
+ args <- list(assays=list(counts=X), rowData=var, colData=obs) |
|
| 176 | 179 |
if ("obsm" %in% viewnames) {
|
| 177 | 180 |
obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
| 178 | 181 |
obsm <- lapply(obsmnames, function(space) {
|
| ... | ... |
@@ -183,16 +186,29 @@ read_modality <- function(view, backed=FALSE) {
|
| 183 | 186 |
elem |
| 184 | 187 |
}) |
| 185 | 188 |
names(obsm) <- obsmnames |
| 186 |
- se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 187 |
- } else {
|
|
| 188 |
- se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 189 |
+ args$reducedDims <- obsm |
|
| 189 | 190 |
} |
| 190 | 191 |
|
| 192 |
+ se <- do.call(SingleCellExperiment, args) |
|
| 193 |
+ |
|
| 194 |
+ for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
|
|
| 195 |
+ if (cp$name %in% viewnames) {
|
|
| 196 |
+ names <- h5ls(h5autoclose(view & cp$name), recursive=FALSE)$name |
|
| 197 |
+ for (name in names) {
|
|
| 198 |
+ cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/"))) |
|
| 199 |
+ if (!is(cpair, "dsparseMatrix")) {
|
|
| 200 |
+ warn(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
|
|
| 201 |
+ } else {
|
|
| 202 |
+ se <- cp$setter(se, name, value=cpair) |
|
| 203 |
+ } |
|
| 204 |
+ } |
|
| 205 |
+ } |
|
| 206 |
+ } |
|
| 191 | 207 |
se |
| 192 | 208 |
} |
| 193 | 209 |
|
| 194 | 210 |
|
| 195 |
-#' Read an .h5ad file and create a \code{\linkS4class{SummarizedExperiment}}.
|
|
| 211 |
+#' Read an .h5ad file and create a \code{\linkS4class{SingleCellExperiment}}.
|
|
| 196 | 212 |
#' |
| 197 | 213 |
#' In file-backed mode, the main \code{X} matrix is not read into memory,
|
| 198 | 214 |
#' but references the HDF5 file and its required parts are read on demand. |
| ... | ... |
@@ -201,8 +217,7 @@ read_modality <- function(view, backed=FALSE) {
|
| 201 | 217 |
#' @param file Path to the .h5ad file. |
| 202 | 218 |
#' @param backed Whether to use file-backed mode. |
| 203 | 219 |
#' |
| 204 |
-#' @return A \code{\linkS4class{SingleCellExperiment}} if the .h5ad object has
|
|
| 205 |
-#' non-empty \code{.obsm}, otherwise a \code{\linkS4class{SummarizedExperiment}}.
|
|
| 220 |
+#' @return A \code{\linkS4class{SingleCellExperiment}}.
|
|
| 206 | 221 |
#' |
| 207 | 222 |
#' @importFrom rhdf5 H5Fclose |
| 208 | 223 |
#' @export |
| ... | ... |
@@ -258,7 +273,9 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 258 | 273 |
idx <- which(cmap > 0) |
| 259 | 274 |
data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]]) |
| 260 | 275 |
}) |
| 261 |
- args$sampleMap <- do.call(rbind, samplemaps) |
|
| 276 |
+ sampleMap <- do.call(rbind, samplemaps) |
|
| 277 |
+ rownames(sampleMap) <- NULL |
|
| 278 |
+ args$sampleMap <- sampleMap |
|
| 262 | 279 |
} |
| 263 | 280 |
|
| 264 | 281 |
# Close the connection |
- WriteH5AD wasn't writing .obs and .var for matrices, but the reading
code assumes that they're always there
- The original MAE can have different rownames in its colData than the
rownames of the individual experiment. In this case, automatic
sampleMap construction fails, so we create a sampleMap manually from
the .obsmap
| ... | ... |
@@ -224,7 +224,8 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 224 | 224 |
#' |
| 225 | 225 |
#' @return A \code{\linkS4class{MultiAssayExperiment}}
|
| 226 | 226 |
#' |
| 227 |
-#' @importFrom rhdf5 h5ls H5Fclose |
|
| 227 |
+#' @importFrom stats setNames |
|
| 228 |
+#' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose |
|
| 228 | 229 |
#' @importMethodsFrom rhdf5 & |
| 229 | 230 |
#' @importFrom MultiAssayExperiment MultiAssayExperiment |
| 230 | 231 |
#' |
| ... | ... |
@@ -234,7 +235,7 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 234 | 235 |
h5 <- open_and_check_mudata(file) |
| 235 | 236 |
|
| 236 | 237 |
# Check all the assays are written |
| 237 |
- assays <- h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name |
|
| 238 |
+ assays <- setNames(nm=h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name) |
|
| 238 | 239 |
|
| 239 | 240 |
# Create global colData |
| 240 | 241 |
metadata <- read_with_index(h5autoclose(h5 & "obs")) |
| ... | ... |
@@ -243,12 +244,27 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 243 | 244 |
modalities <- lapply(assays, function(mod) {
|
| 244 | 245 |
read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
|
| 245 | 246 |
}) |
| 246 |
- names(modalities) <- assays |
|
| 247 |
+ |
|
| 248 |
+ args <- list(experiments=modalities, colData=metadata) |
|
| 249 |
+ |
|
| 250 |
+ # create a sampleMap. This is needed for a round-trip MAE -> .h5mu -> MAE |
|
| 251 |
+ # if colData(MAE) has different row names than the experiments |
|
| 252 |
+ if (H5Lexists(h5, "obsmap")) {
|
|
| 253 |
+ samplemaps <- lapply(assays, function(mod) {
|
|
| 254 |
+ cmapdset <- h5 & paste("obsmap", mod, sep="/")
|
|
| 255 |
+ cmap <- H5Dread(cmapdset) |
|
| 256 |
+ H5Dclose(cmapdset) |
|
| 257 |
+ |
|
| 258 |
+ idx <- which(cmap > 0) |
|
| 259 |
+ data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]]) |
|
| 260 |
+ }) |
|
| 261 |
+ args$sampleMap <- do.call(rbind, samplemaps) |
|
| 262 |
+ } |
|
| 247 | 263 |
|
| 248 | 264 |
# Close the connection |
| 249 | 265 |
H5Fclose(h5) |
| 250 | 266 |
|
| 251 | 267 |
# Create a MAE object |
| 252 |
- MultiAssayExperiment(modalities, metadata) |
|
| 268 |
+ do.call(MultiAssayExperiment, args) |
|
| 253 | 269 |
} |
| 254 | 270 |
|
| ... | ... |
@@ -192,7 +192,17 @@ read_modality <- function(view, backed=FALSE) {
|
| 192 | 192 |
} |
| 193 | 193 |
|
| 194 | 194 |
|
| 195 |
-#' Read an .h5ad file and create a SummarizedExperiment |
|
| 195 |
+#' Read an .h5ad file and create a \code{\linkS4class{SummarizedExperiment}}.
|
|
| 196 |
+#' |
|
| 197 |
+#' In file-backed mode, the main \code{X} matrix is not read into memory,
|
|
| 198 |
+#' but references the HDF5 file and its required parts are read on demand. |
|
| 199 |
+#' This requires the HDF5Array package to be installed. |
|
| 200 |
+#' |
|
| 201 |
+#' @param file Path to the .h5ad file. |
|
| 202 |
+#' @param backed Whether to use file-backed mode. |
|
| 203 |
+#' |
|
| 204 |
+#' @return A \code{\linkS4class{SingleCellExperiment}} if the .h5ad object has
|
|
| 205 |
+#' non-empty \code{.obsm}, otherwise a \code{\linkS4class{SummarizedExperiment}}.
|
|
| 196 | 206 |
#' |
| 197 | 207 |
#' @importFrom rhdf5 H5Fclose |
| 198 | 208 |
#' @export |
| ... | ... |
@@ -203,7 +213,16 @@ ReadH5AD <- function(file, backed=FALSE) {
|
| 203 | 213 |
res |
| 204 | 214 |
} |
| 205 | 215 |
|
| 206 |
-#' Read an .h5mu file and create a MultiAssayExperiment |
|
| 216 |
+#' Read an .h5mu file and create a \code{\link{MultiAssayExperiment}}.
|
|
| 217 |
+#' |
|
| 218 |
+#' In file-backed mode, the main \code{X} matrices are not read into memory,
|
|
| 219 |
+#' but reference the HDF5 file and their required parts are read on demand. |
|
| 220 |
+#' This requires the HDF5Array package to be installed. |
|
| 221 |
+#' |
|
| 222 |
+#' @param file Path to the .h5mu file. |
|
| 223 |
+#' @param backed Whether to use file-backed mode. |
|
| 224 |
+#' |
|
| 225 |
+#' @return A \code{\linkS4class{MultiAssayExperiment}}
|
|
| 207 | 226 |
#' |
| 208 | 227 |
#' @importFrom rhdf5 h5ls H5Fclose |
| 209 | 228 |
#' @importMethodsFrom rhdf5 & |
| ... | ... |
@@ -160,13 +160,53 @@ read_attribute <- function(attr) {
|
| 160 | 160 |
H5Dread(attr) |
| 161 | 161 |
} |
| 162 | 162 |
|
| 163 |
-#' Read an .h5mu file and create a MultiAssayExperiment |
|
| 164 |
-#' |
|
| 165 |
-#' @importFrom rhdf5 h5ls H5Fclose |
|
| 163 |
+#' @importFrom rhdf5 h5ls |
|
| 166 | 164 |
#' @importMethodsFrom rhdf5 & |
| 167 | 165 |
#' @importFrom S4Vectors SimpleList |
| 168 | 166 |
#' @importFrom SingleCellExperiment SingleCellExperiment |
| 169 | 167 |
#' @importFrom SummarizedExperiment SummarizedExperiment |
| 168 |
+read_modality <- function(view, backed=FALSE) {
|
|
| 169 |
+ X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
|
| 170 |
+ var <- read_with_index(h5autoclose(view & "var")) |
|
| 171 |
+ obs <- read_with_index(h5autoclose(view & "obs")) |
|
| 172 |
+ |
|
| 173 |
+ viewnames <- h5ls(view, recursive=FALSE)$name |
|
| 174 |
+ rownames(X) <- rownames(var) |
|
| 175 |
+ colnames(X) <- rownames(obs) |
|
| 176 |
+ if ("obsm" %in% viewnames) {
|
|
| 177 |
+ obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
|
| 178 |
+ obsm <- lapply(obsmnames, function(space) {
|
|
| 179 |
+ elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
|
|
| 180 |
+ if (!is.data.frame(elem) && length(dim(elem)) > 1) |
|
| 181 |
+ elem <- t(elem) |
|
| 182 |
+ rownames(elem) <- rownames(obs) |
|
| 183 |
+ elem |
|
| 184 |
+ }) |
|
| 185 |
+ names(obsm) <- obsmnames |
|
| 186 |
+ se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 187 |
+ } else {
|
|
| 188 |
+ se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 189 |
+ } |
|
| 190 |
+ |
|
| 191 |
+ se |
|
| 192 |
+} |
|
| 193 |
+ |
|
| 194 |
+ |
|
| 195 |
+#' Read an .h5ad file and create a SummarizedExperiment |
|
| 196 |
+#' |
|
| 197 |
+#' @importFrom rhdf5 H5Fclose |
|
| 198 |
+#' @export |
|
| 199 |
+ReadH5AD <- function(file, backed=FALSE) {
|
|
| 200 |
+ file <- H5Fopen(filename, flags="H5F_ACC_RDONLY", native=FALSE) |
|
| 201 |
+ res <- read_modality(file, backed) |
|
| 202 |
+ H5Fclose(file) |
|
| 203 |
+ res |
|
| 204 |
+} |
|
| 205 |
+ |
|
| 206 |
+#' Read an .h5mu file and create a MultiAssayExperiment |
|
| 207 |
+#' |
|
| 208 |
+#' @importFrom rhdf5 h5ls H5Fclose |
|
| 209 |
+#' @importMethodsFrom rhdf5 & |
|
| 170 | 210 |
#' @importFrom MultiAssayExperiment MultiAssayExperiment |
| 171 | 211 |
#' |
| 172 | 212 |
#' @export |
| ... | ... |
@@ -182,48 +222,14 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 182 | 222 |
|
| 183 | 223 |
# Create an experiments list |
| 184 | 224 |
modalities <- lapply(assays, function(mod) {
|
| 185 |
- view <- h5autoclose(h5 & paste("mod", mod, sep="/"))
|
|
| 186 |
- X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
|
| 187 |
- var <- read_with_index(h5autoclose(view & "var")) |
|
| 188 |
- obs <- read_with_index(h5autoclose(view & "obs")) |
|
| 189 |
- primary <- rownames(obs) |
|
| 190 |
- rownames(obs) <- paste(mod, rownames(obs), sep="-") |
|
| 191 |
- |
|
| 192 |
- viewnames <- h5ls(view, recursive=FALSE)$name |
|
| 193 |
- rownames(X) <- rownames(var) |
|
| 194 |
- colnames(X) <- rownames(obs) |
|
| 195 |
- if ("obsm" %in% viewnames) {
|
|
| 196 |
- obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
|
| 197 |
- obsm <- lapply(obsmnames, function(space) {
|
|
| 198 |
- elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
|
|
| 199 |
- if (!is.data.frame(elem) && length(dim(elem)) > 1) |
|
| 200 |
- elem <- t(elem) |
|
| 201 |
- rownames(elem) <- rownames(obs) |
|
| 202 |
- elem |
|
| 203 |
- }) |
|
| 204 |
- names(obsm) <- obsmnames |
|
| 205 |
- se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 206 |
- } else {
|
|
| 207 |
- se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 208 |
- } |
|
| 209 |
- |
|
| 210 |
- list(exp=se, primary=primary) |
|
| 225 |
+ read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
|
|
| 211 | 226 |
}) |
| 212 | 227 |
names(modalities) <- assays |
| 213 | 228 |
|
| 214 |
- # Create sampleMap |
|
| 215 |
- mapping <- lapply(modalities, function(mod) {
|
|
| 216 |
- data.frame(primary=mod$primary, colname=colnames(mod$exp), stringsAsFactors=FALSE) |
|
| 217 |
- }) |
|
| 218 |
- |
|
| 219 |
- obsmap <- do.call(rbind, mapping) |
|
| 220 |
- obsmap["assay"] <- rep(assays, times=vapply(mapping, nrow, 1)) |
|
| 221 |
- obsmap <- obsmap[,c("assay", "primary", "colname")]
|
|
| 222 |
- |
|
| 223 | 229 |
# Close the connection |
| 224 | 230 |
H5Fclose(h5) |
| 225 | 231 |
|
| 226 | 232 |
# Create a MAE object |
| 227 |
- MultiAssayExperiment(lapply(modalities, function(mod)mod$exp), metadata, obsmap) |
|
| 233 |
+ MultiAssayExperiment(modalities, metadata) |
|
| 228 | 234 |
} |
| 229 | 235 |
|
| ... | ... |
@@ -31,7 +31,8 @@ read_dataframe <- function(group) {
|
| 31 | 31 |
}) |
| 32 | 32 |
names(col_list) <- columnorder |
| 33 | 33 |
index <- group & indexcol |
| 34 |
- col_list[["row.names"]] <- H5Dread(group & indexcol) |
|
| 34 |
+ col_list[["row.names"]] <- H5Dread(index) |
|
| 35 |
+ H5Dclose(index) |
|
| 35 | 36 |
do.call(data.frame, args=col_list) |
| 36 | 37 |
} |
| 37 | 38 |
|
| ... | ... |
@@ -70,12 +71,18 @@ read_with_index <- function(dataset) {
|
| 70 | 71 |
} |
| 71 | 72 |
} |
| 72 | 73 |
|
| 73 |
-#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose |
|
| 74 |
+#' @importFrom rhdf5 H5Dread H5Dclose H5Aopen H5Aread H5Aclose |
|
| 74 | 75 |
#' @importMethodsFrom rhdf5 & |
| 75 | 76 |
read_sparse_matrix <- function(group, encoding, backed=FALSE) {
|
| 76 |
- i <- as.vector(H5Dread(group & "indices")) |
|
| 77 |
- p <- as.vector(H5Dread(group & "indptr")) |
|
| 78 |
- x <- as.vector(H5Dread(group & "data")) |
|
| 77 |
+ indices <- group & "indices" |
|
| 78 |
+ indptr <- group & "indptr" |
|
| 79 |
+ data <- group & "data" |
|
| 80 |
+ i <- as.vector(H5Dread(indices)) |
|
| 81 |
+ p <- as.vector(H5Dread(indptr)) |
|
| 82 |
+ x <- as.vector(H5Dread(data)) |
|
| 83 |
+ H5Dclose(indices) |
|
| 84 |
+ H5Dclose(indptr) |
|
| 85 |
+ H5Dclose(data) |
|
| 79 | 86 |
shapeattr <- H5Aopen(group, "shape") |
| 80 | 87 |
shape <- H5Aread(shapeattr) |
| 81 | 88 |
H5Aclose(shapeattr) |
| ... | ... |
@@ -86,7 +93,7 @@ read_sparse_matrix <- function(group, encoding, backed=FALSE) {
|
| 86 | 93 |
} |
| 87 | 94 |
} |
| 88 | 95 |
|
| 89 |
-#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Fget_name |
|
| 96 |
+#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Gclose H5Fget_name |
|
| 90 | 97 |
read_matrix <- function(dataset, backed=FALSE) {
|
| 91 | 98 |
if (backed) {
|
| 92 | 99 |
have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
|
| ... | ... |
@@ -168,17 +175,17 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 168 | 175 |
h5 <- open_and_check_mudata(file) |
| 169 | 176 |
|
| 170 | 177 |
# Check all the assays are written |
| 171 |
- assays <- h5ls(h5 & "mod", recursive=FALSE)$name |
|
| 178 |
+ assays <- h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name |
|
| 172 | 179 |
|
| 173 | 180 |
# Create global colData |
| 174 |
- metadata <- read_with_index(h5 & "obs") |
|
| 181 |
+ metadata <- read_with_index(h5autoclose(h5 & "obs")) |
|
| 175 | 182 |
|
| 176 | 183 |
# Create an experiments list |
| 177 | 184 |
modalities <- lapply(assays, function(mod) {
|
| 178 |
- view <- h5 & paste("mod", mod, sep="/")
|
|
| 179 |
- X <- read_matrix(view & "X", backed=backed) |
|
| 180 |
- var <- read_with_index(view & "var") |
|
| 181 |
- obs <- read_with_index(view & "obs") |
|
| 185 |
+ view <- h5autoclose(h5 & paste("mod", mod, sep="/"))
|
|
| 186 |
+ X <- read_matrix(h5autoclose(view & "X"), backed=backed) |
|
| 187 |
+ var <- read_with_index(h5autoclose(view & "var")) |
|
| 188 |
+ obs <- read_with_index(h5autoclose(view & "obs")) |
|
| 182 | 189 |
primary <- rownames(obs) |
| 183 | 190 |
rownames(obs) <- paste(mod, rownames(obs), sep="-") |
| 184 | 191 |
|
| ... | ... |
@@ -186,9 +193,9 @@ ReadH5MU <- function(file, backed=FALSE) {
|
| 186 | 193 |
rownames(X) <- rownames(var) |
| 187 | 194 |
colnames(X) <- rownames(obs) |
| 188 | 195 |
if ("obsm" %in% viewnames) {
|
| 189 |
- obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name |
|
| 196 |
+ obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name |
|
| 190 | 197 |
obsm <- lapply(obsmnames, function(space) {
|
| 191 |
- elem <- read_attribute(view & paste("obsm", space, sep="/"))
|
|
| 198 |
+ elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
|
|
| 192 | 199 |
if (!is.data.frame(elem) && length(dim(elem)) > 1) |
| 193 | 200 |
elem <- t(elem) |
| 194 | 201 |
rownames(elem) <- rownames(obs) |
| ... | ... |
@@ -72,7 +72,7 @@ read_with_index <- function(dataset) {
|
| 72 | 72 |
|
| 73 | 73 |
#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose |
| 74 | 74 |
#' @importMethodsFrom rhdf5 & |
| 75 |
-read_sparse_matrix <- function(group, encoding) {
|
|
| 75 |
+read_sparse_matrix <- function(group, encoding, backed=FALSE) {
|
|
| 76 | 76 |
i <- as.vector(H5Dread(group & "indices")) |
| 77 | 77 |
p <- as.vector(H5Dread(group & "indptr")) |
| 78 | 78 |
x <- as.vector(H5Dread(group & "data")) |
| ... | ... |
@@ -80,26 +80,53 @@ read_sparse_matrix <- function(group, encoding) {
|
| 80 | 80 |
shape <- H5Aread(shapeattr) |
| 81 | 81 |
H5Aclose(shapeattr) |
| 82 | 82 |
if (encoding == "csr_matrix") {
|
| 83 |
- sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE) |
|
| 83 |
+ sparseMatrix(i=i, p=p, x=x, dims=rev(shape), repr="C", index1=FALSE) |
|
| 84 | 84 |
} else {
|
| 85 |
- sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE) |
|
| 85 |
+ t(sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE)) |
|
| 86 | 86 |
} |
| 87 | 87 |
} |
| 88 | 88 |
|
| 89 |
-#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread |
|
| 90 |
-read_matrix <- function(dataset) {
|
|
| 89 |
+#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Fget_name |
|
| 90 |
+read_matrix <- function(dataset, backed=FALSE) {
|
|
| 91 |
+ if (backed) {
|
|
| 92 |
+ have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
|
|
| 93 |
+ if (!have_delayedarray) {
|
|
| 94 |
+ warning("Could not load the HDF5Array package. HDF5Array is required for backed matrices. Loading matrix into memory...")
|
|
| 95 |
+ backed <- FALSE |
|
| 96 |
+ } |
|
| 97 |
+ } |
|
| 98 |
+ |
|
| 91 | 99 |
if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
|
| 92 | 100 |
encattr <- H5Aopen(dataset, "encoding-type") |
| 93 | 101 |
encoding <- H5Aread(encattr) |
| 94 | 102 |
H5Aclose(encattr) |
| 95 | 103 |
if (encoding %in% c("csr_matrix", "csc_matrix")) {
|
| 96 |
- read_sparse_matrix(dataset, encoding) |
|
| 104 |
+ if (backed) {
|
|
| 105 |
+ cls <- ifelse(encoding == "csr_matrix", "CSR_H5ADMatrixSeed", "CSC_H5ADMatrixSeed") |
|
| 106 |
+ seed <- HDF5Array::H5SparseMatrixSeed |
|
| 107 |
+ } else {
|
|
| 108 |
+ return(read_sparse_matrix(dataset, encoding)) |
|
| 109 |
+ } |
|
| 97 | 110 |
} else {
|
| 98 | 111 |
warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
|
| 99 |
- matrix() |
|
| 112 |
+ return(matrix()) |
|
| 100 | 113 |
} |
| 101 | 114 |
} else {
|
| 102 |
- H5Dread(dataset) |
|
| 115 |
+ if (backed) {
|
|
| 116 |
+ cls <- "Dense_H5ADMatrixSeed" |
|
| 117 |
+ seed <- HDF5Array::HDF5ArraySeed |
|
| 118 |
+ } else {
|
|
| 119 |
+ return(H5Dread(dataset)) |
|
| 120 |
+ } |
|
| 121 |
+ } |
|
| 122 |
+ if (backed) {
|
|
| 123 |
+ file <- H5Fget_name(dataset) |
|
| 124 |
+ name <- H5Iget_name(dataset) |
|
| 125 |
+ suppressWarnings({
|
|
| 126 |
+ seed <- seed(file, name) |
|
| 127 |
+ seed <- new(cls, seed) |
|
| 128 |
+ HDF5Array::H5ADMatrix(seed) |
|
| 129 |
+ }) |
|
| 103 | 130 |
} |
| 104 | 131 |
} |
| 105 | 132 |
|
| ... | ... |
@@ -136,7 +163,7 @@ read_attribute <- function(attr) {
|
| 136 | 163 |
#' @importFrom MultiAssayExperiment MultiAssayExperiment |
| 137 | 164 |
#' |
| 138 | 165 |
#' @export |
| 139 |
-ReadH5MU <- function(file) {
|
|
| 166 |
+ReadH5MU <- function(file, backed=FALSE) {
|
|
| 140 | 167 |
# Connect to the the file |
| 141 | 168 |
h5 <- open_and_check_mudata(file) |
| 142 | 169 |
|
| ... | ... |
@@ -149,26 +176,28 @@ ReadH5MU <- function(file) {
|
| 149 | 176 |
# Create an experiments list |
| 150 | 177 |
modalities <- lapply(assays, function(mod) {
|
| 151 | 178 |
view <- h5 & paste("mod", mod, sep="/")
|
| 152 |
- X <- read_matrix(view & "X") |
|
| 179 |
+ X <- read_matrix(view & "X", backed=backed) |
|
| 153 | 180 |
var <- read_with_index(view & "var") |
| 154 | 181 |
obs <- read_with_index(view & "obs") |
| 155 | 182 |
primary <- rownames(obs) |
| 156 | 183 |
rownames(obs) <- paste(mod, rownames(obs), sep="-") |
| 157 | 184 |
|
| 158 | 185 |
viewnames <- h5ls(view, recursive=FALSE)$name |
| 159 |
- colnames(X) <- rownames(var) |
|
| 160 |
- rownames(X) <- rownames(obs) |
|
| 186 |
+ rownames(X) <- rownames(var) |
|
| 187 |
+ colnames(X) <- rownames(obs) |
|
| 161 | 188 |
if ("obsm" %in% viewnames) {
|
| 162 | 189 |
obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name |
| 163 | 190 |
obsm <- lapply(obsmnames, function(space) {
|
| 164 | 191 |
elem <- read_attribute(view & paste("obsm", space, sep="/"))
|
| 192 |
+ if (!is.data.frame(elem) && length(dim(elem)) > 1) |
|
| 193 |
+ elem <- t(elem) |
|
| 165 | 194 |
rownames(elem) <- rownames(obs) |
| 166 | 195 |
elem |
| 167 | 196 |
}) |
| 168 | 197 |
names(obsm) <- obsmnames |
| 169 |
- se <- SingleCellExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs, reducedDims=obsm) |
|
| 198 |
+ se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 170 | 199 |
} else {
|
| 171 |
- se <- SummarizedExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs) |
|
| 200 |
+ se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 172 | 201 |
} |
| 173 | 202 |
|
| 174 | 203 |
list(exp=se, primary=primary) |
- remove the workaround, it doesn't work. Therefore, this depends on the
rhdf5 version from https://github.com/ilia-kats/rhdf5/tree/attribute-references
- read-write of MultiAssayExperiments works
TODO:
- backed objects with DelayedArray
- figure out how to close all the HDF5 objects when we're done
| ... | ... |
@@ -1,4 +1,59 @@ |
| 1 |
-#' @importFrom rhdf5 H5Iget_type h5readAttributes h5ls H5Aexists H5Aopen H5Aread H5Aclose |
|
| 1 |
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose |
|
| 2 |
+#' @importMethodsFrom rhdf5 & |
|
| 3 |
+read_dataframe <- function(group) {
|
|
| 4 |
+ indexcol <- "_index" |
|
| 5 |
+ if (H5Aexists(group, "_index")) {
|
|
| 6 |
+ indexattr <- H5Aopen(group, "_index") |
|
| 7 |
+ indexcol <- H5Aread(indexattr) |
|
| 8 |
+ H5Aclose(indexattr) |
|
| 9 |
+ } |
|
| 10 |
+ |
|
| 11 |
+ orderedattr <- H5Aopen(group, "column-order") |
|
| 12 |
+ columnorder <- H5Aread(orderedattr) |
|
| 13 |
+ H5Aclose(orderedattr) |
|
| 14 |
+ |
|
| 15 |
+ col_list <- lapply(columnorder, function(name) {
|
|
| 16 |
+ col <- group & name |
|
| 17 |
+ values <- H5Dread(col) |
|
| 18 |
+ if (H5Aexists(col, "categories")) {
|
|
| 19 |
+ attr <- H5Aopen(col, "categories") |
|
| 20 |
+ labels <- H5Aread(attr) |
|
| 21 |
+ if (!is(labels, "H5IdComponent")) {
|
|
| 22 |
+ warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
|
|
| 23 |
+ } else {
|
|
| 24 |
+ values <- factor(as.integer(values), labels=H5Dread(labels)) |
|
| 25 |
+ H5Dclose(labels) |
|
| 26 |
+ } |
|
| 27 |
+ H5Aclose(attr) |
|
| 28 |
+ } |
|
| 29 |
+ H5Dclose(col) |
|
| 30 |
+ values |
|
| 31 |
+ }) |
|
| 32 |
+ names(col_list) <- columnorder |
|
| 33 |
+ index <- group & indexcol |
|
| 34 |
+ col_list[["row.names"]] <- H5Dread(group & indexcol) |
|
| 35 |
+ do.call(data.frame, args=col_list) |
|
| 36 |
+} |
|
| 37 |
+ |
|
| 38 |
+#' @importFrom rhdf5 H5Dread H5Aexists H5Aopen H5Aread H5Aclose |
|
| 39 |
+read_dataframe_legacy <- function(dataset) {
|
|
| 40 |
+ table <- H5Dread(dataset) |
|
| 41 |
+ |
|
| 42 |
+ indexcol <- "_index" |
|
| 43 |
+ if (H5Aexists(dataset, "_index")) {
|
|
| 44 |
+ index <- H5Aopen(dataset, "_index") |
|
| 45 |
+ indexcol <- H5Aread(index) |
|
| 46 |
+ H5Aclose(index) |
|
| 47 |
+ } |
|
| 48 |
+ |
|
| 49 |
+ if (indexcol %in% colnames(table)) {
|
|
| 50 |
+ rownames(table) <- table[,indexcol,drop=TRUE] |
|
| 51 |
+ table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE] |
|
| 52 |
+ } |
|
| 53 |
+ table |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose |
|
| 2 | 57 |
read_with_index <- function(dataset) {
|
| 3 | 58 |
cls <- H5Iget_type(dataset) |
| 4 | 59 |
if (cls == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
|
| ... | ... |
@@ -9,85 +64,36 @@ read_with_index <- function(dataset) {
|
| 9 | 64 |
warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
|
| 10 | 65 |
return(data.frame()) |
| 11 | 66 |
} |
| 12 |
- # Table is saved as a group rather than a dataset |
|
| 13 |
- indexcol <- "_index" |
|
| 14 |
- if (H5Aexists(dataset, "_index")) {
|
|
| 15 |
- indexattr <- H5Aopen(dataset, "_index") |
|
| 16 |
- indexcol <- H5Aread(indexattr) |
|
| 17 |
- H5Aclose(indexattr) |
|
| 18 |
- } |
|
| 19 |
- |
|
| 20 |
- columns <- h5ls(dataset, recursive=FALSE, datasetinfo=FALSE)$name |
|
| 21 |
- columns <- columns[columns != "__categories"] |
|
| 22 |
- |
|
| 23 |
- col_list <- lapply(columns, function(name) {
|
|
| 24 |
- values <- H5Dread(dataset & name) |
|
| 25 |
- col <- dataset & name |
|
| 26 |
- if (H5Aexists(col, "categories")) {
|
|
| 27 |
- attr <- H5Aopen(col, "categories") |
|
| 28 |
- if (H5is_attr_reference(attr)) {
|
|
| 29 |
- labels <- H5deref_attr_reference(attr) |
|
| 30 |
- values <- factor(as.integer(values), labels=H5Dread(labels)) |
|
| 31 |
- } else {
|
|
| 32 |
- warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
|
|
| 33 |
- } |
|
| 34 |
- H5Aclose(attr) |
|
| 35 |
- } |
|
| 36 |
- values |
|
| 37 |
- }) |
|
| 38 |
- table <- data.frame(Reduce(cbind, col_list)) |
|
| 39 |
- colnames(table) <- columns |
|
| 40 |
- |
|
| 41 |
- if (indexcol %in% colnames(table)) {
|
|
| 42 |
- rownames(table) <- table[,indexcol,drop=TRUE] |
|
| 43 |
- table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE] |
|
| 44 |
- } |
|
| 45 |
- |
|
| 46 |
- # Fix column order |
|
| 47 |
- if (H5Aexists(dataset, "column-order")) {
|
|
| 48 |
- orderedattr <- H5Aopen(dataset, "column-order") |
|
| 49 |
- ordered_columns <- H5Aread(orderedattr) |
|
| 50 |
- H5Aclose(orderedattr) |
|
| 51 |
- |
|
| 52 |
- ordered_columns <- ordered_columns[ordered_columns != indexcol] |
|
| 53 |
- table <- table[,ordered_columns[ordered_columns %in% columns],drop=FALSE] |
|
| 54 |
- } |
|
| 67 |
+ read_dataframe(dataset) |
|
| 55 | 68 |
} else {
|
| 56 |
- table <- H5Dread(dataset) |
|
| 57 |
- |
|
| 58 |
- indexcol <- "_index" |
|
| 59 |
- if (H5Aexists(dataset, "_index")) {
|
|
| 60 |
- index <- H5Aopen(dataset, "_index") |
|
| 61 |
- indexcol <- H5Aread(index) |
|
| 62 |
- H5Aclose(index) |
|
| 63 |
- } |
|
| 69 |
+ read_dataframe_legacy(dataset) |
|
| 70 |
+ } |
|
| 71 |
+} |
|
| 64 | 72 |
|
| 65 |
- if (indexcol %in% colnames(table)) {
|
|
| 66 |
- rownames(table) <- table[,indexcol,drop=TRUE] |
|
| 67 |
- table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE] |
|
| 68 |
- } |
|
| 73 |
+#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose |
|
| 74 |
+#' @importMethodsFrom rhdf5 & |
|
| 75 |
+read_sparse_matrix <- function(group, encoding) {
|
|
| 76 |
+ i <- as.vector(H5Dread(group & "indices")) |
|
| 77 |
+ p <- as.vector(H5Dread(group & "indptr")) |
|
| 78 |
+ x <- as.vector(H5Dread(group & "data")) |
|
| 79 |
+ shapeattr <- H5Aopen(group, "shape") |
|
| 80 |
+ shape <- H5Aread(shapeattr) |
|
| 81 |
+ H5Aclose(shapeattr) |
|
| 82 |
+ if (encoding == "csr_matrix") {
|
|
| 83 |
+ sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE) |
|
| 84 |
+ } else {
|
|
| 85 |
+ sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE) |
|
| 69 | 86 |
} |
| 70 |
- table |
|
| 71 | 87 |
} |
| 72 | 88 |
|
| 73 | 89 |
#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread |
| 74 | 90 |
read_matrix <- function(dataset) {
|
| 75 | 91 |
if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
|
| 76 | 92 |
encattr <- H5Aopen(dataset, "encoding-type") |
| 77 |
- encoding <- H5Aread(dataset, "encoding") |
|
| 93 |
+ encoding <- H5Aread(encattr) |
|
| 78 | 94 |
H5Aclose(encattr) |
| 79 | 95 |
if (encoding %in% c("csr_matrix", "csc_matrix")) {
|
| 80 |
- i <- H5Dread(dataset & "indices") |
|
| 81 |
- p <- H5Dread(dataset & "indptr") |
|
| 82 |
- x <- H5Dread(dataset & "data") |
|
| 83 |
- shapeattr <- H5Aopen(dataset, "shape") |
|
| 84 |
- shape <- H5Aread(shapeattr) |
|
| 85 |
- H5Aclose(shapeattr) |
|
| 86 |
- if (encoding == "csr_matrix") {
|
|
| 87 |
- sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE) |
|
| 88 |
- } else {
|
|
| 89 |
- sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE) |
|
| 90 |
- } |
|
| 96 |
+ read_sparse_matrix(dataset, encoding) |
|
| 91 | 97 |
} else {
|
| 92 | 98 |
warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
|
| 93 | 99 |
matrix() |
| ... | ... |
@@ -97,11 +103,34 @@ read_matrix <- function(dataset) {
|
| 97 | 103 |
} |
| 98 | 104 |
} |
| 99 | 105 |
|
| 100 |
-#' @details MultiAssayExperiment-helpers |
|
| 101 |
-#' |
|
| 102 |
-#' @description Create a MultiAssayExperiment or a Seurat object from the .h5mu file |
|
| 106 |
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose |
|
| 107 |
+read_group <- function(group) {
|
|
| 108 |
+ encattr <- H5Aopen(group, "encoding-type") |
|
| 109 |
+ encoding <- H5Aread(encattr) |
|
| 110 |
+ H5Aclose(encattr) |
|
| 111 |
+ if (encoding == "dataframe") {
|
|
| 112 |
+ read_dataframe(group) |
|
| 113 |
+ } else if (endsWith(encoding, "matrix")) {
|
|
| 114 |
+ read_sparse_matrix(group) |
|
| 115 |
+ } else {
|
|
| 116 |
+ warning(paste0("Unknown encoding ", encoding))
|
|
| 117 |
+ invisible(NULL) |
|
| 118 |
+ } |
|
| 119 |
+} |
|
| 120 |
+ |
|
| 121 |
+#' @importFrom rhdf5 H5Iget_type H5Dread |
|
| 122 |
+read_attribute <- function(attr) {
|
|
| 123 |
+ if (H5Iget_type(attr) == "H5I_GROUP") |
|
| 124 |
+ read_group(attr) |
|
| 125 |
+ else |
|
| 126 |
+ H5Dread(attr) |
|
| 127 |
+} |
|
| 128 |
+ |
|
| 129 |
+#' Read an .h5mu file and create a MultiAssayExperiment |
|
| 103 | 130 |
#' |
| 104 |
-#' @importFrom rhdf5 h5ls H5Dread |
|
| 131 |
+#' @importFrom rhdf5 h5ls H5Fclose |
|
| 132 |
+#' @importMethodsFrom rhdf5 & |
|
| 133 |
+#' @importFrom S4Vectors SimpleList |
|
| 105 | 134 |
#' @importFrom SingleCellExperiment SingleCellExperiment |
| 106 | 135 |
#' @importFrom SummarizedExperiment SummarizedExperiment |
| 107 | 136 |
#' @importFrom MultiAssayExperiment MultiAssayExperiment |
| ... | ... |
@@ -121,44 +150,34 @@ ReadH5MU <- function(file) {
|
| 121 | 150 |
modalities <- lapply(assays, function(mod) {
|
| 122 | 151 |
view <- h5 & paste("mod", mod, sep="/")
|
| 123 | 152 |
X <- read_matrix(view & "X") |
| 124 |
- |
|
| 125 | 153 |
var <- read_with_index(view & "var") |
| 126 |
- |
|
| 127 | 154 |
obs <- read_with_index(view & "obs") |
| 128 |
- if (is("obs", "data.frame"))
|
|
| 129 |
- rownames(obs) <- paste(mod, rownames(obs), sep="-") |
|
| 155 |
+ primary <- rownames(obs) |
|
| 156 |
+ rownames(obs) <- paste(mod, rownames(obs), sep="-") |
|
| 130 | 157 |
|
| 131 | 158 |
viewnames <- h5ls(view, recursive=FALSE)$name |
| 159 |
+ colnames(X) <- rownames(var) |
|
| 160 |
+ rownames(X) <- rownames(obs) |
|
| 132 | 161 |
if ("obsm" %in% viewnames) {
|
| 133 | 162 |
obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name |
| 134 |
- obsm <- lapply(obsnames, function(space) {
|
|
| 135 |
- H5Dread(view & paste("obsm", space, sep="/"))
|
|
| 163 |
+ obsm <- lapply(obsmnames, function(space) {
|
|
| 164 |
+ elem <- read_attribute(view & paste("obsm", space, sep="/"))
|
|
| 165 |
+ rownames(elem) <- rownames(obs) |
|
| 166 |
+ elem |
|
| 136 | 167 |
}) |
| 137 | 168 |
names(obsm) <- obsmnames |
| 138 |
- se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 169 |
+ se <- SingleCellExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs, reducedDims=obsm) |
|
| 139 | 170 |
} else {
|
| 140 |
- se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 171 |
+ se <- SummarizedExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs) |
|
| 141 | 172 |
} |
| 142 | 173 |
|
| 143 |
- se |
|
| 174 |
+ list(exp=se, primary=primary) |
|
| 144 | 175 |
}) |
| 145 | 176 |
names(modalities) <- assays |
| 146 | 177 |
|
| 147 | 178 |
# Create sampleMap |
| 148 |
- mapping <- lapply(assays, function(mod) {
|
|
| 149 |
- primary <- colnames(modalities[[mod]]) |
|
| 150 |
- |
|
| 151 |
- view <- h5 & paste("mod", mod, sep="/")
|
|
| 152 |
- view_attr <- h5attributes(view[["obs"]]) |
|
| 153 |
- indexcol <- "_index" |
|
| 154 |
- if ("_index" %in% names(view_attr)) {
|
|
| 155 |
- indexcol <- view_attr$`_index` |
|
| 156 |
- } |
|
| 157 |
- obs_names <- view[['obs']]$read()[,indexcol,drop=TRUE] |
|
| 158 |
- sm <- data.frame(primary = obs_names, |
|
| 159 |
- colname = rownames(colData(modalities[[mod]])), |
|
| 160 |
- stringsAsFactors = FALSE) |
|
| 161 |
- sm |
|
| 179 |
+ mapping <- lapply(modalities, function(mod) {
|
|
| 180 |
+ data.frame(primary=mod$primary, colname=colnames(mod$exp), stringsAsFactors=FALSE) |
|
| 162 | 181 |
}) |
| 163 | 182 |
|
| 164 | 183 |
obsmap <- do.call(rbind, mapping) |
| ... | ... |
@@ -166,9 +185,9 @@ ReadH5MU <- function(file) {
|
| 166 | 185 |
obsmap <- obsmap[,c("assay", "primary", "colname")]
|
| 167 | 186 |
|
| 168 | 187 |
# Close the connection |
| 169 |
- h5$close_all() |
|
| 188 |
+ H5Fclose(h5) |
|
| 170 | 189 |
|
| 171 | 190 |
# Create a MAE object |
| 172 |
- MultiAssayExperiment(modalities, metadata, obsmap) |
|
| 191 |
+ MultiAssayExperiment(lapply(modalities, function(mod)mod$exp), metadata, obsmap) |
|
| 173 | 192 |
} |
| 174 | 193 |
|
Start port of muon.r MAE functions to rhdf5.
Includes a workaround for missing features in rhdf5. The workaround
works for file creation properties, but not for references, because the
H5Rdereference2 symbol is missing from the rhdf5.so library. This is
because the GNU linker, when linking with a static library, by default
only includes those objects from the static lib that are actually used
in the final output.
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,174 @@ |
| 1 |
+#' @importFrom rhdf5 H5Iget_type h5readAttributes h5ls H5Aexists H5Aopen H5Aread H5Aclose |
|
| 2 |
+read_with_index <- function(dataset) {
|
|
| 3 |
+ cls <- H5Iget_type(dataset) |
|
| 4 |
+ if (cls == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
|
|
| 5 |
+ encattr <- H5Aopen(dataset, "encoding-type") |
|
| 6 |
+ encoding <- H5Aread(encattr, "encoding-type") |
|
| 7 |
+ H5Aclose(encattr) |
|
| 8 |
+ if (encoding != "dataframe") {
|
|
| 9 |
+ warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
|
|
| 10 |
+ return(data.frame()) |
|
| 11 |
+ } |
|
| 12 |
+ # Table is saved as a group rather than a dataset |
|
| 13 |
+ indexcol <- "_index" |
|
| 14 |
+ if (H5Aexists(dataset, "_index")) {
|
|
| 15 |
+ indexattr <- H5Aopen(dataset, "_index") |
|
| 16 |
+ indexcol <- H5Aread(indexattr) |
|
| 17 |
+ H5Aclose(indexattr) |
|
| 18 |
+ } |
|
| 19 |
+ |
|
| 20 |
+ columns <- h5ls(dataset, recursive=FALSE, datasetinfo=FALSE)$name |
|
| 21 |
+ columns <- columns[columns != "__categories"] |
|
| 22 |
+ |
|
| 23 |
+ col_list <- lapply(columns, function(name) {
|
|
| 24 |
+ values <- H5Dread(dataset & name) |
|
| 25 |
+ col <- dataset & name |
|
| 26 |
+ if (H5Aexists(col, "categories")) {
|
|
| 27 |
+ attr <- H5Aopen(col, "categories") |
|
| 28 |
+ if (H5is_attr_reference(attr)) {
|
|
| 29 |
+ labels <- H5deref_attr_reference(attr) |
|
| 30 |
+ values <- factor(as.integer(values), labels=H5Dread(labels)) |
|
| 31 |
+ } else {
|
|
| 32 |
+ warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
|
|
| 33 |
+ } |
|
| 34 |
+ H5Aclose(attr) |
|
| 35 |
+ } |
|
| 36 |
+ values |
|
| 37 |
+ }) |
|
| 38 |
+ table <- data.frame(Reduce(cbind, col_list)) |
|
| 39 |
+ colnames(table) <- columns |
|
| 40 |
+ |
|
| 41 |
+ if (indexcol %in% colnames(table)) {
|
|
| 42 |
+ rownames(table) <- table[,indexcol,drop=TRUE] |
|
| 43 |
+ table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE] |
|
| 44 |
+ } |
|
| 45 |
+ |
|
| 46 |
+ # Fix column order |
|
| 47 |
+ if (H5Aexists(dataset, "column-order")) {
|
|
| 48 |
+ orderedattr <- H5Aopen(dataset, "column-order") |
|
| 49 |
+ ordered_columns <- H5Aread(orderedattr) |
|
| 50 |
+ H5Aclose(orderedattr) |
|
| 51 |
+ |
|
| 52 |
+ ordered_columns <- ordered_columns[ordered_columns != indexcol] |
|
| 53 |
+ table <- table[,ordered_columns[ordered_columns %in% columns],drop=FALSE] |
|
| 54 |
+ } |
|
| 55 |
+ } else {
|
|
| 56 |
+ table <- H5Dread(dataset) |
|
| 57 |
+ |
|
| 58 |
+ indexcol <- "_index" |
|
| 59 |
+ if (H5Aexists(dataset, "_index")) {
|
|
| 60 |
+ index <- H5Aopen(dataset, "_index") |
|
| 61 |
+ indexcol <- H5Aread(index) |
|
| 62 |
+ H5Aclose(index) |
|
| 63 |
+ } |
|
| 64 |
+ |
|
| 65 |
+ if (indexcol %in% colnames(table)) {
|
|
| 66 |
+ rownames(table) <- table[,indexcol,drop=TRUE] |
|
| 67 |
+ table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE] |
|
| 68 |
+ } |
|
| 69 |
+ } |
|
| 70 |
+ table |
|
| 71 |
+} |
|
| 72 |
+ |
|
| 73 |
+#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread |
|
| 74 |
+read_matrix <- function(dataset) {
|
|
| 75 |
+ if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
|
|
| 76 |
+ encattr <- H5Aopen(dataset, "encoding-type") |
|
| 77 |
+ encoding <- H5Aread(dataset, "encoding") |
|
| 78 |
+ H5Aclose(encattr) |
|
| 79 |
+ if (encoding %in% c("csr_matrix", "csc_matrix")) {
|
|
| 80 |
+ i <- H5Dread(dataset & "indices") |
|
| 81 |
+ p <- H5Dread(dataset & "indptr") |
|
| 82 |
+ x <- H5Dread(dataset & "data") |
|
| 83 |
+ shapeattr <- H5Aopen(dataset, "shape") |
|
| 84 |
+ shape <- H5Aread(shapeattr) |
|
| 85 |
+ H5Aclose(shapeattr) |
|
| 86 |
+ if (encoding == "csr_matrix") {
|
|
| 87 |
+ sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE) |
|
| 88 |
+ } else {
|
|
| 89 |
+ sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE) |
|
| 90 |
+ } |
|
| 91 |
+ } else {
|
|
| 92 |
+ warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
|
|
| 93 |
+ matrix() |
|
| 94 |
+ } |
|
| 95 |
+ } else {
|
|
| 96 |
+ H5Dread(dataset) |
|
| 97 |
+ } |
|
| 98 |
+} |
|
| 99 |
+ |
|
| 100 |
+#' @details MultiAssayExperiment-helpers |
|
| 101 |
+#' |
|
| 102 |
+#' @description Create a MultiAssayExperiment or a Seurat object from the .h5mu file |
|
| 103 |
+#' |
|
| 104 |
+#' @importFrom rhdf5 h5ls H5Dread |
|
| 105 |
+#' @importFrom SingleCellExperiment SingleCellExperiment |
|
| 106 |
+#' @importFrom SummarizedExperiment SummarizedExperiment |
|
| 107 |
+#' @importFrom MultiAssayExperiment MultiAssayExperiment |
|
| 108 |
+#' |
|
| 109 |
+#' @export |
|
| 110 |
+ReadH5MU <- function(file) {
|
|
| 111 |
+ # Connect to the the file |
|
| 112 |
+ h5 <- open_and_check_mudata(file) |
|
| 113 |
+ |
|
| 114 |
+ # Check all the assays are written |
|
| 115 |
+ assays <- h5ls(h5 & "mod", recursive=FALSE)$name |
|
| 116 |
+ |
|
| 117 |
+ # Create global colData |
|
| 118 |
+ metadata <- read_with_index(h5 & "obs") |
|
| 119 |
+ |
|
| 120 |
+ # Create an experiments list |
|
| 121 |
+ modalities <- lapply(assays, function(mod) {
|
|
| 122 |
+ view <- h5 & paste("mod", mod, sep="/")
|
|
| 123 |
+ X <- read_matrix(view & "X") |
|
| 124 |
+ |
|
| 125 |
+ var <- read_with_index(view & "var") |
|
| 126 |
+ |
|
| 127 |
+ obs <- read_with_index(view & "obs") |
|
| 128 |
+ if (is("obs", "data.frame"))
|
|
| 129 |
+ rownames(obs) <- paste(mod, rownames(obs), sep="-") |
|
| 130 |
+ |
|
| 131 |
+ viewnames <- h5ls(view, recursive=FALSE)$name |
|
| 132 |
+ if ("obsm" %in% viewnames) {
|
|
| 133 |
+ obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name |
|
| 134 |
+ obsm <- lapply(obsnames, function(space) {
|
|
| 135 |
+ H5Dread(view & paste("obsm", space, sep="/"))
|
|
| 136 |
+ }) |
|
| 137 |
+ names(obsm) <- obsmnames |
|
| 138 |
+ se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm) |
|
| 139 |
+ } else {
|
|
| 140 |
+ se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs) |
|
| 141 |
+ } |
|
| 142 |
+ |
|
| 143 |
+ se |
|
| 144 |
+ }) |
|
| 145 |
+ names(modalities) <- assays |
|
| 146 |
+ |
|
| 147 |
+ # Create sampleMap |
|
| 148 |
+ mapping <- lapply(assays, function(mod) {
|
|
| 149 |
+ primary <- colnames(modalities[[mod]]) |
|
| 150 |
+ |
|
| 151 |
+ view <- h5 & paste("mod", mod, sep="/")
|
|
| 152 |
+ view_attr <- h5attributes(view[["obs"]]) |
|
| 153 |
+ indexcol <- "_index" |
|
| 154 |
+ if ("_index" %in% names(view_attr)) {
|
|
| 155 |
+ indexcol <- view_attr$`_index` |
|
| 156 |
+ } |
|
| 157 |
+ obs_names <- view[['obs']]$read()[,indexcol,drop=TRUE] |
|
| 158 |
+ sm <- data.frame(primary = obs_names, |
|
| 159 |
+ colname = rownames(colData(modalities[[mod]])), |
|
| 160 |
+ stringsAsFactors = FALSE) |
|
| 161 |
+ sm |
|
| 162 |
+ }) |
|
| 163 |
+ |
|
| 164 |
+ obsmap <- do.call(rbind, mapping) |
|
| 165 |
+ obsmap["assay"] <- rep(assays, times=vapply(mapping, nrow, 1)) |
|
| 166 |
+ obsmap <- obsmap[,c("assay", "primary", "colname")]
|
|
| 167 |
+ |
|
| 168 |
+ # Close the connection |
|
| 169 |
+ h5$close_all() |
|
| 170 |
+ |
|
| 171 |
+ # Create a MAE object |
|
| 172 |
+ MultiAssayExperiment(modalities, metadata, obsmap) |
|
| 173 |
+} |
|
| 174 |
+ |