Thanks to visit codestin.com
Credit goes to code.bioconductor.org

Browse code

reading: cast 1D-arrays to vectors and 2D-vectors to matrices

this is needed to satisfy testthat

Ilia Kats authored on 12/10/2023 13:39:16
Showing 1 changed files
... ...
@@ -183,7 +183,12 @@ read_matrix <- function(dataset, backed=FALSE) {
183 183
             cls <- "Dense_H5ADMatrixSeed"
184 184
             seed <- HDF5Array::HDF5ArraySeed
185 185
         } else {
186
-            return(H5Dread(dataset))
186
+            dset <- H5Dread(dataset)
187
+            if (length(dim(dset)) == 1)
188
+                dset <- as.vector(dset)
189
+            else if (length(dim(dset)) == 2)
190
+                dset <- as.matrix(dset)
191
+            return(dset)
187 192
         }
188 193
     }
189 194
     if (backed) {
... ...
@@ -214,6 +219,10 @@ read_array <- function(attr, encoding, strict=TRUE) {
214 219
         check_encodingversion(attr, encoding, "0.2.0")
215 220
 
216 221
     ret <- H5Dread(attr)
222
+    if (length(dim(ret)) == 1)
223
+        ret <- as.vector(ret)
224
+    else if (length(dim(ret)) == 2)
225
+        ret <- as.matrix(ret)
217 226
     # h5py saves boolean arrays as HDF5 enums
218 227
     if (is.factor(ret) && all(levels(ret) == c("FALSE", "TRUE"))) {
219 228
         ret <- as.logical(ret)
Browse code

correctly handle R/HDF5 dense array storage order

Ilia Kats authored on 11/10/2023 11:25:39
Showing 1 changed files
... ...
@@ -222,6 +222,8 @@ read_array <- function(attr, encoding, strict=TRUE) {
222 222
     if (!is.null(encoding) && (endsWith(encoding, "-scalar") || encoding == "string")) {
223 223
         attr(ret, "encoding-scalar") <- TRUE
224 224
     }
225
+    if (length(dim(ret)) > 1)
226
+        ret <- t(ret)
225 227
     ret
226 228
 }
227 229
 
... ...
@@ -329,8 +331,6 @@ read_modality <- function(view, backed=FALSE) {
329 331
         obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
330 332
         obsm <- lapply(obsmnames, function(space) {
331 333
             elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
332
-            if (!is.data.frame(elem) && length(dim(elem)) > 1)
333
-                elem <- t(elem)
334 334
             rownames(elem) <- rownames(obs)
335 335
             elem
336 336
         })
Browse code

use the new convert_categoricals function

Ilia Kats authored on 08/09/2023 15:09:50
Showing 1 changed files
... ...
@@ -76,9 +76,9 @@ read_dataframe <- function(group, encoding) {
76 76
 #' Helper function to convert values + labels into factors
77 77
 #'
78 78
 #' @description  A helper function to convert categories into factors.
79
-#'  Assumptions: 
79
+#'  Assumptions:
80 80
 #'      - values correspond to the zero indexed categories
81
-#'          (i.e. value 0 is the first category) 
81
+#'          (i.e. value 0 is the first category)
82 82
 #'      - NA are encoded with a value -1
83 83
 #'  Categories not uses will be dropped.
84 84
 #'
... ...
@@ -88,7 +88,7 @@ read_dataframe <- function(group, encoding) {
88 88
 #' @returns factor with categorical values
89 89
 #'
90 90
 #' @keywords internal
91
-#' @noRd 
91
+#' @noRd
92 92
 convert_categoricals <- function(values, categories) {
93 93
     # The levels are 0 indexed integers
94 94
     levels <- seq_len(length(categories))-1
... ...
@@ -236,13 +236,7 @@ read_categorical <- function(group, encoding) {
236 236
 
237 237
     values <- as.integer(H5Dread(h5autoclose(group & "codes")))
238 238
     labels <- H5Dread(h5autoclose(group & "categories"))
239
-    n_labels <- max(values) + 1
240
-    if (length(labels) > n_labels) {
241
-        labels_items <- labels_items[seq_len(n_labels)]
242
-    }
243
-
244
-    values[values == -1] <- NA
245
-    factor(values, labels=labels, ordered=ordered)
239
+    convert_categoricals(values, labels)
246 240
 }
247 241
 
248 242
 #' @importFrom rhdf5 H5Dread
Browse code

implement reading of AnnData 0.8+ files

Ilia Kats authored on 08/09/2023 11:28:46
Showing 1 changed files
... ...
@@ -1,7 +1,34 @@
1 1
 #' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference
2
+#' @importFrom S4Vectors DataFrame
2 3
 #' @importMethodsFrom rhdf5 &
3 4
 #' @importFrom methods is
4
-read_dataframe <- function(group) {
5
+read_dataframe_020 <- function(group, encoding, strict=TRUE) {
6
+    if (strict)
7
+        check_encodingversion(group, encoding, "0.2.0")
8
+    indexattr <- H5Aopen(group, "_index")
9
+    indexcol <- H5Aread(indexattr)
10
+    H5Aclose(indexattr)
11
+
12
+    orderedattr <- H5Aopen(group, "column-order")
13
+    columnorder <- H5Aread(orderedattr)
14
+    H5Aclose(orderedattr)
15
+
16
+    col_list <- lapply(columnorder, function(name) {
17
+        read_attribute(h5autoclose(group & name))
18
+    })
19
+    names(col_list) <- columnorder
20
+    index <- h5autoclose(group & indexcol)
21
+    col_list[["row.names"]] <- H5Dread(index)
22
+    do.call(DataFrame, args=col_list)
23
+}
24
+
25
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference
26
+#' @importFrom S4Vectors DataFrame
27
+#' @importMethodsFrom rhdf5 &
28
+#' @importFrom methods is
29
+read_dataframe_010 <- function(group, encoding, strict=TRUE) {
30
+    if (strict)
31
+        check_encodingversion(group, encoding, "0.1.0")
5 32
     indexcol <- "_index"
6 33
     if (H5Aexists(group, "_index")) {
7 34
         indexattr <- H5Aopen(group, "_index")
... ...
@@ -34,10 +61,16 @@ read_dataframe <- function(group) {
34 61
         values
35 62
     })
36 63
     names(col_list) <- columnorder
37
-    index <- group & indexcol
38
-    col_list[["row.names"]] <- H5Dread(index)
39
-    H5Dclose(index)
40
-    do.call(data.frame, args=col_list)
64
+    col_list[["row.names"]] <- H5Dread(h5autoclose(group & indexcol))
65
+    do.call(DataFrame, args=col_list)
66
+}
67
+
68
+read_dataframe <- function(group, encoding) {
69
+    version <- check_encodingversion(group, encoding, c("0.1.0", "0.2.0"))
70
+    if (version == "0.1.0")
71
+        read_dataframe_010(group, encoding, strict=FALSE)
72
+    else
73
+        read_dataframe_020(group, encoding, strict=FALSE)
41 74
 }
42 75
 
43 76
 #' Helper function to convert values + labels into factors
... ...
@@ -100,18 +133,16 @@ read_with_index <- function(dataset) {
100 133
     }
101 134
 }
102 135
 
103
-#' @importFrom rhdf5 H5Dread H5Dclose H5Aopen H5Aread H5Aclose
136
+#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose
104 137
 #' @importMethodsFrom rhdf5 &
105 138
 read_sparse_matrix <- function(group, encoding, backed=FALSE) {
106
-    indices <- group & "indices"
107
-    indptr <- group & "indptr"
108
-    data <- group & "data"
139
+    check_encodingversion(group, encoding, "0.1.0")
140
+    indices <- h5autoclose(group & "indices")
141
+    indptr <- h5autoclose(group & "indptr")
142
+    data <- h5autoclose(group & "data")
109 143
     i <- as.vector(H5Dread(indices))
110 144
     p <- as.vector(H5Dread(indptr))
111 145
     x <- as.vector(H5Dread(data))
112
-    H5Dclose(indices)
113
-    H5Dclose(indptr)
114
-    H5Dclose(data)
115 146
     shapeattr <- H5Aopen(group, "shape")
116 147
     shape <- H5Aread(shapeattr)
117 148
     H5Aclose(shapeattr)
... ...
@@ -166,41 +197,113 @@ read_matrix <- function(dataset, backed=FALSE) {
166 197
     }
167 198
 }
168 199
 
169
-#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Aexists h5ls
200
+#' @importFrom rhdf5 h5ls
201
+#' @importMethodsFrom rhdf5 &
170 202
 #' @importFrom stats setNames
171
-read_group <- function(group, read_uns=FALSE) {
172
-    if (H5Aexists(group, "encoding-type")) {
173
-        encattr <- H5Aopen(group, "encoding-type")
174
-        encoding <- H5Aread(encattr)
175
-        H5Aclose(encattr)
176
-
177
-        if (encoding == "dataframe") {
178
-            return(read_dataframe(group))
179
-        } else if (endsWith(encoding, "matrix")) {
180
-            return(read_sparse_matrix(group))
181
-        } else {
182
-            warning("Unknown encoding ", encoding)
183
-            if (!read_uns)
184
-                return(invisible(NULL))
185
-        }
186
-    }
203
+read_dict <- function(group, encoding, strict=TRUE) {
204
+    if (strict)
205
+        check_encodingversion(group, encoding, "0.1.0")
187 206
 
188 207
     objects <- h5ls(group, recursive=FALSE, datasetinfo=FALSE)$name
189 208
     lapply(setNames(nm=objects), function(x)read_attribute(h5autoclose(group & x)))
190 209
 }
191 210
 
192
-#' @importFrom rhdf5 H5Iget_type H5Dread
211
+#' @importFrom rhdf5 H5Dread
212
+read_array <- function(attr, encoding, strict=TRUE) {
213
+    if (strict)
214
+        check_encodingversion(attr, encoding, "0.2.0")
215
+
216
+    ret <- H5Dread(attr)
217
+    # h5py saves boolean arrays as HDF5 enums
218
+    if (is.factor(ret) && all(levels(ret) == c("FALSE", "TRUE"))) {
219
+        ret <- as.logical(ret)
220
+    }
221
+
222
+    if (!is.null(encoding) && (endsWith(encoding, "-scalar") || encoding == "string")) {
223
+        attr(ret, "encoding-scalar") <- TRUE
224
+    }
225
+    ret
226
+}
227
+
228
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Dread
229
+#' @importMethodsFrom rhdf5 &
230
+read_categorical <- function(group, encoding) {
231
+    check_encodingversion(group, encoding, "0.2.0")
232
+
233
+    orderedattr <- H5Aopen(group, "ordered")
234
+    ordered <- as.logical(H5Aread(orderedattr))
235
+    H5Aclose(orderedattr)
236
+
237
+    values <- as.integer(H5Dread(h5autoclose(group & "codes")))
238
+    labels <- H5Dread(h5autoclose(group & "categories"))
239
+    n_labels <- max(values) + 1
240
+    if (length(labels) > n_labels) {
241
+        labels_items <- labels_items[seq_len(n_labels)]
242
+    }
243
+
244
+    values[values == -1] <- NA
245
+    factor(values, labels=labels, ordered=ordered)
246
+}
247
+
248
+#' @importFrom rhdf5 H5Dread
249
+#' @importMethodsFrom rhdf5 &
250
+read_nullable <- function(group, encoding) {
251
+    check_encodingversion(group, encoding, "0.1.0")
252
+    values <- H5Dread(h5autoclose(group & "values"))
253
+    mask <- as.logical(H5Dread(h5autoclose(group & "mask")))
254
+    values[mask] <- NA
255
+    if (endsWith(encoding, "-boolean")) {
256
+        values <- as.logical(values)
257
+    }
258
+    values
259
+}
260
+
261
+.read_funcs <- list("array"=read_array,
262
+                    "csr_matrix"=read_sparse_matrix,
263
+                    "csc_matrix"=read_sparse_matrix,
264
+                    "dataframe"=read_dataframe,
265
+                    "dict"=read_dict,
266
+                    "numeric-scalar"=read_array,
267
+                    "string"=read_array,
268
+                    "categorical"=read_categorical,
269
+                    "string-array"=read_array,
270
+                    "nullable-integer"=read_nullable,
271
+                    "nullable-boolean"=read_nullable)
272
+
273
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Iget_type H5Iget_name
193 274
 read_attribute <- function(attr) {
194
-    if (H5Iget_type(attr) == "H5I_GROUP")
195
-        read_group(attr)
196
-    else {
197
-        values <- H5Dread(attr)
198
-        # h5py saves boolean arrays as HDF5 enums
199
-        if (is.factor(values) && all(levels(values) == c("FALSE", "TRUE"))) {
200
-            values <- as.logical(values)
275
+    ret <- NULL
276
+    if (H5Aexists(attr, "encoding-type")) {
277
+        encattr <- H5Aopen(attr, "encoding-type")
278
+        encoding <- H5Aread(encattr)
279
+        H5Aclose(encattr)
280
+
281
+        func <- switch(encoding,
282
+                      "array"=read_array,
283
+                      "csr_matrix"=read_sparse_matrix,
284
+                      "csc_matrix"=read_sparse_matrix,
285
+                      "dataframe"=read_dataframe,
286
+                      "dict"=read_dict,
287
+                      "numeric-scalar"=read_array,
288
+                      "string"=read_array,
289
+                      "categorical"=read_categorical,
290
+                      "string-array"=read_array,
291
+                      "nullable-integer"=read_nullable,
292
+                      "nullable-boolean"=read_nullable)
293
+        if (is.null(func)) {
294
+            warning("Unknown encoding ", encoding, " for element ", H5Iget_name(attr))
295
+        } else {
296
+            ret <- func(attr, encoding)
201 297
         }
202
-        values
203 298
     }
299
+
300
+    if (is.null(ret)) {
301
+        if (H5Iget_type(attr) == "H5I_GROUP")
302
+            ret <- read_dict(attr, NULL, strict=FALSE)
303
+        else
304
+            ret <- read_array(attr, NULL, strict=FALSE)
305
+    }
306
+    ret
204 307
 }
205 308
 
206 309
 #' @importFrom stats setNames
... ...
@@ -273,7 +376,7 @@ read_modality <- function(view, backed=FALSE) {
273 376
     }
274 377
 
275 378
     if ("uns" %in% viewnames)
276
-        metadata(se) <- read_group(h5autoclose(view & "uns"))
379
+        metadata(se) <- read_dict(h5autoclose(view & "uns"), NULL, strict=FALSE)
277 380
     se
278 381
 }
279 382
 
... ...
@@ -350,9 +453,8 @@ readH5MU <- function(file, backed=FALSE) {
350 453
     # if colData(MAE) has different row names than the experiments
351 454
     if (H5Lexists(h5, "obsmap")) {
352 455
         samplemaps <- lapply(assays, function(mod) {
353
-            cmapdset <- h5 & paste("obsmap", mod, sep="/")
456
+            cmapdset <- h5autoclose(h5 & paste("obsmap", mod, sep="/"))
354 457
             cmap <- H5Dread(cmapdset)
355
-            H5Dclose(cmapdset)
356 458
 
357 459
             idx <- which(cmap > 0)
358 460
             data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]])
... ...
@@ -363,7 +465,7 @@ readH5MU <- function(file, backed=FALSE) {
363 465
     }
364 466
 
365 467
     if (H5Lexists(h5, "uns"))
366
-        args$metadata <- read_group(h5autoclose(h5 & "uns"))
468
+        args$metadata <- read_dict(h5autoclose(h5 & "uns"), NULL, strict=FALSE)
367 469
 
368 470
     # Close the connection
369 471
     H5Fclose(h5)
Browse code

Add support for categorical columns with NA

Anndata 0.7.8 exports categories with NA with a value -1,
causing the reading of mudata/anndata to fail.

This adds support for NAs.
The convert_categoricals function is added as an internal function
in order to facilitate testing of this logic.

Fixes: #6

Vito Zanotelli authored on 01/09/2023 10:32:06
Showing 1 changed files
... ...
@@ -25,11 +25,7 @@ read_dataframe <- function(group) {
25 25
             } else {
26 26
                 labels <- H5Rdereference(labels, h5loc=col)
27 27
                 labels_items <- H5Dread(labels)
28
-                n_labels <- length(unique(values))
29
-                if (length(labels_items) > n_labels) {
30
-                    labels_items <- labels_items[seq_len(n_labels)]
31
-                }
32
-                values <- factor(as.integer(values), labels=labels_items)
28
+                values <- convert_categoricals(values, labels_items)
33 29
                 H5Dclose(labels)
34 30
             }
35 31
             H5Aclose(attr)
... ...
@@ -44,6 +40,30 @@ read_dataframe <- function(group) {
44 40
     do.call(data.frame, args=col_list)
45 41
 }
46 42
 
43
+#' Helper function to convert values + labels into factors
44
+#'
45
+#' @description  A helper function to convert categories into factors.
46
+#'  Assumptions: 
47
+#'      - values correspond to the zero indexed categories
48
+#'          (i.e. value 0 is the first category) 
49
+#'      - NA are encoded with a value -1
50
+#'  Categories not uses will be dropped.
51
+#'
52
+#' @param values Vector of integer level numbers (zero indexed). -1 indicate NA
53
+#' @param categories Labels for level numbers (zero indexed).
54
+#'
55
+#' @returns factor with categorical values
56
+#'
57
+#' @keywords internal
58
+#' @noRd 
59
+convert_categoricals <- function(values, categories) {
60
+    # The levels are 0 indexed integers
61
+    levels <- seq_len(length(categories))-1
62
+    value_factor <- factor(as.integer(values), levels, labels=categories)
63
+    # Drop unused levels
64
+    droplevels(value_factor)
65
+}
66
+
47 67
 #' @importFrom rhdf5 H5Dread H5Aexists H5Aopen H5Aread H5Aclose
48 68
 read_dataframe_legacy <- function(dataset) {
49 69
     table <- H5Dread(dataset)
Browse code

work around deprecated conversion in Matrix

Ilia Kats authored on 25/08/2022 15:20:30
Showing 1 changed files
... ...
@@ -176,7 +176,7 @@ read_attribute <- function(attr) {
176 176
     else {
177 177
         values <- H5Dread(attr)
178 178
         # h5py saves boolean arrays as HDF5 enums
179
-        if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
179
+        if (is.factor(values) && all(levels(values) == c("FALSE", "TRUE"))) {
180 180
             values <- as.logical(values)
181 181
         }
182 182
         values
Browse code

don't use multiple dispatch, as suggested in https://github.com/Bioconductor/Contributions/issues/2453

Ilia Kats authored on 15/02/2022 12:59:30
Showing 1 changed files
... ...
@@ -15,16 +15,12 @@ read_dataframe <- function(group) {
15 15
 
16 16
     col_list <- lapply(columnorder, function(name) {
17 17
         col <- group & name
18
-        values <- H5Dread(col)
19
-        # h5py saves boolean arrays as HDF5 enums
20
-        if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
21
-            values <- as.logical(values)
22
-        }
18
+        values <- read_attribute(col)
23 19
         if (H5Aexists(col, "categories")) {
24 20
             attr <- H5Aopen(col, "categories")
25 21
             labels <- H5Aread(attr)
26 22
             if (!is(labels, "H5Ref")) {
27
-                warning("found categories attribute for column ", 
23
+                warning("found categories attribute for column ",
28 24
                         name, ", but it is not a reference")
29 25
             } else {
30 26
                 labels <- H5Rdereference(labels, h5loc=col)
... ...
@@ -74,7 +70,7 @@ read_with_index <- function(dataset) {
74 70
         encoding <- H5Aread(encattr, "encoding-type")
75 71
         H5Aclose(encattr)
76 72
         if (encoding != "dataframe") {
77
-            warning("Unknown encoding ", encoding, 
73
+            warning("Unknown encoding ", encoding,
78 74
                     " when attempting to read data frame")
79 75
             return(data.frame())
80 76
         }
... ...
@@ -177,8 +173,14 @@ read_group <- function(group, read_uns=FALSE) {
177 173
 read_attribute <- function(attr) {
178 174
     if (H5Iget_type(attr) == "H5I_GROUP")
179 175
         read_group(attr)
180
-    else
181
-        H5Dread(attr)
176
+    else {
177
+        values <- H5Dread(attr)
178
+        # h5py saves boolean arrays as HDF5 enums
179
+        if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
180
+            values <- as.logical(values)
181
+        }
182
+        values
183
+    }
182 184
 }
183 185
 
184 186
 #' @importFrom stats setNames
Browse code

Add more tests

Danila Bredikhin authored on 31/01/2022 02:04:56
Showing 1 changed files
... ...
@@ -219,7 +219,6 @@ read_modality <- function(view, backed=FALSE) {
219 219
         args$reducedDims <- obsm
220 220
     }
221 221
 
222
-    objectclass <- NULL
223 222
     if (H5Aexists(view, "origin-class")) {
224 223
         originattr <- H5Aopen(view, "origin-class")
225 224
         objectclass <- H5Aread(originattr)
... ...
@@ -233,9 +232,9 @@ read_modality <- function(view, backed=FALSE) {
233 232
 		}
234 233
 		se <- do.call(SingleCellExperiment, args)
235 234
 	}
236
-    }
237
-    if (is.null(objectclass))
235
+    } else {
238 236
 	se <- do.call(SingleCellExperiment, args)
237
+    }
239 238
 
240 239
     for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
241 240
         if (cp$name %in% viewnames) {
Browse code

Write down and restore object class

Object class is saved as an attribute for the root group
of .h5ad and .h5mu files and as an attribute for individual
modalities of .h5mu files.

Unless SummarizedExperiment was saved as a modality,
a SingleCellExperiment is currently created from it.

Danila Bredikhin authored on 31/01/2022 01:59:07
Showing 1 changed files
... ...
@@ -219,7 +219,23 @@ read_modality <- function(view, backed=FALSE) {
219 219
         args$reducedDims <- obsm
220 220
     }
221 221
 
222
-    se <- do.call(SingleCellExperiment, args)
222
+    objectclass <- NULL
223
+    if (H5Aexists(view, "origin-class")) {
224
+        originattr <- H5Aopen(view, "origin-class")
225
+        objectclass <- H5Aread(originattr)
226
+        H5Aclose(originattr)
227
+
228
+	if (objectclass == "SummarizedExperiment") {
229
+		se <- do.call(SummarizedExperiment, args)
230
+	} else {
231
+		if (objectclass != "SingleCellExperiment") {
232
+            		message("Reading as SingleCellExperiment where the original object class is ", objectclass)
233
+		}
234
+		se <- do.call(SingleCellExperiment, args)
235
+	}
236
+    }
237
+    if (is.null(objectclass))
238
+	se <- do.call(SingleCellExperiment, args)
223 239
 
224 240
     for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
225 241
         if (cp$name %in% viewnames) {
Browse code

Minor code style changes

Danila Bredikhin authored on 17/01/2022 13:29:09
Showing 1 changed files
... ...
@@ -24,14 +24,15 @@ read_dataframe <- function(group) {
24 24
             attr <- H5Aopen(col, "categories")
25 25
             labels <- H5Aread(attr)
26 26
             if (!is(labels, "H5Ref")) {
27
-                warning("found categories attribute for column ", name, ", but it is not a reference")
27
+                warning("found categories attribute for column ", 
28
+                        name, ", but it is not a reference")
28 29
             } else {
29 30
                 labels <- H5Rdereference(labels, h5loc=col)
30
-	        labels_items <- H5Dread(labels)
31
-		n_labels <- length(unique(values))
32
-		if (length(labels_items) > n_labels) {
33
-			labels_items <- labels_items[seq_len(n_labels)]
34
-		}
31
+                labels_items <- H5Dread(labels)
32
+                n_labels <- length(unique(values))
33
+                if (length(labels_items) > n_labels) {
34
+                    labels_items <- labels_items[seq_len(n_labels)]
35
+                }
35 36
                 values <- factor(as.integer(values), labels=labels_items)
36 37
                 H5Dclose(labels)
37 38
             }
... ...
@@ -73,7 +74,8 @@ read_with_index <- function(dataset) {
73 74
         encoding <- H5Aread(encattr, "encoding-type")
74 75
         H5Aclose(encattr)
75 76
         if (encoding != "dataframe") {
76
-            warning("Unknown encoding ", encoding, " when attempting to read data frame")
77
+            warning("Unknown encoding ", encoding, 
78
+                    " when attempting to read data frame")
77 79
             return(data.frame())
78 80
         }
79 81
         read_dataframe(dataset)
Browse code

Error out when no HDF5Array and backed=TRUE

Danila Bredikhin authored on 10/01/2022 14:49:27
Showing 1 changed files
... ...
@@ -110,8 +110,7 @@ read_matrix <- function(dataset, backed=FALSE) {
110 110
     if (backed) {
111 111
         have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
112 112
         if (!have_delayedarray) {
113
-            warning("Could not load the HDF5Array package. HDF5Array is required for backed matrices. Loading matrix into memory...")
114
-            backed <- FALSE
113
+            stop("Could not load the HDF5Array package. HDF5Array is required for backed matrices.")
115 114
         }
116 115
     }
117 116
 
Browse code

first implementation of metadata read/write

Ilia Kats authored on 21/12/2021 12:28:26
Showing 1 changed files
... ...
@@ -149,19 +149,27 @@ read_matrix <- function(dataset, backed=FALSE) {
149 149
     }
150 150
 }
151 151
 
152
-#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose
153
-read_group <- function(group) {
154
-    encattr <- H5Aopen(group, "encoding-type")
155
-    encoding <- H5Aread(encattr)
156
-    H5Aclose(encattr)
157
-    if (encoding == "dataframe") {
158
-        read_dataframe(group)
159
-    } else if (endsWith(encoding, "matrix")) {
160
-        read_sparse_matrix(group)
161
-    } else {
162
-        warning("Unknown encoding ", encoding)
163
-        invisible(NULL)
152
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose H5Aexists h5ls
153
+#' @importFrom stats setNames
154
+read_group <- function(group, read_uns=FALSE) {
155
+    if (H5Aexists(group, "encoding-type")) {
156
+        encattr <- H5Aopen(group, "encoding-type")
157
+        encoding <- H5Aread(encattr)
158
+        H5Aclose(encattr)
159
+
160
+        if (encoding == "dataframe") {
161
+            return(read_dataframe(group))
162
+        } else if (endsWith(encoding, "matrix")) {
163
+            return(read_sparse_matrix(group))
164
+        } else {
165
+            warning("Unknown encoding ", encoding)
166
+            if (!read_uns)
167
+                return(invisible(NULL))
168
+        }
164 169
     }
170
+
171
+    objects <- h5ls(group, recursive=FALSE, datasetinfo=FALSE)$name
172
+    lapply(setNames(nm=objects), function(x)read_attribute(h5autoclose(group & x)))
165 173
 }
166 174
 
167 175
 #' @importFrom rhdf5 H5Iget_type H5Dread
... ...
@@ -178,7 +186,7 @@ read_attribute <- function(attr) {
178 186
 #' @importFrom S4Vectors SimpleList
179 187
 #' @importFrom SingleCellExperiment SingleCellExperiment
180 188
 #' @importMethodsFrom SingleCellExperiment colPair<- rowPair<-
181
-#' @importFrom SummarizedExperiment SummarizedExperiment
189
+#' @importFrom SummarizedExperiment SummarizedExperiment metadata<-
182 190
 #' @importFrom methods is
183 191
 read_modality <- function(view, backed=FALSE) {
184 192
     X <- read_matrix(h5autoclose(view & "X"), backed=backed)
... ...
@@ -225,6 +233,9 @@ read_modality <- function(view, backed=FALSE) {
225 233
             }
226 234
         }
227 235
     }
236
+
237
+    if ("uns" %in% viewnames)
238
+        metadata(se) <- read_group(h5autoclose(view & "uns"))
228 239
     se
229 240
 }
230 241
 
... ...
@@ -313,6 +324,9 @@ readH5MU <- function(file, backed=FALSE) {
313 324
         args$sampleMap <- sampleMap
314 325
     }
315 326
 
327
+    if (H5Lexists(h5, "uns"))
328
+        args$metadata <- read_group(h5autoclose(h5 & "uns"))
329
+
316 330
     # Close the connection
317 331
     H5Fclose(h5)
318 332
 
Browse code

lowercase function names

Ilia Kats authored on 21/12/2021 08:31:53
Showing 1 changed files
... ...
@@ -242,12 +242,12 @@ read_modality <- function(view, backed=FALSE) {
242 242
 #'
243 243
 #' @examples
244 244
 #' data(miniACC, package="MultiAssayExperiment")
245
-#' WriteH5AD(miniACC[[1]], "miniacc.h5ad")
246
-#' sce <- ReadH5AD("miniacc.h5ad")
245
+#' writeH5AD(miniACC[[1]], "miniacc.h5ad")
246
+#' sce <- readH5AD("miniacc.h5ad")
247 247
 #'
248 248
 #' @importFrom rhdf5 H5Fclose
249 249
 #' @export
250
-ReadH5AD <- function(file, backed=FALSE) {
250
+readH5AD <- function(file, backed=FALSE) {
251 251
     h5 <- H5Fopen(file, flags="H5F_ACC_RDONLY", native=FALSE)
252 252
     res <- read_modality(h5, backed)
253 253
     H5Fclose(h5)
... ...
@@ -267,8 +267,8 @@ ReadH5AD <- function(file, backed=FALSE) {
267 267
 #'
268 268
 #' @examples
269 269
 #' data(miniACC, package="MultiAssayExperiment")
270
-#' WriteH5MU(miniACC, "miniacc.h5mu")
271
-#' mae <- ReadH5MU("miniacc.h5mu")
270
+#' writeH5MU(miniACC, "miniacc.h5mu")
271
+#' mae <- readH5MU("miniacc.h5mu")
272 272
 #'
273 273
 #' @importFrom stats setNames
274 274
 #' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose
... ...
@@ -276,7 +276,7 @@ ReadH5AD <- function(file, backed=FALSE) {
276 276
 #' @importFrom MultiAssayExperiment MultiAssayExperiment
277 277
 #'
278 278
 #' @export
279
-ReadH5MU <- function(file, backed=FALSE) {
279
+readH5MU <- function(file, backed=FALSE) {
280 280
     # Connect to the the file
281 281
     h5 <- open_and_check_mudata(file)
282 282
 
Browse code

Preserve modality order

Danila Bredikhin authored on 20/12/2021 23:03:23
Showing 1 changed files
... ...
@@ -283,6 +283,9 @@ ReadH5MU <- function(file, backed=FALSE) {
283 283
     # Check all the assays are written
284 284
     assays <- setNames(nm=h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name)
285 285
 
286
+    # Read modality order if available and matches available assays
287
+    mod_order <- check_mod_order(h5)
288
+
286 289
     # Create global colData
287 290
     metadata <- read_with_index(h5autoclose(h5 & "obs"))
288 291
 
... ...
@@ -290,6 +293,7 @@ ReadH5MU <- function(file, backed=FALSE) {
290 293
     modalities <- lapply(assays, function(mod) {
291 294
         read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
292 295
     })
296
+    modalities <- modalities[mod_order]
293 297
 
294 298
     args <- list(experiments=modalities, colData=metadata)
295 299
 
Browse code

Improve for biocCheck

Danila Bredikhin authored on 15/12/2021 14:53:35
Showing 1 changed files
... ...
@@ -30,7 +30,7 @@ read_dataframe <- function(group) {
30 30
 	        labels_items <- H5Dread(labels)
31 31
 		n_labels <- length(unique(values))
32 32
 		if (length(labels_items) > n_labels) {
33
-			labels_items <- labels_items[1:n_labels]
33
+			labels_items <- labels_items[seq_len(n_labels)]
34 34
 		}
35 35
                 values <- factor(as.integer(values), labels=labels_items)
36 36
                 H5Dclose(labels)
Browse code

Improve reading categorical metadata

Danila Bredikhin authored on 10/12/2021 01:25:53
Showing 1 changed files
... ...
@@ -27,7 +27,12 @@ read_dataframe <- function(group) {
27 27
                 warning("found categories attribute for column ", name, ", but it is not a reference")
28 28
             } else {
29 29
                 labels <- H5Rdereference(labels, h5loc=col)
30
-                values <- factor(as.integer(values), labels=H5Dread(labels))
30
+	        labels_items <- H5Dread(labels)
31
+		n_labels <- length(unique(values))
32
+		if (length(labels_items) > n_labels) {
33
+			labels_items <- labels_items[1:n_labels]
34
+		}
35
+                values <- factor(as.integer(values), labels=labels_items)
31 36
                 H5Dclose(labels)
32 37
             }
33 38
             H5Aclose(attr)
Browse code

run CI on bioc-devel only for now

all required rhdf5 patches are now in bioc-devel, so we don't need to
install it separately from Github. Because of this, we are targeting
bioc-devel anyway.

Ilia Kats authored on 01/12/2021 15:57:41
Showing 1 changed files
... ...
@@ -236,7 +236,8 @@ read_modality <- function(view, backed=FALSE) {
236 236
 #' @return A \code{\linkS4class{SingleCellExperiment}}.
237 237
 #'
238 238
 #' @examples
239
-#' WriteH5AD(MultiAssayExperiment::miniACC[[1]], "miniacc.h5ad")
239
+#' data(miniACC, package="MultiAssayExperiment")
240
+#' WriteH5AD(miniACC[[1]], "miniacc.h5ad")
240 241
 #' sce <- ReadH5AD("miniacc.h5ad")
241 242
 #'
242 243
 #' @importFrom rhdf5 H5Fclose
... ...
@@ -260,7 +261,8 @@ ReadH5AD <- function(file, backed=FALSE) {
260 261
 #' @return A \code{\linkS4class{MultiAssayExperiment}}
261 262
 #'
262 263
 #' @examples
263
-#' WriteH5MU(MultiAssayExperiment::miniACC, "miniacc.h5mu")
264
+#' data(miniACC, package="MultiAssayExperiment")
265
+#' WriteH5MU(miniACC, "miniacc.h5mu")
264 266
 #' mae <- ReadH5MU("miniacc.h5mu")
265 267
 #'
266 268
 #' @importFrom stats setNames
Browse code

updates to work with rhdf5 from bioc-devel

Ilia Kats authored on 29/11/2021 14:54:18
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose
1
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Rdereference
2 2
 #' @importMethodsFrom rhdf5 &
3 3
 #' @importFrom methods is
4 4
 read_dataframe <- function(group) {
... ...
@@ -16,12 +16,17 @@ read_dataframe <- function(group) {
16 16
     col_list <- lapply(columnorder, function(name) {
17 17
         col <- group & name
18 18
         values <- H5Dread(col)
19
+        # h5py saves boolean arrays as HDF5 enums
20
+        if (is.factor(values) && levels(values) == c("FALSE", "TRUE")) {
21
+            values <- as.logical(values)
22
+        }
19 23
         if (H5Aexists(col, "categories")) {
20 24
             attr <- H5Aopen(col, "categories")
21 25
             labels <- H5Aread(attr)
22
-            if (!is(labels, "H5IdComponent")) {
26
+            if (!is(labels, "H5Ref")) {
23 27
                 warning("found categories attribute for column ", name, ", but it is not a reference")
24 28
             } else {
29
+                labels <- H5Rdereference(labels, h5loc=col)
25 30
                 values <- factor(as.integer(values), labels=H5Dread(labels))
26 31
                 H5Dclose(labels)
27 32
             }
Browse code

make BiocCheck pass

Ilia Kats authored on 27/09/2021 08:42:31
Showing 1 changed files
... ...
@@ -20,7 +20,7 @@ read_dataframe <- function(group) {
20 20
             attr <- H5Aopen(col, "categories")
21 21
             labels <- H5Aread(attr)
22 22
             if (!is(labels, "H5IdComponent")) {
23
-                warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
23
+                warning("found categories attribute for column ", name, ", but it is not a reference")
24 24
             } else {
25 25
                 values <- factor(as.integer(values), labels=H5Dread(labels))
26 26
                 H5Dclose(labels)
... ...
@@ -63,7 +63,7 @@ read_with_index <- function(dataset) {
63 63
         encoding <- H5Aread(encattr, "encoding-type")
64 64
         H5Aclose(encattr)
65 65
         if (encoding != "dataframe") {
66
-            warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
66
+            warning("Unknown encoding ", encoding, " when attempting to read data frame")
67 67
             return(data.frame())
68 68
         }
69 69
         read_dataframe(dataset)
... ...
@@ -117,7 +117,7 @@ read_matrix <- function(dataset, backed=FALSE) {
117 117
                 return(read_sparse_matrix(dataset, encoding))
118 118
             }
119 119
         } else {
120
-            warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
120
+            warning("Unknown encoding ", encoding, "when attempting to read matrix")
121 121
             return(matrix())
122 122
         }
123 123
     } else {
... ...
@@ -149,7 +149,7 @@ read_group <- function(group) {
149 149
     } else if (endsWith(encoding, "matrix")) {
150 150
         read_sparse_matrix(group)
151 151
     } else {
152
-        warning(paste0("Unknown encoding ", encoding))
152
+        warning("Unknown encoding ", encoding)
153 153
         invisible(NULL)
154 154
     }
155 155
 }
... ...
@@ -208,7 +208,7 @@ read_modality <- function(view, backed=FALSE) {
208 208
             for (name in names) {
209 209
                 cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/")))
210 210
                 if (!is(cpair, "dsparseMatrix")) {
211
-                    warning(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
211
+                    warning("Pairwise ", cp$name, " matrix ", name, " is not a sparse matrix. Only sparse matrices are currently supported, skipping...")
212 212
                 } else {
213 213
                     se <- cp$setter(se, name, value=cpair)
214 214
                 }
... ...
@@ -230,6 +230,10 @@ read_modality <- function(view, backed=FALSE) {
230 230
 #'
231 231
 #' @return A \code{\linkS4class{SingleCellExperiment}}.
232 232
 #'
233
+#' @examples
234
+#' WriteH5AD(MultiAssayExperiment::miniACC[[1]], "miniacc.h5ad")
235
+#' sce <- ReadH5AD("miniacc.h5ad")
236
+#'
233 237
 #' @importFrom rhdf5 H5Fclose
234 238
 #' @export
235 239
 ReadH5AD <- function(file, backed=FALSE) {
... ...
@@ -250,6 +254,10 @@ ReadH5AD <- function(file, backed=FALSE) {
250 254
 #'
251 255
 #' @return A \code{\linkS4class{MultiAssayExperiment}}
252 256
 #'
257
+#' @examples
258
+#' WriteH5MU(MultiAssayExperiment::miniACC, "miniacc.h5mu")
259
+#' mae <- ReadH5MU("miniacc.h5mu")
260
+#'
253 261
 #' @importFrom stats setNames
254 262
 #' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose
255 263
 #' @importMethodsFrom rhdf5 &
Browse code

make R CMD check pass

Ilia Kats authored on 27/09/2021 08:04:15
Showing 1 changed files
... ...
@@ -1,5 +1,6 @@
1 1
 #' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose
2 2
 #' @importMethodsFrom rhdf5 &
3
+#' @importFrom methods is
3 4
 read_dataframe <- function(group) {
4 5
     indexcol <- "_index"
5 6
     if (H5Aexists(group, "_index")) {
... ...
@@ -94,6 +95,7 @@ read_sparse_matrix <- function(group, encoding, backed=FALSE) {
94 95
 }
95 96
 
96 97
 #' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Gclose H5Fget_name
98
+#' @importFrom methods new
97 99
 read_matrix <- function(dataset, backed=FALSE) {
98 100
     if (backed) {
99 101
         have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
... ...
@@ -167,6 +169,7 @@ read_attribute <- function(attr) {
167 169
 #' @importFrom SingleCellExperiment SingleCellExperiment
168 170
 #' @importMethodsFrom SingleCellExperiment colPair<- rowPair<-
169 171
 #' @importFrom SummarizedExperiment SummarizedExperiment
172
+#' @importFrom methods is
170 173
 read_modality <- function(view, backed=FALSE) {
171 174
     X <- read_matrix(h5autoclose(view & "X"), backed=backed)
172 175
     var <- read_with_index(h5autoclose(view & "var"))
... ...
@@ -205,7 +208,7 @@ read_modality <- function(view, backed=FALSE) {
205 208
             for (name in names) {
206 209
                 cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/")))
207 210
                 if (!is(cpair, "dsparseMatrix")) {
208
-                    warn(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
211
+                    warning(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
209 212
                 } else {
210 213
                     se <- cp$setter(se, name, value=cpair)
211 214
                 }
Browse code

fix reading of delayed sparse matrices

Ilia Kats authored on 16/09/2021 11:12:43
Showing 1 changed files
... ...
@@ -109,7 +109,7 @@ read_matrix <- function(dataset, backed=FALSE) {
109 109
         H5Aclose(encattr)
110 110
         if (encoding %in% c("csr_matrix", "csc_matrix")) {
111 111
             if (backed) {
112
-                cls <- ifelse(encoding == "csr_matrix", "CSR_H5ADMatrixSeed", "CSC_H5ADMatrixSeed")
112
+                cls <- ifelse(encoding == "csr_matrix", "CSC_H5ADMatrixSeed", "CSR_H5ADMatrixSeed")
113 113
                 seed <- HDF5Array::H5SparseMatrixSeed
114 114
             } else {
115 115
                 return(read_sparse_matrix(dataset, encoding))
Browse code

read/write AnnData layers to different assays of SummarizedExperiment

Ilia Kats authored on 16/09/2021 07:54:49
Showing 1 changed files
... ...
@@ -160,6 +160,7 @@ read_attribute <- function(attr) {
160 160
         H5Dread(attr)
161 161
 }
162 162
 
163
+#' @importFrom stats setNames
163 164
 #' @importFrom rhdf5 h5ls
164 165
 #' @importMethodsFrom rhdf5 &
165 166
 #' @importFrom S4Vectors SimpleList
... ...
@@ -170,12 +171,19 @@ read_modality <- function(view, backed=FALSE) {
170 171
     X <- read_matrix(h5autoclose(view & "X"), backed=backed)
171 172
     var <- read_with_index(h5autoclose(view & "var"))
172 173
     obs <- read_with_index(h5autoclose(view  & "obs"))
173
-
174
-    viewnames <- h5ls(view, recursive=FALSE)$name
175 174
     rownames(X) <- rownames(var)
176 175
     colnames(X) <- rownames(obs)
177 176
 
178
-    args <- list(assays=list(counts=X), rowData=var, colData=obs)
177
+    viewnames <- h5ls(view, recursive=FALSE)$name
178
+    layers <- list()
179
+    if ("layers" %in% viewnames) {
180
+        layers <- lapply(setNames(nm=h5ls(h5autoclose(view & "layers"), recursive=FALSE)$name), function(layer) {
181
+            read_matrix(h5autoclose(view & paste("layers", layer, sep="/")), backed=backed)
182
+        })
183
+    }
184
+
185
+    args <- list(assays=c(list(X), layers), rowData=var, colData=obs)
186
+
179 187
     if ("obsm" %in% viewnames) {
180 188
         obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
181 189
         obsm <- lapply(obsmnames, function(space) {
Browse code

fix ReadH5AD

Ilia Kats authored on 15/09/2021 16:12:07
Showing 1 changed files
... ...
@@ -222,9 +222,9 @@ read_modality <- function(view, backed=FALSE) {
222 222
 #' @importFrom rhdf5 H5Fclose
223 223
 #' @export
224 224
 ReadH5AD <- function(file, backed=FALSE) {
225
-    file <- H5Fopen(filename, flags="H5F_ACC_RDONLY", native=FALSE)
226
-    res <- read_modality(file, backed)
227
-    H5Fclose(file)
225
+    h5 <- H5Fopen(file, flags="H5F_ACC_RDONLY", native=FALSE)
226
+    res <- read_modality(h5, backed)
227
+    H5Fclose(h5)
228 228
     res
229 229
 }
230 230
 
Browse code

implement read/write of obsp/varp for modalities

Ilia Kats authored on 15/09/2021 15:55:21
Showing 1 changed files
... ...
@@ -164,6 +164,7 @@ read_attribute <- function(attr) {
164 164
 #' @importMethodsFrom rhdf5 &
165 165
 #' @importFrom S4Vectors SimpleList
166 166
 #' @importFrom SingleCellExperiment SingleCellExperiment
167
+#' @importMethodsFrom SingleCellExperiment colPair<- rowPair<-
167 168
 #' @importFrom SummarizedExperiment SummarizedExperiment
168 169
 read_modality <- function(view, backed=FALSE) {
169 170
     X <- read_matrix(h5autoclose(view & "X"), backed=backed)
... ...
@@ -173,6 +174,8 @@ read_modality <- function(view, backed=FALSE) {
173 174
     viewnames <- h5ls(view, recursive=FALSE)$name
174 175
     rownames(X) <- rownames(var)
175 176
     colnames(X) <- rownames(obs)
177
+
178
+    args <- list(assays=list(counts=X), rowData=var, colData=obs)
176 179
     if ("obsm" %in% viewnames) {
177 180
         obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
178 181
         obsm <- lapply(obsmnames, function(space) {
... ...
@@ -183,16 +186,29 @@ read_modality <- function(view, backed=FALSE) {
183 186
             elem
184 187
         })
185 188
         names(obsm) <- obsmnames
186
-        se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
187
-    } else {
188
-        se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
189
+        args$reducedDims <- obsm
189 190
     }
190 191
 
192
+    se <- do.call(SingleCellExperiment, args)
193
+
194
+    for (cp in list(list(name="obsp", setter=`colPair<-`), list(name="varp", setter=`rowPair<-`))) {
195
+        if (cp$name %in% viewnames) {
196
+            names <- h5ls(h5autoclose(view & cp$name), recursive=FALSE)$name
197
+            for (name in names) {
198
+                cpair <- read_matrix(h5autoclose(view & paste(cp$name, name, sep="/")))
199
+                if (!is(cpair, "dsparseMatrix")) {
200
+                    warn(paste("Pairwise", cp$name, "matrix", name, "is not a sparse matrix. Only sparse matrices are currently supported, skipping..."))
201
+                } else {
202
+                    se <- cp$setter(se, name, value=cpair)
203
+                }
204
+            }
205
+        }
206
+    }
191 207
     se
192 208
 }
193 209
 
194 210
 
195
-#' Read an .h5ad file and create a \code{\linkS4class{SummarizedExperiment}}.
211
+#' Read an .h5ad file and create a \code{\linkS4class{SingleCellExperiment}}.
196 212
 #'
197 213
 #' In file-backed mode, the main \code{X} matrix is not read into memory,
198 214
 #' but references the HDF5 file and its required parts are read on demand.
... ...
@@ -201,8 +217,7 @@ read_modality <- function(view, backed=FALSE) {
201 217
 #' @param file Path to the .h5ad file.
202 218
 #' @param backed Whether to use file-backed mode.
203 219
 #'
204
-#' @return A \code{\linkS4class{SingleCellExperiment}} if the .h5ad object has
205
-#'     non-empty \code{.obsm}, otherwise a \code{\linkS4class{SummarizedExperiment}}.
220
+#' @return A \code{\linkS4class{SingleCellExperiment}}.
206 221
 #'
207 222
 #' @importFrom rhdf5 H5Fclose
208 223
 #' @export
... ...
@@ -258,7 +273,9 @@ ReadH5MU <- function(file, backed=FALSE) {
258 273
             idx <- which(cmap > 0)
259 274
             data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]])
260 275
         })
261
-        args$sampleMap <- do.call(rbind, samplemaps)
276
+        sampleMap <- do.call(rbind, samplemaps)
277
+        rownames(sampleMap) <- NULL
278
+        args$sampleMap <- sampleMap
262 279
     }
263 280
 
264 281
     # Close the connection
Browse code

fix round-trips of MultiAssayExperiments

- WriteH5AD wasn't writing .obs and .var for matrices, but the reading
code assumes that they're always there
- The original MAE can have different rownames in its colData than the
rownames of the individual experiment. In this case, automatic
sampleMap construction fails, so we create a sampleMap manually from
the .obsmap

Ilia Kats authored on 15/09/2021 12:13:35
Showing 1 changed files
... ...
@@ -224,7 +224,8 @@ ReadH5AD <- function(file, backed=FALSE) {
224 224
 #'
225 225
 #' @return A \code{\linkS4class{MultiAssayExperiment}}
226 226
 #'
227
-#' @importFrom rhdf5 h5ls H5Fclose
227
+#' @importFrom stats setNames
228
+#' @importFrom rhdf5 h5ls H5Fclose H5Lexists H5Dread H5Dclose
228 229
 #' @importMethodsFrom rhdf5 &
229 230
 #' @importFrom MultiAssayExperiment MultiAssayExperiment
230 231
 #'
... ...
@@ -234,7 +235,7 @@ ReadH5MU <- function(file, backed=FALSE) {
234 235
     h5 <- open_and_check_mudata(file)
235 236
 
236 237
     # Check all the assays are written
237
-    assays <- h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name
238
+    assays <- setNames(nm=h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name)
238 239
 
239 240
     # Create global colData
240 241
     metadata <- read_with_index(h5autoclose(h5 & "obs"))
... ...
@@ -243,12 +244,27 @@ ReadH5MU <- function(file, backed=FALSE) {
243 244
     modalities <- lapply(assays, function(mod) {
244 245
         read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
245 246
     })
246
-    names(modalities) <- assays
247
+
248
+    args <- list(experiments=modalities, colData=metadata)
249
+
250
+    # create a sampleMap. This is needed for a round-trip MAE -> .h5mu -> MAE
251
+    # if colData(MAE) has different row names than the experiments
252
+    if (H5Lexists(h5, "obsmap")) {
253
+        samplemaps <- lapply(assays, function(mod) {
254
+            cmapdset <- h5 & paste("obsmap", mod, sep="/")
255
+            cmap <- H5Dread(cmapdset)
256
+            H5Dclose(cmapdset)
257
+
258
+            idx <- which(cmap > 0)
259
+            data.frame(assay=mod, primary=rownames(metadata)[idx], colname=colnames(modalities[[mod]])[cmap[idx]])
260
+        })
261
+        args$sampleMap <- do.call(rbind, samplemaps)
262
+    }
247 263
 
248 264
     # Close the connection
249 265
     H5Fclose(h5)
250 266
 
251 267
     # Create a MAE object
252
-    MultiAssayExperiment(modalities, metadata)
268
+    do.call(MultiAssayExperiment, args)
253 269
 }
254 270
 
Browse code

add some more documentation

Ilia Kats authored on 15/09/2021 09:33:54
Showing 1 changed files
... ...
@@ -192,7 +192,17 @@ read_modality <- function(view, backed=FALSE) {
192 192
 }
193 193
 
194 194
 
195
-#' Read an .h5ad file and create a SummarizedExperiment
195
+#' Read an .h5ad file and create a \code{\linkS4class{SummarizedExperiment}}.
196
+#'
197
+#' In file-backed mode, the main \code{X} matrix is not read into memory,
198
+#' but references the HDF5 file and its required parts are read on demand.
199
+#' This requires the HDF5Array package to be installed.
200
+#'
201
+#' @param file Path to the .h5ad file.
202
+#' @param backed Whether to use file-backed mode.
203
+#'
204
+#' @return A \code{\linkS4class{SingleCellExperiment}} if the .h5ad object has
205
+#'     non-empty \code{.obsm}, otherwise a \code{\linkS4class{SummarizedExperiment}}.
196 206
 #'
197 207
 #' @importFrom rhdf5 H5Fclose
198 208
 #' @export
... ...
@@ -203,7 +213,16 @@ ReadH5AD <- function(file, backed=FALSE) {
203 213
     res
204 214
 }
205 215
 
206
-#' Read an .h5mu file and create a MultiAssayExperiment
216
+#' Read an .h5mu file and create a \code{\link{MultiAssayExperiment}}.
217
+#'
218
+#' In file-backed mode, the main \code{X} matrices are not read into memory,
219
+#' but reference the HDF5 file and their required parts are read on demand.
220
+#' This requires the HDF5Array package to be installed.
221
+#'
222
+#' @param file Path to the .h5mu file.
223
+#' @param backed Whether to use file-backed mode.
224
+#'
225
+#' @return A \code{\linkS4class{MultiAssayExperiment}}
207 226
 #'
208 227
 #' @importFrom rhdf5 h5ls H5Fclose
209 228
 #' @importMethodsFrom rhdf5 &
Browse code

factor out h5ad I/O into their own methods and export those methods

Ilia Kats authored on 14/09/2021 16:19:27
Showing 1 changed files
... ...
@@ -160,13 +160,53 @@ read_attribute <- function(attr) {
160 160
         H5Dread(attr)
161 161
 }
162 162
 
163
-#' Read an .h5mu file and create a MultiAssayExperiment
164
-#'
165
-#' @importFrom rhdf5 h5ls H5Fclose
163
+#' @importFrom rhdf5 h5ls
166 164
 #' @importMethodsFrom rhdf5 &
167 165
 #' @importFrom S4Vectors SimpleList
168 166
 #' @importFrom SingleCellExperiment SingleCellExperiment
169 167
 #' @importFrom SummarizedExperiment SummarizedExperiment
168
+read_modality <- function(view, backed=FALSE) {
169
+    X <- read_matrix(h5autoclose(view & "X"), backed=backed)
170
+    var <- read_with_index(h5autoclose(view & "var"))
171
+    obs <- read_with_index(h5autoclose(view  & "obs"))
172
+
173
+    viewnames <- h5ls(view, recursive=FALSE)$name
174
+    rownames(X) <- rownames(var)
175
+    colnames(X) <- rownames(obs)
176
+    if ("obsm" %in% viewnames) {
177
+        obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
178
+        obsm <- lapply(obsmnames, function(space) {
179
+            elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
180
+            if (!is.data.frame(elem) && length(dim(elem)) > 1)
181
+                elem <- t(elem)
182
+            rownames(elem) <- rownames(obs)
183
+            elem
184
+        })
185
+        names(obsm) <- obsmnames
186
+        se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
187
+    } else {
188
+        se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
189
+    }
190
+
191
+    se
192
+}
193
+
194
+
195
+#' Read an .h5ad file and create a SummarizedExperiment
196
+#'
197
+#' @importFrom rhdf5 H5Fclose
198
+#' @export
199
+ReadH5AD <- function(file, backed=FALSE) {
200
+    file <- H5Fopen(filename, flags="H5F_ACC_RDONLY", native=FALSE)
201
+    res <- read_modality(file, backed)
202
+    H5Fclose(file)
203
+    res
204
+}
205
+
206
+#' Read an .h5mu file and create a MultiAssayExperiment
207
+#'
208
+#' @importFrom rhdf5 h5ls H5Fclose
209
+#' @importMethodsFrom rhdf5 &
170 210
 #' @importFrom MultiAssayExperiment MultiAssayExperiment
171 211
 #'
172 212
 #' @export
... ...
@@ -182,48 +222,14 @@ ReadH5MU <- function(file, backed=FALSE) {
182 222
 
183 223
     # Create an experiments list
184 224
     modalities <- lapply(assays, function(mod) {
185
-        view <- h5autoclose(h5 & paste("mod", mod, sep="/"))
186
-        X <- read_matrix(h5autoclose(view & "X"), backed=backed)
187
-        var <- read_with_index(h5autoclose(view & "var"))
188
-        obs <- read_with_index(h5autoclose(view  & "obs"))
189
-        primary <- rownames(obs)
190
-        rownames(obs) <- paste(mod, rownames(obs), sep="-")
191
-
192
-        viewnames <- h5ls(view, recursive=FALSE)$name
193
-        rownames(X) <- rownames(var)
194
-        colnames(X) <- rownames(obs)
195
-        if ("obsm" %in% viewnames) {
196
-            obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
197
-            obsm <- lapply(obsmnames, function(space) {
198
-                elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
199
-                if (!is.data.frame(elem) && length(dim(elem)) > 1)
200
-                    elem <- t(elem)
201
-                rownames(elem) <- rownames(obs)
202
-                elem
203
-            })
204
-            names(obsm) <- obsmnames
205
-            se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
206
-        } else {
207
-            se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
208
-        }
209
-
210
-        list(exp=se, primary=primary)
225
+        read_modality(h5autoclose(h5 & paste("mod", mod, sep="/")), backed)
211 226
     })
212 227
     names(modalities) <- assays
213 228
 
214
-    # Create sampleMap
215
-    mapping <- lapply(modalities, function(mod) {
216
-        data.frame(primary=mod$primary, colname=colnames(mod$exp), stringsAsFactors=FALSE)
217
-    })
218
-
219
-    obsmap <- do.call(rbind, mapping)
220
-    obsmap["assay"] <- rep(assays, times=vapply(mapping, nrow, 1))
221
-    obsmap <- obsmap[,c("assay", "primary", "colname")]
222
-
223 229
     # Close the connection
224 230
     H5Fclose(h5)
225 231
 
226 232
     # Create a MAE object
227
-    MultiAssayExperiment(lapply(modalities, function(mod)mod$exp), metadata, obsmap)
233
+    MultiAssayExperiment(modalities, metadata)
228 234
 }
229 235
 
Browse code

take care to close all HDF5 identifiers after we're done with them

Ilia Kats authored on 14/09/2021 13:41:54
Showing 1 changed files
... ...
@@ -31,7 +31,8 @@ read_dataframe <- function(group) {
31 31
     })
32 32
     names(col_list) <- columnorder
33 33
     index <- group & indexcol
34
-    col_list[["row.names"]] <- H5Dread(group & indexcol)
34
+    col_list[["row.names"]] <- H5Dread(index)
35
+    H5Dclose(index)
35 36
     do.call(data.frame, args=col_list)
36 37
 }
37 38
 
... ...
@@ -70,12 +71,18 @@ read_with_index <- function(dataset) {
70 71
     }
71 72
 }
72 73
 
73
-#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose
74
+#' @importFrom rhdf5 H5Dread H5Dclose H5Aopen H5Aread H5Aclose
74 75
 #' @importMethodsFrom rhdf5 &
75 76
 read_sparse_matrix <- function(group, encoding, backed=FALSE) {
76
-    i <- as.vector(H5Dread(group & "indices"))
77
-    p <- as.vector(H5Dread(group & "indptr"))
78
-    x <- as.vector(H5Dread(group & "data"))
77
+    indices <- group & "indices"
78
+    indptr <- group & "indptr"
79
+    data <- group & "data"
80
+    i <- as.vector(H5Dread(indices))
81
+    p <- as.vector(H5Dread(indptr))
82
+    x <- as.vector(H5Dread(data))
83
+    H5Dclose(indices)
84
+    H5Dclose(indptr)
85
+    H5Dclose(data)
79 86
     shapeattr <- H5Aopen(group, "shape")
80 87
     shape <- H5Aread(shapeattr)
81 88
     H5Aclose(shapeattr)
... ...
@@ -86,7 +93,7 @@ read_sparse_matrix <- function(group, encoding, backed=FALSE) {
86 93
     }
87 94
 }
88 95
 
89
-#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Fget_name
96
+#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose H5Gclose H5Fget_name
90 97
 read_matrix <- function(dataset, backed=FALSE) {
91 98
     if (backed) {
92 99
         have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
... ...
@@ -168,17 +175,17 @@ ReadH5MU <- function(file, backed=FALSE) {
168 175
     h5 <- open_and_check_mudata(file)
169 176
 
170 177
     # Check all the assays are written
171
-    assays <- h5ls(h5 & "mod", recursive=FALSE)$name
178
+    assays <- h5ls(h5autoclose(h5 & "mod"), recursive=FALSE)$name
172 179
 
173 180
     # Create global colData
174
-    metadata <- read_with_index(h5 & "obs")
181
+    metadata <- read_with_index(h5autoclose(h5 & "obs"))
175 182
 
176 183
     # Create an experiments list
177 184
     modalities <- lapply(assays, function(mod) {
178
-        view <- h5 & paste("mod", mod, sep="/")
179
-        X <- read_matrix(view & "X", backed=backed)
180
-        var <- read_with_index(view & "var")
181
-        obs <- read_with_index(view  & "obs")
185
+        view <- h5autoclose(h5 & paste("mod", mod, sep="/"))
186
+        X <- read_matrix(h5autoclose(view & "X"), backed=backed)
187
+        var <- read_with_index(h5autoclose(view & "var"))
188
+        obs <- read_with_index(h5autoclose(view  & "obs"))
182 189
         primary <- rownames(obs)
183 190
         rownames(obs) <- paste(mod, rownames(obs), sep="-")
184 191
 
... ...
@@ -186,9 +193,9 @@ ReadH5MU <- function(file, backed=FALSE) {
186 193
         rownames(X) <- rownames(var)
187 194
         colnames(X) <- rownames(obs)
188 195
         if ("obsm" %in% viewnames) {
189
-            obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name
196
+            obsmnames <- h5ls(h5autoclose(view & "obsm"), recursive=FALSE)$name
190 197
             obsm <- lapply(obsmnames, function(space) {
191
-                elem <- read_attribute(view & paste("obsm", space, sep="/"))
198
+                elem <- read_attribute(h5autoclose(view & paste("obsm", space, sep="/")))
192 199
                 if (!is.data.frame(elem) && length(dim(elem)) > 1)
193 200
                     elem <- t(elem)
194 201
                 rownames(elem) <- rownames(obs)
Browse code

implement file-backed mode with HDF5Array

Ilia Kats authored on 14/09/2021 11:58:38
Showing 1 changed files
... ...
@@ -72,7 +72,7 @@ read_with_index <- function(dataset) {
72 72
 
73 73
 #' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose
74 74
 #' @importMethodsFrom rhdf5 &
75
-read_sparse_matrix <- function(group, encoding) {
75
+read_sparse_matrix <- function(group, encoding, backed=FALSE) {
76 76
     i <- as.vector(H5Dread(group & "indices"))
77 77
     p <- as.vector(H5Dread(group & "indptr"))
78 78
     x <- as.vector(H5Dread(group & "data"))
... ...
@@ -80,26 +80,53 @@ read_sparse_matrix <- function(group, encoding) {
80 80
     shape <- H5Aread(shapeattr)
81 81
     H5Aclose(shapeattr)
82 82
     if (encoding == "csr_matrix") {
83
-        sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE)
83
+        sparseMatrix(i=i, p=p, x=x, dims=rev(shape), repr="C", index1=FALSE)
84 84
     } else {
85
-        sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE)
85
+        t(sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE))
86 86
     }
87 87
 }
88 88
 
89
-#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread
90
-read_matrix <- function(dataset) {
89
+#' @importFrom rhdf5 H5Iget_type H5Iget_name H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Fget_name
90
+read_matrix <- function(dataset, backed=FALSE) {
91
+    if (backed) {
92
+        have_delayedarray <- requireNamespace("HDF5Array", quietly=TRUE)
93
+        if (!have_delayedarray) {
94
+            warning("Could not load the HDF5Array package. HDF5Array is required for backed matrices. Loading matrix into memory...")
95
+            backed <- FALSE
96
+        }
97
+    }
98
+
91 99
     if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
92 100
         encattr <- H5Aopen(dataset, "encoding-type")
93 101
         encoding <- H5Aread(encattr)
94 102
         H5Aclose(encattr)
95 103
         if (encoding %in% c("csr_matrix", "csc_matrix")) {
96
-            read_sparse_matrix(dataset, encoding)
104
+            if (backed) {
105
+                cls <- ifelse(encoding == "csr_matrix", "CSR_H5ADMatrixSeed", "CSC_H5ADMatrixSeed")
106
+                seed <- HDF5Array::H5SparseMatrixSeed
107
+            } else {
108
+                return(read_sparse_matrix(dataset, encoding))
109
+            }
97 110
         } else {
98 111
             warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
99
-            matrix()
112
+            return(matrix())
100 113
         }
101 114
     } else {
102
-        H5Dread(dataset)
115
+        if (backed) {
116
+            cls <- "Dense_H5ADMatrixSeed"
117
+            seed <- HDF5Array::HDF5ArraySeed
118
+        } else {
119
+            return(H5Dread(dataset))
120
+        }
121
+    }
122
+    if (backed) {
123
+        file <- H5Fget_name(dataset)
124
+        name <- H5Iget_name(dataset)
125
+        suppressWarnings({
126
+            seed <- seed(file, name)
127
+            seed <- new(cls, seed)
128
+            HDF5Array::H5ADMatrix(seed)
129
+        })
103 130
     }
104 131
 }
105 132
 
... ...
@@ -136,7 +163,7 @@ read_attribute <- function(attr) {
136 163
 #' @importFrom MultiAssayExperiment MultiAssayExperiment
137 164
 #'
138 165
 #' @export
139
-ReadH5MU <- function(file) {
166
+ReadH5MU <- function(file, backed=FALSE) {
140 167
     # Connect to the the file
141 168
     h5 <- open_and_check_mudata(file)
142 169
 
... ...
@@ -149,26 +176,28 @@ ReadH5MU <- function(file) {
149 176
     # Create an experiments list
150 177
     modalities <- lapply(assays, function(mod) {
151 178
         view <- h5 & paste("mod", mod, sep="/")
152
-        X <- read_matrix(view & "X")
179
+        X <- read_matrix(view & "X", backed=backed)
153 180
         var <- read_with_index(view & "var")
154 181
         obs <- read_with_index(view  & "obs")
155 182
         primary <- rownames(obs)
156 183
         rownames(obs) <- paste(mod, rownames(obs), sep="-")
157 184
 
158 185
         viewnames <- h5ls(view, recursive=FALSE)$name
159
-        colnames(X) <- rownames(var)
160
-        rownames(X) <- rownames(obs)
186
+        rownames(X) <- rownames(var)
187
+        colnames(X) <- rownames(obs)
161 188
         if ("obsm" %in% viewnames) {
162 189
             obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name
163 190
             obsm <- lapply(obsmnames, function(space) {
164 191
                 elem <- read_attribute(view & paste("obsm", space, sep="/"))
192
+                if (!is.data.frame(elem) && length(dim(elem)) > 1)
193
+                    elem <- t(elem)
165 194
                 rownames(elem) <- rownames(obs)
166 195
                 elem
167 196
             })
168 197
             names(obsm) <- obsmnames
169
-            se <- SingleCellExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs, reducedDims=obsm)
198
+            se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
170 199
         } else {
171
-            se <- SummarizedExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs)
200
+            se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
172 201
         }
173 202
 
174 203
         list(exp=se, primary=primary)
Browse code

working port to rhdf5

- remove the workaround, it doesn't work. Therefore, this depends on the
rhdf5 version from https://github.com/ilia-kats/rhdf5/tree/attribute-references
- read-write of MultiAssayExperiments works

TODO:
- backed objects with DelayedArray
- figure out how to close all the HDF5 objects when we're done

Ilia Kats authored on 13/09/2021 13:02:50
Showing 1 changed files
... ...
@@ -1,4 +1,59 @@
1
-#' @importFrom rhdf5 H5Iget_type h5readAttributes h5ls H5Aexists H5Aopen H5Aread H5Aclose
1
+#' @importFrom rhdf5 H5Aexists H5Aopen H5Aread H5Aclose H5Dread H5Dclose
2
+#' @importMethodsFrom rhdf5 &
3
+read_dataframe <- function(group) {
4
+    indexcol <- "_index"
5
+    if (H5Aexists(group, "_index")) {
6
+        indexattr <- H5Aopen(group, "_index")
7
+        indexcol <- H5Aread(indexattr)
8
+        H5Aclose(indexattr)
9
+    }
10
+
11
+    orderedattr <- H5Aopen(group, "column-order")
12
+    columnorder <- H5Aread(orderedattr)
13
+    H5Aclose(orderedattr)
14
+
15
+    col_list <- lapply(columnorder, function(name) {
16
+        col <- group & name
17
+        values <- H5Dread(col)
18
+        if (H5Aexists(col, "categories")) {
19
+            attr <- H5Aopen(col, "categories")
20
+            labels <- H5Aread(attr)
21
+            if (!is(labels, "H5IdComponent")) {
22
+                warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
23
+            } else {
24
+                values <- factor(as.integer(values), labels=H5Dread(labels))
25
+                H5Dclose(labels)
26
+            }
27
+            H5Aclose(attr)
28
+        }
29
+        H5Dclose(col)
30
+        values
31
+    })
32
+    names(col_list) <- columnorder
33
+    index <- group & indexcol
34
+    col_list[["row.names"]] <- H5Dread(group & indexcol)
35
+    do.call(data.frame, args=col_list)
36
+}
37
+
38
+#' @importFrom rhdf5 H5Dread H5Aexists H5Aopen H5Aread H5Aclose
39
+read_dataframe_legacy <- function(dataset) {
40
+    table <- H5Dread(dataset)
41
+
42
+    indexcol <- "_index"
43
+    if (H5Aexists(dataset, "_index")) {
44
+        index <- H5Aopen(dataset, "_index")
45
+        indexcol <- H5Aread(index)
46
+        H5Aclose(index)
47
+    }
48
+
49
+    if (indexcol %in% colnames(table)) {
50
+        rownames(table) <- table[,indexcol,drop=TRUE]
51
+        table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE]
52
+    }
53
+    table
54
+}
55
+
56
+#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose
2 57
 read_with_index <- function(dataset) {
3 58
     cls <- H5Iget_type(dataset)
4 59
     if (cls == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
... ...
@@ -9,85 +64,36 @@ read_with_index <- function(dataset) {
9 64
             warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
10 65
             return(data.frame())
11 66
         }
12
-        # Table is saved as a group rather than a dataset
13
-        indexcol <- "_index"
14
-        if (H5Aexists(dataset, "_index")) {
15
-            indexattr <- H5Aopen(dataset, "_index")
16
-            indexcol <- H5Aread(indexattr)
17
-            H5Aclose(indexattr)
18
-        }
19
-
20
-        columns <- h5ls(dataset, recursive=FALSE, datasetinfo=FALSE)$name
21
-        columns <- columns[columns != "__categories"]
22
-
23
-        col_list <- lapply(columns, function(name) {
24
-            values <- H5Dread(dataset & name)
25
-            col <- dataset & name
26
-            if (H5Aexists(col, "categories")) {
27
-                attr <- H5Aopen(col, "categories")
28
-                if (H5is_attr_reference(attr)) {
29
-                    labels <- H5deref_attr_reference(attr)
30
-                    values <- factor(as.integer(values), labels=H5Dread(labels))
31
-                } else {
32
-                    warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
33
-                }
34
-                H5Aclose(attr)
35
-            }
36
-            values
37
-        })
38
-        table <- data.frame(Reduce(cbind, col_list))
39
-        colnames(table) <- columns
40
-
41
-        if (indexcol %in% colnames(table)) {
42
-            rownames(table) <- table[,indexcol,drop=TRUE]
43
-            table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE]
44
-        }
45
-
46
-        # Fix column order
47
-        if (H5Aexists(dataset, "column-order")) {
48
-            orderedattr <- H5Aopen(dataset, "column-order")
49
-            ordered_columns <- H5Aread(orderedattr)
50
-            H5Aclose(orderedattr)
51
-
52
-            ordered_columns <- ordered_columns[ordered_columns != indexcol]
53
-            table <- table[,ordered_columns[ordered_columns %in% columns],drop=FALSE]
54
-        }
67
+        read_dataframe(dataset)
55 68
     } else {
56
-        table <- H5Dread(dataset)
57
-
58
-        indexcol <- "_index"
59
-        if (H5Aexists(dataset, "_index")) {
60
-            index <- H5Aopen(dataset, "_index")
61
-            indexcol <- H5Aread(index)
62
-            H5Aclose(index)
63
-        }
69
+        read_dataframe_legacy(dataset)
70
+    }
71
+}
64 72
 
65
-        if (indexcol %in% colnames(table)) {
66
-            rownames(table) <- table[,indexcol,drop=TRUE]
67
-            table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE]
68
-        }
73
+#' @importFrom rhdf5 H5Dread H5Aopen H5Aread H5Aclose
74
+#' @importMethodsFrom rhdf5 &
75
+read_sparse_matrix <- function(group, encoding) {
76
+    i <- as.vector(H5Dread(group & "indices"))
77
+    p <- as.vector(H5Dread(group & "indptr"))
78
+    x <- as.vector(H5Dread(group & "data"))
79
+    shapeattr <- H5Aopen(group, "shape")
80
+    shape <- H5Aread(shapeattr)
81
+    H5Aclose(shapeattr)
82
+    if (encoding == "csr_matrix") {
83
+        sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE)
84
+    } else {
85
+        sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE)
69 86
     }
70
-        table
71 87
 }
72 88
 
73 89
 #' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread
74 90
 read_matrix <- function(dataset) {
75 91
     if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
76 92
         encattr <- H5Aopen(dataset, "encoding-type")
77
-        encoding <- H5Aread(dataset, "encoding")
93
+        encoding <- H5Aread(encattr)
78 94
         H5Aclose(encattr)
79 95
         if (encoding %in% c("csr_matrix", "csc_matrix")) {
80
-            i <- H5Dread(dataset & "indices")
81
-            p <- H5Dread(dataset & "indptr")
82
-            x <- H5Dread(dataset & "data")
83
-            shapeattr <- H5Aopen(dataset, "shape")
84
-            shape <- H5Aread(shapeattr)
85
-            H5Aclose(shapeattr)
86
-            if (encoding == "csr_matrix") {
87
-                sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE)
88
-            } else {
89
-                sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE)
90
-            }
96
+            read_sparse_matrix(dataset, encoding)
91 97
         } else {
92 98
             warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
93 99
             matrix()
... ...
@@ -97,11 +103,34 @@ read_matrix <- function(dataset) {
97 103
     }
98 104
 }
99 105
 
100
-#' @details MultiAssayExperiment-helpers
101
-#'
102
-#' @description Create a MultiAssayExperiment or a Seurat object from the .h5mu file
106
+#' @importFrom rhdf5 H5Aopen H5Aread H5Aclose
107
+read_group <- function(group) {
108
+    encattr <- H5Aopen(group, "encoding-type")
109
+    encoding <- H5Aread(encattr)
110
+    H5Aclose(encattr)
111
+    if (encoding == "dataframe") {
112
+        read_dataframe(group)
113
+    } else if (endsWith(encoding, "matrix")) {
114
+        read_sparse_matrix(group)
115
+    } else {
116
+        warning(paste0("Unknown encoding ", encoding))
117
+        invisible(NULL)
118
+    }
119
+}
120
+
121
+#' @importFrom rhdf5 H5Iget_type H5Dread
122
+read_attribute <- function(attr) {
123
+    if (H5Iget_type(attr) == "H5I_GROUP")
124
+        read_group(attr)
125
+    else
126
+        H5Dread(attr)
127
+}
128
+
129
+#' Read an .h5mu file and create a MultiAssayExperiment
103 130
 #'
104
-#' @importFrom rhdf5 h5ls H5Dread
131
+#' @importFrom rhdf5 h5ls H5Fclose
132
+#' @importMethodsFrom rhdf5 &
133
+#' @importFrom S4Vectors SimpleList
105 134
 #' @importFrom SingleCellExperiment SingleCellExperiment
106 135
 #' @importFrom SummarizedExperiment SummarizedExperiment
107 136
 #' @importFrom MultiAssayExperiment MultiAssayExperiment
... ...
@@ -121,44 +150,34 @@ ReadH5MU <- function(file) {
121 150
     modalities <- lapply(assays, function(mod) {
122 151
         view <- h5 & paste("mod", mod, sep="/")
123 152
         X <- read_matrix(view & "X")
124
-
125 153
         var <- read_with_index(view & "var")
126
-
127 154
         obs <- read_with_index(view  & "obs")
128
-        if (is("obs", "data.frame"))
129
-            rownames(obs) <- paste(mod, rownames(obs), sep="-")
155
+        primary <- rownames(obs)
156
+        rownames(obs) <- paste(mod, rownames(obs), sep="-")
130 157
 
131 158
         viewnames <- h5ls(view, recursive=FALSE)$name
159
+        colnames(X) <- rownames(var)
160
+        rownames(X) <- rownames(obs)
132 161
         if ("obsm" %in% viewnames) {
133 162
             obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name
134
-            obsm <- lapply(obsnames, function(space) {
135
-                H5Dread(view & paste("obsm", space, sep="/"))
163
+            obsm <- lapply(obsmnames, function(space) {
164
+                elem <- read_attribute(view & paste("obsm", space, sep="/"))
165
+                rownames(elem) <- rownames(obs)
166
+                elem
136 167
             })
137 168
             names(obsm) <- obsmnames
138
-            se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
169
+            se <- SingleCellExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs, reducedDims=obsm)
139 170
         } else {
140
-            se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
171
+            se <- SummarizedExperiment(assays=SimpleList(counts=t(X)), rowData=var, colData=obs)
141 172
         }
142 173
 
143
-        se
174
+        list(exp=se, primary=primary)
144 175
     })
145 176
     names(modalities) <- assays
146 177
 
147 178
     # Create sampleMap
148
-    mapping <- lapply(assays, function(mod) {
149
-        primary <- colnames(modalities[[mod]])
150
-
151
-        view <- h5 & paste("mod", mod, sep="/")
152
-        view_attr <- h5attributes(view[["obs"]])
153
-        indexcol <- "_index"
154
-        if ("_index" %in% names(view_attr)) {
155
-          indexcol <- view_attr$`_index`
156
-        }
157
-        obs_names <- view[['obs']]$read()[,indexcol,drop=TRUE]
158
-        sm <- data.frame(primary = obs_names,
159
-                         colname = rownames(colData(modalities[[mod]])),
160
-                         stringsAsFactors = FALSE)
161
-        sm
179
+    mapping <- lapply(modalities, function(mod) {
180
+        data.frame(primary=mod$primary, colname=colnames(mod$exp), stringsAsFactors=FALSE)
162 181
     })
163 182
 
164 183
     obsmap <- do.call(rbind, mapping)
... ...
@@ -166,9 +185,9 @@ ReadH5MU <- function(file) {
166 185
     obsmap <- obsmap[,c("assay", "primary", "colname")]
167 186
 
168 187
     # Close the connection
169
-    h5$close_all()
188
+    H5Fclose(h5)
170 189
 
171 190
     # Create a MAE object
172
-    MultiAssayExperiment(modalities, metadata, obsmap)
191
+    MultiAssayExperiment(lapply(modalities, function(mod)mod$exp), metadata, obsmap)
173 192
 }
174 193
 
Browse code

initial commit

Start port of muon.r MAE functions to rhdf5.
Includes a workaround for missing features in rhdf5. The workaround
works for file creation properties, but not for references, because the
H5Rdereference2 symbol is missing from the rhdf5.so library. This is
because the GNU linker, when linking with a static library, by default
only includes those objects from the static lib that are actually used
in the final output.

Ilia Kats authored on 10/09/2021 14:59:16
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,174 @@
1
+#' @importFrom rhdf5 H5Iget_type h5readAttributes h5ls H5Aexists H5Aopen H5Aread H5Aclose
2
+read_with_index <- function(dataset) {
3
+    cls <- H5Iget_type(dataset)
4
+    if (cls == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
5
+        encattr <- H5Aopen(dataset, "encoding-type")
6
+        encoding <- H5Aread(encattr, "encoding-type")
7
+        H5Aclose(encattr)
8
+        if (encoding != "dataframe") {
9
+            warning(paste0("Unknown encoding ", encoding, " when attempting to read data frame"))
10
+            return(data.frame())
11
+        }
12
+        # Table is saved as a group rather than a dataset
13
+        indexcol <- "_index"
14
+        if (H5Aexists(dataset, "_index")) {
15
+            indexattr <- H5Aopen(dataset, "_index")
16
+            indexcol <- H5Aread(indexattr)
17
+            H5Aclose(indexattr)
18
+        }
19
+
20
+        columns <- h5ls(dataset, recursive=FALSE, datasetinfo=FALSE)$name
21
+        columns <- columns[columns != "__categories"]
22
+
23
+        col_list <- lapply(columns, function(name) {
24
+            values <- H5Dread(dataset & name)
25
+            col <- dataset & name
26
+            if (H5Aexists(col, "categories")) {
27
+                attr <- H5Aopen(col, "categories")
28
+                if (H5is_attr_reference(attr)) {
29
+                    labels <- H5deref_attr_reference(attr)
30
+                    values <- factor(as.integer(values), labels=H5Dread(labels))
31
+                } else {
32
+                    warning(paste0("found categories attribute for column ", name, ", but it is not a reference"))
33
+                }
34
+                H5Aclose(attr)
35
+            }
36
+            values
37
+        })
38
+        table <- data.frame(Reduce(cbind, col_list))
39
+        colnames(table) <- columns
40
+
41
+        if (indexcol %in% colnames(table)) {
42
+            rownames(table) <- table[,indexcol,drop=TRUE]
43
+            table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE]
44
+        }
45
+
46
+        # Fix column order
47
+        if (H5Aexists(dataset, "column-order")) {
48
+            orderedattr <- H5Aopen(dataset, "column-order")
49
+            ordered_columns <- H5Aread(orderedattr)
50
+            H5Aclose(orderedattr)
51
+
52
+            ordered_columns <- ordered_columns[ordered_columns != indexcol]
53
+            table <- table[,ordered_columns[ordered_columns %in% columns],drop=FALSE]
54
+        }
55
+    } else {
56
+        table <- H5Dread(dataset)
57
+
58
+        indexcol <- "_index"
59
+        if (H5Aexists(dataset, "_index")) {
60
+            index <- H5Aopen(dataset, "_index")
61
+            indexcol <- H5Aread(index)
62
+            H5Aclose(index)
63
+        }
64
+
65
+        if (indexcol %in% colnames(table)) {
66
+            rownames(table) <- table[,indexcol,drop=TRUE]
67
+            table <- table[,!colnames(table) %in% c(indexcol),drop=FALSE]
68
+        }
69
+    }
70
+        table
71
+}
72
+
73
+#' @importFrom rhdf5 H5Iget_type H5Aexists H5Aopen H5Aread H5Aclose H5Dread
74
+read_matrix <- function(dataset) {
75
+    if (H5Iget_type(dataset) == "H5I_GROUP" && H5Aexists(dataset, "encoding-type")) {
76
+        encattr <- H5Aopen(dataset, "encoding-type")
77
+        encoding <- H5Aread(dataset, "encoding")
78
+        H5Aclose(encattr)
79
+        if (encoding %in% c("csr_matrix", "csc_matrix")) {
80
+            i <- H5Dread(dataset & "indices")
81
+            p <- H5Dread(dataset & "indptr")
82
+            x <- H5Dread(dataset & "data")
83
+            shapeattr <- H5Aopen(dataset, "shape")
84
+            shape <- H5Aread(shapeattr)
85
+            H5Aclose(shapeattr)
86
+            if (encoding == "csr_matrix") {
87
+                sparseMatrix(j=i, p=p, x=x, dims=shape, repr="R", index1=FALSE)
88
+            } else {
89
+                sparseMatrix(i=i, p=p, x=x, dims=shape, repr="C", index1=FALSE)
90
+            }
91
+        } else {
92
+            warning(paste0("Unknown encoding ", encoding, "when attempting to read matrix"))
93
+            matrix()
94
+        }
95
+    } else {
96
+        H5Dread(dataset)
97
+    }
98
+}
99
+
100
+#' @details MultiAssayExperiment-helpers
101
+#'
102
+#' @description Create a MultiAssayExperiment or a Seurat object from the .h5mu file
103
+#'
104
+#' @importFrom rhdf5 h5ls H5Dread
105
+#' @importFrom SingleCellExperiment SingleCellExperiment
106
+#' @importFrom SummarizedExperiment SummarizedExperiment
107
+#' @importFrom MultiAssayExperiment MultiAssayExperiment
108
+#'
109
+#' @export
110
+ReadH5MU <- function(file) {
111
+    # Connect to the the file
112
+    h5 <- open_and_check_mudata(file)
113
+
114
+    # Check all the assays are written
115
+    assays <- h5ls(h5 & "mod", recursive=FALSE)$name
116
+
117
+    # Create global colData
118
+    metadata <- read_with_index(h5 & "obs")
119
+
120
+    # Create an experiments list
121
+    modalities <- lapply(assays, function(mod) {
122
+        view <- h5 & paste("mod", mod, sep="/")
123
+        X <- read_matrix(view & "X")
124
+
125
+        var <- read_with_index(view & "var")
126
+
127
+        obs <- read_with_index(view  & "obs")
128
+        if (is("obs", "data.frame"))
129
+            rownames(obs) <- paste(mod, rownames(obs), sep="-")
130
+
131
+        viewnames <- h5ls(view, recursive=FALSE)$name
132
+        if ("obsm" %in% viewnames) {
133
+            obsmnames <- h5ls(view & "obsm", recursive=FALSE)$name
134
+            obsm <- lapply(obsnames, function(space) {
135
+                H5Dread(view & paste("obsm", space, sep="/"))
136
+            })
137
+            names(obsm) <- obsmnames
138
+            se <- SingleCellExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs, reducedDims=obsm)
139
+        } else {
140
+            se <- SummarizedExperiment(assays=SimpleList(counts=X), rowData=var, colData=obs)
141
+        }
142
+
143
+        se
144
+    })
145
+    names(modalities) <- assays
146
+
147
+    # Create sampleMap
148
+    mapping <- lapply(assays, function(mod) {
149
+        primary <- colnames(modalities[[mod]])
150
+
151
+        view <- h5 & paste("mod", mod, sep="/")
152
+        view_attr <- h5attributes(view[["obs"]])
153
+        indexcol <- "_index"
154
+        if ("_index" %in% names(view_attr)) {
155
+          indexcol <- view_attr$`_index`
156
+        }
157
+        obs_names <- view[['obs']]$read()[,indexcol,drop=TRUE]
158
+        sm <- data.frame(primary = obs_names,
159
+                         colname = rownames(colData(modalities[[mod]])),
160
+                         stringsAsFactors = FALSE)
161
+        sm
162
+    })
163
+
164
+    obsmap <- do.call(rbind, mapping)
165
+    obsmap["assay"] <- rep(assays, times=vapply(mapping, nrow, 1))
166
+    obsmap <- obsmap[,c("assay", "primary", "colname")]
167
+
168
+    # Close the connection
169
+    h5$close_all()
170
+
171
+    # Create a MAE object
172
+    MultiAssayExperiment(modalities, metadata, obsmap)
173
+}
174
+