Thanks to visit codestin.com
Credit goes to code.bioconductor.org

R/write_h5mu.R
3178c437
 #' @importClassesFrom Matrix Matrix
 #' @importClassesFrom DelayedArray DelayedMatrix
 setClassUnion("Matrix_OR_DelayedMatrix", c("matrix", "Matrix", "DelayedMatrix"))
 
fe9356a4
 #' @importClassesFrom S4Vectors DataFrame
 setClassUnion("data.frame_OR_DataFrame", c("data.frame", "DataFrame"))
 
57b02c9e
 #' Save an experiment to an .h5ad file.
 #'
d3abf875
 #' Note that NA values are not supported by HDF5, and therefore by h5ad. The behavior of this
 #' function if NAs are present is undefined.
 #'
57b02c9e
 #' @param object The object to save.
 #' @param file Name of the file to save to.
 #' @param overwrite Currently unused.
 #'
d8c3ad4e
 #' @returns NULL, invisibly
 #'
 #' @examples
3226b179
 #' data(miniACC, package="MultiAssayExperiment")
9e8b2a91
 #' writeH5AD(miniACC[[1]], "miniacc.h5ad")
d8c3ad4e
 #'
fa40c4f3
 #' @importFrom rhdf5 H5Iget_type H5Gcreate H5Gclose
 #' @importFrom SummarizedExperiment colData assays
 #' @importFrom S4Vectors metadata
 #' @importFrom methods hasMethod as
 #' @importFrom SingleCellExperiment altExps rowData colData colPairNames colPair rowPairNames rowPair reducedDims
 #'
51cea720
 #' @export
fa40c4f3
 writeH5AD <- function(object, file, overwrite) {
     need_finalize <- FALSE
     written_object <- FALSE
     if (is.character(file)) {
         file <- open_h5(file)
         need_finalize <- TRUE
     } else if (!(H5Iget_type(file) %in% c("H5I_FILE", "H5I_GROUP")))
         stop("file must be a character, file or group")
 
     cls <- class(object)
2b4bf31f
     if (is(object, "RangedSummarizedExperiment") && !is(object, "SingleCellExperiment")) {
fa40c4f3
         warning("Ranged data is currently unsupported. Coercing to SummarizedExperiment...")
         object <- as(object, "SummarizedExperiment")
     }
5ce6c7bc
     if (is(object, "SingleCellExperiment")) {
fa40c4f3
         write_data_frame(file, "var", rowData(object))
         obsm <- reducedDims(object)
         if (length(obsm) > 0) {
             obsmgrp <- H5Gcreate(file, "obsm")
             mapply(function(name, data) {
03679528
                 if (is(data, "data.frame_OR_DataFrame")) {
fa40c4f3
                     rownames(data) <- rownames(colData(object))
                     write_data_frame(obsmgrp, name, data)
                 } else {
                     if (length(dim(data)) == 1)
                         data <- as.vector(data)
                     else
03679528
                         data <- data
fa40c4f3
                     write_matrix(obsmgrp, name, data)
                 }
             }, names(obsm), obsm)
             H5Gclose(obsmgrp)
         }
 
         lapply(list(list(name="obsp", names=colPairNames, getter=colPair), list(name="varp", names=rowPairNames, getter=rowPair)), function(cp) {
             names <- cp$names(object)
             if (length(names) > 0) {
                 pairgrp <- H5Gcreate(file, cp$name)
                 lapply(names, function(cname) {
                     write_matrix(pairgrp, cname, cp$getter(object, cname, asSparse=TRUE))
                 })
                 H5Gclose(pairgrp)
             }
         })
         object <- as(object, "SummarizedExperiment")
         written_object <- TRUE
     }
     if (is(object, "SummarizedExperiment")) {
         write_data_frame(file, "obs", colData(object))
         rdata <- rowData(object)
         if (ncol(rdata) > 0 || !is.null(rownames(rdata)))
             write_data_frame(file, "var", rdata)
 
         assays <- assays(object)
         nassays <- length(assays)
fe59b618
         write_matrix(file, "X", assays[[1]], needTranspose=FALSE)
fa40c4f3
         if (nassays > 1) {
             layersgrp <- H5Gcreate(file, "layers")
             mapply(function(name, mat) {
fe59b618
                 write_matrix(layersgrp, name, mat, needTranspose=FALSE)
fa40c4f3
             }, names(assays[2:nassays]), assays[2:nassays])
             H5Gclose(layersgrp)
         }
 
         writeList(file, "uns", metadata(object))
 
         if (hasMethod("altExps", class(object))) {
             naltexps <- length(altExps(object))
             if (naltexps > 1) {
                 warning("Alternative experiments are currently unsupported. Construct a MultiAssayExperiment object or write them as individual H5AD files.")
             }
         }
         written_object <- TRUE
     }
     if (is(object, "Matrix_OR_DelayedMatrix")) {
fe59b618
         write_matrix(file, "X", object, needTranspose=FALSE)
5ce6c7bc
         rownames <- rownames(object)
         colnames <- colnames(object)
         if (is.null(rownames))
             rownames <- as.character(seq_len(nrow(object)))
         if (is.null(colnames))
             colnames <- as.character(seq_len(ncol(object)))
         var <- data.frame(row.names=rownames)
         obs <- data.frame(row.names=colnames)
         write_data_frame(file, "obs", obs)
         write_data_frame(file, "var", var)
fa40c4f3
         written_object <- TRUE
     }
 
     if (!written_object) {
         warning("Objects of class ", class(object), " are currently unsupported, skipping...")
     } else {
         write_object_class(file, cls[1])
         finalize_anndata_internal(file)
         if (need_finalize)
             finalize_anndata(file)
     }
51cea720
     invisible(NULL)
fa40c4f3
 }
e8c319bb
 
57b02c9e
 #' Save a \code{\linkS4class{MultiAssayExperiment}} to an .h5mu file.
e8c319bb
 #'
d3abf875
 #' Note that NA values are not supported by HDF5, and therefore by h5mu. The behavior of this
 #' function if NAs are present is undefined.
 #'
57b02c9e
 #' @param object A \code{\linkS4class{MultiAssayExperiment}}.
 #' @param file Name of the file to save to.
 #' @param overwrite Currently unused.
 #'
d8c3ad4e
 #' @returns NULL, invisibly
 #'
 #' @examples
3226b179
 #' data(miniACC, package="MultiAssayExperiment")
9e8b2a91
 #' writeH5MU(miniACC, "miniacc.h5mu")
d8c3ad4e
 #'
93c42b30
 #' @importFrom rhdf5 H5Gcreate H5Gclose
fe9356a4
 #' @importFrom MultiAssayExperiment colData experiments sampleMap metadata
e8c319bb
 #'
bcce7b15
 #' @export
fa40c4f3
 writeH5MU <- function(object, file, overwrite) {
     if (!is(object, "MultiAssayExperiment"))
         stop("Only MultiAssayExperiment objects are currently supported.")
     if (is.character(file)) {
         h5 <- open_h5(file)
         need_finalize <- TRUE
     } else
         stop("file must be a character")
bcce7b15
 
     obs <- as.data.frame(colData(object), stringsAsFactors = FALSE)
     write_data_frame(h5, "obs", obs)
 
     mods <- H5Gcreate(h5, "mod")
8effa257
     obsmgrp <- H5Gcreate(h5, "obsm")
     obsmapgrp <- H5Gcreate(h5, "obsmap")
     samplemap <- sampleMap(object)
     globalrownames <- rownames(obs)
     vars <- mapply(function(mname, mod) {
         mod_group <- H5Gcreate(mods, mname)
9e8b2a91
         writeH5AD(mod, mod_group)
bcce7b15
         H5Gclose(mod_group)
51cea720
 
8effa257
         cmap <- samplemap[samplemap$assay == mname,]
         cmaporder <- match(globalrownames, cmap$primary)
         localorder <- match(cmap$colname, colnames(mod))
d8c3ad4e
         obsmap <- vapply(cmaporder, function(o)ifelse(is.na(o), 0L, localorder[o]), 0L)
93c42b30
         writeDataset(obsmapgrp, mname, obsmap)
         writeDataset(obsmgrp, mname, as.integer(!is.na(cmaporder)))
8effa257
 
         data.frame(row.names = rownames(mod))
     }, names(object), object)
93c42b30
     writeAttribute(mods, "order", names(object))
bcce7b15
     H5Gclose(mods)
8effa257
     H5Gclose(obsmgrp)
     H5Gclose(obsmapgrp)
e8c319bb
 
bcce7b15
     var <- do.call(rbind, vars)
     write_data_frame(h5, "var", var)
e8c319bb
 
fe9356a4
     writeList(h5, "uns", metadata(object))
 
83946533
     write_object_class(h5, class(object)[1])
bcce7b15
     finalize_mudata(h5)
     invisible(NULL)
fa40c4f3
 }
e8c319bb
 
93c42b30
 #' @importFrom rhdf5 H5Gcreate H5Gclose
d2dd1d61
 #' @importFrom methods is as
fe59b618
 write_matrix <- function(parent, key, mat, needTranspose=TRUE) {
e8d5e1c6
     if (is(mat, "dgeMatrix"))
         mat <- as.matrix(mat)
93c42b30
     if (is.matrix(mat) || is.vector(mat) || is.array(mat) || is.numeric(mat) || is.integer(mat) || is.logical(mat) || is.character(mat)) { # is.vector returns false for vectors with attributes
         isscalar <- length(mat) == 1 & !is.null(attr(mat, "encoding-scalar"))
fe59b618
         naidx <- is.na(mat)
         hasna <- any(naidx)
         if (needTranspose) {
             if (is.matrix(mat))
                 mat <- t(mat)
             else if (is.array(mat))
                 mat <- aperm(mat, length(dim(mat)):1)
         }
93c42b30
         if (hasna && is.double(mat)) {
             # FIXME: extend anndata spec to handle double NAs?
fe59b618
             mat[naidx] <- NaN
             hasna <- FALSE
         } else if (hasna && is.character(mat)) {
             # FIXME: extend anndata spec to handle string NAs?
             mat[naidx] <- ""
93c42b30
             hasna <- FALSE
         }
 
         if (isscalar || !hasna) {
             writeDataset(parent, key, mat, scalar=isscalar)
             dset <- h5autoclose(parent & key)
 
             if (!isscalar)
                 writeAttribute(dset, "encoding-type", ifelse(is.character(mat), "string-array", "array"))
             else
                 writeAttribute(dset, "encoding-type", ifelse(is.character(mat), "string", "numeric-scalar"))
             writeAttribute(dset, "encoding-version", "0.2.0")
         } else {
             grp <- H5Gcreate(parent, key)
39dc0a3a
             if (is.character(mat))
                 mat[naidx] <- ""
             else
                 mat[naidx] <- as(0, type(mat))
93c42b30
             write_matrix(grp, "values", mat)
fe59b618
             write_matrix(grp, "mask", naidx)
93c42b30
             writeAttribute(grp, "encoding-type", ifelse(is.logical(mat), "nullable-boolean", "nullable-integer"))
             writeAttribute(grp, "encoding-version", "0.1.0")
             H5Gclose(grp)
         }
     } else if (is.factor(mat)) {
         grp <- H5Gcreate(parent, key)
         codes <- as.integer(mat)
b8fc6f26
         codes[is.na(mat)] <- 0L
93c42b30
         write_matrix(grp, "codes", codes - 1L)
         write_matrix(grp, "categories", levels(mat))
         writeAttribute(grp, "ordered", is.ordered(mat))
         writeAttribute(grp, "encoding-type", "categorical")
         writeAttribute(grp, "encoding-version", "0.2.0")
         H5Gclose(grp)
2b8e1156
     } else if (is(mat, "dgCMatrix") || is(mat, "dgRMatrix") || is(mat, "DelayedArray") && DelayedArray::is_sparse(mat)) {
         if (is(mat, "DelayedArray"))
0108b354
             mat <- as(mat, "RsparseMatrix")
2b8e1156
 
bcce7b15
         grp <- H5Gcreate(parent, key)
ef122c89
         writeDataset(grp, "indptr", mat@p)
         writeDataset(grp, "data", mat@x)
3365972a
         writeAttribute(grp, "shape", if (needTranspose) dim(mat) else rev(dim(mat)), scalar=FALSE)
93c42b30
         writeAttribute(grp, "encoding-version", "0.1.0")
bcce7b15
         if (is(mat, "dgCMatrix")) {
ef122c89
             writeDataset(grp, "indices", mat@i)
3365972a
             writeAttribute(grp, "encoding-type", ifelse(needTranspose, "csc_matrix", "csr_matrix"))
e8c319bb
         } else {
ef122c89
             writeDataset(grp, "indices", mat@j)
3365972a
             writeAttribute(grp, "encoding-type", ifelse(needTranspose, "csr_matrix", "csc_matrix"))
e8c319bb
         }
38e7eca8
         H5Gclose(grp)
2b8e1156
     } else if (is(mat, "DelayedArray") && requireNamespace("HDF5Array", quietly=TRUE)) {
         writeArrayToMuData(mat, parent, key)
bcce7b15
     } else {
d8c3ad4e
         stop("Writing matrices of type ", class(mat), " is not implemented.")
e8c319bb
     }
 }
 
93c42b30
 #' @importFrom rhdf5 H5Gcreate H5Gclose h5createAttribute
bcce7b15
 write_data_frame <- function(parent, key, df) {
     group <- H5Gcreate(parent, key)
 
     columns <- colnames(df)
6562f90a
     index <- rownames(df)
     if (is.null(index))
         index <- as.character(1:nrow(df))
     df[["_index"]] <- index
bcce7b15
     for (col in colnames(df)) {
93c42b30
         write_matrix(group, col, df[[col]])
e8c319bb
     }
 
bcce7b15
     # Write attributes
93c42b30
     writeAttribute(group, "_index", "_index")
     writeAttribute(group, "encoding-type", "dataframe")
     writeAttribute(group, "encoding-version", "0.2.0")
bcce7b15
     if (length(columns) > 0) {
4b2bd0b8
         writeAttribute(group, "column-order", columns, scalar=FALSE)
e8c319bb
     } else {
bcce7b15
         # When there are no columns, null buffer can't be written to a file.
         h5createAttribute(group, "column-order", dims=0)
e8c319bb
     }
bcce7b15
     H5Gclose(group)
     invisible(NULL)
 }
2b8e1156
 
fa836f93
 .registeredHDF5ArrayMethods <- new.env()
 .registeredHDF5ArrayMethods$registered <- FALSE
2b8e1156
 #' @importFrom rhdf5 h5write H5Dclose
3178c437
 #' @importFrom DelayedArray write_block start width
d2dd1d61
 #' @importFrom methods setClass
3178c437
 registerHDF5ArrayMethods <- function() {
fa836f93
     if (!.registeredHDF5ArrayMethods$registered) {
2b8e1156
         haveHDF5Array <- requireNamespace("HDF5Array", quietly=TRUE)
3178c437
         if (!haveHDF5Array)
2b8e1156
             return(FALSE)
2b9ba3aa
 
2b8e1156
         setClass("MuDataFileRealizationSink",
                  contains="HDF5RealizationSink",
                  slots=c(parent="H5IdComponent",
2b9ba3aa
                          datasetname="character"),
                  where=.registeredHDF5ArrayMethods)
2b8e1156
 
 
3178c437
         setMethod(write_block, "MuDataFileRealizationSink", function(sink, viewport, block) {
2b8e1156
             if (!is.array(block))
                 block <- as.array(block)
3178c437
             h5write(block, sink@parent, sink@datasetname, start=start(viewport), count=width(viewport))
2b8e1156
             sink
2b9ba3aa
         }, where=.registeredHDF5ArrayMethods)
2b8e1156
 
fa836f93
         .registeredHDF5ArrayMethods$registered <- TRUE
2b8e1156
     }
fa836f93
     .registeredHDF5ArrayMethods$registered
2b8e1156
 }
 
 #' @importFrom rhdf5 h5createDataset H5Fget_name H5Iget_name
d2dd1d61
 #' @importFrom methods new
2b8e1156
 MuDataFileRealizationSink <- function(dim, type, parent, key, dimnames=NULL, as.sparse=FALSE) {
     chunkdim <- HDF5Array::getHDF5DumpChunkDim(dim)
ef122c89
     h5createDataset(parent, key, dim, storage.mode=type, chunk=chunkdim, level=9, shuffle=TRUE)
2b8e1156
     file <- H5Fget_name(parent)
     path <- paste(H5Iget_name(parent), key, sep="/")
     new("MuDataFileRealizationSink", dim=dim, dimnames=dimnames, type=type, as_sparse=as.sparse,
                                      filepath=file, name=path, chunkdim=chunkdim, parent=parent, datasetname=key)
 }
 
3178c437
 #' @importFrom DelayedArray is_sparse type BLOCK_write_to_sink
2b8e1156
 writeArrayToMuData <- function(x, parent, key, verbose=NA) {
3178c437
     if (!registerHDF5ArrayMethods())
         stop("The HDF5Array packages must be installed to save DelayedArrays.")
     as.sparse <- is_sparse(x)
2b8e1156
     sink_dimnames <- dimnames(x)
3178c437
     sink <- MuDataFileRealizationSink(dim(x), type(x), parent, key, sink_dimnames, as.sparse)
2b8e1156
 
     verbose <- DelayedArray:::normarg_verbose(verbose)
3178c437
     sink <- BLOCK_write_to_sink(sink, x, verbose=verbose)
2b8e1156
     invisible(NULL)
 }
ef122c89
 
 # this is a straight port of the h5py guess_chunk function
 chunk_base <- 16 * 1024
 chunk_min <- 8 * 1024
 chunk_max <- 1024 * 1024
 guess_chunk <- function(shape, storage.mode) {
     nbytes <- switch(storage.mode, double=8, integer=4, logical=1, character=8)
     ndims <- length(shape)
 
     dset_size <- prod(shape) * nbytes
     target_size <- chunk_base * (2^log10(dset_size / 1024^2))
     if (target_size > chunk_max)
         target_size <- chunk_max
     else if (target_size < chunk_min)
         target_size <- chunk_min
 
    idx <- 0
    while(TRUE) {
        chunk_bytes <- prod(shape) * nbytes
        if ((chunk_bytes < target_size || abs(chunk_bytes - target_size) / target_size < 0.5) && chunk_bytes < chunk_max)
            break
        if (prod(shape) == 1)
            break
 
        shape[idx %% ndims + 1] <- ceiling(shape[idx %% ndims + 1] / 2)
        idx <- idx + 1
    }
    as.integer(shape)
 }
 
fe9356a4
 #' @importFrom rhdf5 h5createDataset h5writeDataset
 writeDataset <- function(parent, key, data, scalar=FALSE) {
ef122c89
     shape <- dim(data)
     if (is.null(shape))
         shape <- length(data)
fe9356a4
     if (length(shape) == 1 && shape == 1 && scalar) {
         shape <- chunksize <- NULL
         level <- 0
     } else {
         chunksize <- guess_chunk(shape, storage.mode(data))
         level <- 9
     }
     h5createDataset(parent, key, shape, storage.mode=storage.mode(data), chunk=chunksize, level=level, shuffle=TRUE, encoding="UTF-8")
006c66be
     h5writeDataset(data, parent, key, variableLengthString=TRUE, encoding="UTF-8")
ef122c89
 }
fe9356a4
 
93c42b30
 #' @importFrom rhdf5 h5writeAttribute
4b2bd0b8
 writeAttribute <- function(obj, name, value, scalar=TRUE) {
93c42b30
     if (is.logical(value))
         value <- as.integer(value) # rhdf5 hasn't implemented logical attributes yet
     args <- list(attr=value, h5obj=obj, name=name)
     if (is.character(value))
         args$variableLengthString <- TRUE
4b2bd0b8
     if (length(value) == 1 && scalar)
93c42b30
         args$asScalar <- TRUE
     do.call(h5writeAttribute, args)
 }
 
fe9356a4
 #' @importFrom rhdf5 H5Gcreate H5Gclose
 #' @importFrom methods slotNames slot
 write_elem <- function(parent, key, data) {
     if (is(data, "list_OR_List"))
         writeList(parent, key, data)
     else if (is(data, "Matrix_OR_DelayedMatrix") || is(data, "vector_OR_Vector"))
93c42b30
         write_matrix(parent, key, data)
fe9356a4
     else if (is(data, "data.frame_OR_DataFrame"))
         write_data_frame(parent, key, data)
     else if (isS4(data)) {
         grp <- H5Gcreate(parent, key)
         for (slotnm in slotNames(data)) {
             write_elem(grp, slotnm, slot(data, slotnm))
         }
39dc0a3a
         writeAttribute(grp, "encoding-type", "dict")
93c42b30
         writeAttribute(grp, "encoding-version", "0.1.0")
fe9356a4
         H5Gclose(grp)
     } else
         warning(paste("Cannot write object of class", class(data), "skipping..."))
 }
 
 #' @importFrom rhdf5 H5Gcreate H5Gclose
 writeList <- function(parent, key, data) {
     if (length(data) > 0) {
         nms <- names(data)
         if (is.null(nms))
5d324faf
             nms <- as.character(seq_along(data))
fe9356a4
         grp <- H5Gcreate(parent, key)
         mapply(function(name, data) {
             write_elem(grp, name, data)
         }, nms, data)
93c42b30
         writeAttribute(grp, "encoding-type", "dict")
         writeAttribute(grp, "encoding-version", "0.1.0")
fe9356a4
         H5Gclose(grp)
     }
 }