Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Webworker h5wasm provider (for random access to local files) #1582

@bmaranville

Description

@bmaranville

Is your feature request related to a problem?

Currently for the h5wasm provider, the entire file must be loaded into memory before use (it is written to the MEMFS virtual file system provided by Emscripten)

This puts an upper limit of 2GB (?) on the size of files that can be used with the h5wasm provider, and can cause memory issues for users (entire file in memory).

The only advantage of this system is that file access (once loaded) is very very fast.

Requested solution or feature

Use the WORKERFS Emscripten file system and a webworker-based h5wasm provider, which allows random-access to files on the users' computer without loading the entire thing into memory.

Alternatives you've considered

The new File System Access API could also solve this problem, where users could mount a local folder for working with and have random access to the files in that folder. This API is only fully implemented on Chrome-based browsers, however.

Additional context

Here is an example worker:

// h5wasm_worker.js
importScripts("https://unpkg.com/comlink/dist/umd/comlink.js");
importScripts("https://cdn.jsdelivr.net/npm/[email protected]/dist/iife/h5wasm.js")

const WORKING_DIRECTORY = '/working_directory';

async function load_file(file) {
  // file is of type File
  const { FS } = await h5wasm.ready;
  const { filesystems: { WORKERFS } } = FS;
  const { name: filename, size } = file;
  if (!FS.analyzePath(WORKING_DIRECTORY).exists) {
    FS.mkdir(WORKING_DIRECTORY);
  }
  if (api.file !== null) {
    api.file.close();
    // only use a single file at a time;
    // unmount the previous filesystem
    FS.unmount(WORKING_DIRECTORY);
  }

  FS.mount(WORKERFS, { files: [ file ] }, WORKING_DIRECTORY);
  const h5wasmFile = new h5wasm.File(`${WORKING_DIRECTORY}/${filename}`);
  api.file = h5wasmFile;
  return;
}

async function get_entity(path = '/') {
  if (api.file === null) {
    return null;
  }
  return api.file.get(path);
}

async function get_type(path) {
  const entity = await get_entity(path);
  return entity.type;
}

async function get_attrs(path) {
  const entity = await get_entity(path);
  const attrs = entity.attrs;
  return attrs;
}

// Group functions:
async function get_keys(group_path) {
  const entity = await get_entity(group_path);
  if (entity === null) {
    return null;
  }
  // assert entity instanceof h5wasm.Group;
  const keys = entity.keys();
  return keys;
}

// Dataset functions:
async function get_value (dataset_path) {
  const entity = await get_entity(dataset_path);
  if (entity === null) {
    return null;
  }
  // assert entity instanceof h5wasm.Dataset;
  const value = entity.value;
  return value;
}

const api = {
  ready: h5wasm.ready,
  file: null,
  load_file,
  get_entity,
  get_type,
  get_attrs,
  get_keys,
  get_value,
}
Comlink.expose(api);

and here is example client code for interacting with the worker:

<script type="module">
  import * as Comlink from "https://unpkg.com/comlink/dist/esm/comlink.mjs";

  async function init() {
    const worker = new Worker("h5wasm_worker.js");
    const h5wasm_proxy = Comlink.wrap(worker);
    const file = document.getElementById("file");
    file.addEventListener("change", async (event) => {
      const file = event.target.files[0];
      await h5wasm_proxy.load_file(file);
      // example api call on file:
      const keys = await h5wasm_proxy.get_keys("/");
      console.log({keys});
    });
  }
  init();
</script>

Metadata

Metadata

Assignees

Labels

enhancementNew feature or request

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions