From e64bf552c3df8da963075b295d27496cfe54a22e Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 16 Dec 2025 11:25:20 -0500 Subject: [PATCH 1/5] return hsize_t (bigint) for shape, maxshape and total_size in metadata --- src/hdf5_util.cc | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/hdf5_util.cc b/src/hdf5_util.cc index cdfc20c..d7ba99b 100644 --- a/src/hdf5_util.cc +++ b/src/hdf5_util.cc @@ -8,6 +8,7 @@ #include "H5PLextern.h" #include #include +#include #define ATTRIBUTE_DATA 0 #define DATASET_DATA 1 @@ -329,10 +330,10 @@ val get_dtype_metadata(hid_t dtype) int ndims = H5Tget_array_ndims(dtype); std::vector array_dims(ndims); H5Tget_array_dims2(dtype, &array_dims[0]); - int total_size = 1; + hsize_t total_size = 1; for (int i=0; i count(rank); std::vector block(rank); htri_t success = H5Sget_regular_hyperslab(dspace, nullptr, nullptr, count.data(), block.data()); - shape = val::array(); + shape = val::array(); // elements of type hsize_t for (int d = 0; d < rank; d++) { - int blocksize = (block.at(d) == NULL) ? 1 : block.at(d); - shape.set(d, (uint)(count.at(d) * blocksize)); + hsize_t blocksize = (block.at(d) == NULL) ? 1 : block.at(d); + shape.set(d, (count.at(d) * blocksize)); } } metadata.set("shape", shape); @@ -1442,6 +1443,7 @@ EMSCRIPTEN_BINDINGS(hdf5) constant("H5Z_FILTER_SCALEOFFSET", H5Z_FILTER_SCALEOFFSET); constant("H5Z_FILTER_RESERVED", H5Z_FILTER_RESERVED); constant("H5Z_FILTER_MAX", H5Z_FILTER_MAX); + constant("MAXIMUM_MEMORY", emscripten_get_heap_max()); register_vector("vector"); } From 75b9e23d74130b97ea1180e4db6036d30eed5aa3 Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 16 Dec 2025 11:26:16 -0500 Subject: [PATCH 2/5] add check_malloc and handle metadata.shape and total_size etc. as bigint --- src/hdf5_hl.ts | 87 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/src/hdf5_hl.ts b/src/hdf5_hl.ts index 84ef00c..ef649b7 100644 --- a/src/hdf5_hl.ts +++ b/src/hdf5_hl.ts @@ -40,6 +40,19 @@ function dirname(path: string) { return head; } +function check_malloc(nbytes: bigint | number) { + const max_memory = Module.MAXIMUM_MEMORY; + if (nbytes > max_memory) { + throw new Error(`Requested allocation of ${nbytes} bytes exceeds maximum memory of ${max_memory} bytes`); + } + const safe_nbytes = Number(nbytes); + const ptr = Module._malloc(safe_nbytes); + if (ptr === 0) { + throw new Error(`Memory allocation of ${safe_nbytes} bytes failed`); + } + return ptr; +} + function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: true): JSONCompatibleOutputData | null; function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: false): OutputData | null; function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean): OutputData | JSONCompatibleOutputData | null; @@ -49,12 +62,12 @@ function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_com return null; } - let nbytes = metadata.size * metadata.total_size; - let data_ptr = Module._malloc(nbytes); + const nbytes = BigInt(metadata.size) * metadata.total_size; + let data_ptr = check_malloc(nbytes); var processed; try { Module.get_attribute_data(file_id, obj_name, attr_name, BigInt(data_ptr)); - let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes); + let data = Module.HEAPU8.slice(data_ptr, data_ptr + Number(nbytes)); processed = process_data(data, metadata, json_compatible); } finally { if (metadata.vlen) { @@ -146,7 +159,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo else if (type === Module.H5T_class_t.H5T_INTEGER.value || type === Module.H5T_class_t.H5T_FLOAT.value) { const { size, signed} = metadata; const accessor = getAccessor(type, size, signed); - output_data = new accessor(data.buffer); + output_data = new accessor(data.buffer as ArrayBuffer); if (json_compatible) { output_data = [...output_data]; if (accessor === BigInt64Array || accessor === BigUint64Array) { @@ -173,12 +186,15 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo else if (type === Module.H5T_class_t.H5T_ARRAY.value) { const { array_type } = <{array_type: Metadata}>metadata; - shape = (shape).concat(array_type.shape); + shape = (shape).concat(array_type.shape); array_type.shape = shape; // always convert ARRAY types to base JS types: output_data = process_data(data, array_type, true); if (isIterable(output_data) && typeof output_data !== "string") { - output_data = create_nested_array(output_data as JSONCompatibleOutputData[], array_type.shape); + // because process_data is called after data is already retrievied, we know that + // total_size < Module.MAXIMUM_MEMORY, so it's safe to convert shape to number[] + const output_shape = array_type.shape.map(Number); + output_data = create_nested_array(output_data as JSONCompatibleOutputData[], output_shape); } } @@ -201,7 +217,9 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo else if (type === Module.H5T_class_t.H5T_REFERENCE.value) { const { ref_type, size } = metadata; // as { ref_type: 'object' | 'region', size: number }; const cls = (ref_type === 'object') ? Reference : RegionReference; - output_data = Array.from({ length: metadata.total_size }).map((_, i) => { + // because process_data is called after data is already retrievied, we know that + // total_size < Module.MAXIMUM_MEMORY, so it's safe to convert length to number + output_data = Array.from({ length: Number(metadata.total_size) }).map((_, i) => { const ref_data = data.slice(i*size, (i+1)*size); return new cls(ref_data); }); @@ -226,7 +244,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo const data = Module.HEAPU8.slice(data_ptr, data_ptr + data_nbytes); // Process this vlen array's data according to base datatype - output.push(process_data(data, { ...vlen_type, shape: [length], total_size: length }, json_compatible)); + output.push(process_data(data, { ...vlen_type, shape: [BigInt(length)], total_size: BigInt(length) }, json_compatible)); } output_data = output; @@ -461,7 +479,8 @@ const TypedArray_to_dtype = new Map([ * `[i0, i1]` - select all data in the range `i0` to `i1` * `[i0, i1, s]` - select every `s` values in the range `i0` to `i1` **/ -type Slice = [] | [number|null] | [number|null,number|null] | [number|null, number|null, number|null]; +type SliceElement = bigint | number | null; +type Slice = [] | [SliceElement] | [SliceElement, SliceElement] | [SliceElement, SliceElement, SliceElement]; export type GuessableDataTypes = TypedArray | number | number[] | string | string[] | Reference | Reference[] | RegionReference | RegionReference[]; @@ -542,7 +561,7 @@ export class Attribute { name: string; metadata: Metadata; dtype: Dtype; - shape: number[] | null; + shape: bigint[] | null; private _value?: OutputData | null; private _json_value?: JSONCompatibleOutputData | null; @@ -573,10 +592,10 @@ export class Attribute { to_array(): JSONCompatibleOutputData | null { const { json_value, metadata } = this; const { shape } = metadata; - if (!isIterable(json_value) || typeof json_value === "string") { + if (!isIterable(json_value) || typeof json_value === "string" || shape === null) { return json_value; } - return create_nested_array(json_value, shape); + return create_nested_array(json_value, shape.map(Number)); } } @@ -635,7 +654,7 @@ abstract class HasAttrs { ); } else { - let data_ptr = Module._malloc((prepared_data as Uint8Array).byteLength); + let data_ptr = check_malloc((prepared_data as Uint8Array).byteLength); try { Module.HEAPU8.set(prepared_data as Uint8Array, data_ptr); Module.create_attribute( @@ -830,7 +849,7 @@ export class Group extends HasAttrs { ); } else { - let data_ptr = Module._malloc((prepared_data as Uint8Array).byteLength); + let data_ptr = check_malloc((prepared_data as Uint8Array).byteLength); try { Module.HEAPU8.set(prepared_data as Uint8Array, data_ptr); Module.create_dataset( @@ -904,14 +923,22 @@ export class File extends Group { } } -const calculateHyperslabParams = (shape: number[],ranges: Slice[]) => { +const calculateHyperslabParams = (shape: bigint[], ranges: Slice[]) => { const strides = shape.map((s, i) => BigInt(ranges?.[i]?.[2] ?? 1)); const count = shape.map((s, i) => { - const N = BigInt((Math.min(s, ranges?.[i]?.[1] ?? s) - Math.max(0, ranges?.[i]?.[0] ?? 0))); + const range_upper = ranges?.[i]?.[1] ?? s; + const range_lower = ranges?.[i]?.[0] ?? 0n; + const high = (range_upper < s) ? BigInt(range_upper) : s; + const low = (range_lower > 0n) ? BigInt(range_lower) : 0n; + const N = high - low; const st = strides[i]; return N / st + ((N % st) + st - 1n)/st }); - const offset = shape.map((s, i) => BigInt(Math.min(s, Math.max(0, ranges?.[i]?.[0] ?? 0)))); + const offset = shape.map((s, i) => { + const range_lower = ranges?.[i]?.[0] ?? 0n; + const low = (range_lower > 0n) ? BigInt(range_lower) : 0n; + return (s < low) ? s : low; + }); return {strides, count, offset} } @@ -971,12 +998,12 @@ export class Dataset extends HasAttrs { const {strides, count, offset} = calculateHyperslabParams(shape, ranges); const total_size = count.reduce((previous, current) => current * previous, 1n); - const nbytes = metadata.size * Number(total_size); - const data_ptr = Module._malloc(nbytes); + const nbytes = BigInt(metadata.size) * total_size; + const data_ptr = check_malloc(nbytes); let processed: OutputData; try { Module.get_dataset_data(this.file_id, this.path, count, offset, strides, BigInt(data_ptr)); - let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes); + let data = Module.HEAPU8.slice(data_ptr, data_ptr + Number(nbytes)); processed = process_data(data, metadata, false); } finally { if (metadata.vlen || metadata.type === Module.H5T_class_t.H5T_VLEN.value) { @@ -1000,8 +1027,8 @@ export class Dataset extends HasAttrs { // if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id; const {strides, count, offset} = calculateHyperslabParams(shape, ranges); - const { data: prepared_data, shape: guessed_shape } = prepare_data(data, metadata, count); - let data_ptr = Module._malloc((prepared_data as Uint8Array).byteLength); + const { data: prepared_data } = prepare_data(data, metadata, count); + let data_ptr = check_malloc((prepared_data as Uint8Array).byteLength); Module.HEAPU8.set(prepared_data as Uint8Array, data_ptr); try { @@ -1028,10 +1055,10 @@ export class Dataset extends HasAttrs { to_array(): JSONCompatibleOutputData | null { const { json_value, metadata } = this; const { shape } = metadata; - if (!isIterable(json_value) || typeof json_value === "string") { + if (!isIterable(json_value) || typeof json_value === "string" || shape === null) { return json_value; } - let nested = create_nested_array(json_value, shape); + let nested = create_nested_array(json_value, shape.map(Number)); return nested; } @@ -1090,12 +1117,12 @@ export class Dataset extends HasAttrs { } // if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id; - let nbytes = metadata.size * metadata.total_size; - let data_ptr = Module._malloc(nbytes); + let nbytes = BigInt(metadata.size) * metadata.total_size; + let data_ptr = check_malloc(nbytes); let processed: OutputData; try { Module.get_dataset_data(this.file_id, this.path, null, null, null, BigInt(data_ptr)); - let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes); + let data = Module.HEAPU8.slice(data_ptr, data_ptr + Number(nbytes)); processed = process_data(data, metadata, json_compatible); } finally { if (metadata.vlen) { @@ -1139,12 +1166,12 @@ export class DatasetRegion { } // if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id; - let nbytes = metadata.size * metadata.total_size; - let data_ptr = Module._malloc(nbytes); + let nbytes = BigInt(metadata.size) * metadata.total_size; + let data_ptr = check_malloc(nbytes); let processed: OutputData; try { Module.get_region_data(this.source_dataset.file_id, this.region_reference.ref_data, BigInt(data_ptr)); - let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes); + let data = Module.HEAPU8.slice(data_ptr, data_ptr + Number(nbytes)); processed = process_data(data, metadata, json_compatible); } finally { if (metadata.vlen) { From 7f933f639e9f44ef8de7d41a78126d9d14f2af41 Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 16 Dec 2025 11:26:51 -0500 Subject: [PATCH 3/5] compiled types with new bigint for shape --- src/hdf5_hl.d.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hdf5_hl.d.ts b/src/hdf5_hl.d.ts index cab8905..fa73291 100644 --- a/src/hdf5_hl.d.ts +++ b/src/hdf5_hl.d.ts @@ -28,7 +28,8 @@ declare type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Arra * `[i0, i1]` - select all data in the range `i0` to `i1` * `[i0, i1, s]` - select every `s` values in the range `i0` to `i1` **/ -declare type Slice = [] | [number | null] | [number | null, number | null] | [number | null, number | null, number | null]; +declare type SliceElement = bigint | number | null; +declare type Slice = [] | [SliceElement] | [SliceElement, SliceElement] | [SliceElement, SliceElement, SliceElement]; export declare type GuessableDataTypes = TypedArray | number | number[] | string | string[] | Reference | Reference[] | RegionReference | RegionReference[]; declare enum OBJECT_TYPE { DATASET = "Dataset", @@ -63,7 +64,7 @@ export declare class Attribute { name: string; metadata: Metadata; dtype: Dtype; - shape: number[] | null; + shape: bigint[] | null; private _value?; private _json_value?; constructor(file_id: bigint, path: string, name: string); @@ -133,7 +134,7 @@ export declare class Dataset extends HasAttrs { refresh(): void; get metadata(): Metadata; get dtype(): Dtype; - get shape(): number[] | null; + get shape(): bigint[] | null; get filters(): Filter[]; get value(): OutputData | null; get json_value(): JSONCompatibleOutputData | null; From ecdddbfbb3824a1afffdb7aa096f7398d3744fef Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 16 Dec 2025 11:27:22 -0500 Subject: [PATCH 4/5] update interface for Metadata to include bigint shape, maxshape, total_size --- src/hdf5_util_helpers.d.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hdf5_util_helpers.d.ts b/src/hdf5_util_helpers.d.ts index bcf7fe0..b6eeadb 100644 --- a/src/hdf5_util_helpers.d.ts +++ b/src/hdf5_util_helpers.d.ts @@ -25,13 +25,13 @@ export interface Metadata { enum_type?: EnumTypeMetadata, vlen_type?: Metadata, littleEndian: boolean, - maxshape: number[] | null, + maxshape: bigint[] | null, ref_type?: 'object' | 'region', - shape: number[] | null, + shape: bigint[] | null, signed: boolean, size: number, strpad?: number, - total_size: number, + total_size: bigint, type: number, virtual_sources?: VirtualSource[], vlen: boolean, @@ -93,6 +93,7 @@ export interface H5Module extends EmscriptenModule { H5Z_FILTER_SCALEOFFSET: 6; H5Z_FILTER_RESERVED: 256; H5Z_FILTER_MAX: 65535; + MAXIMUM_MEMORY: number; create_group(file_id: bigint, name: string, track_order?: boolean): number; create_vlen_str_dataset(file_id: bigint, dset_name: string, prepared_data: any, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, track_order?: boolean): number; get_dataset_data(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null, rdata_ptr: bigint): number; From 4dd75b7454f7b1c47c481e756526e7723cdb0c0c Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 16 Dec 2025 11:27:44 -0500 Subject: [PATCH 5/5] update tests to use bigint for shape, maxshape, total_size --- test/bool_test.mjs | 6 +++--- test/compound_and_array_test.mjs | 14 +++++++------- test/vlen_test.mjs | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/bool_test.mjs b/test/bool_test.mjs index d77e68e..07e5dd5 100644 --- a/test/bool_test.mjs +++ b/test/bool_test.mjs @@ -28,11 +28,11 @@ async function bool_test() { } }, littleEndian: true, - maxshape: [2, 2], - shape: [2, 2], + maxshape: [2n, 2n], + shape: [2n, 2n], signed: true, size: 1, - total_size: 4, + total_size: 4n, type: 8, vlen: false, }); diff --git a/test/compound_and_array_test.mjs b/test/compound_and_array_test.mjs index c728531..4b2040e 100644 --- a/test/compound_and_array_test.mjs +++ b/test/compound_and_array_test.mjs @@ -67,11 +67,11 @@ async function compound_array_test() { members: [ { array_type: { - shape: [2, 2], + shape: [2n, 2n], littleEndian: true, signed: false, size: 8, - total_size: 4, + total_size: 4n, type: 1, vlen: false, }, @@ -87,12 +87,12 @@ async function compound_array_test() { { array_type: { cset: 0, - shape: [2, 2], + shape: [2n, 2n], littleEndian: false, signed: false, size: 5, strpad: 1, - total_size: 4, + total_size: 4n, type: 3, vlen: false, }, @@ -109,11 +109,11 @@ async function compound_array_test() { nmembers: 2 }, littleEndian: true, - maxshape: [2], - shape: [2], + maxshape: [2n], + shape: [2n], signed: false, size: 52, - total_size: 2, + total_size: 2n, type: 6, vlen: false, }); diff --git a/test/vlen_test.mjs b/test/vlen_test.mjs index a6e552a..7885044 100644 --- a/test/vlen_test.mjs +++ b/test/vlen_test.mjs @@ -13,7 +13,7 @@ async function vlen_test() { maxshape: [], chunks: null, size: 8, - total_size: 1, + total_size: 1n, signed: true, littleEndian: true, vlen: false, @@ -28,11 +28,11 @@ async function vlen_test() { assert.deepEqual(f.get('float32_oneD').metadata, { type: 9, - shape: [3], - maxshape: [3], + shape: [3n], + maxshape: [3n], chunks: null, size: 8, - total_size: 3, + total_size: 3n, signed: false, littleEndian: true, vlen: false,