Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c52b4d1

Browse files
authored
Merge pull request h5py#2463 from ptim0626/write_fill_iss_2459
Expose fill time property via the high-level dataset creation interface
2 parents 5d1f995 + 3e58f85 commit c52b4d1

File tree

5 files changed

+123
-4
lines changed

5 files changed

+123
-4
lines changed

docs/high/group.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,15 @@ Reference
382382
:keyword fillvalue: This value will be used when reading
383383
uninitialized parts of the dataset.
384384

385+
:keyword fill_time: Control when to write the fill value. One of the
386+
following choices: `alloc`, write fill value before writing
387+
application data values or when the dataset is created; `never`,
388+
never write fill value; `ifset`, write fill value if it is defined.
389+
Default to `ifset`, which is the default of HDF5 library. If the
390+
whole dataset is going to be written by the application, setting
391+
this to `never` can avoid unnecessary writing of fill value and
392+
potentially improve performance.
393+
385394
:keyword track_times: Enable dataset creation timestamps (**T**/F).
386395

387396
:keyword track_order: Track attribute creation order if

h5py/_hl/dataset.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ def make_new_dset(parent, shape=None, dtype=None, data=None, name=None,
3838
fillvalue=None, scaleoffset=None, track_times=False,
3939
external=None, track_order=None, dcpl=None, dapl=None,
4040
efile_prefix=None, virtual_prefix=None, allow_unknown_filter=False,
41-
rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None):
41+
rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, *,
42+
fill_time=None):
4243
""" Return a new low-level dataset identifier """
4344

4445
# Convert data to a C-contiguous ndarray
@@ -104,7 +105,8 @@ def make_new_dset(parent, shape=None, dtype=None, data=None, name=None,
104105
dcpl = filters.fill_dcpl(
105106
dcpl or h5p.create(h5p.DATASET_CREATE), shape, dtype,
106107
chunks, compression, compression_opts, shuffle, fletcher32,
107-
maxshape, scaleoffset, external, allow_unknown_filter)
108+
maxshape, scaleoffset, external, allow_unknown_filter,
109+
fill_time=fill_time)
108110

109111
if fillvalue is not None:
110112
# prepare string-type dtypes for fillvalue

h5py/_hl/filters.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
'shuffle': h5z.FILTER_SHUFFLE,
5353
'fletcher32': h5z.FILTER_FLETCHER32,
5454
'scaleoffset': h5z.FILTER_SCALEOFFSET }
55+
_FILL_TIME_ENUM = {'alloc': h5d.FILL_TIME_ALLOC,
56+
'never': h5d.FILL_TIME_NEVER,
57+
'ifset': h5d.FILL_TIME_IFSET,
58+
}
5559

5660
DEFAULT_GZIP = 4
5761
DEFAULT_SZIP = ('nn', 8)
@@ -146,7 +150,7 @@ def __init__(self, level=DEFAULT_GZIP):
146150

147151
def fill_dcpl(plist, shape, dtype, chunks, compression, compression_opts,
148152
shuffle, fletcher32, maxshape, scaleoffset, external,
149-
allow_unknown_filter=False):
153+
allow_unknown_filter=False, *, fill_time=None):
150154
""" Generate a dataset creation property list.
151155
152156
Undocumented and subject to change without warning.
@@ -259,7 +263,14 @@ def rq_tuple(tpl, name):
259263

260264
if chunks is not None:
261265
plist.set_chunk(chunks)
262-
plist.set_fill_time(h5d.FILL_TIME_ALLOC) # prevent resize glitch
266+
267+
if fill_time is not None:
268+
if (ft := _FILL_TIME_ENUM.get(fill_time)) is not None:
269+
plist.set_fill_time(ft)
270+
else:
271+
msg = ("fill_time must be one of the following choices: 'alloc', "
272+
f"'never' or 'ifset', but it is {fill_time}.")
273+
raise ValueError(msg)
263274

264275
# scale-offset must come before shuffle and compression
265276
if scaleoffset is not None:

h5py/tests/test_dataset.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,90 @@ def test_exc(self):
476476
dtype=[('a', 'i'), ('b', 'f')], fillvalue=42)
477477

478478

479+
class TestFillTime(BaseDataset):
480+
481+
"""
482+
Feature: Datasets created with specified fill time property
483+
"""
484+
485+
def test_fill_time_default(self):
486+
""" Fill time default to IFSET """
487+
dset = self.f.create_dataset('foo', (10,), fillvalue=4.0)
488+
plist = dset.id.get_create_plist()
489+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_IFSET)
490+
self.assertEqual(dset[0], 4.0)
491+
self.assertEqual(dset[7], 4.0)
492+
493+
@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
494+
def test_compressed_default(self):
495+
""" Fill time is IFSET for compressed dataset (chunked) """
496+
dset = self.f.create_dataset('foo', (10,), compression='gzip',
497+
fillvalue=4.0)
498+
plist = dset.id.get_create_plist()
499+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_IFSET)
500+
self.assertEqual(dset[0], 4.0)
501+
self.assertEqual(dset[7], 4.0)
502+
503+
def test_fill_time_never(self):
504+
""" Fill time set to NEVER """
505+
dset = self.f.create_dataset('foo', (10,), fillvalue=4.0,
506+
fill_time='never')
507+
plist = dset.id.get_create_plist()
508+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_NEVER)
509+
# should not be equal to the explicitly set fillvalue
510+
self.assertNotEqual(dset[0], 4.0)
511+
self.assertNotEqual(dset[7], 4.0)
512+
513+
def test_fill_time_alloc(self):
514+
""" Fill time explicitly set to ALLOC """
515+
dset = self.f.create_dataset('foo', (10,), fillvalue=4.0,
516+
fill_time='alloc')
517+
plist = dset.id.get_create_plist()
518+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_ALLOC)
519+
520+
def test_fill_time_ifset(self):
521+
""" Fill time explicitly set to IFSET """
522+
dset = self.f.create_dataset('foo', (10,), chunks=(2,), fillvalue=4.0,
523+
fill_time='ifset')
524+
plist = dset.id.get_create_plist()
525+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_IFSET)
526+
527+
def test_invalid_fill_time(self):
528+
""" Choice of fill_time is 'alloc', 'never', 'ifset' """
529+
with self.assertRaises(ValueError):
530+
dset = self.f.create_dataset('foo', (10,), fill_time='fill_bad')
531+
532+
def test_non_str_fill_time(self):
533+
""" fill_time must be a string """
534+
with self.assertRaises(ValueError):
535+
dset = self.f.create_dataset('foo', (10,), fill_time=2)
536+
537+
def test_resize_chunk_fill_time_default(self):
538+
""" The resize dataset will be filled (by default fill value 0) """
539+
dset = self.f.create_dataset('foo', (50, ), maxshape=(100, ),
540+
chunks=(5, ))
541+
plist = dset.id.get_create_plist()
542+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_IFSET)
543+
544+
assert np.isclose(dset[:], 0.0).all()
545+
546+
dset.resize((100, ))
547+
assert np.isclose(dset[:], 0.0).all()
548+
549+
def test_resize_chunk_fill_time_never(self):
550+
""" The resize dataset won't be filled """
551+
dset = self.f.create_dataset('foo', (50, ), maxshape=(100, ),
552+
fillvalue=4.0, fill_time='never',
553+
chunks=(5, ))
554+
plist = dset.id.get_create_plist()
555+
self.assertEqual(plist.get_fill_time(), h5py.h5d.FILL_TIME_NEVER)
556+
557+
assert not np.isclose(dset[:], 4.0).any()
558+
559+
dset.resize((100, ))
560+
assert not np.isclose(dset[:], 4.0).any()
561+
562+
479563
@pytest.mark.parametrize('dt,expected', [
480564
(int, 0),
481565
(np.int32, 0),

news/expose_fill_time.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Breaking Changes and Deprecations
2+
------------
3+
4+
* Fill time for chunked storage was set to ``h5d.FILL_TIME_ALLOC``. Now this
5+
is handled by HDF5 library where the default is ``h5d.FILL_TIME_IFSET``
6+
(equivalent to ``fill_time='ifset'``). Please use ``fill_time='alloc'`` if
7+
the behaviour in previous releases is wanted.
8+
9+
Exposing HDF5 functions
10+
-----------------------
11+
12+
* Expose fill time option in dataset creation property list via the
13+
``fill_time`` parameter in ``create_dataset``.

0 commit comments

Comments
 (0)