Thanks to visit codestin.com
Credit goes to github.com

Skip to content

ENH: properly account for trailing padding in PEP3118 #7798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions doc/release/upcoming_changes/7798.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Trailing Padding now supported in PEP3118 buffer inferface
----------------------------------------------------------
Previously, structured types with trailing padding such as
`np.dtype({'formats': ['i1'], 'names': ['a'], 'itemsize': 4})` could not
roundtrip through the PEP3118 interface using a memoryview, as in
`a == np.array(memoryview(a))`. Now, such trailing padding is preserved.

More technically, the PEP3118 interface now supports PEP3118 format strings as
follows: Within "T{}", in aligned @ mode, trailing padding is automatically
assumed in the same way as C structs and numpy aligned dtypes. Outside of T{}
trailing padding is not automatically added or assumed in inputs, following
python's struct module, but is explicitly added by padding with "x" or unnamed
zero-sized trailing elements. 0-sized unnamed elements, like "0i", can now be
added anywhere in the format string, and in @ mode this will add padding bytes
up to that type's alignment offset, and otherwise is ignored, as described in
the python struct docs.
40 changes: 28 additions & 12 deletions numpy/core/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,9 @@ def _dtype_from_pep3118(spec):
return dtype

def __dtype_from_pep3118(stream, is_subdtype):
# numpy interprets pep3118 formats which includes named fields as
# structured dtypes, even if not enclosed by "T{}"

field_spec = dict(
names=[],
formats=[],
Expand Down Expand Up @@ -613,8 +616,7 @@ def __dtype_from_pep3118(stream, is_subdtype):
is_padding = False

if stream.consume('T{'):
value, align = __dtype_from_pep3118(
stream, is_subdtype=True)
value, align = __dtype_from_pep3118(stream, is_subdtype=True)
elif stream.next in type_map_chars:
if stream.next == 'Z':
typechar = stream.advance(2)
Expand All @@ -638,12 +640,10 @@ def __dtype_from_pep3118(stream, is_subdtype):
else:
raise ValueError("Unknown PEP 3118 data type specifier %r" % stream.s)

#
# Native alignment may require padding
#
# Here we assume that the presence of a '@' character implicitly implies
# that the start of the array is *already* aligned.
#
extra_offset = 0
if stream.byteorder == '@':
start_padding = (-offset) % align
Expand All @@ -663,6 +663,19 @@ def __dtype_from_pep3118(stream, is_subdtype):
# Update common alignment
common_alignment = _lcm(align, common_alignment)

# Field name
if stream.consume(':'):
name = stream.consume_until(':')
else:
name = None

# struct docs explicitly say that repeat-0 elements are for padding or
# alignment. We further interpret this applies only to unnamed fields
if name is None and itemsize == 0:
offset += extra_offset
field_spec['itemsize'] = offset
continue

# Convert itemsize to sub-array
if itemsize != 1:
value = dtype((value, (itemsize,)))
Expand All @@ -671,12 +684,6 @@ def __dtype_from_pep3118(stream, is_subdtype):
if shape is not None:
value = dtype((value, shape))

# Field name
if stream.consume(':'):
name = stream.consume_until(':')
else:
name = None

if not (is_padding and name is None):
if name is not None and name in field_spec['names']:
raise RuntimeError(f"Duplicate field name '{name}' in PEP3118 format")
Expand All @@ -689,8 +696,17 @@ def __dtype_from_pep3118(stream, is_subdtype):

field_spec['itemsize'] = offset

# extra final padding for aligned types
if stream.byteorder == '@':
# extra final padding for aligned types:
# Inside of T{}, if in aligned mode, we add trailing padding like in a
# C struct so the end of the struct is aligned.
# Note that this behavior is *not* described by the PEP3118 spec, which
# does not say anything about T{} trailing padding. Note also that the Py
# struct docs say that trailing padding should *not* be implicitly added
# when outside of T{}, and the user should explicitly add a 0-sized
# trailing field to add padding, however struct does not implement T{}. So,
# here numpy is taking the initiative to specify how trailing padding works
# inside T{}, while we mimic struct outside of T{}.
if is_subdtype and stream.byteorder == '@':
field_spec['itemsize'] += (-offset) % common_alignment

# Check if this was a simple 1-item type, and unwrap it
Expand Down
33 changes: 31 additions & 2 deletions numpy/core/src/multiarray/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ _append_str(_tmp_string_t *s, char const *p)
return 0;
}

static int
_append_int(_tmp_string_t *s, unsigned int n)
{
/* even on 64bit, int strings are at most 20 bytes; 256 is overkill */
static char buf[256];
int nw = snprintf(buf, 256, "%u", n);
if (nw < 0 || nw >= 256) {
return -1;
}
return _append_str(s, buf);
}


/*
* Append a PEP3118-formatted field name, ":name:", to str
*/
Expand Down Expand Up @@ -282,9 +295,15 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
);
return -1;
}
while (*offset < new_offset) {
/* add padding bytes: repeat-count plus 'x' */
if (*offset < new_offset) {
if (new_offset - (*offset) > 1) {
if (_append_int(str, new_offset - (*offset)) < 0) {
return -1;
}
}
if (_append_char(str, 'x') < 0) return -1;
++*offset;
*offset = new_offset;
}

/* Insert child item */
Expand All @@ -297,6 +316,16 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
/* Insert field name */
if (_append_field_name(str, name) < 0) return -1;
}

/* Add any trailing padding */
if (*offset < descr->elsize) {
if (descr->elsize - (*offset) > 1) {
if (_append_int(str, descr->elsize - (*offset)) < 0) return -1;
}
if (_append_char(str, 'x') < 0) return -1;
*offset = descr->elsize;
}

if (_append_char(str, '}') < 0) return -1;
}
else {
Expand Down
73 changes: 56 additions & 17 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7023,27 +7023,72 @@ def test_native_padding_2(self):
self._check('^x3T{xi}', {'f0': (({'f0': ('i', 1)}, (3,)), 1)})

def test_trailing_padding(self):
# Trailing padding should be included, *and*, the item size
# should match the alignment if in aligned mode
align = np.dtype('i').alignment
size = np.dtype('i').itemsize

def aligned(n):
return align*(1 + (n-1)//align)

base = dict(formats=['i'], names=['f0'])

self._check('ix', dict(itemsize=aligned(size + 1), **base))
self._check('ixx', dict(itemsize=aligned(size + 2), **base))
self._check('ixxx', dict(itemsize=aligned(size + 3), **base))
self._check('ixxxx', dict(itemsize=aligned(size + 4), **base))
self._check('i7x', dict(itemsize=aligned(size + 7), **base))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these tests might remain valid with T{...}? In particular, I would expect trailing padding to be kept in a struct context, to match the behaviour of sizeof(T) in C, and what happens when structs are repeated

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PEP3118 spec is unclear about this.

One could argue the struct module has set no precedent in judging how alignment and padding work here since it doesn't implement the T{} format. We might then feel free to set the precedent here in this PR, deciding that aligned formats add trailing padding only inside T{}.

Copy link
Member

@eric-wieser eric-wieser Feb 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whereas the ctypes module has set the precedent of ignoring all the remarks that the struct docs make about alignment...

bbase = dict(formats=['b'], names=['f0'])

self._check('ix', dict(itemsize=size + 1, **base))
self._check('ixx', dict(itemsize=size + 2, **base))
self._check('ixxx', dict(itemsize=size + 3, **base))
self._check('ixxxx', dict(itemsize=size + 4, **base))
self._check('i7x', dict(itemsize=size + 7, **base))
self._check('ix0i', dict(itemsize=2*size, **base))
self._check('b0i', dict(itemsize=size, **bbase))

# Our intepretaton of the PEP3118/struct spec is that trailing
# padding for alignment is assumed only inside of T{}.
self._check('T{ix}', dict(itemsize=aligned(size + 1), **base))
self._check('T{ixx}', dict(itemsize=aligned(size + 2), **base))
self._check('T{ixxx}', dict(itemsize=aligned(size + 3), **base))
self._check('T{ixxxx}', dict(itemsize=aligned(size + 4), **base))
self._check('T{i7x}', dict(itemsize=aligned(size + 7), **base))
self._check('T{ix0i}', dict(itemsize=2*size, **base))
self._check('T{b0i}', dict(itemsize=size, **bbase))

# check that alignment mode affects assumed trailing padding in T{}
self._check('T{=ix}', dict(itemsize=size + 1, **base))

self._check('^ix', dict(itemsize=size + 1, **base))
self._check('^ixx', dict(itemsize=size + 2, **base))
self._check('^ixxx', dict(itemsize=size + 3, **base))
self._check('^ixxxx', dict(itemsize=size + 4, **base))
self._check('^i7x', dict(itemsize=size + 7, **base))
self._check('^ixx0i', dict(itemsize=size + 2, **base))
self._check('^b0i', np.dtype('b'))

# check we can convert to memoryview and back, aligned and unaligned
arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=True))
assert_equal(arr.dtype, np.array(memoryview(arr)).dtype)

arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=False))
assert_equal(arr.dtype, np.array(memoryview(arr)).dtype)

a = np.empty(0, np.dtype({'formats': ['u1'], 'offsets': [0],
'names': ['x'], 'itemsize': 4}))
assert_equal(a, np.array(memoryview(a)))

# check that 0-sized elements act as padding in @ alignment and not =
# outside of T{} (see python struct docs, example at very end)
self._check('B:f0:B:f1:', [('f0', 'u1'), ('f1', 'u1')])
self._check('B:f0:0iB:f1:0i', {'names': ['f0','f1'],
'formats': ['u1','u1'],
'offsets': [0,4],
'itemsize': 8})
self._check('=B:f0:0iB:f1:0i', [('f0', 'u1'), ('f1', 'u1')])

# PEP3118 cannot support overlapping/out-of-order fields
# (update these tests if it is improved to allow this)
a = np.empty(3, dtype={'names': ['a', 'b'],
'formats': ['i4', 'i2'],
'offsets': [0, 2]})
assert_raises(ValueError, memoryview, a)
a = np.empty(3, dtype='i4,i4')[['f1', 'f0']]
assert_raises(ValueError, memoryview, a)

def test_native_padding_3(self):
dt = np.dtype(
Expand Down Expand Up @@ -7075,15 +7120,9 @@ def test_intra_padding(self):
align = np.dtype('i').alignment
size = np.dtype('i').itemsize

def aligned(n):
return (align*(1 + (n-1)//align))

self._check('(3)T{ix}', (dict(
names=['f0'],
formats=['i'],
offsets=[0],
itemsize=aligned(size + 1)
), (3,)))
expected_dtype = {'names': ['f0'], 'formats': ['i'],
'itemsize': np.dtype('i,V1', align=True).itemsize}
self._check('(3)T{ix}', (expected_dtype, (3,)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this an exact duplicate of the above test? What am I missing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry, I fudged the rebase here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although perhaps using np.dtype(..., align=True) is an improvement over aligned(size + 1)


def test_char_vs_string(self):
dt = np.dtype('c')
Expand Down