Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions numpy/lib/recfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,16 +874,35 @@ def _get_fields_and_offsets(dt, offset=0):
scalar fields in the dtype "dt", including nested fields, in left
to right order.
"""

# counts up elements in subarrays, including nested subarrays, and returns
# base dtype and count
def count_elem(dt):
count = 1
while dt.shape != ():
for size in dt.shape:
count *= size
dt = dt.base
return dt, count

fields = []
for name in dt.names:
field = dt.fields[name]
if field[0].names is None:
count = 1
for size in field[0].shape:
count *= size
fields.append((field[0], count, field[1] + offset))
f_dt, f_offset = field[0], field[1]
f_dt, n = count_elem(f_dt)

if f_dt.names is None:
fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset))
else:
fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
subfields = _get_fields_and_offsets(f_dt, f_offset + offset)
size = f_dt.itemsize

for i in range(n):
if i == 0:
# optimization: avoid list comprehension if no subarray
fields.extend(subfields)
else:
fields.extend([(d, c, o + i*size) for d, c, o in subfields])
return fields


Expand Down Expand Up @@ -948,6 +967,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):

fields = _get_fields_and_offsets(arr.dtype)
n_fields = len(fields)
if n_fields == 0 and dtype is None:
raise ValueError("arr has no fields. Unable to guess dtype")
elif n_fields == 0:
# too many bugs elsewhere for this to work now
raise NotImplementedError("arr with no fields is not supported")

dts, counts, offsets = zip(*fields)
names = ['f{}'.format(n) for n in range(n_fields)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To elaborate - easy to fix as:

dts = []
counts = []
offsets = []
names = []
for i, (dt, count, offset) in enumerate(fields):
   dts.append(dt)
   counts.append(count)
   offsets.append(offset)
   names.append('f{}'.format(i))

Copy link
Member

@eric-wieser eric-wieser Aug 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or as

def unzip(seq, n_items):
    # like zip(*seq), but with the length of the sequences specified so
    # that it generalizes to `len(seq) == 0`
    arrs = ([],) * n_items
    for item in seq:
        assert len(item) == n_items
        for j in range(n_items):
            arrs[j].append(item[j])
    return arrs

Copy link
Member Author

@ahaldane ahaldane Aug 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally put

    if n_fields == 0:
        if dtype is None:
            raise ValueError('could not determine dtype: '
                             'no fields or dtype specified')
        dts, counts, offsets = [], [], []
    else:
        dts, counts, offsets = zip(*fields)

and this worked fine for structured_to_unstructured. Note that if there are no fields the dtype must be given.

The problem was that after supplying a dtype, the resulting array of shape "(x, y, z, 0)" for some integers x, y, z has a 0-size axis at the end, which trips up unstructured_to_unstructured if you try to reverse the operation, in particular the line return arr.view(out_dtype)[..., 0]. It does not make sense to remove the last axis if the dtype is not 0-sized too, and numpy makes it difficult in that case.

Eg, this fails on that line (after adding if n_fields == 0 checks in unstructured_to_structured)

x = np.zeros(2, structured())
y = rfn.structured_to_unstructured(x, dtype=int)
z = rfn.unstructured_to_structured(y, dtype=x.dtype)

Because all this was tricky and probably not what the user intended, I thought it might be best to simply disallow mucking with 0-size arrays.

Edit: This whole comment might also be summarized as: If the structured array has no fields, what should the output dtype be? No choice really makes sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some further investigation show that this view behavior is part of what makes it difficult:

>>> a = np.zeros((2, 0),dtype={'names':[], 'formats':[], 'offsets':[], 'itemsize':8})
>>> b = a.view(np.dtype([]))
>>> b
array([], shape=(2, 0),
      dtype={'names':[], 'formats':[], 'offsets':[], 'itemsize':8})
>>> b.dtype == np.dtype([])
False

in other words for view seems to have no effect here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit: This whole comment might also be summarized as: If the structured array has no fields, what should the output dtype be? No choice really makes sense.

That's a really good justification - can you include it in the error message?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this view behavior is part of what makes it difficult

I'm not sure that's the example you want - the correct behavior of that should be to raise ValueError, and it doubt its failure to do that is a problem for you now.

But that is indeed a bug, and stems from the fact that there is not enough difference between:

  • np.dtype(np.void), which might mean "a byte buffer of unknown length"
  • np.dtype("V0"), which might mean "a byte buffer of length 1"
  • np.dtype([]), which might mean "a structured type with no fields"

That was one of the motivations for introducing PyDescr_ISUNSIZED, which currently fires on all 3 but should really only be true for the first one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the correct behavior of that should be to raise ValueError

OK, that's fine. But that led me to think a little more about what is going on in structured_to_unstructured where I managed to convert a 0-field, itemsize-0 dtype array to a size-0 array. Consider:

>>> a = np.zeros(2, dtype='V0') 
>>> a.reshape((2,0))  # numpy disallow creating size-0 axis.
ValueError: cannot reshape array of size 2 into shape (2,0)
>>> a.view((int, 0))  # but I can bypass by viewing with subarray of size 0
array([], shape=(2, 0), dtype=int64)

So I was able to find a bypass numpy's normal restrictions for the size-0 dtype in structured_to_unstructured, but the behavior in my last comment prevented me from doing the reverse in unstructured_to_unstructured.

In any case, this is the mucky size-0 stuff that we can just forget about by simply disallowing fieldless structured types in this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

by simply disallowing fieldless structured types in this PR.

I'm fine with that, but we should probably raise NotImplementedError as I mention above. I'll take a look at .view some other time, your example looks pretty damning

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth capturing this view weirdness in a new issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, Ill add one


Expand Down Expand Up @@ -1039,6 +1064,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
if arr.shape == ():
raise ValueError('arr must have at least one dimension')
n_elem = arr.shape[-1]
if n_elem == 0:
# too many bugs elsewhere for this to work now
raise NotImplementedError("last axis with size 0 is not supported")

if dtype is None:
if names is None:
Expand All @@ -1051,7 +1079,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
raise ValueError("don't supply both dtype and names")
# sanity check of the input dtype
fields = _get_fields_and_offsets(dtype)
dts, counts, offsets = zip(*fields)
if len(fields) == 0:
dts, counts, offsets = [], [], []
else:
dts, counts, offsets = zip(*fields)

if n_elem != sum(counts):
raise ValueError('The length of the last dimension of arr must '
'be equal to the number of fields in dtype')
Expand Down
37 changes: 36 additions & 1 deletion numpy/lib/tests/test_recfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ def test_structured_to_unstructured(self):
# including uniform fields with subarrays unpacked
d = np.array([(1, [2, 3], [[ 4, 5], [ 6, 7]]),
(8, [9, 10], [[11, 12], [13, 14]])],
dtype=[('x0', 'i4'), ('x1', ('i4', 2)), ('x2', ('i4', (2, 2)))])
dtype=[('x0', 'i4'), ('x1', ('i4', 2)),
('x2', ('i4', (2, 2)))])
dd = structured_to_unstructured(d)
ddd = unstructured_to_structured(dd, d.dtype)
assert_(dd.base is d)
Expand All @@ -262,6 +263,40 @@ def test_structured_to_unstructured(self):
assert_equal(res, np.zeros((10, 6), dtype=int))


# test nested combinations of subarrays and structured arrays, gh-13333
def subarray(dt, shape):
return np.dtype((dt, shape))

def structured(*dts):
return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)])

def inspect(dt, dtype=None):
arr = np.zeros((), dt)
ret = structured_to_unstructured(arr, dtype=dtype)
backarr = unstructured_to_structured(ret, dt)
return ret.shape, ret.dtype, backarr.dtype

dt = structured(subarray(structured(np.int32, np.int32), 3))
assert_equal(inspect(dt), ((6,), np.int32, dt))

dt = structured(subarray(subarray(np.int32, 2), 2))
assert_equal(inspect(dt), ((4,), np.int32, dt))

dt = structured(np.int32)
assert_equal(inspect(dt), ((1,), np.int32, dt))

dt = structured(np.int32, subarray(subarray(np.int32, 2), 2))
assert_equal(inspect(dt), ((5,), np.int32, dt))

dt = structured()
assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt))

# these currently don't work, but we may make it work in the future
assert_raises(NotImplementedError, structured_to_unstructured,
np.zeros(3, dt), dtype=np.int32)
assert_raises(NotImplementedError, unstructured_to_structured,
np.zeros((3,0), dtype=np.int32))

def test_field_assignment_by_name(self):
a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
newdt = [('b', 'f4'), ('c', 'u1')]
Expand Down