numpy · charris · Aug 23, 2019 · Aug 20, 2019 · eric-wieser · Aug 21, 2019
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
@@ -874,16 +874,35 @@ def _get_fields_and_offsets(dt, offset=0):
     scalar fields in the dtype "dt", including nested fields, in left
     to right order.
     """
+
+    # counts up elements in subarrays, including nested subarrays, and returns
+    # base dtype and count
+    def count_elem(dt):
+        count = 1
+        while dt.shape != ():
+            for size in dt.shape:
+                count *= size
+            dt = dt.base
+        return dt, count
+
     fields = []
     for name in dt.names:
         field = dt.fields[name]
-        if field[0].names is None:
-            count = 1
-            for size in field[0].shape:
-                count *= size
-            fields.append((field[0], count, field[1] + offset))
+        f_dt, f_offset = field[0], field[1]
+        f_dt, n = count_elem(f_dt)
+
+        if f_dt.names is None:
+            fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset))
         else:
-            fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+            subfields = _get_fields_and_offsets(f_dt, f_offset + offset)
+            size = f_dt.itemsize
+
+            for i in range(n):
+                if i == 0:
+                    # optimization: avoid list comprehension if no subarray
+                    fields.extend(subfields)
+                else:
+                    fields.extend([(d, c, o + i*size) for d, c, o in subfields])
     return fields
 
 
@@ -948,6 +967,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
 
     fields = _get_fields_and_offsets(arr.dtype)
     n_fields = len(fields)
+    if n_fields == 0 and dtype is None:
+        raise ValueError("arr has no fields. Unable to guess dtype")
+    elif n_fields == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("arr with no fields is not supported")
+
     dts, counts, offsets = zip(*fields)
     names = ['f{}'.format(n) for n in range(n_fields)]
 
@@ -1039,6 +1064,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
     if arr.shape == ():
         raise ValueError('arr must have at least one dimension')
     n_elem = arr.shape[-1]
+    if n_elem == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("last axis with size 0 is not supported")
 
     if dtype is None:
         if names is None:
@@ -1051,7 +1079,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
             raise ValueError("don't supply both dtype and names")
         # sanity check of the input dtype
         fields = _get_fields_and_offsets(dtype)
-        dts, counts, offsets = zip(*fields)
+        if len(fields) == 0:
+            dts, counts, offsets = [], [], []
+        else:
+            dts, counts, offsets = zip(*fields)
+
         if n_elem != sum(counts):
             raise ValueError('The length of the last dimension of arr must '
                              'be equal to the number of fields in dtype')

diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
@@ -248,7 +248,8 @@ def test_structured_to_unstructured(self):
         # including uniform fields with subarrays unpacked
         d = np.array([(1, [2,  3], [[ 4,  5], [ 6,  7]]),
                       (8, [9, 10], [[11, 12], [13, 14]])],
-                     dtype=[('x0', 'i4'), ('x1', ('i4', 2)), ('x2', ('i4', (2, 2)))])
+                     dtype=[('x0', 'i4'), ('x1', ('i4', 2)),
+                            ('x2', ('i4', (2, 2)))])
         dd = structured_to_unstructured(d)
         ddd = unstructured_to_structured(dd, d.dtype)
         assert_(dd.base is d)
@@ -262,6 +263,40 @@ def test_structured_to_unstructured(self):
         assert_equal(res, np.zeros((10, 6), dtype=int))
 
 
+        # test nested combinations of subarrays and structured arrays, gh-13333
+        def subarray(dt, shape):
+            return np.dtype((dt, shape))
+
+        def structured(*dts):
+            return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)])
+
+        def inspect(dt, dtype=None):
+            arr = np.zeros((), dt)
+            ret = structured_to_unstructured(arr, dtype=dtype)
+            backarr = unstructured_to_structured(ret, dt)
+            return ret.shape, ret.dtype, backarr.dtype
+
+        dt = structured(subarray(structured(np.int32, np.int32), 3))
+        assert_equal(inspect(dt), ((6,), np.int32, dt))
+
+        dt = structured(subarray(subarray(np.int32, 2), 2))
+        assert_equal(inspect(dt), ((4,), np.int32, dt))
+
+        dt = structured(np.int32)
+        assert_equal(inspect(dt), ((1,), np.int32, dt))
+
+        dt = structured(np.int32, subarray(subarray(np.int32, 2), 2))
+        assert_equal(inspect(dt), ((5,), np.int32, dt))
+
+        dt = structured()
+        assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt))
+
+        # these currently don't work, but we may make it work in the future
+        assert_raises(NotImplementedError, structured_to_unstructured,
+                                           np.zeros(3, dt), dtype=np.int32)
+        assert_raises(NotImplementedError, unstructured_to_structured,
+                                           np.zeros((3,0), dtype=np.int32))
+
     def test_field_assignment_by_name(self):
         a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
         newdt = [('b', 'f4'), ('c', 'u1')]