Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c892733

Browse files
committed
ENH: Fixups to multi-field assignment helpers
1 parent f1fba70 commit c892733

File tree

3 files changed

+77
-42
lines changed

3 files changed

+77
-42
lines changed

doc/release/1.16.0-notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ Highlights
1919
New functions
2020
=============
2121

22+
* New functions in the `numpy.lib.recfunctions` module to ease the structured
23+
assignment changes: `assign_fields_by_name`, `structured_to_unstructured`,
24+
`unstructured_to_structured`, `apply_along_fields`, and `require_fields`.
25+
See the user guide at <https://docs.scipy.org/doc/numpy/user/basics.rec.html>
26+
for more info.
2227

2328
Deprecations
2429
============

numpy/lib/recfunctions.py

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ def _get_fields_and_offsets(dt, offset=0):
888888
fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
889889
return fields
890890

891-
def structured_to_unstructured(arr, dtype=None):
891+
def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
892892
"""
893893
Converts and n-D structured array into an (n+1)-D unstructured array.
894894
@@ -903,9 +903,15 @@ def structured_to_unstructured(arr, dtype=None):
903903
Parameters
904904
----------
905905
arr : ndarray
906-
Structured array or dtype to convert.
906+
Structured array or dtype to convert. Cannot contain object datatype.
907907
dtype : dtype, optional
908908
The dtype of the output unstructured array
909+
copy : bool, optional
910+
See copy argument to `ndarray.astype`. If true, always return a copy.
911+
If false, and `dtype` requirements are satisfied, a view is returned.
912+
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
913+
See casting argument of `ndarray.astype`. Controls what kind of data
914+
casting may occur.
909915
910916
Returns
911917
-------
@@ -932,39 +938,46 @@ def structured_to_unstructured(arr, dtype=None):
932938
array([ 3. , 5.5, 9. , 11. ])
933939
934940
"""
935-
if not arr.dtype.names:
941+
if arr.dtype.names is None:
936942
raise ValueError('arr must be a structured array')
937943

938944
fields = _get_fields_and_offsets(arr.dtype)
939-
n_elem = sum(f[2] for f in fields)
945+
names, dts, counts, offsets = zip(*fields)
946+
n_fields = len(names)
940947

941948
if dtype is None:
942-
out_dtype = np.result_type(*[f[1].base for f in fields])
949+
out_dtype = np.result_type(*[dt.base for dt in dts])
943950
else:
944951
out_dtype = dtype
945952

946-
out = np.empty(arr.shape + (n_elem,), dtype=out_dtype)
953+
# Use a series of views and casts to convert to an unstructured array:
947954

948-
index = 0
949-
for name, dt, count, offset in fields:
950-
if count == 1:
951-
out[...,index] = arr.getfield(dt, offset)
952-
index += 1
953-
else:
954-
out[...,index:index+count] = arr.getfield(dt, offset)
955-
index += count
955+
# first view using flattened fields (doesn't work for object arrays)
956+
# Note: dts may include a shape for subarrays
957+
flattened_fields = np.dtype({'names': names,
958+
'formats': dts,
959+
'offsets': offsets,
960+
'itemsize': arr.dtype.itemsize})
961+
arr = arr.view(flattened_fields)
956962

957-
return out
963+
# next cast to a packed format with all fields converted to new dtype
964+
packed_fields = np.dtype({'names': names,
965+
'formats': [(out_dtype, c) for c in counts]})
966+
arr = arr.astype(packed_fields, copy=copy, casting=casting)
958967

959-
def unstructured_to_structured(arr, dtype=None, names=None, align=False):
968+
# finally is it safe to view the packed fields as the unstructured type
969+
return arr.view((out_dtype, sum(counts)))
970+
971+
def unstructured_to_structured(arr, dtype=None, names=None, align=False,
972+
copy=False, casting='unsafe'):
960973
"""
961974
Converts and n-D unstructured array into an (n-1)-D structured array.
962975
963-
The last dimension of the array is converted into a structure, with
976+
The last dimension of the input array is converted into a structure, with
964977
number of field-elements equal to the size of the last dimension of the
965-
input array. By default all fields will have the same dtype as the
966-
original array, but you may supply a custom dtype with the right
967-
number of fields-elements.
978+
input array. By default all output fields have the input array's dtype, but
979+
an output structured dtype with an equal number of fields-elements can be
980+
supplied instead.
968981
969982
Nested fields, as well as each element of any subarray fields, all count
970983
towards the number of field-elements.
@@ -979,7 +992,13 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False):
979992
If dtype is not supplied, this specifies the field names for the output
980993
dtype, in order. The field dtypes will be the same as the input array.
981994
align : boolean, optional
982-
If dtype is not supplied, whether to create an aligned memory layout.
995+
Whether to create an aligned memory layout.
996+
copy : bool, optional
997+
See copy argument to `ndarray.astype`. If true, always return a copy.
998+
If false, and `dtype` requirements are satisfied, a view is returned.
999+
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
1000+
See casting argument of `ndarray.astype`. Controls what kind of data
1001+
casting may occur.
9831002
9841003
Returns
9851004
-------
@@ -1011,29 +1030,36 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False):
10111030
names = ['f{}'.format(n) for n in range(n_elem)]
10121031
out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
10131032
fields = _get_fields_and_offsets(out_dtype)
1033+
names, dts, counts, offsets = zip(*fields)
10141034
else:
10151035
if names is not None:
10161036
raise ValueError("don't supply both dtype and names")
10171037
# sanity check of the input dtype
10181038
fields = _get_fields_and_offsets(dtype)
1019-
n_fields = sum(f[2] for f in fields)
1020-
if n_fields != n_elem:
1039+
names, dts, counts, offsets = zip(*fields)
1040+
if n_elem != sum(counts):
10211041
raise ValueError('The length of the last dimension of arr must '
10221042
'be equal to the number of fields in dtype')
10231043
out_dtype = dtype
1044+
if align and not out_dtype.isalignedstruct:
1045+
raise ValueError("align was True but dtype is not aligned")
10241046

1025-
out = np.empty(arr.shape[:-1], dtype=out_dtype)
1047+
# Use a series of views and casts to convert to a structured array:
10261048

1027-
n = 0
1028-
for name, dt, count, offset in fields:
1029-
if count == 1:
1030-
out.setfield(arr[...,n], dt, offset)
1031-
n += 1
1032-
else:
1033-
out.setfield(arr[...,n:n+count], dt, offset)
1034-
n += count
1049+
# first view as a packed structured array of one dtype
1050+
packed_fields = np.dtype({'names': names,
1051+
'formats': [(arr.dtype, c) for c in counts]})
1052+
arr = np.ascontiguousarray(arr).view(packed_fields)
10351053

1036-
return out
1054+
# next cast to an unpacked but flattened format with varied dtypes
1055+
flattened_fields = np.dtype({'names': names,
1056+
'formats': dts,
1057+
'offsets': offsets,
1058+
'itemsize': out_dtype.itemsize})
1059+
arr = arr.astype(flattened_fields, copy=copy, casting=casting)
1060+
1061+
# finally view as the final nested dtype and remove the last axis
1062+
return arr.view(out_dtype)[..., 0]
10371063

10381064
def apply_along_fields(func, arr):
10391065
"""
@@ -1066,7 +1092,7 @@ def apply_along_fields(func, arr):
10661092
array([ 3. , 5.5, 9. , 11. ])
10671093
10681094
"""
1069-
if not arr.dtype.names:
1095+
if arr.dtype.names is None:
10701096
raise ValueError('arr must be a structured array')
10711097

10721098
uarr = structured_to_unstructured(arr)
@@ -1113,15 +1139,11 @@ def assign_fields_by_name(dst, src, zero_unassigned=True):
11131139

11141140
def require_fields(array, required_dtype):
11151141
"""
1116-
Casts the array to the required dtype using assignment by field-name.
1117-
1118-
Normal structured array casting/assignment works "by position" in numpy
1119-
1.14+, meaning that the first field from the source's dtype is copied to
1120-
the first field of the destination's dtype, and so on.
1142+
Casts a structured array to a new dtype using assignment by field-name.
11211143
1122-
This function assigns by name instead, so the value of a field in the
1123-
output array is the value of the field with the same name in the source
1124-
array.
1144+
This function assigns to from the old to the new array by name, so the
1145+
value of a field in the output array is the value of the field with the
1146+
same name in the source array.
11251147
11261148
If a field name in the required_dtype does not exist in the
11271149
input array, that field is set to 0 in the output array.

numpy/lib/tests/test_recfunctions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,14 @@ def test_structured_to_unstructured(self):
233233
assert_equal(apply_along_fields(np.mean, d[['x', 'z']]),
234234
np.array([ 3. , 5.5, 9. , 11. ]))
235235

236+
# check that for uniform field dtypes we get a view, not a copy:
237+
d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
238+
dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'i4')])
239+
dd = structured_to_unstructured(d)
240+
ddd = unstructured_to_structured(dd, d.dtype)
241+
assert_(dd.base is d)
242+
assert_(ddd.base is d)
243+
236244
def test_field_assignment_by_name(self):
237245
a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
238246
newdt = [('b', 'f4'), ('c', 'u1')]

0 commit comments

Comments
 (0)