Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e8d1374

Browse files
committed
Merge pull request numpy#4812 from juliantaylor/align-bloat
Align bloat
2 parents b25cdd6 + 1d96a95 commit e8d1374

7 files changed

Lines changed: 78 additions & 45 deletions

File tree

numpy/core/src/multiarray/arraytypes.c.src

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3922,7 +3922,8 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
39223922
/* elsize */
39233923
@num@ * sizeof(@fromtype@),
39243924
/* alignment */
3925-
@num@ * _ALIGN(@fromtype@),
3925+
@num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ?
3926+
NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@),
39263927
/* subarray */
39273928
NULL,
39283929
/* fields */
@@ -4264,7 +4265,8 @@ set_typeinfo(PyObject *dict)
42644265
#endif
42654266
NPY_@name@,
42664267
NPY_BITSOF_@name@,
4267-
@num@ * _ALIGN(@type@),
4268+
@num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
4269+
NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
42684270
(PyObject *) &Py@Name@ArrType_Type));
42694271
Py_DECREF(s);
42704272

numpy/core/src/multiarray/common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ _IsAligned(PyArrayObject *ap)
676676

677677
/* alignment 1 types should have a efficient alignment for copy loops */
678678
if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) {
679-
alignment = 16;
679+
alignment = NPY_MAX_COPY_ALIGNMENT;
680680
}
681681

682682
if (alignment == 1) {

numpy/core/src/multiarray/ctors.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,12 +1054,12 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
10541054
fa->data = data;
10551055

10561056
/*
1057-
* If the strides were provided to the function, need to
1058-
* update the flags to get the right CONTIGUOUS, ALIGN properties
1057+
* always update the flags to get the right CONTIGUOUS, ALIGN properties
1058+
* not owned data and input strides may not be aligned and on some
1059+
* platforms (debian sparc) malloc does not provide enough alignment for
1060+
* long double types
10591061
*/
1060-
if (strides != NULL) {
1061-
PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL);
1062-
}
1062+
PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL);
10631063

10641064
/*
10651065
* call the __array_finalize__

numpy/core/src/multiarray/shape.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,8 @@ PyArray_Transpose(PyArrayObject *ap, PyArray_Dims *permute)
776776
PyArray_DIMS(ret)[i] = PyArray_DIMS(ap)[permutation[i]];
777777
PyArray_STRIDES(ret)[i] = PyArray_STRIDES(ap)[permutation[i]];
778778
}
779-
PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS);
779+
PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS |
780+
NPY_ARRAY_ALIGNED);
780781
return (PyObject *)ret;
781782
}
782783

numpy/core/src/private/npy_config.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@
1010
#undef HAVE_HYPOT
1111
#endif
1212

13+
/*
14+
* largest alignment the copy loops might require
15+
* required as string, void and complex types might get copied using larger
16+
* instructions than required to operate on them. E.g. complex float is copied
17+
* in 8 byte moves but arithmetic on them only loads in 4 byte moves.
18+
* the sparc platform may need that alignment for long doubles.
19+
* amd64 is not harmed much by the bloat as the system provides 16 byte
20+
* alignment by default.
21+
*/
22+
#define NPY_MAX_COPY_ALIGNMENT 16
23+
1324
/* Safe to use ldexp and frexp for long double for MSVC builds */
1425
#if (NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE) || defined(_MSC_VER)
1526
#ifdef HAVE_LDEXP

numpy/core/tests/test_numeric.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from decimal import Decimal
66
import warnings
77
import itertools
8+
import platform
89

910
import numpy as np
1011
from numpy.core import *
@@ -931,6 +932,7 @@ def test_nonzero_twodim(self):
931932
assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1]))
932933
assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2]))
933934

935+
assert_(not x['a'].T.flags.aligned)
934936
assert_equal(np.count_nonzero(x['a'].T), 4)
935937
assert_equal(np.count_nonzero(x['b'].T), 5)
936938
assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0]))
@@ -1048,7 +1050,15 @@ def test_array_equiv(self):
10481050
def assert_array_strict_equal(x, y):
10491051
assert_array_equal(x, y)
10501052
# Check flags
1051-
assert_(x.flags == y.flags)
1053+
if 'sparc' not in platform.platform().lower():
1054+
assert_(x.flags == y.flags)
1055+
else:
1056+
# sparc arrays may not be aligned for long double types
1057+
assert_(x.flags.owndata == y.flags.owndata)
1058+
assert_(x.flags.writeable == y.flags.writeable)
1059+
assert_(x.flags.c_contiguous == y.flags.c_contiguous)
1060+
assert_(x.flags.f_contiguous == y.flags.f_contiguous)
1061+
assert_(x.flags.updateifcopy == y.flags.updateifcopy)
10521062
# check endianness
10531063
assert_(x.dtype.isnative == y.dtype.isnative)
10541064

numpy/f2py/tests/test_array_from_pyobj.py

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import sys
66
import copy
7+
import platform
78

89
import nose
910

@@ -81,37 +82,45 @@ def is_intent_exact(self,*names):
8182

8283
intent = Intent()
8384

84-
class Type(object):
85-
_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT',
86-
'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG',
87-
'FLOAT', 'DOUBLE', 'LONGDOUBLE', 'CFLOAT', 'CDOUBLE',
88-
'CLONGDOUBLE']
89-
_type_cache = {}
90-
91-
_cast_dict = {'BOOL':['BOOL']}
92-
_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE']
93-
_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE']
94-
_cast_dict['BYTE'] = ['BYTE']
95-
_cast_dict['UBYTE'] = ['UBYTE']
96-
_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT']
97-
_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT']
98-
_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT']
99-
_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT']
100-
101-
_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG']
102-
_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG']
103-
104-
_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG']
105-
_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG']
106-
107-
_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT']
108-
_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE']
109-
_cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE']
110-
111-
_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT']
85+
_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT',
86+
'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG',
87+
'FLOAT', 'DOUBLE', 'CFLOAT']
88+
89+
_cast_dict = {'BOOL':['BOOL']}
90+
_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE']
91+
_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE']
92+
_cast_dict['BYTE'] = ['BYTE']
93+
_cast_dict['UBYTE'] = ['UBYTE']
94+
_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT']
95+
_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT']
96+
_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT']
97+
_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT']
98+
99+
_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG']
100+
_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG']
101+
102+
_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG']
103+
_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG']
104+
105+
_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT']
106+
_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE']
107+
108+
_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT']
109+
110+
# (debian) sparc system malloc does not provide the alignment required by
111+
# 16 byte long double types this means the inout intent cannot be satisfied and
112+
# several tests fail as the alignment flag can be randomly true or fals
113+
# when numpy gains an aligned allocator the tests could be enabled again
114+
if 'sparc' not in platform.platform().lower():
115+
_type_names.extend(['LONGDOUBLE', 'CDOUBLE', 'CLONGDOUBLE'])
116+
_cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + \
117+
['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE']
118+
_cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + \
119+
['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE']
112120
_cast_dict['CDOUBLE'] = _cast_dict['DOUBLE'] + ['CFLOAT', 'CDOUBLE']
113-
_cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE']
114121

122+
class Type(object):
123+
_type_cache = {}
115124

116125
def __new__(cls, name):
117126
if isinstance(name, dtype):
@@ -138,23 +147,23 @@ def _init(self, name):
138147
self.dtypechar = typeinfo[self.NAME][0]
139148

140149
def cast_types(self):
141-
return [self.__class__(_m) for _m in self._cast_dict[self.NAME]]
150+
return [self.__class__(_m) for _m in _cast_dict[self.NAME]]
142151

143152
def all_types(self):
144-
return [self.__class__(_m) for _m in self._type_names]
153+
return [self.__class__(_m) for _m in _type_names]
145154

146155
def smaller_types(self):
147156
bits = typeinfo[self.NAME][3]
148157
types = []
149-
for name in self._type_names:
158+
for name in _type_names:
150159
if typeinfo[name][3]<bits:
151160
types.append(Type(name))
152161
return types
153162

154163
def equal_types(self):
155164
bits = typeinfo[self.NAME][3]
156165
types = []
157-
for name in self._type_names:
166+
for name in _type_names:
158167
if name==self.NAME: continue
159168
if typeinfo[name][3]==bits:
160169
types.append(Type(name))
@@ -163,7 +172,7 @@ def equal_types(self):
163172
def larger_types(self):
164173
bits = typeinfo[self.NAME][3]
165174
types = []
166-
for name in self._type_names:
175+
for name in _type_names:
167176
if typeinfo[name][3]>bits:
168177
types.append(Type(name))
169178
return types
@@ -532,7 +541,7 @@ def test_inplace_from_casttype(self):
532541
assert_(obj.dtype.type is self.type.dtype) # obj type is changed inplace!
533542

534543

535-
for t in Type._type_names:
544+
for t in _type_names:
536545
exec('''\
537546
class test_%s_gen(unittest.TestCase,
538547
_test_shared_memory

0 commit comments

Comments
 (0)