Merge pull request numpy#4812 from juliantaylor/align-bloat

charris · charris · commit e8d137409801 · 2014-07-03T16:56:08.000-06:00
Align bloat
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -3922,7 +3922,8 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
     /* elsize */
     @num@ * sizeof(@fromtype@),
     /* alignment */
-    @num@ * _ALIGN(@fromtype@),
+    @num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ?
+        NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@),
     /* subarray */
     NULL,
     /* fields */
@@ -4264,7 +4265,8 @@ set_typeinfo(PyObject *dict)
 #endif
                 NPY_@name@,
                 NPY_BITSOF_@name@,
-                @num@ * _ALIGN(@type@),
+                @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
+                    NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
                 (PyObject *) &Py@Name@ArrType_Type));
     Py_DECREF(s);
 
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
@@ -676,7 +676,7 @@ _IsAligned(PyArrayObject *ap)
 
     /* alignment 1 types should have a efficient alignment for copy loops */
     if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) {
-        alignment = 16;
+        alignment = NPY_MAX_COPY_ALIGNMENT;
     }
 
     if (alignment == 1) {
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
@@ -1054,12 +1054,12 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
     fa->data = data;
 
     /*
-     * If the strides were provided to the function, need to
-     * update the flags to get the right CONTIGUOUS, ALIGN properties
+     * always update the flags to get the right CONTIGUOUS, ALIGN properties
+     * not owned data and input strides may not be aligned and on some
+     * platforms (debian sparc) malloc does not provide enough alignment for
+     * long double types
      */
-    if (strides != NULL) {
-        PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL);
-    }
+    PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL);
 
     /*
      * call the __array_finalize__
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
@@ -776,7 +776,8 @@ PyArray_Transpose(PyArrayObject *ap, PyArray_Dims *permute)
         PyArray_DIMS(ret)[i] = PyArray_DIMS(ap)[permutation[i]];
         PyArray_STRIDES(ret)[i] = PyArray_STRIDES(ap)[permutation[i]];
     }
-    PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS);
+    PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS |
+                        NPY_ARRAY_ALIGNED);
     return (PyObject *)ret;
 }
 
diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h
@@ -10,6 +10,17 @@
 #undef HAVE_HYPOT
 #endif
 
+/*
+ * largest alignment the copy loops might require
+ * required as string, void and complex types might get copied using larger
+ * instructions than required to operate on them. E.g. complex float is copied
+ * in 8 byte moves but arithmetic on them only loads in 4 byte moves.
+ * the sparc platform may need that alignment for long doubles.
+ * amd64 is not harmed much by the bloat as the system provides 16 byte
+ * alignment by default.
+ */
+#define NPY_MAX_COPY_ALIGNMENT 16
+
 /* Safe to use ldexp and frexp for long double for MSVC builds */
 #if (NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE) || defined(_MSC_VER)
     #ifdef HAVE_LDEXP
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
@@ -5,6 +5,7 @@
 from decimal import Decimal
 import warnings
 import itertools
+import platform
 
 import numpy as np
 from numpy.core import *
@@ -931,6 +932,7 @@ def test_nonzero_twodim(self):
         assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1]))
         assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2]))
 
+        assert_(not x['a'].T.flags.aligned)
         assert_equal(np.count_nonzero(x['a'].T), 4)
         assert_equal(np.count_nonzero(x['b'].T), 5)
         assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0]))
@@ -1048,7 +1050,15 @@ def test_array_equiv(self):
 def assert_array_strict_equal(x, y):
     assert_array_equal(x, y)
     # Check flags
-    assert_(x.flags == y.flags)
+    if 'sparc' not in platform.platform().lower():
+        assert_(x.flags == y.flags)
+    else:
+        # sparc arrays may not be aligned for long double types
+        assert_(x.flags.owndata == y.flags.owndata)
+        assert_(x.flags.writeable == y.flags.writeable)
+        assert_(x.flags.c_contiguous == y.flags.c_contiguous)
+        assert_(x.flags.f_contiguous == y.flags.f_contiguous)
+        assert_(x.flags.updateifcopy == y.flags.updateifcopy)
     # check endianness
     assert_(x.dtype.isnative == y.dtype.isnative)
 
diff --git a/numpy/f2py/tests/test_array_from_pyobj.py b/numpy/f2py/tests/test_array_from_pyobj.py
@@ -4,6 +4,7 @@
 import os
 import sys
 import copy
+import platform
 
 import nose
 
@@ -81,37 +82,45 @@ def is_intent_exact(self,*names):
 
 intent = Intent()
 
-class Type(object):
-    _type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT',
-                   'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG',
-                   'FLOAT', 'DOUBLE', 'LONGDOUBLE', 'CFLOAT', 'CDOUBLE',
-                   'CLONGDOUBLE']
-    _type_cache = {}
-
-    _cast_dict = {'BOOL':['BOOL']}
-    _cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE']
-    _cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE']
-    _cast_dict['BYTE'] = ['BYTE']
-    _cast_dict['UBYTE'] = ['UBYTE']
-    _cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT']
-    _cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT']
-    _cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT']
-    _cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT']
-
-    _cast_dict['LONG'] = _cast_dict['INT'] + ['LONG']
-    _cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG']
-
-    _cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG']
-    _cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG']
-
-    _cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT']
-    _cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE']
-    _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE']
-
-    _cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT']
+_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT',
+               'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG',
+               'FLOAT', 'DOUBLE', 'CFLOAT']
+
+_cast_dict = {'BOOL':['BOOL']}
+_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE']
+_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE']
+_cast_dict['BYTE'] = ['BYTE']
+_cast_dict['UBYTE'] = ['UBYTE']
+_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT']
+_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT']
+_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT']
+_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT']
+
+_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG']
+_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG']
+
+_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG']
+_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG']
+
+_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT']
+_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE']
+
+_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT']
+
+# (debian) sparc system malloc does not provide the alignment required by
+# 16 byte long double types this means the inout intent cannot be satisfied and
+# several tests fail as the alignment flag can be randomly true or fals
+# when numpy gains an aligned allocator the tests could be enabled again
+if 'sparc' not in platform.platform().lower():
+    _type_names.extend(['LONGDOUBLE', 'CDOUBLE', 'CLONGDOUBLE'])
+    _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + \
+                               ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE']
+    _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + \
+                               ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE']
     _cast_dict['CDOUBLE'] = _cast_dict['DOUBLE'] + ['CFLOAT', 'CDOUBLE']
-    _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE']
 
+class Type(object):
+    _type_cache = {}
 
     def __new__(cls, name):
         if isinstance(name, dtype):
@@ -138,23 +147,23 @@ def _init(self, name):
         self.dtypechar = typeinfo[self.NAME][0]
 
     def cast_types(self):
-        return [self.__class__(_m) for _m in self._cast_dict[self.NAME]]
+        return [self.__class__(_m) for _m in _cast_dict[self.NAME]]
 
     def all_types(self):
-        return [self.__class__(_m) for _m in self._type_names]
+        return [self.__class__(_m) for _m in _type_names]
 
     def smaller_types(self):
         bits = typeinfo[self.NAME][3]
         types = []
-        for name in self._type_names:
+        for name in _type_names:
             if typeinfo[name][3]<bits:
                 types.append(Type(name))
         return types
 
     def equal_types(self):
         bits = typeinfo[self.NAME][3]
         types = []
-        for name in self._type_names:
+        for name in _type_names:
             if name==self.NAME: continue
             if typeinfo[name][3]==bits:
                 types.append(Type(name))
@@ -163,7 +172,7 @@ def equal_types(self):
     def larger_types(self):
         bits = typeinfo[self.NAME][3]
         types = []
-        for name in self._type_names:
+        for name in _type_names:
             if typeinfo[name][3]>bits:
                 types.append(Type(name))
         return types
@@ -532,7 +541,7 @@ def test_inplace_from_casttype(self):
             assert_(obj.dtype.type is self.type.dtype) # obj type is changed inplace!
 
 
-for t in Type._type_names:
+for t in _type_names:
     exec('''\
 class test_%s_gen(unittest.TestCase,
               _test_shared_memory

Original file line number	Diff line number	Diff line change
`@@ -676,7 +676,7 @@ _IsAligned(PyArrayObject *ap)`
`676`	`676`
`677`	`677`	`/* alignment 1 types should have a efficient alignment for copy loops */`
`678`	`678`	`if (PyArray_ISFLEXIBLE(ap) \|\| PyArray_ISSTRING(ap)) {`
`679`		`- alignment = 16;`
	`679`	`+ alignment = NPY_MAX_COPY_ALIGNMENT;`
`680`	`680`	`}`
`681`	`681`
`682`	`682`	`if (alignment == 1) {`
Original file line number	Diff line number	Diff line change
`@@ -776,7 +776,8 @@ PyArray_Transpose(PyArrayObject ap, PyArray_Dims permute)`
`776`	`776`	`PyArray_DIMS(ret)[i] = PyArray_DIMS(ap)[permutation[i]];`
`777`	`777`	`PyArray_STRIDES(ret)[i] = PyArray_STRIDES(ap)[permutation[i]];`
`778`	`778`	`}`
`779`		`- PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS \| NPY_ARRAY_F_CONTIGUOUS);`
	`779`	`+ PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS \| NPY_ARRAY_F_CONTIGUOUS \|`
	`780`	`+ NPY_ARRAY_ALIGNED);`
`780`	`781`	`return (PyObject *)ret;`
`781`	`782`	`}`
`782`	`783`