diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index a464562c45da..57b53e74d053 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -43,7 +43,8 @@ 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', 'BUFSIZE', 'ALLOW_THREADS', - 'ComplexWarning', 'may_share_memory', 'full', 'full_like'] + 'ComplexWarning', 'may_share_memory', 'full', 'full_like', + 'get_data_alignment', 'set_data_alignment'] if sys.version_info[0] < 3: __all__.extend(['getbuffer', 'newbuffer']) @@ -2476,6 +2477,43 @@ def array_equiv(a1, a2): return bool(asarray(a1 == a2).all()) +def get_data_alignment(): + """ + Get the guaranteed alignment of array data allocated by Numpy. + + Returns + ------- + res : int + A power of two representing the current alignment, in bytes, enforced + when an array's data is allocated. + + See Also + -------- + set_data_alignment() + + """ + return multiarray._get_alignment() + + +def set_data_alignment(align): + """ + Set the guaranteed alignment of array data allocated by Numpy. + + Parameters + ---------- + align : int + A power of two representing the alignment, in bytes, to enforce + in future data allocations. + + Notes + ----- + ValueError is raised if the alignment is invalid (too small, or not + a power of two). + + """ + multiarray._set_alignment(align) + + _errdict = {"ignore":ERR_IGNORE, "warn":ERR_WARN, "raise":ERR_RAISE, diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c index 77eb0416bc34..be187d03dedc 100644 --- a/numpy/core/src/multiarray/alloc.c +++ b/numpy/core/src/multiarray/alloc.c @@ -8,6 +8,7 @@ #include "numpy/arrayobject.h" #include #include "npy_config.h" +#include "templ_common.h" /* for npy_mul_with_overflow_intp */ #include @@ -59,6 +60,20 @@ _npy_free_cache(void * p, npy_uintp nelem, npy_uint msz, dealloc(p); } +/* + * clear all cache data in the given cache + */ +static void +_npy_clear_cache(npy_uint msz, cache_bucket * cache, void (*dealloc)(void *)) +{ + npy_intp i, nelem; + for (nelem = 0; nelem < msz; nelem++) { + for (i = 0; i < cache[nelem].available; i++) { + dealloc(cache[nelem].ptrs[i]); + } + cache[nelem].available = 0; + } +} /* * array data cache, sz is number of bytes to allocate @@ -162,15 +177,97 @@ PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook, return temp; } + +/* A minimum valid alignment for common data types */ +#define MIN_ALIGN 16 + +static size_t datamem_align = MIN_ALIGN; +static size_t datamem_align_mask = MIN_ALIGN - 1; + +/* + * Get a safe size for an aligned allocation, taking into account the + * overhead of storing the base pointer. + */ +static NPY_INLINE size_t +get_aligned_size(size_t size) +{ + return size + sizeof(void *) + datamem_align_mask; +} + +/* + * Align the given pointer to the guaranteed alignment. + */ +static NPY_INLINE void * +get_aligned_pointer(void *ptr) +{ + /* Ensure a pointer can fit in the space before */ + npy_intp aligned_ptr = ((npy_intp) ptr + sizeof(void *) + datamem_align_mask) + & ~datamem_align_mask; + return (void *) aligned_ptr; +} + +/* + * Remember the base allocation start ahead of the aligned memory area. + */ +static NPY_INLINE void * +store_base_pointer(void *aligned_ptr, void *ptr) +{ + ((void **) aligned_ptr)[-1] = ptr; +} + +/* + * Given an aligned pointer, get the start of the base allocation. + */ +static NPY_INLINE void * +get_base_pointer(void *aligned_ptr) +{ + return ((void **) aligned_ptr)[-1]; +} + +/* Internal API for querying and changing the current alignement */ + +NPY_NO_EXPORT size_t +npy_datamem_get_align(void) +{ + return datamem_align; +} + +NPY_NO_EXPORT int +npy_datamem_set_align(size_t align) +{ + size_t align_mask = align - 1; + if (align < MIN_ALIGN) { + /* Too small */ + return -1; + } + if ((align ^ align_mask) != (align | align_mask)) { + /* Not a power of two */ + return -1; + } + if (align > datamem_align) { + /* Alignment has increased, free all cached data areas as they may + not be aligned anymore. */ + _npy_clear_cache(NBUCKETS, datacache, &PyDataMem_FREE); + } + datamem_align = align; + datamem_align_mask = align_mask; + return 0; +} + /*NUMPY_API * Allocates memory for array data. */ NPY_NO_EXPORT void * PyDataMem_NEW(size_t size) { - void *result; + void *base_result, *result = NULL; + + base_result = malloc(get_aligned_size(size)); + if (base_result != NULL) { + result = get_aligned_pointer(base_result); + store_base_pointer(result, base_result); + } - result = malloc(size); if (_PyDataMem_eventhook != NULL) { NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API @@ -187,16 +284,24 @@ PyDataMem_NEW(size_t size) * Allocates zeroed memory for array data. */ NPY_NO_EXPORT void * -PyDataMem_NEW_ZEROED(size_t size, size_t elsize) +PyDataMem_NEW_ZEROED(size_t nelems, size_t elsize) { - void *result; + void *base_result, *result = NULL; + size_t size; + + if (!npy_mul_with_overflow_intp(&size, nelems, elsize)) { + base_result = calloc(get_aligned_size(size), 1); + if (base_result != NULL) { + result = get_aligned_pointer(base_result); + store_base_pointer(result, base_result); + } + } - result = calloc(size, elsize); if (_PyDataMem_eventhook != NULL) { NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API if (_PyDataMem_eventhook != NULL) { - (*_PyDataMem_eventhook)(NULL, result, size * elsize, + (*_PyDataMem_eventhook)(NULL, result, nelems * elsize, _PyDataMem_eventhook_user_data); } NPY_DISABLE_C_API @@ -210,7 +315,9 @@ PyDataMem_NEW_ZEROED(size_t size, size_t elsize) NPY_NO_EXPORT void PyDataMem_FREE(void *ptr) { - free(ptr); + if (ptr != NULL) { + free(get_base_pointer(ptr)); + } if (_PyDataMem_eventhook != NULL) { NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API @@ -228,9 +335,27 @@ PyDataMem_FREE(void *ptr) NPY_NO_EXPORT void * PyDataMem_RENEW(void *ptr, size_t size) { - void *result; + void *base_result, *result = NULL; + void *base_ptr = get_base_pointer(ptr); - result = realloc(ptr, size); + base_result = realloc(base_ptr, get_aligned_size(size)); + if (base_result != NULL) { + if (base_result == base_ptr) { + result = ptr; + } + else { + size_t offset = (npy_intp) ptr - (npy_intp) base_ptr; + size_t new_offset; + result = get_aligned_pointer(base_result); + /* If the offset from base pointer changed, we must move + the data area ourselves */ + new_offset = (npy_intp) result - (npy_intp) base_result; + if (new_offset != offset) { + memmove(result, (const char *) base_result + offset, size); + } + store_base_pointer(result, base_result); + } + } if (_PyDataMem_eventhook != NULL) { NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h index 8f6b167d0380..24b194253ad9 100644 --- a/numpy/core/src/multiarray/alloc.h +++ b/numpy/core/src/multiarray/alloc.h @@ -19,4 +19,11 @@ npy_alloc_cache_dim(npy_uintp sz); NPY_NO_EXPORT void npy_free_cache_dim(void * p, npy_uintp sd); +NPY_NO_EXPORT size_t +npy_datamem_get_align(void); + +NPY_NO_EXPORT int +npy_datamem_set_align(size_t); + + #endif diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 79e471ceeb48..9439678cd1be 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -58,6 +58,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "multiarraymodule.h" #include "cblasfuncs.h" #include "vdot.h" +#include "alloc.h" #include "templ_common.h" /* for npy_mul_with_overflow_intp */ #include "compiled_base.h" @@ -3865,6 +3866,26 @@ array_may_share_memory(PyObject *NPY_UNUSED(ignored), PyObject *args) } } +static PyObject * +get_alignment(PyObject *NPY_UNUSED(ignored), PyObject *NPY_UNUSED(args)) +{ + return PyLong_FromSsize_t(npy_datamem_get_align()); +} + +static PyObject * +set_alignment(PyObject *NPY_UNUSED(ignored), PyObject *args) +{ + int align; + if (!PyArg_ParseTuple(args, "i", &align)) { + return NULL; + } + if (npy_datamem_set_align(align)) { + PyErr_SetString(PyExc_ValueError, "invalid value for alignment"); + return NULL; + } + Py_RETURN_NONE; +} + static struct PyMethodDef array_module_methods[] = { {"_get_ndarray_c_version", (PyCFunction)array__get_ndarray_c_version, @@ -4011,6 +4032,12 @@ static struct PyMethodDef array_module_methods[] = { {"test_interrupt", (PyCFunction)test_interrupt, METH_VARARGS, NULL}, + {"_set_alignment", + (PyCFunction)set_alignment, + METH_VARARGS, NULL}, + {"_get_alignment", + (PyCFunction)get_alignment, + METH_NOARGS, NULL}, {"_insert", (PyCFunction)arr_insert, METH_VARARGS | METH_KEYWORDS, "Insert vals sequentially into equivalent 1-d positions " diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index b0d6770527f5..2c33908113e0 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4987,5 +4987,32 @@ def test_collections_hashable(self): self.assertFalse(isinstance(x, collections.Hashable)) +class TestAlignment(TestCase): + + def check_alignment(self, align): + for size in (0, 1, 7, 12, 135, 777): + e = np.empty(size, dtype='int8') + z = np.zeros(size, dtype='int8') + self.assertEqual(e.ctypes.data & (align - 1), 0, (size, align)) + self.assertEqual(z.ctypes.data & (align - 1), 0, (size, align)) + + def test_get_alignment(self): + a = get_data_alignment() + # Default alignment should probably be one of those. + self.assertTrue(a in (8, 16, 32), a) + + def test_set_alignment(self): + old_align = get_data_alignment() + self.check_alignment(old_align) + try: + aligns = [16, 32, 64, 512] + for a in aligns: + set_data_alignment(a) + self.assertEqual(get_data_alignment(), a) + self.check_alignment(a) + finally: + set_data_alignment(old_align) + + if __name__ == "__main__": run_module_suite()