Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Fix #5312: use an aligned allocator. #5457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
40 changes: 39 additions & 1 deletion numpy/core/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
'Inf', 'inf', 'infty', 'Infinity',
'nan', 'NaN', 'False_', 'True_', 'bitwise_not',
'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', 'BUFSIZE', 'ALLOW_THREADS',
'ComplexWarning', 'may_share_memory', 'full', 'full_like']
'ComplexWarning', 'may_share_memory', 'full', 'full_like',
'get_data_alignment', 'set_data_alignment']

if sys.version_info[0] < 3:
__all__.extend(['getbuffer', 'newbuffer'])
Expand Down Expand Up @@ -2476,6 +2477,43 @@ def array_equiv(a1, a2):
return bool(asarray(a1 == a2).all())


def get_data_alignment():
"""
Get the guaranteed alignment of array data allocated by Numpy.

Returns
-------
res : int
A power of two representing the current alignment, in bytes, enforced
when an array's data is allocated.

See Also
--------
set_data_alignment()

"""
return multiarray._get_alignment()


def set_data_alignment(align):
"""
Set the guaranteed alignment of array data allocated by Numpy.

Parameters
----------
align : int
A power of two representing the alignment, in bytes, to enforce
in future data allocations.

Notes
-----
ValueError is raised if the alignment is invalid (too small, or not
a power of two).

"""
multiarray._set_alignment(align)


_errdict = {"ignore":ERR_IGNORE,
"warn":ERR_WARN,
"raise":ERR_RAISE,
Expand Down
143 changes: 134 additions & 9 deletions numpy/core/src/multiarray/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "numpy/arrayobject.h"
#include <numpy/npy_common.h>
#include "npy_config.h"
#include "templ_common.h" /* for npy_mul_with_overflow_intp */

#include <assert.h>

Expand Down Expand Up @@ -59,6 +60,20 @@ _npy_free_cache(void * p, npy_uintp nelem, npy_uint msz,
dealloc(p);
}

/*
* clear all cache data in the given cache
*/
static void
_npy_clear_cache(npy_uint msz, cache_bucket * cache, void (*dealloc)(void *))
{
npy_intp i, nelem;
for (nelem = 0; nelem < msz; nelem++) {
for (i = 0; i < cache[nelem].available; i++) {
dealloc(cache[nelem].ptrs[i]);
}
cache[nelem].available = 0;
}
}

/*
* array data cache, sz is number of bytes to allocate
Expand Down Expand Up @@ -162,15 +177,97 @@ PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook,
return temp;
}


/* A minimum valid alignment for common data types */
#define MIN_ALIGN 16

static size_t datamem_align = MIN_ALIGN;
static size_t datamem_align_mask = MIN_ALIGN - 1;

/*
* Get a safe size for an aligned allocation, taking into account the
* overhead of storing the base pointer.
*/
static NPY_INLINE size_t
get_aligned_size(size_t size)
{
return size + sizeof(void *) + datamem_align_mask;
}

/*
* Align the given pointer to the guaranteed alignment.
*/
static NPY_INLINE void *
get_aligned_pointer(void *ptr)
{
/* Ensure a pointer can fit in the space before */
npy_intp aligned_ptr = ((npy_intp) ptr + sizeof(void *) + datamem_align_mask)
& ~datamem_align_mask;
return (void *) aligned_ptr;
}

/*
* Remember the base allocation start ahead of the aligned memory area.
*/
static NPY_INLINE void *
store_base_pointer(void *aligned_ptr, void *ptr)
{
((void **) aligned_ptr)[-1] = ptr;
}

/*
* Given an aligned pointer, get the start of the base allocation.
*/
static NPY_INLINE void *
get_base_pointer(void *aligned_ptr)
{
return ((void **) aligned_ptr)[-1];
}

/* Internal API for querying and changing the current alignement */

NPY_NO_EXPORT size_t
npy_datamem_get_align(void)
{
return datamem_align;
}

NPY_NO_EXPORT int
npy_datamem_set_align(size_t align)
{
size_t align_mask = align - 1;
if (align < MIN_ALIGN) {
/* Too small */
return -1;
}
if ((align ^ align_mask) != (align | align_mask)) {
/* Not a power of two */
return -1;
}
if (align > datamem_align) {
/* Alignment has increased, free all cached data areas as they may
not be aligned anymore. */
_npy_clear_cache(NBUCKETS, datacache, &PyDataMem_FREE);
}
datamem_align = align;
datamem_align_mask = align_mask;
return 0;
}

/*NUMPY_API
* Allocates memory for array data.
*/
NPY_NO_EXPORT void *
PyDataMem_NEW(size_t size)
{
void *result;
void *base_result, *result = NULL;

base_result = malloc(get_aligned_size(size));
if (base_result != NULL) {
result = get_aligned_pointer(base_result);
store_base_pointer(result, base_result);
}

result = malloc(size);
if (_PyDataMem_eventhook != NULL) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
Expand All @@ -187,16 +284,24 @@ PyDataMem_NEW(size_t size)
* Allocates zeroed memory for array data.
*/
NPY_NO_EXPORT void *
PyDataMem_NEW_ZEROED(size_t size, size_t elsize)
PyDataMem_NEW_ZEROED(size_t nelems, size_t elsize)
{
void *result;
void *base_result, *result = NULL;
size_t size;

if (!npy_mul_with_overflow_intp(&size, nelems, elsize)) {
base_result = calloc(get_aligned_size(size), 1);
if (base_result != NULL) {
result = get_aligned_pointer(base_result);
store_base_pointer(result, base_result);
}
}

result = calloc(size, elsize);
if (_PyDataMem_eventhook != NULL) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
if (_PyDataMem_eventhook != NULL) {
(*_PyDataMem_eventhook)(NULL, result, size * elsize,
(*_PyDataMem_eventhook)(NULL, result, nelems * elsize,
_PyDataMem_eventhook_user_data);
}
NPY_DISABLE_C_API
Expand All @@ -210,7 +315,9 @@ PyDataMem_NEW_ZEROED(size_t size, size_t elsize)
NPY_NO_EXPORT void
PyDataMem_FREE(void *ptr)
{
free(ptr);
if (ptr != NULL) {
free(get_base_pointer(ptr));
}
if (_PyDataMem_eventhook != NULL) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
Expand All @@ -228,9 +335,27 @@ PyDataMem_FREE(void *ptr)
NPY_NO_EXPORT void *
PyDataMem_RENEW(void *ptr, size_t size)
{
void *result;
void *base_result, *result = NULL;
void *base_ptr = get_base_pointer(ptr);

result = realloc(ptr, size);
base_result = realloc(base_ptr, get_aligned_size(size));
if (base_result != NULL) {
if (base_result == base_ptr) {
result = ptr;
}
else {
size_t offset = (npy_intp) ptr - (npy_intp) base_ptr;
size_t new_offset;
result = get_aligned_pointer(base_result);
/* If the offset from base pointer changed, we must move
the data area ourselves */
new_offset = (npy_intp) result - (npy_intp) base_result;
if (new_offset != offset) {
memmove(result, (const char *) base_result + offset, size);
}
store_base_pointer(result, base_result);
}
}
if (_PyDataMem_eventhook != NULL) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
Expand Down
7 changes: 7 additions & 0 deletions numpy/core/src/multiarray/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,11 @@ npy_alloc_cache_dim(npy_uintp sz);
NPY_NO_EXPORT void
npy_free_cache_dim(void * p, npy_uintp sd);

NPY_NO_EXPORT size_t
npy_datamem_get_align(void);

NPY_NO_EXPORT int
npy_datamem_set_align(size_t);


#endif
27 changes: 27 additions & 0 deletions numpy/core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "multiarraymodule.h"
#include "cblasfuncs.h"
#include "vdot.h"
#include "alloc.h"
#include "templ_common.h" /* for npy_mul_with_overflow_intp */
#include "compiled_base.h"

Expand Down Expand Up @@ -3865,6 +3866,26 @@ array_may_share_memory(PyObject *NPY_UNUSED(ignored), PyObject *args)
}
}

static PyObject *
get_alignment(PyObject *NPY_UNUSED(ignored), PyObject *NPY_UNUSED(args))
{
return PyLong_FromSsize_t(npy_datamem_get_align());
}

static PyObject *
set_alignment(PyObject *NPY_UNUSED(ignored), PyObject *args)
{
int align;
if (!PyArg_ParseTuple(args, "i", &align)) {
return NULL;
}
if (npy_datamem_set_align(align)) {
PyErr_SetString(PyExc_ValueError, "invalid value for alignment");
return NULL;
}
Py_RETURN_NONE;
}

static struct PyMethodDef array_module_methods[] = {
{"_get_ndarray_c_version",
(PyCFunction)array__get_ndarray_c_version,
Expand Down Expand Up @@ -4011,6 +4032,12 @@ static struct PyMethodDef array_module_methods[] = {
{"test_interrupt",
(PyCFunction)test_interrupt,
METH_VARARGS, NULL},
{"_set_alignment",
(PyCFunction)set_alignment,
METH_VARARGS, NULL},
{"_get_alignment",
(PyCFunction)get_alignment,
METH_NOARGS, NULL},
{"_insert", (PyCFunction)arr_insert,
METH_VARARGS | METH_KEYWORDS,
"Insert vals sequentially into equivalent 1-d positions "
Expand Down
27 changes: 27 additions & 0 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4987,5 +4987,32 @@ def test_collections_hashable(self):
self.assertFalse(isinstance(x, collections.Hashable))


class TestAlignment(TestCase):

def check_alignment(self, align):
for size in (0, 1, 7, 12, 135, 777):
e = np.empty(size, dtype='int8')
z = np.zeros(size, dtype='int8')
self.assertEqual(e.ctypes.data & (align - 1), 0, (size, align))
self.assertEqual(z.ctypes.data & (align - 1), 0, (size, align))

def test_get_alignment(self):
a = get_data_alignment()
# Default alignment should probably be one of those.
self.assertTrue(a in (8, 16, 32), a)

def test_set_alignment(self):
old_align = get_data_alignment()
self.check_alignment(old_align)
try:
aligns = [16, 32, 64, 512]
for a in aligns:
set_data_alignment(a)
self.assertEqual(get_data_alignment(), a)
self.check_alignment(a)
finally:
set_data_alignment(old_align)


if __name__ == "__main__":
run_module_suite()