diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index baf5285f0859..81643cbeedec 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -1193,6 +1193,7 @@ add_newdoc('numpy.core', 'dot', """ dot(a, b) + dot(a, b, out) Dot product of two arrays. @@ -1209,6 +1210,13 @@ First argument. b : array_like Second argument. + out : ndarray, optional + Output argument. This must have the exact kind that would be returned + if it was not used. In particular, it must have the right type, must be + C-contiguous, and its dtype must be the dtype that would be returned + for `dot(a,b)`. This is a performance feature. Therefore, if these + conditions are not met, an exception is raised, instead of attempting + to be flexible. Returns ------- @@ -1216,6 +1224,7 @@ Returns the dot product of `a` and `b`. If `a` and `b` are both scalars or both 1-D arrays then a scalar is returned; otherwise an array is returned. + If `out` is given, then it is returned. Raises ------ diff --git a/numpy/core/blasdot/_dotblas.c b/numpy/core/blasdot/_dotblas.c index 1dc24d8ea889..6f22e3a9b5be 100644 --- a/numpy/core/blasdot/_dotblas.c +++ b/numpy/core/blasdot/_dotblas.c @@ -213,10 +213,10 @@ _bad_strides(PyArrayObject *ap) * NB: The first argument is not conjugated.; */ static PyObject * -dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) +dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwargs) { PyObject *op1, *op2; - PyArrayObject *ap1 = NULL, *ap2 = NULL, *ret = NULL; + PyArrayObject *ap1 = NULL, *ap2 = NULL, *out = NULL, *ret = NULL; int j, l, lda, ldb, ldc; int typenum, nd; npy_intp ap1stride = 0; @@ -230,8 +230,10 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) PyTypeObject *subtype; PyArray_Descr *dtype; MatrixShape ap1shape, ap2shape; + char* kwords[] = {"a", "b", "out", NULL }; - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwords, + &op1, &op2, &out)) { return NULL; } @@ -246,7 +248,10 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) /* This function doesn't handle other types */ if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { - return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2)); + return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct3( + (PyObject *)op1, + (PyObject *)op2, + (PyObject *)out)); } dtype = PyArray_DescrFromType(typenum); @@ -279,8 +284,9 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) Py_DECREF(tmp1); Py_DECREF(tmp2); } - ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1, - (PyObject *)ap2); + ret = (PyArrayObject *)PyArray_MatrixProduct3((PyObject *)ap1, + (PyObject *)ap2, + (PyObject *)out); Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); @@ -418,10 +424,34 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) subtype = Py_TYPE(ap1); } - ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, - typenum, NULL, NULL, 0, 0, - (PyObject *) - (prior2 > prior1 ? ap2 : ap1)); + if (out) { + int d; + /* verify that out is usable */ + if (Py_TYPE(out) != subtype || + PyArray_NDIM(out) != nd || + PyArray_TYPE(out) != typenum || + !PyArray_ISCARRAY(out)) { + + PyErr_SetString(PyExc_ValueError, + "output array is not acceptable " + "(must have the right type, nr dimensions, and be a C-Array)"); + goto fail; + } + for (d = 0; d != nd; ++d) { + if (dimensions[d] != PyArray_DIM(out, d)) { + PyErr_SetString(PyExc_ValueError, + "output array has wrong dimensions"); + goto fail; + } + } + Py_INCREF(out); + ret = out; + } else { + ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, + typenum, NULL, NULL, 0, 0, + (PyObject *) + (prior2 > prior1 ? ap2 : ap1)); + } if (ret == NULL) { goto fail; @@ -1167,7 +1197,7 @@ static PyObject *dotblas_vdot(PyObject *NPY_UNUSED(dummy), PyObject *args) { } static struct PyMethodDef dotblas_module_methods[] = { - {"dot", (PyCFunction)dotblas_matrixproduct, 1, NULL}, + {"dot", (PyCFunction)dotblas_matrixproduct, METH_VARARGS|METH_KEYWORDS, NULL}, {"inner", (PyCFunction)dotblas_innerproduct, 1, NULL}, {"vdot", (PyCFunction)dotblas_vdot, 1, NULL}, {"alterdot", (PyCFunction)dotblas_alterdot, 1, NULL}, diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index 9474a131a5c1..f89aebec318d 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -252,6 +252,7 @@ 'PyArray_TimedeltaToTimedeltaStruct': 218, 'PyArray_DatetimeStructToDatetime': 219, 'PyArray_TimedeltaStructToTimedelta': 220, + 'PyArray_MatrixProduct3': 222, } ufunc_types_api = { diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index d4dba719c2ba..f5a6b6d79434 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -578,7 +578,7 @@ PyArray_CanCoerceScalar(int thistype, int neededtype, * priority of ap1 and ap2 into account. */ static PyArrayObject * -new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, +new_array_for_sum(PyArrayObject* out, PyArrayObject *ap1, PyArrayObject *ap2, int nd, intp dimensions[], int typenum) { PyArrayObject *ret; @@ -597,6 +597,28 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, prior1 = prior2 = 0.0; subtype = Py_TYPE(ap1); } + if (out) { + int d; + /* verify that out is usable */ + if (Py_TYPE(out) != subtype || + PyArray_NDIM(out) != nd || + PyArray_TYPE(out) != typenum || + !PyArray_ISCARRAY(out)) { + PyErr_SetString(PyExc_ValueError, + "output array is not acceptable " + "(must have the right type, nr dimensions, and be a C-Array)"); + return 0; + } + for (d = 0; d != nd; ++d) { + if (dimensions[d] != PyArray_DIM(out, d)) { + PyErr_SetString(PyExc_ValueError, + "output array has wrong dimensions"); + return 0; + } + } + Py_INCREF(out); + return out; + } ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, @@ -666,7 +688,7 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) * Need to choose an output array that can hold a sum * -- use priority to determine which subtype. */ - ret = new_array_for_sum(ap1, ap2, nd, dimensions, typenum); + ret = new_array_for_sum(NULL, ap1, ap2, nd, dimensions, typenum); if (ret == NULL) { goto fail; } @@ -713,13 +735,12 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) return NULL; } - /*NUMPY_API - *Numeric.matrixproduct(a,v) + * Numeric.matrixproduct(a,v,out) * just like inner product but does the swapaxes stuff on the fly */ NPY_NO_EXPORT PyObject * -PyArray_MatrixProduct(PyObject *op1, PyObject *op2) +PyArray_MatrixProduct3(PyObject *op1, PyObject *op2, PyObject* out) { PyArrayObject *ap1, *ap2, *ret = NULL; PyArrayIterObject *it1, *it2; @@ -788,7 +809,7 @@ PyArray_MatrixProduct(PyObject *op1, PyObject *op2) is1 = ap1->strides[ap1->nd-1]; is2 = ap2->strides[matchDim]; /* Choose which subtype to return */ - ret = new_array_for_sum(ap1, ap2, nd, dimensions, typenum); + ret = new_array_for_sum(out, ap1, ap2, nd, dimensions, typenum); if (ret == NULL) { goto fail; } @@ -845,6 +866,16 @@ PyArray_MatrixProduct(PyObject *op1, PyObject *op2) return NULL; } +/*NUMPY_API + *Numeric.matrixproduct(a,v) + * just like inner product but does the swapaxes stuff on the fly + */ +NPY_NO_EXPORT PyObject * +PyArray_MatrixProduct(PyObject *op1, PyObject *op2) +{ + return PyArray_MatrixProduct3(op1, op2, NULL); +} + /*NUMPY_API * Fast Copy and Transpose */ @@ -968,7 +999,7 @@ _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, int typenum, * Need to choose an output array that can hold a sum * -- use priority to determine which subtype. */ - ret = new_array_for_sum(ap1, ap2, 1, &length, typenum); + ret = new_array_for_sum(NULL, ap1, ap2, 1, &length, typenum); if (ret == NULL) { return NULL; } @@ -1850,14 +1881,15 @@ array_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) } static PyObject * -array_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) +array_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwds) { - PyObject *v, *a; + PyObject *v, *a, *o = NULL; + char* kwlist[] = {"a", "b", "out", NULL }; - if (!PyArg_ParseTuple(args, "OO", &a, &v)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O", kwlist, &a, &v, &o)) { return NULL; } - return _ARET(PyArray_MatrixProduct(a, v)); + return _ARET(PyArray_MatrixProduct3(a, v, o)); } static PyObject * @@ -2766,7 +2798,7 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS, NULL}, {"dot", (PyCFunction)array_matrixproduct, - METH_VARARGS, NULL}, + METH_VARARGS | METH_KEYWORDS, NULL}, {"_fastCopyAndTranspose", (PyCFunction)array_fastCopyAndTranspose, METH_VARARGS, NULL}, diff --git a/numpy/core/tests/test_blasdot.py b/numpy/core/tests/test_blasdot.py index 3c04759d55bd..93a78c0bf5b1 100644 --- a/numpy/core/tests/test_blasdot.py +++ b/numpy/core/tests/test_blasdot.py @@ -1,5 +1,8 @@ +import numpy as np +import sys from numpy.core import zeros, float64 -from numpy.testing import dec, TestCase, assert_almost_equal, assert_ +from numpy.testing import dec, TestCase, assert_almost_equal, assert_, \ + assert_raises, assert_array_equal, assert_allclose, assert_equal from numpy.core.multiarray import inner as inner_ DECPREC = 14 @@ -26,3 +29,63 @@ def test_blasdot_used(): assert_(inner is _dotblas.inner) assert_(alterdot is _dotblas.alterdot) assert_(restoredot is _dotblas.restoredot) + + +def test_dot_2args(): + from numpy.core import dot + + a = np.array([[1, 2], [3, 4]], dtype=float) + b = np.array([[1, 0], [1, 1]], dtype=float) + c = np.array([[3, 2], [7, 4]], dtype=float) + + d = dot(a, b) + assert_allclose(c, d) + +def test_dot_3args(): + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 32)) + for i in xrange(12): + np.dot(f,v,r) + assert_equal(sys.getrefcount(r), 2) + r2 = np.dot(f,v) + assert_array_equal(r2, r) + assert_(r is np.dot(f,v,r)) + + v = v[:,0].copy() # v.shape == (16,) + r = r[:,0].copy() # r.shape == (1024,) + r2 = np.dot(f,v) + assert_(r is np.dot(f,v,r)) + assert_array_equal(r2, r) + +def test_dot_3args_errors(): + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 31)) + assert_raises(ValueError, np.dot, f, v, r) + + r = np.empty((1024,)) + assert_raises(ValueError, np.dot, f, v, r) + + r = np.empty((32,)) + assert_raises(ValueError, np.dot, f, v, r) + + r = np.empty((32, 1024)) + assert_raises(ValueError, np.dot, f, v, r) + assert_raises(ValueError, np.dot, f, v, r.T) + + r = np.empty((1024, 64)) + assert_raises(ValueError, np.dot, f, v, r[:,::2]) + assert_raises(ValueError, np.dot, f, v, r[:,:32]) + + r = np.empty((1024, 32), dtype=np.float32) + assert_raises(ValueError, np.dot, f, v, r) + + r = np.empty((1024, 32), dtype=int) + assert_raises(ValueError, np.dot, f, v, r) + + diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index b3bf209a15d9..85a4a06916a6 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1274,6 +1274,68 @@ def __array_finalize__(self, obj): res = dat.var(1) assert res.info == dat.info +class TestDot(TestCase): + def test_dot_2args(self): + from numpy.core.multiarray import dot + + a = np.array([[1, 2], [3, 4]], dtype=float) + b = np.array([[1, 0], [1, 1]], dtype=float) + c = np.array([[3, 2], [7, 4]], dtype=float) + + d = dot(a, b) + assert_allclose(c, d) + + def test_dot_3args(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 32)) + for i in xrange(12): + dot(f,v,r) + assert_equal(sys.getrefcount(r), 2) + r2 = dot(f,v) + assert_array_equal(r2, r) + assert_(r is dot(f,v,r)) + + v = v[:,0].copy() # v.shape == (16,) + r = r[:,0].copy() # r.shape == (1024,) + r2 = dot(f,v) + assert_(r is dot(f,v,r)) + assert_array_equal(r2, r) + + def test_dot_3args_errors(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 31)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32, 1024)) + assert_raises(ValueError, dot, f, v, r) + assert_raises(ValueError, dot, f, v, r.T) + + r = np.empty((1024, 64)) + assert_raises(ValueError, dot, f, v, r[:,::2]) + assert_raises(ValueError, dot, f, v, r[:,:32]) + + r = np.empty((1024, 32), dtype=np.float32) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024, 32), dtype=int) + assert_raises(ValueError, dot, f, v, r) + class TestSummarization(TestCase): def test_1d(self): @@ -1329,23 +1391,23 @@ class TestNeighborhoodIter(TestCase): def _test_simple2d(self, dt): # Test zero and one padding for simple data type x = np.array([[0, 1], [2, 3]], dtype=dt) - r = [np.array([[0, 0, 0], [0, 0, 1]], dtype=dt), - np.array([[0, 0, 0], [0, 1, 0]], dtype=dt), - np.array([[0, 0, 1], [0, 2, 3]], dtype=dt), + r = [np.array([[0, 0, 0], [0, 0, 1]], dtype=dt), + np.array([[0, 0, 0], [0, 1, 0]], dtype=dt), + np.array([[0, 0, 1], [0, 2, 3]], dtype=dt), np.array([[0, 1, 0], [2, 3, 0]], dtype=dt)] l = test_neighborhood_iterator(x, [-1, 0, -1, 1], x[0], NEIGH_MODE['zero']) assert_array_equal(l, r) - r = [np.array([[1, 1, 1], [1, 0, 1]], dtype=dt), - np.array([[1, 1, 1], [0, 1, 1]], dtype=dt), - np.array([[1, 0, 1], [1, 2, 3]], dtype=dt), + r = [np.array([[1, 1, 1], [1, 0, 1]], dtype=dt), + np.array([[1, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[1, 0, 1], [1, 2, 3]], dtype=dt), np.array([[0, 1, 1], [2, 3, 1]], dtype=dt)] l = test_neighborhood_iterator(x, [-1, 0, -1, 1], x[0], NEIGH_MODE['one']) assert_array_equal(l, r) - r = [np.array([[4, 4, 4], [4, 0, 1]], dtype=dt), - np.array([[4, 4, 4], [0, 1, 4]], dtype=dt), - np.array([[4, 0, 1], [4, 2, 3]], dtype=dt), + r = [np.array([[4, 4, 4], [4, 0, 1]], dtype=dt), + np.array([[4, 4, 4], [0, 1, 4]], dtype=dt), + np.array([[4, 0, 1], [4, 2, 3]], dtype=dt), np.array([[0, 1, 4], [2, 3, 4]], dtype=dt)] l = test_neighborhood_iterator(x, [-1, 0, -1, 1], 4, NEIGH_MODE['constant']) assert_array_equal(l, r) @@ -1362,9 +1424,9 @@ def test_simple2d_object(self): def _test_mirror2d(self, dt): x = np.array([[0, 1], [2, 3]], dtype=dt) - r = [np.array([[0, 0, 1], [0, 0, 1]], dtype=dt), - np.array([[0, 1, 1], [0, 1, 1]], dtype=dt), - np.array([[0, 0, 1], [2, 2, 3]], dtype=dt), + r = [np.array([[0, 0, 1], [0, 0, 1]], dtype=dt), + np.array([[0, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[0, 0, 1], [2, 2, 3]], dtype=dt), np.array([[0, 1, 1], [2, 3, 3]], dtype=dt)] l = test_neighborhood_iterator(x, [-1, 0, -1, 1], x[0], NEIGH_MODE['mirror']) assert_array_equal(l, r)