From 2ebabb78f314e6ed706499e81099132921d32dea Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 11 Jun 2025 19:43:52 +0300 Subject: [PATCH 1/3] BUG: fix matmul with transposed out arg --- numpy/_core/src/umath/matmul.c.src | 2 +- numpy/_core/tests/test_multiarray.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/numpy/_core/src/umath/matmul.c.src b/numpy/_core/src/umath/matmul.c.src index d9be7b1d6826..02c4fde56bf2 100644 --- a/numpy/_core/src/umath/matmul.c.src +++ b/numpy/_core/src/umath/matmul.c.src @@ -596,7 +596,7 @@ NPY_NO_EXPORT void * Use transpose equivalence: * matmul(a, b, o) == matmul(b.T, a.T, o.T) */ - if (o_f_blasable) { + if (o_transpose) { @TYPE@_matmul_matrixmatrix( ip2_, is2_p_, is2_n_, ip1_, is1_n_, is1_m_, diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index 7603449ba28e..34740963f6d8 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -7272,6 +7272,10 @@ def test_out_contiguous(self): assert_array_equal(c, tgt_mv) c = self.matmul(v, a.T, out=out[:, 0, 0]) assert_array_equal(c, tgt_mv) + # issue 29164 + out_f = np.zeros((10, 4), dtype=float) + c = self.matmul(a, b, out=out_f[::-2, ::-2]) + assert_array_equal(c, tgt) # test out contiguous in only last dim out = np.ones((10, 2), dtype=float) From ece0bdb1f8ef91b6716777aa65b5c1c2ce659d60 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 11 Jun 2025 19:58:24 +0300 Subject: [PATCH 2/3] DOC: add release note --- doc/release/upcoming_changes/23752.performance.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release/upcoming_changes/23752.performance.rst diff --git a/doc/release/upcoming_changes/23752.performance.rst b/doc/release/upcoming_changes/23752.performance.rst new file mode 100644 index 000000000000..37ed1ee34dd8 --- /dev/null +++ b/doc/release/upcoming_changes/23752.performance.rst @@ -0,0 +1,6 @@ +Improve matmul performance when operands are non-contiguous +----------------------------------------------------------- + +Enable using BLAS for matmul even when operands are non-contiguous by copying +if needed. This performance enhancement's original implementation had a bug +that was fixed for v2.3.1 From 6100fba37b5f3c9584437d91d5c8c3c219021622 Mon Sep 17 00:00:00 2001 From: mattip Date: Thu, 12 Jun 2025 08:35:22 +0300 Subject: [PATCH 3/3] fixes from review --- doc/release/upcoming_changes/23752.performance.rst | 6 ------ doc/release/upcoming_changes/29179.change.rst | 4 ++++ doc/source/release/2.3.0-notes.rst | 6 ++++++ numpy/_core/tests/test_multiarray.py | 10 ++++++---- 4 files changed, 16 insertions(+), 10 deletions(-) delete mode 100644 doc/release/upcoming_changes/23752.performance.rst create mode 100644 doc/release/upcoming_changes/29179.change.rst diff --git a/doc/release/upcoming_changes/23752.performance.rst b/doc/release/upcoming_changes/23752.performance.rst deleted file mode 100644 index 37ed1ee34dd8..000000000000 --- a/doc/release/upcoming_changes/23752.performance.rst +++ /dev/null @@ -1,6 +0,0 @@ -Improve matmul performance when operands are non-contiguous ------------------------------------------------------------ - -Enable using BLAS for matmul even when operands are non-contiguous by copying -if needed. This performance enhancement's original implementation had a bug -that was fixed for v2.3.1 diff --git a/doc/release/upcoming_changes/29179.change.rst b/doc/release/upcoming_changes/29179.change.rst new file mode 100644 index 000000000000..12eb6804d3dd --- /dev/null +++ b/doc/release/upcoming_changes/29179.change.rst @@ -0,0 +1,4 @@ +Fix bug in ``matmul`` for non-contiguous out kwarg parameter +------------------------------------------------------------ +In some cases, if ``out`` was non-contiguous, ``np.matmul`` would cause +memory corruption or a c-level assert. This was new to v2.3.0 and fixed in v2.3.1. diff --git a/doc/source/release/2.3.0-notes.rst b/doc/source/release/2.3.0-notes.rst index faad9ffcc8eb..4c3c923b3b5e 100644 --- a/doc/source/release/2.3.0-notes.rst +++ b/doc/source/release/2.3.0-notes.rst @@ -414,6 +414,12 @@ the best performance. (`gh-28769 `__) +Performance improvements for ``np.matmul`` +------------------------------------------ +Enable using BLAS for ``matmul`` even when operands are non-contiguous by copying +if needed. + +(`gh-23752 `__) Changes ======= diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index 34740963f6d8..acf053b41490 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -7272,10 +7272,6 @@ def test_out_contiguous(self): assert_array_equal(c, tgt_mv) c = self.matmul(v, a.T, out=out[:, 0, 0]) assert_array_equal(c, tgt_mv) - # issue 29164 - out_f = np.zeros((10, 4), dtype=float) - c = self.matmul(a, b, out=out_f[::-2, ::-2]) - assert_array_equal(c, tgt) # test out contiguous in only last dim out = np.ones((10, 2), dtype=float) @@ -7321,6 +7317,12 @@ def test_dot_equivalent(self, args): r3 = np.matmul(args[0].copy(), args[1].copy()) assert_equal(r1, r3) + # matrix matrix, issue 29164 + if [len(args[0].shape), len(args[1].shape)] == [2, 2]: + out_f = np.zeros((r2.shape[0] * 2, r2.shape[1] * 2), order='F') + r4 = np.matmul(*args, out=out_f[::2, ::2]) + assert_equal(r2, r4) + def test_matmul_object(self): import fractions