From 5cb44c56fb324a8b173823987939ac0cf1757835 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:08:01 -0800 Subject: [PATCH 1/8] Update x86-simd-sort to latest Marks explicit template specializations inline --- numpy/_core/src/npysort/x86-simd-sort | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/npysort/x86-simd-sort b/numpy/_core/src/npysort/x86-simd-sort index 8187e9a6d322..7060e3c76899 160000 --- a/numpy/_core/src/npysort/x86-simd-sort +++ b/numpy/_core/src/npysort/x86-simd-sort @@ -1 +1 @@ -Subproject commit 8187e9a6d322b3d0f202c0c8bc0c0c76fc9a6ec4 +Subproject commit 7060e3c768992441aa6454a6f9320a9fe1f870da From 894a1e3eae226ae3d23ae65ba8d155f0adc63d3c Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:27:36 -0800 Subject: [PATCH 2/8] CI: UPdate SDE version to 9.27 and run tests on SPR --- .github/workflows/linux_simd.yml | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index 0b8f082b5494..2764c32868c9 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -162,7 +162,7 @@ jobs: - name: Install Intel SDE run: | - curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz + curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz.sig mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/ sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde @@ -198,15 +198,13 @@ jobs: sde -spr -- python -c "import numpy; numpy.show_config()" && sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd* - # Can't run on SDE just yet: see https://github.com/numpy/numpy/issues/23545#issuecomment-1659047365 - # - #- name: linalg/ufunc/umath tests (SPR) - # run: | - # export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) - # export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" - # cd build-install && - # sde -spr -- python -c "import numpy; numpy.show_config()" && - # sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ - # $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ - # $NUMPY_SITE/numpy/linalg/tests/test_* + - name: linalg/ufunc/umath tests (SPR) + run: | + export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) + export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" + cd build-install && + sde -spr -- python -c "import numpy; numpy.show_config()" && + sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ + $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ + $NUMPY_SITE/numpy/linalg/tests/test_* From b743ee0b85bdb4b1cf3ee947299b9981d9f2202f Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:36:59 -0800 Subject: [PATCH 3/8] Fix SDE file extension name and update cpu-baseline=avx512_spr --- .github/workflows/linux_simd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index 2764c32868c9..26c37e2c0012 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -162,7 +162,7 @@ jobs: - name: Install Intel SDE run: | - curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz.sig + curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/ sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde @@ -176,7 +176,7 @@ jobs: python -m pip install pytest pytest-xdist hypothesis typing_extensions - name: Build - run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512f -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' + run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' - name: Meson Log if: always() From 0000e49c29965830e2d3189ff501969b7903af3d Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 20:41:37 -0800 Subject: [PATCH 4/8] BUG: fix dispatch for avx512_qsort and qselect float16 functions --- .../npysort/x86_simd_qsort_16bit.dispatch.cpp | 47 ++----------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp index c210a9135356..11377f4a35a3 100644 --- a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp +++ b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp @@ -3,34 +3,9 @@ #if defined(NPY_HAVE_AVX512_SPR) #include "x86-simd-sort/src/avx512fp16-16bit-qsort.hpp" -/* - * Wrapper function declarations to avoid multiple definitions of - * avx512_qsort and avx512_qsort - */ -void avx512_qsort_uint16(uint16_t*, npy_intp); -void avx512_qsort_int16(int16_t*, npy_intp); -void avx512_qselect_uint16(uint16_t*, npy_intp, npy_intp); -void avx512_qselect_int16(int16_t*, npy_intp, npy_intp); - + #include "x86-simd-sort/src/avx512-16bit-qsort.hpp" #elif defined(NPY_HAVE_AVX512_ICL) #include "x86-simd-sort/src/avx512-16bit-qsort.hpp" -/* Wrapper function defintions here: */ -void avx512_qsort_uint16(uint16_t* arr, npy_intp size) -{ - avx512_qsort(arr, size); -} -void avx512_qsort_int16(int16_t* arr, npy_intp size) -{ - avx512_qsort(arr, size); -} -void avx512_qselect_uint16(uint16_t* arr, npy_intp kth, npy_intp size) -{ - avx512_qselect(arr, kth, size, true); -} -void avx512_qselect_int16(int16_t* arr, npy_intp kth, npy_intp size) -{ - avx512_qselect(arr, kth, size, true); -} #endif namespace np { namespace qsort_simd { @@ -50,20 +25,12 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(Half *arr, npy_intp num, npy_int template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint16_t *arr, npy_intp num, npy_intp kth) { -#if defined(NPY_HAVE_AVX512_SPR) - avx512_qselect_uint16(arr, kth, num); -#else - avx512_qselect(arr, kth, num); -#endif + avx512_qselect(arr, num, kth); } template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_intp kth) { -#if defined(NPY_HAVE_AVX512_SPR) - avx512_qselect_int16(arr, kth, num); -#else - avx512_qselect(arr, kth, num); -#endif + avx512_qselect(arr, num, kth); } /* @@ -79,19 +46,11 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size) } template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size) { -#if defined(NPY_HAVE_AVX512_SPR) - avx512_qsort_uint16(arr, size); -#else avx512_qsort(arr, size); -#endif } template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size) { -#if defined(NPY_HAVE_AVX512_SPR) - avx512_qsort_int16(arr, size); -#else avx512_qsort(arr, size); -#endif } #endif // NPY_HAVE_AVX512_ICL || SPR From 7a29f66ce96516f3a6e13c5f22147e47e233b3ba Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 21:11:14 -0800 Subject: [PATCH 5/8] Add TGL tests and build with cpu-baseline of avx512f --- .github/workflows/linux_simd.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index 26c37e2c0012..092731116f2f 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -176,7 +176,7 @@ jobs: python -m pip install pytest pytest-xdist hypothesis typing_extensions - name: Build - run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' + run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512f -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' - name: Meson Log if: always() @@ -198,6 +198,17 @@ jobs: sde -spr -- python -c "import numpy; numpy.show_config()" && sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd* + - name: linalg/ufunc/umath tests (TGL) + run: | + export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) + export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" + cd build-install && + sde -tgl -- python -c "import numpy; numpy.show_config()" && + sde -tgl -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ + $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ + $NUMPY_SITE/numpy/_core/tests/test_multiarray.py \ + $NUMPY_SITE/numpy/linalg/tests/test_* + - name: linalg/ufunc/umath tests (SPR) run: | export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) @@ -206,5 +217,5 @@ jobs: sde -spr -- python -c "import numpy; numpy.show_config()" && sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ + $NUMPY_SITE/numpy/_core/tests/test_multiarray.py \ $NUMPY_SITE/numpy/linalg/tests/test_* - From f01ee53765d23a339ae5cfcb02e0fbf57ae190c0 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 13 Dec 2023 09:22:57 -0800 Subject: [PATCH 6/8] BUG: fix order of arguments to avx512_qselect --- numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp index 11377f4a35a3..8222fc77cae3 100644 --- a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp +++ b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp @@ -25,12 +25,12 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(Half *arr, npy_intp num, npy_int template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint16_t *arr, npy_intp num, npy_intp kth) { - avx512_qselect(arr, num, kth); + avx512_qselect(arr, kth, num); } template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_intp kth) { - avx512_qselect(arr, num, kth); + avx512_qselect(arr, kth, num); } /* From ed7f19b29a743c80dadd206a63b8e33592cbc9b1 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 13 Dec 2023 12:28:42 -0800 Subject: [PATCH 7/8] CI: Split CI jobs on TGL and SPR --- .github/workflows/linux_simd.yml | 39 ++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index 092731116f2f..bbad7090b14f 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -148,7 +148,7 @@ jobs: - uses: ./.github/meson_actions name: Build/Test - intel_sde: + intel_sde_avx512: needs: [baseline_only] runs-on: ubuntu-latest steps: @@ -209,7 +209,42 @@ jobs: $NUMPY_SITE/numpy/_core/tests/test_multiarray.py \ $NUMPY_SITE/numpy/linalg/tests/test_* - - name: linalg/ufunc/umath tests (SPR) + + intel_sde_spr: + needs: [baseline_only] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install Intel SDE + run: | + curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz + mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/ + sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y g++-13 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 1 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 1 + python -m pip install -r build_requirements.txt + python -m pip install pytest pytest-xdist hypothesis typing_extensions + + - name: Build + run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr + + - name: Meson Log + if: always() + run: cat build/meson-logs/meson-log.txt + + - name: linalg/ufunc/umath tests on Intel SPR run: | export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" From bde29643a418542b0fec8c10e6b809e8292f2d71 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 14 Dec 2023 09:01:19 -0800 Subject: [PATCH 8/8] Move SIMD SPR tests to intel_sde_spr job --- .github/workflows/linux_simd.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index bbad7090b14f..b87d773f4b4e 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -190,14 +190,6 @@ jobs: sde -knm -- python -c "import numpy; numpy.show_config()" && sde -knm -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd* - - name: SIMD tests (SPR) - run: | - export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) - export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" - cd build-install && - sde -spr -- python -c "import numpy; numpy.show_config()" && - sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd* - - name: linalg/ufunc/umath tests (TGL) run: | export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) @@ -244,6 +236,14 @@ jobs: if: always() run: cat build/meson-logs/meson-log.txt + - name: SIMD tests (SPR) + run: | + export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) + export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" + cd build-install && + sde -spr -- python -c "import numpy; numpy.show_config()" && + sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd* + - name: linalg/ufunc/umath tests on Intel SPR run: | export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/)