Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a46b082

Browse files
author
Raghuveer Devulapalli
committed
AVX2 now includes FMA3
1 parent f313286 commit a46b082

2 files changed

Lines changed: 8 additions & 5 deletions

File tree

meson_cpu/x86/meson.build

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,14 @@ FMA3 = mod_features.new(
5959
'FMA3', 24, implies: F16C, args: '-mfma',
6060
test_code: files(source_root + '/numpy/distutils/checks/cpu_fma3.c')[0]
6161
)
62+
# match this to HWY_AVX2
6263
AVX2 = mod_features.new(
63-
'AVX2', 25, implies: F16C, args: '-march=skylake',
64+
'AVX2', 25, implies: FMA3, args: ['-mavx2', '-maes', '-mpclmul', '-mbmi', '-mbmi2'],
6465
test_code: files(source_root + '/numpy/distutils/checks/cpu_avx2.c')[0]
6566
)
6667
# 25-40 left as margin for any extra features
6768
AVX512F = mod_features.new(
68-
'AVX512F', 40, implies: [FMA3, AVX2],
69+
'AVX512F', 40, implies: [AVX2],
6970
# Disables mmx because of stack corruption that may happen during mask
7071
# conversions.
7172
# TODO (seiko2plus): provide more clarification
@@ -93,7 +94,7 @@ AVX512_KNM = mod_features.new(
9394
)
9495
AVX512_SKX = mod_features.new(
9596
'AVX512_SKX', 50, implies: AVX512CD,
96-
args: ['-march=skylake-avx512'],
97+
args: ['-mavx512vl', '-mavx512bw', '-mavx512dq'],
9798
group: ['AVX512VL', 'AVX512BW', 'AVX512DQ'],
9899
test_code: files(source_root + '/numpy/distutils/checks/cpu_avx512_skx.c')[0],
99100
extra_tests: {

numpy/_core/src/common/npy_cpu_features.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ npy__cpu_init_features(void)
474474
// third call to the cpuid to get extended AVX2 & AVX512 feature bits
475475
npy__cpu_cpuid(reg, 7);
476476
npy__cpu_have[NPY_CPU_FEATURE_AVX2] = (reg[1] & (1 << 5)) != 0;
477+
npy__cpu_have[NPY_CPU_FEATURE_AVX2] = npy__cpu_have[NPY_CPU_FEATURE_AVX2] &&
478+
npy__cpu_have[NPY_CPU_FEATURE_FMA3];
477479
if (!npy__cpu_have[NPY_CPU_FEATURE_AVX2])
478480
return;
479481
// detect AVX2 & FMA3
@@ -641,7 +643,7 @@ static void
641643
npy__cpu_init_features(void)
642644
{
643645
memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
644-
646+
645647
unsigned int hwcap = getauxval(AT_HWCAP);
646648
if ((hwcap & HWCAP_S390_VX) == 0) {
647649
return;
@@ -653,7 +655,7 @@ npy__cpu_init_features(void)
653655
npy__cpu_have[NPY_CPU_FEATURE_VXE2] = 1;
654656
return;
655657
}
656-
658+
657659
npy__cpu_have[NPY_CPU_FEATURE_VXE] = (hwcap & HWCAP_S390_VXE) != 0;
658660

659661
npy__cpu_have[NPY_CPU_FEATURE_VX] = 1;

0 commit comments

Comments
 (0)