Thanks to visit codestin.com Credit goes to github.com
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c0b6a6e commit dd4320fCopy full SHA for dd4320f
cp-algo/util/simd.hpp
@@ -43,10 +43,11 @@ namespace cp_algo {
43
}
44
45
[[gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
46
- auto x_ninv = u64x4(u32x8(x) * (u32x8() + imod));
47
#ifdef __AVX2__
+ auto x_ninv = u64x4(_mm256_mul_epu32(__m256i(x), __m256i() + imod));
48
x += u64x4(_mm256_mul_epu32(__m256i(x_ninv), __m256i() + mod));
49
#else
50
+ auto x_ninv = x * imod;
51
x += low32(x_ninv) * mod;
52
#endif
53
return x >> 32;
0 commit comments