Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 58fc8cb

Browse files
author
git apple-llvm automerger
committed
Merge commit '46d4c3b1f64d' from llvm.org/main into next
2 parents cafec40 + 46d4c3b commit 58fc8cb

File tree

2 files changed

+9
-8
lines changed

2 files changed

+9
-8
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+7-6
Original file line numberDiff line numberDiff line change
@@ -39827,7 +39827,6 @@ static SDValue combineX86ShuffleChain(
3982739827

3982839828
// If we're inserting the low subvector, an insert-subvector 'concat'
3982939829
// pattern is quicker than VPERM2X128.
39830-
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
3983139830
if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) &&
3983239831
!Subtarget.hasAVX2()) {
3983339832
if (Depth == 0 && RootOpc == ISD::INSERT_SUBVECTOR)
@@ -39838,15 +39837,15 @@ static SDValue combineX86ShuffleChain(
3983839837
return insertSubVector(Lo, Hi, NumRootElts / 2, DAG, DL, 128);
3983939838
}
3984039839

39841-
if (Depth == 0 && RootOpc == X86ISD::VPERM2X128)
39842-
return SDValue(); // Nothing to do!
39843-
39844-
// If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
39845-
// we need to use the zeroing feature.
39840+
// Don't lower to VPERM2X128 here if we have AVX2+, prefer to use
39841+
// VPERMQ/VPERMPD for unary shuffles unless we need to use the zeroing
39842+
// feature.
3984639843
// Prefer blends for sequential shuffles unless we are optimizing for size.
3984739844
if (UnaryShuffle &&
3984839845
!(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
3984939846
(OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
39847+
if (Depth == 0 && RootOpc == X86ISD::VPERM2X128)
39848+
return SDValue(); // Nothing to do!
3985039849
unsigned PermMask = 0;
3985139850
PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
3985239851
PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
@@ -39864,6 +39863,8 @@ static SDValue combineX86ShuffleChain(
3986439863
"Unexpected shuffle sentinel value");
3986539864
// Prefer blends to X86ISD::VPERM2X128.
3986639865
if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
39866+
if (Depth == 0 && RootOpc == X86ISD::VPERM2X128)
39867+
return SDValue(); // Nothing to do!
3986739868
unsigned PermMask = 0;
3986839869
PermMask |= ((Mask[0] & 3) << 0);
3986939870
PermMask |= ((Mask[1] & 3) << 4);

llvm/test/CodeGen/X86/matrix-multiply.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ define <4 x double> @test_mul2x2_f64(<4 x double> %a0, <4 x double> %a1) nounwin
131131
; AVX2-LABEL: test_mul2x2_f64:
132132
; AVX2: # %bb.0: # %entry
133133
; AVX2-NEXT: vshufpd {{.*#+}} ymm2 = ymm1[1,1,3,3]
134-
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
134+
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,3,2,3]
135135
; AVX2-NEXT: vmulpd %ymm2, %ymm3, %ymm2
136136
; AVX2-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
137137
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
@@ -142,7 +142,7 @@ define <4 x double> @test_mul2x2_f64(<4 x double> %a0, <4 x double> %a1) nounwin
142142
; AVX512-LABEL: test_mul2x2_f64:
143143
; AVX512: # %bb.0: # %entry
144144
; AVX512-NEXT: vshufpd {{.*#+}} ymm2 = ymm1[1,1,3,3]
145-
; AVX512-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
145+
; AVX512-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,3,2,3]
146146
; AVX512-NEXT: vmulpd %ymm2, %ymm3, %ymm2
147147
; AVX512-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
148148
; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]

0 commit comments

Comments
 (0)