Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 81d3b6e

Browse files
RKSimontru
authored andcommitted
[X86] Only fold AND/ANDNP back to VSELECT if we know the predicated mask select is legal (#156663)
By only checking type legality we didn't account for 128/256-bit ops being run on non-AVX512VL targets, or vXi8/i16 ops being run on non-AVX512BW targets This check is cropping up in several places now and I intend to hoist it out into a common helper, but this initial fix needs to be as clean as possible to be back ported to 21.X Fixes #156256 (cherry picked from commit 86879d4)
1 parent 41df6d5 commit 81d3b6e

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51775,6 +51775,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5177551775
SDValue X, Y;
5177651776
EVT CondVT = VT.changeVectorElementType(MVT::i1);
5177751777
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(CondVT) &&
51778+
(VT.is512BitVector() || Subtarget.hasVLX()) &&
51779+
(VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
5177851780
sd_match(N, m_And(m_Value(X),
5177951781
m_OneUse(m_SExt(m_AllOf(
5178051782
m_Value(Y), m_SpecificVT(CondVT),
@@ -55329,6 +55331,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
5532955331
SDValue Src = N0.getOperand(0);
5533055332
EVT SrcVT = Src.getValueType();
5533155333
if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
55334+
(VT.is512BitVector() || Subtarget.hasVLX()) &&
55335+
(VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
5533255336
TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
5533355337
return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
5533455338
getZeroVector(VT, Subtarget, DAG, DL));

llvm/test/CodeGen/X86/pr156256.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL
4+
5+
define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) {
6+
; AVX512-LABEL: PR156256:
7+
; AVX512: # %bb.0:
8+
; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
9+
; AVX512-NEXT: vpmovm2d %k0, %zmm0
10+
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
11+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
12+
; AVX512-NEXT: retq
13+
;
14+
; AVX512VL-LABEL: PR156256:
15+
; AVX512VL: # %bb.0:
16+
; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
17+
; AVX512VL-NEXT: vpmovm2d %k0, %zmm0
18+
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
19+
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
20+
; AVX512VL-NEXT: retq
21+
%icmp = icmp ugt <16 x i32> %a, %b
22+
%sext = sext <16 x i1> %icmp to <16 x i16>
23+
%and = and <16 x i16> %sext, splat (i16 16256)
24+
ret <16 x i16> %and
25+
}

0 commit comments

Comments
 (0)