[AggressiveInstcombine] Fold away shift in or reduction chain.#137875
[AggressiveInstcombine] Fold away shift in or reduction chain.#137875davemgreen merged 1 commit intollvm:mainfrom
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesIf we have This is also true of ne, and true for longer or chains. Thinking out loud, this is kind of like a "any-bit is demanded" combine. I'm not sure if that exists already. Full diff: https://github.com/llvm/llvm-project/pull/137875.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b7b0bb7361359..d2aa957b10b78 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5046,6 +5046,29 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
return nullptr;
}
+static Value *foldShiftAwayFromOrChain(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ if (I.getOpcode() != Instruction::Or)
+ return nullptr;
+ Value *A, *B;
+ if (match(&I, m_c_Or(m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
+ m_NUWShl(m_Value(A), m_Value())),
+ m_Value(B))))
+ return Builder.CreateOr(A, B);
+
+ Value *Op0 = I.getOperand(0);
+ if (isa<Instruction>(Op0))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ Op0 = X;
+ Value *Op1 = I.getOperand(1);
+ if (isa<Instruction>(Op1))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op1), Builder))
+ Op1 = X;
+ if (Op0 != I.getOperand(0) || Op1 != I.getOperand(1))
+ return Builder.CreateOr(Op0, Op1);
+ return nullptr;
+}
+
static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
InstCombinerImpl &IC) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
@@ -7742,6 +7765,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b)
+ if (I.isEquality() && match(Op1, m_Zero()) && isa<Instruction>(Op0))
+ if (auto *Res = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ return new ICmpInst(I.getPredicate(), Res, Op1);
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 993325f6ff0b0..baaa754e52894 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -409,3 +409,112 @@ define i1 @PR38139(i8 %arg) {
%r = icmp ne i8 %masked, %arg
ret i1 %r
}
+
+define i1 @remove_shift_nuw_ab(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ab(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ba(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ba(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[B:%.*]], [[T:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %b, %t
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_wraps(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_wraps(
+; CHECK-NEXT: [[T:%.*]] = shl i8 [[A:%.*]], [[S:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_d(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_d(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[C:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %a, %b
+ %or2 = or i8 %c, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_abcd(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_abcd(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[AT:%.*]], [[BT:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[CT:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %at = shl nuw i8 %a, %s
+ %bt = shl nuw i8 %b, 2
+ %ct = shl nuw i8 %c, 1
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %at, %bt
+ %or2 = or i8 %ct, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
|
b2d83d9 to
4bc6794
Compare
dtcxzyw
left a comment
There was a problem hiding this comment.
I am fine with this fold itself. But it prevents AggressiveInstCombine from merging consecutive loads: https://godbolt.org/z/aq94MeM3n
We should not perform this fold when the shift's LHS is zext(load).
If you think this workaround is too fragile, an alternative solution is to convert (load iN p) | (load iN p+N/8) == 0 into (load i2N p) == 0 in AggressiveInstCombine.
4bc6794 to
fc1184a
Compare
|
I've tried to move this into aggressive instcombine to allow it to check for trees of loads first. The transform is looking up through trees already, so maybe that is a better place for it. Let me know what you think. |
…37875 If we have icmp eq or(a, shl(b)), 0 then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero. https://alive2.llvm.org/ce/z/nhrBVX. This is also true of ne, and true for longer or chains.
fc1184a to
57ff4e8
Compare
If we have
icmp eq or(a, shl(b)), 0then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero.https://alive2.llvm.org/ce/z/nhrBVX.
This is also true of ne, and true for longer or chains.