-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AggressiveInstcombine] Fold away shift in or reduction chain. #137875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesIf we have This is also true of ne, and true for longer or chains. Thinking out loud, this is kind of like a "any-bit is demanded" combine. I'm not sure if that exists already. Full diff: https://github.com/llvm/llvm-project/pull/137875.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b7b0bb7361359..d2aa957b10b78 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5046,6 +5046,29 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
return nullptr;
}
+static Value *foldShiftAwayFromOrChain(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ if (I.getOpcode() != Instruction::Or)
+ return nullptr;
+ Value *A, *B;
+ if (match(&I, m_c_Or(m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
+ m_NUWShl(m_Value(A), m_Value())),
+ m_Value(B))))
+ return Builder.CreateOr(A, B);
+
+ Value *Op0 = I.getOperand(0);
+ if (isa<Instruction>(Op0))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ Op0 = X;
+ Value *Op1 = I.getOperand(1);
+ if (isa<Instruction>(Op1))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op1), Builder))
+ Op1 = X;
+ if (Op0 != I.getOperand(0) || Op1 != I.getOperand(1))
+ return Builder.CreateOr(Op0, Op1);
+ return nullptr;
+}
+
static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
InstCombinerImpl &IC) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
@@ -7742,6 +7765,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b)
+ if (I.isEquality() && match(Op1, m_Zero()) && isa<Instruction>(Op0))
+ if (auto *Res = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ return new ICmpInst(I.getPredicate(), Res, Op1);
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 993325f6ff0b0..baaa754e52894 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -409,3 +409,112 @@ define i1 @PR38139(i8 %arg) {
%r = icmp ne i8 %masked, %arg
ret i1 %r
}
+
+define i1 @remove_shift_nuw_ab(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ab(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ba(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ba(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[B:%.*]], [[T:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %b, %t
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_wraps(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_wraps(
+; CHECK-NEXT: [[T:%.*]] = shl i8 [[A:%.*]], [[S:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_d(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_d(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[C:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %a, %b
+ %or2 = or i8 %c, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_abcd(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_abcd(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[AT:%.*]], [[BT:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[CT:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %at = shl nuw i8 %a, %s
+ %bt = shl nuw i8 %b, 2
+ %ct = shl nuw i8 %c, 1
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %at, %bt
+ %or2 = or i8 %ct, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
|
b2d83d9
to
4bc6794
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine with this fold itself. But it prevents AggressiveInstCombine from merging consecutive loads: https://godbolt.org/z/aq94MeM3n
We should not perform this fold when the shift's LHS is zext(load)
.
If you think this workaround is too fragile, an alternative solution is to convert (load iN p) | (load iN p+N/8) == 0
into (load i2N p) == 0
in AggressiveInstCombine.
4bc6794
to
fc1184a
Compare
I've tried to move this into aggressive instcombine to allow it to check for trees of loads first. The transform is looking up through trees already, so maybe that is a better place for it. Let me know what you think. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
…37875 If we have icmp eq or(a, shl(b)), 0 then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero. https://alive2.llvm.org/ce/z/nhrBVX. This is also true of ne, and true for longer or chains.
fc1184a
to
57ff4e8
Compare
If we have
icmp eq or(a, shl(b)), 0
then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero.https://alive2.llvm.org/ce/z/nhrBVX.
This is also true of ne, and true for longer or chains.