-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Instcombine] Fold away shift in or reduction chain. #137875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesIf we have This is also true of ne, and true for longer or chains. Thinking out loud, this is kind of like a "any-bit is demanded" combine. I'm not sure if that exists already. Full diff: https://github.com/llvm/llvm-project/pull/137875.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b7b0bb7361359..d2aa957b10b78 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5046,6 +5046,29 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
return nullptr;
}
+static Value *foldShiftAwayFromOrChain(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ if (I.getOpcode() != Instruction::Or)
+ return nullptr;
+ Value *A, *B;
+ if (match(&I, m_c_Or(m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
+ m_NUWShl(m_Value(A), m_Value())),
+ m_Value(B))))
+ return Builder.CreateOr(A, B);
+
+ Value *Op0 = I.getOperand(0);
+ if (isa<Instruction>(Op0))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ Op0 = X;
+ Value *Op1 = I.getOperand(1);
+ if (isa<Instruction>(Op1))
+ if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op1), Builder))
+ Op1 = X;
+ if (Op0 != I.getOperand(0) || Op1 != I.getOperand(1))
+ return Builder.CreateOr(Op0, Op1);
+ return nullptr;
+}
+
static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
InstCombinerImpl &IC) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
@@ -7742,6 +7765,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b)
+ if (I.isEquality() && match(Op1, m_Zero()) && isa<Instruction>(Op0))
+ if (auto *Res = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+ return new ICmpInst(I.getPredicate(), Res, Op1);
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 993325f6ff0b0..baaa754e52894 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -409,3 +409,112 @@ define i1 @PR38139(i8 %arg) {
%r = icmp ne i8 %masked, %arg
ret i1 %r
}
+
+define i1 @remove_shift_nuw_ab(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ab(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ba(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ba(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[B:%.*]], [[T:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %b, %t
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nuw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_nsw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw_ne(
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl nsw i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_wraps(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_wraps(
+; CHECK-NEXT: [[T:%.*]] = shl i8 [[A:%.*]], [[S:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[T]], [[B:%.*]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %t = shl i8 %a, %s
+ %or = or i8 %t, %b
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_d(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_d(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[C:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %a, %b
+ %or2 = or i8 %c, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
+define i1 @remove_shift_chain_abcd(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_abcd(
+; CHECK-NEXT: [[OR1:%.*]] = or i8 [[AT:%.*]], [[BT:%.*]]
+; CHECK-NEXT: [[OR2:%.*]] = or i8 [[CT:%.*]], [[DT:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: ret i1 [[IC]]
+;
+ %at = shl nuw i8 %a, %s
+ %bt = shl nuw i8 %b, 2
+ %ct = shl nuw i8 %c, 1
+ %dt = shl nuw i8 %d, %s
+ %or1 = or i8 %at, %bt
+ %or2 = or i8 %ct, %dt
+ %or = or i8 %or1, %or2
+ %ic = icmp eq i8 %or, 0
+ ret i1 %ic
+}
+
|
If we have `icmp eq or(a, shl(b)), 0` then the shift can be removed so long as it is nuw or nsw. It is still comparing the same bits against 0. https://alive2.llvm.org/ce/z/nhrBVX. This is also true of ne, and true of longer or chains. Thinking out loud, this is kind of like a "are any bits demanded" combine.
@@ -7742,6 +7765,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { | |||
} | |||
} | |||
|
|||
// icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b) | |||
if (I.isEquality() && match(Op1, m_Zero()) && isa<Instruction>(Op0)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be better to move this fold into InstCombinerImpl::foldICmpWithZero
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - I wasn't sure which of foldICmpEquality, foldICmpBinOp, foldICmpWithZero, foldICmpWithConstant or.. foldICmpInstWithConstant (?) it would live best in. I've moved it to the end of foldICmpWithZero (and had to move the function earlier in the file).
b2d83d9
to
4bc6794
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine with this fold itself. But it prevents AggressiveInstCombine from merging consecutive loads: https://godbolt.org/z/aq94MeM3n
We should not perform this fold when the shift's LHS is zext(load)
.
If you think this workaround is too fragile, an alternative solution is to convert (load iN p) | (load iN p+N/8) == 0
into (load i2N p) == 0
in AggressiveInstCombine.
If we have
icmp eq or(a, shl(b)), 0
then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero.https://alive2.llvm.org/ce/z/nhrBVX.
This is also true of ne, and true for longer or chains.
Thinking out loud, this is kind of like a "any-bit is demanded" combine. I'm not sure if that exists already.