Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[AggressiveInstcombine] Fold away shift in or reduction chain. #137875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

davemgreen
Copy link
Collaborator

@davemgreen davemgreen commented Apr 29, 2025

If we have icmp eq or(a, shl(b)), 0 then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero.
https://alive2.llvm.org/ce/z/nhrBVX.

This is also true of ne, and true for longer or chains.

@llvmbot
Copy link
Member

llvmbot commented Apr 29, 2025

@llvm/pr-subscribers-llvm-transforms

Author: David Green (davemgreen)

Changes

If we have icmp eq or(a, shl(b)), 0 then the shift can be removed so long as it is nuw or nsw. It is still comparing that some bits are non-zero.
https://alive2.llvm.org/ce/z/nhrBVX.

This is also true of ne, and true for longer or chains.

Thinking out loud, this is kind of like a "any-bit is demanded" combine. I'm not sure if that exists already.


Full diff: https://github.com/llvm/llvm-project/pull/137875.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+28)
  • (modified) llvm/test/Transforms/InstCombine/icmp-of-or-x.ll (+109)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b7b0bb7361359..d2aa957b10b78 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5046,6 +5046,29 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
   return nullptr;
 }
 
+static Value *foldShiftAwayFromOrChain(Instruction &I,
+                                       InstCombiner::BuilderTy &Builder) {
+  if (I.getOpcode() != Instruction::Or)
+    return nullptr;
+  Value *A, *B;
+  if (match(&I, m_c_Or(m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
+                                   m_NUWShl(m_Value(A), m_Value())),
+                       m_Value(B))))
+    return Builder.CreateOr(A, B);
+
+  Value *Op0 = I.getOperand(0);
+  if (isa<Instruction>(Op0))
+    if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+      Op0 = X;
+  Value *Op1 = I.getOperand(1);
+  if (isa<Instruction>(Op1))
+    if (auto *X = foldShiftAwayFromOrChain(*cast<Instruction>(Op1), Builder))
+      Op1 = X;
+  if (Op0 != I.getOperand(0) || Op1 != I.getOperand(1))
+    return Builder.CreateOr(Op0, Op1);
+  return nullptr;
+}
+
 static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
                                   InstCombinerImpl &IC) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
@@ -7742,6 +7765,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
     }
   }
 
+  // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b)
+  if (I.isEquality() && match(Op1, m_Zero()) && isa<Instruction>(Op0))
+    if (auto *Res = foldShiftAwayFromOrChain(*cast<Instruction>(Op0), Builder))
+      return new ICmpInst(I.getPredicate(), Res, Op1);
+
   return Changed ? &I : nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 993325f6ff0b0..baaa754e52894 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -409,3 +409,112 @@ define i1 @PR38139(i8 %arg) {
   %r = icmp ne i8 %masked, %arg
   ret i1 %r
 }
+
+define i1 @remove_shift_nuw_ab(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ab(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl nuw i8 %a, %s
+  %or = or i8 %t, %b
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ba(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ba(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[B:%.*]], [[T:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl nuw i8 %a, %s
+  %or = or i8 %b, %t
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_nsw(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl nsw i8 %a, %s
+  %or = or i8 %t, %b
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_nuw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nuw_ne(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl nuw i8 %a, %s
+  %or = or i8 %t, %b
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_nsw_ne(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_nsw_ne(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl nsw i8 %a, %s
+  %or = or i8 %t, %b
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_wraps(i8 %a, i8 %b, i8 %s) {
+; CHECK-LABEL: @remove_shift_wraps(
+; CHECK-NEXT:    [[T:%.*]] = shl i8 [[A:%.*]], [[S:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[T]], [[B:%.*]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %t = shl i8 %a, %s
+  %or = or i8 %t, %b
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_chain_d(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_d(
+; CHECK-NEXT:    [[OR1:%.*]] = or i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i8 [[C:%.*]], [[DT:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %dt = shl nuw i8 %d, %s
+  %or1 = or i8 %a, %b
+  %or2 = or i8 %c, %dt
+  %or = or i8 %or1, %or2
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+
+define i1 @remove_shift_chain_abcd(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
+; CHECK-LABEL: @remove_shift_chain_abcd(
+; CHECK-NEXT:    [[OR1:%.*]] = or i8 [[AT:%.*]], [[BT:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i8 [[CT:%.*]], [[DT:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[IC:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[IC]]
+;
+  %at = shl nuw i8 %a, %s
+  %bt = shl nuw i8 %b, 2
+  %ct = shl nuw i8 %c, 1
+  %dt = shl nuw i8 %d, %s
+  %or1 = or i8 %at, %bt
+  %or2 = or i8 %ct, %dt
+  %or = or i8 %or1, %or2
+  %ic = icmp eq i8 %or, 0
+  ret i1 %ic
+}
+

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with this fold itself. But it prevents AggressiveInstCombine from merging consecutive loads: https://godbolt.org/z/aq94MeM3n

We should not perform this fold when the shift's LHS is zext(load).
If you think this workaround is too fragile, an alternative solution is to convert (load iN p) | (load iN p+N/8) == 0 into (load i2N p) == 0 in AggressiveInstCombine.

@davemgreen davemgreen force-pushed the gh-ic-orshiftchain branch from 4bc6794 to fc1184a Compare May 8, 2025 10:43
@davemgreen
Copy link
Collaborator Author

I've tried to move this into aggressive instcombine to allow it to check for trees of loads first. The transform is looking up through trees already, so maybe that is a better place for it. Let me know what you think.

@davemgreen davemgreen changed the title [Instcombine] Fold away shift in or reduction chain. [AggressiveInstcombine] Fold away shift in or reduction chain. May 8, 2025
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

…37875

If we have icmp eq or(a, shl(b)), 0 then the shift can be removed so long as it
is nuw or nsw. It is still comparing that some bits are non-zero.
https://alive2.llvm.org/ce/z/nhrBVX.

This is also true of ne, and true for longer or chains.
@davemgreen davemgreen force-pushed the gh-ic-orshiftchain branch from fc1184a to 57ff4e8 Compare May 9, 2025 11:26
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants