-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SimplifyCFG] Relax cttz
cost check in simplifySwitchOfPowersOfTwo
#145159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SimplifyCFG] Relax cttz
cost check in simplifySwitchOfPowersOfTwo
#145159
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Antonio Frighetto (antoniofrighetto) ChangesWe should be able to allow Full diff: https://github.com/llvm/llvm-project/pull/145159.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..f4f15c8809c94 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// will be transformed to:
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
@@ -7211,26 +7213,18 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
return false;
- const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
- IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
- {Condition, ConstantInt::getTrue(Context)}),
- TTI::TCK_SizeAndLatency);
-
- if (CttzIntrinsicCost > TTI::TCC_Basic)
- // Inserting intrinsic is too expensive.
+ // Ensure trailing zeroes count intrinsic emission is not too expensive.
+ IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)});
+ if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+ TTI::TCC_Basic * 2)
return false;
// Only bother with this optimization if there are more than 3 switch cases.
- // SDAG will only bother creating jump tables for 4 or more cases.
+ // SDAG will start emitting jump tables for 4 or more cases.
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index 2ac94afd95910..37e771fc33b64 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -60,30 +60,44 @@ return:
ret i32 %p
}
-; Check that switch's of powers of two range is not reduced if default case is reachable
+; Check that switch's of powers of two range is not reduced if default case is reachable,
+; unless Zbb extension is on.
define i32 @switch_of_powers_reachable_default(i32 %x) {
-; CHECK-LABEL: @switch_of_powers_reachable_default(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB1:%.*]]
-; CHECK-NEXT: i32 8, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb3:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb4:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[P]]
+; RV64I-LABEL: @switch_of_powers_reachable_default(
+; RV64I-NEXT: entry:
+; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; RV64I-NEXT: i32 1, label [[BB1:%.*]]
+; RV64I-NEXT: i32 8, label [[BB2:%.*]]
+; RV64I-NEXT: i32 16, label [[BB3:%.*]]
+; RV64I-NEXT: i32 32, label [[BB4:%.*]]
+; RV64I-NEXT: i32 64, label [[BB5:%.*]]
+; RV64I-NEXT: ]
+; RV64I: bb1:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb2:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb3:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb4:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb5:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: return:
+; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64I-NEXT: ret i32 [[P]]
+;
+; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
+; RV64ZBB-NEXT: entry:
+; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
+; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
+; RV64ZBB-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; RV64ZBB: switch.lookup:
+; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i32 0, i32 [[TMP0]]
+; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; RV64ZBB-NEXT: br label [[RETURN]]
+; RV64ZBB: return:
+; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64ZBB-NEXT: ret i32 [[P]]
;
entry:
switch i32 %x, label %default_case [
|
Only relaxing cost for now, need to take a better look on permitting reachable default cases too (currently miscompiles https://alive2.llvm.org/ce/z/wG8GoT). |
Okay, I think we miss some non-zero power-of-two checks here... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think relaxing this to 2 is fine. Can you add an x86 test?
We should be able to allow `simplifySwitchOfPowersOfTwo` transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient).
c12c49d
to
930ae93
Compare
Added, thanks, shall we take the opportunity extend this to cover reachable default cases too, or better this done separately? |
Separately. |
We should be able to allow
simplifySwitchOfPowersOfTwo
transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient).