-
Notifications
You must be signed in to change notification settings - Fork 15k
[RISCV] Improve fixed vector handling in isCtpopFast. #158380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Previously we considered fixed vectors fast if Zvbb or Zbb is enabled. Zbb only helps if the vector type will end up being scalarized.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesPreviously we considered fixed vectors fast if Zvbb or Zbb is Full diff: https://github.com/llvm/llvm-project/pull/158380.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f9b484b98739f..b3c1082184162 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- return true;
+ return Subtarget.hasVInstructions();
case MVT::i64:
return Subtarget.hasVInstructionsI64();
case MVT::f16:
@@ -24840,12 +24840,17 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
}
bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
- if (VT.isScalableVector())
- return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
- if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
- return true;
- return Subtarget.hasCPOPLike() &&
- (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+ if (VT.isVector()) {
+ EVT SVT = VT.getVectorElementType();
+ // If the element type is legal we can use cpop.v if it is enabled.
+ if (isLegalElementTypeForRVV(SVT))
+ return Subtarget.hasStdExtZvbb();
+ // If it will be scalarized, we might be able to use cpop.
+ return VT.isFixedLengthVector() && Subtarget.hasCPOPLike() &&
+ (SVT == MVT::i32 || SVT == MVT::i64);
+ }
+
+ return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
}
unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 44b9331fd2caf..474708383b4c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @ctpop_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: ctpop_v16i8:
diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
index 9e4a10d9eb864..ed795c223ffaa 100644
--- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll
@@ -18,8 +18,9 @@ define <4 x i1> @test_ult_2(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ult_2(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ult <4 x i64> [[CTPOP]], splat (i64 2)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -37,8 +38,9 @@ define <4 x i1> @test_ugt_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ugt_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -56,8 +58,9 @@ define <4 x i1> @test_eq_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_eq_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[TMP2]], [[TMP1]]
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
@@ -75,8 +78,9 @@ define <4 x i1> @test_ne_1(<4 x i64> %x) {
;
; FAST-LABEL: define <4 x i1> @test_ne_1(
; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] {
-; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]])
-; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[CTPOP]], splat (i64 1)
+; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1)
+; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]]
+; FAST-NEXT: [[CMP1:%.*]] = icmp ule <4 x i64> [[TMP2]], [[TMP1]]
; FAST-NEXT: ret <4 x i1> [[CMP1]]
;
%ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really want to be considering scalarized vector operations fast at all? What's the implication of just requiring zvbb for this? The prior code really looks more likely an oversight than an intentional fallthrough.
It has test coverage in rv32zbb.ll and rv64zbb.ll which don't have +v command lines so I think it was semi-intentional. But I think I'm happy to remove it. |
Previously we considered fixed vectors fast if Zvbb or Zbb is
enabled. Zbb only helps if the vector type will end up being
scalarized.