diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f9b484b98739f..b3c1082184162 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { case MVT::i8: case MVT::i16: case MVT::i32: - return true; + return Subtarget.hasVInstructions(); case MVT::i64: return Subtarget.hasVInstructionsI64(); case MVT::f16: @@ -24840,12 +24840,17 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( } bool RISCVTargetLowering::isCtpopFast(EVT VT) const { - if (VT.isScalableVector()) - return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); - if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) - return true; - return Subtarget.hasCPOPLike() && - (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); + if (VT.isVector()) { + EVT SVT = VT.getVectorElementType(); + // If the element type is legal we can use cpop.v if it is enabled. + if (isLegalElementTypeForRVV(SVT)) + return Subtarget.hasStdExtZvbb(); + // If it will be scalarized, we might be able to use cpop. + return VT.isFixedLengthVector() && Subtarget.hasCPOPLike() && + (SVT == MVT::i32 || SVT == MVT::i64); + } + + return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64); } unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index 44b9331fd2caf..474708383b4c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zbb,+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+zbb,+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @ctpop_v16i8(ptr %x, ptr %y) { ; CHECK-LABEL: ctpop_v16i8: diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll index 9e4a10d9eb864..ed795c223ffaa 100644 --- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll +++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll @@ -18,8 +18,9 @@ define <4 x i1> @test_ult_2(<4 x i64> %x) { ; ; FAST-LABEL: define <4 x i1> @test_ult_2( ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]]) -; FAST-NEXT: [[CMP1:%.*]] = icmp ult <4 x i64> [[CTPOP]], splat (i64 2) +; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1) +; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]] +; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; FAST-NEXT: ret <4 x i1> [[CMP1]] ; %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x) @@ -37,8 +38,9 @@ define <4 x i1> @test_ugt_1(<4 x i64> %x) { ; ; FAST-LABEL: define <4 x i1> @test_ugt_1( ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] { -; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]]) -; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[CTPOP]], splat (i64 1) +; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1) +; FAST-NEXT: [[TMP2:%.*]] = and <4 x i64> [[X]], [[TMP1]] +; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer ; FAST-NEXT: ret <4 x i1> [[CMP1]] ; %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x) @@ -56,8 +58,9 @@ define <4 x i1> @test_eq_1(<4 x i64> %x) { ; ; FAST-LABEL: define <4 x i1> @test_eq_1( ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] { -; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]]) -; FAST-NEXT: [[CMP1:%.*]] = icmp eq <4 x i64> [[CTPOP]], splat (i64 1) +; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1) +; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]] +; FAST-NEXT: [[CMP1:%.*]] = icmp ugt <4 x i64> [[TMP2]], [[TMP1]] ; FAST-NEXT: ret <4 x i1> [[CMP1]] ; %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x) @@ -75,8 +78,9 @@ define <4 x i1> @test_ne_1(<4 x i64> %x) { ; ; FAST-LABEL: define <4 x i1> @test_ne_1( ; FAST-SAME: <4 x i64> [[X:%.*]]) #[[ATTR0]] { -; FAST-NEXT: [[CTPOP:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[X]]) -; FAST-NEXT: [[CMP1:%.*]] = icmp ne <4 x i64> [[CTPOP]], splat (i64 1) +; FAST-NEXT: [[TMP1:%.*]] = add <4 x i64> [[X]], splat (i64 -1) +; FAST-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[X]], [[TMP1]] +; FAST-NEXT: [[CMP1:%.*]] = icmp ule <4 x i64> [[TMP2]], [[TMP1]] ; FAST-NEXT: ret <4 x i1> [[CMP1]] ; %ctpop = call <4 x i64> @llvm.ctpop(<4 x i64> %x)