Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5a03b84

Browse files
authored
[AArch64][CostModel] Model sve costs for ctpop (#192428)
Targets supporting sve prefer sve for ctpop with fixed length vectors. Update cost model to reflect the same.
1 parent 1bcdc4b commit 5a03b84

3 files changed

Lines changed: 198 additions & 44 deletions

File tree

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -807,14 +807,25 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
807807
{ISD::CTPOP, MVT::v4i32, 3},
808808
{ISD::CTPOP, MVT::v8i16, 2},
809809
{ISD::CTPOP, MVT::v16i8, 1},
810-
{ISD::CTPOP, MVT::i64, 4},
810+
{ISD::CTPOP, MVT::i64, 4},
811811
{ISD::CTPOP, MVT::v2i32, 3},
812812
{ISD::CTPOP, MVT::v4i16, 2},
813-
{ISD::CTPOP, MVT::v8i8, 1},
814-
{ISD::CTPOP, MVT::i32, 5},
813+
{ISD::CTPOP, MVT::v8i8, 1},
814+
{ISD::CTPOP, MVT::i32, 5},
815+
// SVE types (For targets that override NEON for fixed length vectors)
816+
{ISD::CTPOP, MVT::nxv2i64, 1},
817+
{ISD::CTPOP, MVT::nxv4i32, 1},
818+
{ISD::CTPOP, MVT::nxv8i16, 1},
819+
{ISD::CTPOP, MVT::nxv16i8, 1},
815820
};
816821
auto LT = getTypeLegalizationCost(RetTy);
817822
MVT MTy = LT.second;
823+
824+
// When SVE is available CNT will be used for fixed and scalable vectors.
825+
if (ST->isSVEorStreamingSVEAvailable() && MTy.isFixedLengthVector())
826+
MTy = MVT::getScalableVectorVT(MTy.getVectorElementType(),
827+
128 / MTy.getScalarSizeInBits());
828+
818829
if (const auto *Entry = CostTableLookup(CtpopCostTbl, ISD::CTPOP, MTy)) {
819830
// Extra cost of +1 when illegal vector types are legalized by promoting
820831
// the integer type.

llvm/test/Analysis/CostModel/AArch64/ctpop.ll

Lines changed: 146 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -mtriple=aarch64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
3-
2+
; RUN: opt < %s -mtriple=aarch64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,BASE
3+
; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,SVE
4+
; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,SVE-256
45
; Verify the cost of scalar ctpop instructions.
56

67
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -49,72 +50,136 @@ declare i8 @llvm.ctpop.i8(i8)
4950
; Verify the cost of vector ctpop instructions.
5051

5152
define <2 x i64> @test_ctpop_v2i64(<2 x i64> %a) {
52-
; CHECK-LABEL: 'test_ctpop_v2i64'
53-
; CHECK-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
54-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
53+
; BASE-LABEL: 'test_ctpop_v2i64'
54+
; BASE-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
55+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
56+
;
57+
; SVE-LABEL: 'test_ctpop_v2i64'
58+
; SVE-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
59+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
60+
;
61+
; SVE-256-LABEL: 'test_ctpop_v2i64'
62+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
63+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
5564
;
5665
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
5766
ret <2 x i64> %ctpop
5867
}
5968

6069
define <2 x i32> @test_ctpop_v2i32(<2 x i32> %a) {
61-
; CHECK-LABEL: 'test_ctpop_v2i32'
62-
; CHECK-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
63-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %ctpop
70+
; BASE-LABEL: 'test_ctpop_v2i32'
71+
; BASE-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
72+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %ctpop
73+
;
74+
; SVE-LABEL: 'test_ctpop_v2i32'
75+
; SVE-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
76+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %ctpop
77+
;
78+
; SVE-256-LABEL: 'test_ctpop_v2i32'
79+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
80+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %ctpop
6481
;
6582
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
6683
ret <2 x i32> %ctpop
6784
}
6885

6986
define <4 x i32> @test_ctpop_v4i32(<4 x i32> %a) {
70-
; CHECK-LABEL: 'test_ctpop_v4i32'
71-
; CHECK-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
72-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
87+
; BASE-LABEL: 'test_ctpop_v4i32'
88+
; BASE-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
89+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
90+
;
91+
; SVE-LABEL: 'test_ctpop_v4i32'
92+
; SVE-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
93+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
94+
;
95+
; SVE-256-LABEL: 'test_ctpop_v4i32'
96+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
97+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
7398
;
7499
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
75100
ret <4 x i32> %ctpop
76101
}
77102

78103
define <2 x i16> @test_ctpop_v2i16(<2 x i16> %a) {
79-
; CHECK-LABEL: 'test_ctpop_v2i16'
80-
; CHECK-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
81-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %ctpop
104+
; BASE-LABEL: 'test_ctpop_v2i16'
105+
; BASE-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
106+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %ctpop
107+
;
108+
; SVE-LABEL: 'test_ctpop_v2i16'
109+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
110+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %ctpop
111+
;
112+
; SVE-256-LABEL: 'test_ctpop_v2i16'
113+
; SVE-256-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
114+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %ctpop
82115
;
83116
%ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
84117
ret <2 x i16> %ctpop
85118
}
86119

87120
define <4 x i16> @test_ctpop_v4i16(<4 x i16> %a) {
88-
; CHECK-LABEL: 'test_ctpop_v4i16'
89-
; CHECK-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
90-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %ctpop
121+
; BASE-LABEL: 'test_ctpop_v4i16'
122+
; BASE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
123+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %ctpop
124+
;
125+
; SVE-LABEL: 'test_ctpop_v4i16'
126+
; SVE-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
127+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %ctpop
128+
;
129+
; SVE-256-LABEL: 'test_ctpop_v4i16'
130+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
131+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %ctpop
91132
;
92133
%ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
93134
ret <4 x i16> %ctpop
94135
}
95136

96137
define <8 x i16> @test_ctpop_v8i16(<8 x i16> %a) {
97-
; CHECK-LABEL: 'test_ctpop_v8i16'
98-
; CHECK-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
99-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
138+
; BASE-LABEL: 'test_ctpop_v8i16'
139+
; BASE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
140+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
141+
;
142+
; SVE-LABEL: 'test_ctpop_v8i16'
143+
; SVE-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
144+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
145+
;
146+
; SVE-256-LABEL: 'test_ctpop_v8i16'
147+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
148+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
100149
;
101150
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
102151
ret <8 x i16> %ctpop
103152
}
104153

105154
define <2 x i8> @test_ctpop_v2i8(<2 x i8> %a) {
106-
; CHECK-LABEL: 'test_ctpop_v2i8'
107-
; CHECK-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
108-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %ctpop
155+
; BASE-LABEL: 'test_ctpop_v2i8'
156+
; BASE-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
157+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %ctpop
158+
;
159+
; SVE-LABEL: 'test_ctpop_v2i8'
160+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
161+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %ctpop
162+
;
163+
; SVE-256-LABEL: 'test_ctpop_v2i8'
164+
; SVE-256-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
165+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %ctpop
109166
;
110167
%ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
111168
ret <2 x i8> %ctpop
112169
}
113170

114171
define <4 x i8> @test_ctpop_v4i8(<4 x i8> %a) {
115-
; CHECK-LABEL: 'test_ctpop_v4i8'
116-
; CHECK-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
117-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %ctpop
172+
; BASE-LABEL: 'test_ctpop_v4i8'
173+
; BASE-NEXT: Cost Model: Found costs of 3 for: %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
174+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %ctpop
175+
;
176+
; SVE-LABEL: 'test_ctpop_v4i8'
177+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
178+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %ctpop
179+
;
180+
; SVE-256-LABEL: 'test_ctpop_v4i8'
181+
; SVE-256-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
182+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %ctpop
118183
;
119184
%ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
120185
ret <4 x i8> %ctpop
@@ -139,36 +204,68 @@ define <16 x i8> @test_ctpop_v16i8(<16 x i8> %a) {
139204
}
140205

141206
define <4 x i64> @test_ctpop_v4i64(<4 x i64> %a) {
142-
; CHECK-LABEL: 'test_ctpop_v4i64'
143-
; CHECK-NEXT: Cost Model: Found costs of 8 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
144-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
207+
; BASE-LABEL: 'test_ctpop_v4i64'
208+
; BASE-NEXT: Cost Model: Found costs of 8 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
209+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
210+
;
211+
; SVE-LABEL: 'test_ctpop_v4i64'
212+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
213+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
214+
;
215+
; SVE-256-LABEL: 'test_ctpop_v4i64'
216+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
217+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
145218
;
146219
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
147220
ret <4 x i64> %ctpop
148221
}
149222

150223
define <8 x i32> @test_ctpop_v8i32(<8 x i32> %a) {
151-
; CHECK-LABEL: 'test_ctpop_v8i32'
152-
; CHECK-NEXT: Cost Model: Found costs of 6 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
153-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
224+
; BASE-LABEL: 'test_ctpop_v8i32'
225+
; BASE-NEXT: Cost Model: Found costs of 6 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
226+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
227+
;
228+
; SVE-LABEL: 'test_ctpop_v8i32'
229+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
230+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
231+
;
232+
; SVE-256-LABEL: 'test_ctpop_v8i32'
233+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
234+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
154235
;
155236
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
156237
ret <8 x i32> %ctpop
157238
}
158239

159240
define <16 x i16> @test_ctpop_v16i16(<16 x i16> %a) {
160-
; CHECK-LABEL: 'test_ctpop_v16i16'
161-
; CHECK-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
162-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
241+
; BASE-LABEL: 'test_ctpop_v16i16'
242+
; BASE-NEXT: Cost Model: Found costs of 4 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
243+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
244+
;
245+
; SVE-LABEL: 'test_ctpop_v16i16'
246+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
247+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
248+
;
249+
; SVE-256-LABEL: 'test_ctpop_v16i16'
250+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
251+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
163252
;
164253
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
165254
ret <16 x i16> %ctpop
166255
}
167256

168257
define <32 x i8> @test_ctpop_v32i8(<32 x i8> %a) {
169-
; CHECK-LABEL: 'test_ctpop_v32i8'
170-
; CHECK-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
171-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
258+
; BASE-LABEL: 'test_ctpop_v32i8'
259+
; BASE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
260+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
261+
;
262+
; SVE-LABEL: 'test_ctpop_v32i8'
263+
; SVE-NEXT: Cost Model: Found costs of 2 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
264+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
265+
;
266+
; SVE-256-LABEL: 'test_ctpop_v32i8'
267+
; SVE-256-NEXT: Cost Model: Found costs of 1 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
268+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
172269
;
173270
%ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
174271
ret <32 x i8> %ctpop
@@ -184,9 +281,17 @@ define i64 @test_ctpop_noneon_i64(i64 %a) "target-features"="-fp-armv8,-neon" {
184281
}
185282

186283
define <2 x i64> @test_ctpop_noneon_v2i64(<2 x i64> %a) "target-features"="-fp-armv8,-neon" {
187-
; CHECK-LABEL: 'test_ctpop_noneon_v2i64'
188-
; CHECK-NEXT: Cost Model: Found costs of 24 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
189-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
284+
; BASE-LABEL: 'test_ctpop_noneon_v2i64'
285+
; BASE-NEXT: Cost Model: Found costs of 24 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
286+
; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
287+
;
288+
; SVE-LABEL: 'test_ctpop_noneon_v2i64'
289+
; SVE-NEXT: Cost Model: Found costs of 24 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
290+
; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
291+
;
292+
; SVE-256-LABEL: 'test_ctpop_noneon_v2i64'
293+
; SVE-256-NEXT: Cost Model: Found costs of 12 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
294+
; SVE-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
190295
;
191296
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
192297
ret <2 x i64> %ctpop

0 commit comments

Comments
 (0)