Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
325
326 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327 setOperationAction(ISD::LOAD, MVT::i64, Custom);
328 setOperationAction(ISD::STORE, MVT::i64, Custom);
329 }
330
331 if (Subtarget.is64Bit()) {
333
334 setOperationAction(ISD::LOAD, MVT::i32, Custom);
336 MVT::i32, Custom);
338 if (!Subtarget.hasStdExtZbb())
341 Custom);
343 }
344 if (!Subtarget.hasStdExtZmmul()) {
346 } else if (Subtarget.is64Bit()) {
349 } else {
351 }
352
353 if (!Subtarget.hasStdExtM()) {
355 Expand);
356 } else if (Subtarget.is64Bit()) {
358 {MVT::i8, MVT::i16, MVT::i32}, Custom);
359 }
360
363 Expand);
364
366 Custom);
367
368 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
369 if (Subtarget.is64Bit())
371 } else if (Subtarget.hasVendorXTHeadBb()) {
372 if (Subtarget.is64Bit())
375 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
377 } else {
379 }
380
382 Subtarget.hasREV8Like() ? Legal : Expand);
383
384 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
385 !Subtarget.is64Bit()) {
387 } else {
388 // Zbkb can use rev8+brev8 to implement bitreverse.
390 Subtarget.hasStdExtZbkb() ? Custom : Expand);
391 if (Subtarget.hasStdExtZbkb())
393 }
394
395 if (Subtarget.hasStdExtZbb() ||
396 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
398 Legal);
399 }
400
401 if (Subtarget.hasCTZLike()) {
402 if (Subtarget.is64Bit())
404 } else {
406 }
407
408 if (!Subtarget.hasCPOPLike()) {
409 // TODO: These should be set to LibCall, but this currently breaks
410 // the Linux kernel build. See #101786. Lacks i128 tests, too.
411 if (Subtarget.is64Bit())
413 else
416 }
417
418 if (Subtarget.hasCLZLike()) {
419 // We need the custom lowering to make sure that the resulting sequence
420 // for the 32bit case is efficient on 64bit targets.
421 // Use default promotion for i32 without Zbb.
422 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
424 } else {
426 }
427
428 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
430 } else if (Subtarget.hasShortForwardBranchOpt()) {
431 // We can use PseudoCCSUB to implement ABS.
433 } else if (Subtarget.is64Bit()) {
435 }
436
437 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
439
440 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
447 }
448
449 static const unsigned FPLegalNodeTypes[] = {
450 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
451 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
452 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
457
458 static const ISD::CondCode FPCCToExpand[] = {
462
463 static const unsigned FPOpToExpand[] = {
464 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
465 ISD::FREM};
466
467 static const unsigned FPRndMode[] = {
468 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
469 ISD::FROUNDEVEN};
470
471 static const unsigned ZfhminZfbfminPromoteOps[] = {
472 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
473 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
478 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
479 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
480 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
481
482 if (Subtarget.hasStdExtZfbfmin()) {
483 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
487 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
490 setOperationAction(ISD::FABS, MVT::bf16, Custom);
491 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
495 }
496
497 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
498 if (Subtarget.hasStdExtZfhOrZhinx()) {
499 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
500 setOperationAction(FPRndMode, MVT::f16,
501 Subtarget.hasStdExtZfa() ? Legal : Custom);
503 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
505 if (Subtarget.hasStdExtZfa())
507 } else {
508 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
509 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
510 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
513 setOperationAction(Op, MVT::f16, Custom);
514 setOperationAction(ISD::FABS, MVT::f16, Custom);
515 setOperationAction(ISD::FNEG, MVT::f16, Custom);
519 }
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522
525 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
528 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
529
531 ISD::FNEARBYINT, MVT::f16,
532 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
533 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
534 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
535 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
536 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
537 MVT::f16, Promote);
538
539 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
540 // complete support for all operations in LegalizeDAG.
545 MVT::f16, Promote);
546
547 // We need to custom promote this.
548 if (Subtarget.is64Bit())
549 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
550 }
551
552 if (Subtarget.hasStdExtFOrZfinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
554 setOperationAction(FPRndMode, MVT::f32,
555 Subtarget.hasStdExtZfa() ? Legal : Custom);
556 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
559 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
560 setOperationAction(FPOpToExpand, MVT::f32, Expand);
561 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
562 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
563 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
564 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
566 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
567 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
568 Subtarget.isSoftFPABI() ? LibCall : Custom);
569 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
570 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
571 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
572 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
573
574 if (Subtarget.hasStdExtZfa()) {
576 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
577 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
578 } else {
579 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
580 }
581 }
582
583 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
584 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
585
586 if (Subtarget.hasStdExtDOrZdinx()) {
587 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
588
589 if (!Subtarget.is64Bit())
590 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
591
592 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
593 !Subtarget.is64Bit()) {
594 setOperationAction(ISD::LOAD, MVT::f64, Custom);
595 setOperationAction(ISD::STORE, MVT::f64, Custom);
596 }
597
598 if (Subtarget.hasStdExtZfa()) {
600 setOperationAction(FPRndMode, MVT::f64, Legal);
601 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
602 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
603 } else {
604 if (Subtarget.is64Bit())
605 setOperationAction(FPRndMode, MVT::f64, Custom);
606
607 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
608 }
609
612 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
615 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
616 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
617 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
618 setOperationAction(FPOpToExpand, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
621 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
622 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
624 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
625 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
626 Subtarget.isSoftFPABI() ? LibCall : Custom);
627 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
628 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
629 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
630 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
631 }
632
633 if (Subtarget.is64Bit()) {
636 MVT::i32, Custom);
637 setOperationAction(ISD::LROUND, MVT::i32, Custom);
638 }
639
640 if (Subtarget.hasStdExtFOrZfinx()) {
642 Custom);
643
644 // f16/bf16 require custom handling.
646 Custom);
648 Custom);
649
651 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
652 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
653 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
654 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
655 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
656 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
657 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
658 }
659
662 XLenVT, Custom);
663
665
666 if (Subtarget.is64Bit())
668
669 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
670 // Unfortunately this can't be determined just from the ISA naming string.
671 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
672 Subtarget.is64Bit() ? Legal : Custom);
673 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
674 Subtarget.is64Bit() ? Legal : Custom);
675
676 if (Subtarget.is64Bit()) {
677 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
678 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
679 }
680
681 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
683 if (Subtarget.is64Bit())
685
686 if (Subtarget.hasVendorXMIPSCBOP())
687 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
688 else if (Subtarget.hasStdExtZicbop())
689 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
690
691 if (Subtarget.hasStdExtA()) {
692 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
693 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
695 else
697 } else if (Subtarget.hasForcedAtomics()) {
698 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
699 } else {
701 }
702
703 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
704
706
707 if (getTargetMachine().getTargetTriple().isOSLinux()) {
708 // Custom lowering of llvm.clear_cache.
710 }
711
712 if (Subtarget.hasVInstructions()) {
714
715 setOperationAction(ISD::VSCALE, XLenVT, Custom);
716
717 // RVV intrinsics may have illegal operands.
718 // We also need to custom legalize vmv.x.s.
721 {MVT::i8, MVT::i16}, Custom);
722 if (Subtarget.is64Bit())
724 MVT::i32, Custom);
725 else
727 MVT::i64, Custom);
728
730 MVT::Other, Custom);
731
732 static const unsigned IntegerVPOps[] = {
733 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
734 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
735 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
736 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
737 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
738 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
739 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
740 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
741 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
742 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
743 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
744 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
745 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
746 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
747 ISD::EXPERIMENTAL_VP_SPLAT};
748
749 static const unsigned FloatingPointVPOps[] = {
750 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
751 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
752 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
753 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
754 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
755 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
756 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
757 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
758 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
759 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
760 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
761 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
762 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
763
764 static const unsigned IntegerVecReduceOps[] = {
765 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
766 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
767 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
768
769 static const unsigned FloatingPointVecReduceOps[] = {
770 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
771 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
772
773 static const unsigned FloatingPointLibCallOps[] = {
774 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
775 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
776
777 if (!Subtarget.is64Bit()) {
778 // We must custom-lower certain vXi64 operations on RV32 due to the vector
779 // element type being illegal.
781 MVT::i64, Custom);
782
783 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
784
785 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
786 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
787 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
788 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
789 MVT::i64, Custom);
790 }
791
792 for (MVT VT : BoolVecVTs) {
793 if (!isTypeLegal(VT))
794 continue;
795
797
798 // Mask VTs are custom-expanded into a series of standard nodes
802 VT, Custom);
803
805 Custom);
806
808 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
809 Expand);
810 setOperationAction(ISD::VP_MERGE, VT, Custom);
811
812 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
813 Custom);
814
815 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
816
818 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
819 Custom);
820
822 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
823 Custom);
824
825 // RVV has native int->float & float->int conversions where the
826 // element type sizes are within one power-of-two of each other. Any
827 // wider distances between type sizes have to be lowered as sequences
828 // which progressively narrow the gap in stages.
833 VT, Custom);
835 Custom);
836
837 // Expand all extending loads to types larger than this, and truncating
838 // stores from types larger than this.
840 setTruncStoreAction(VT, OtherVT, Expand);
842 OtherVT, Expand);
843 }
844
845 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
846 ISD::VP_TRUNCATE, ISD::VP_SETCC},
847 VT, Custom);
848
851
853
854 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
855 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
856 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
857
860 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
861 }
862
863 for (MVT VT : IntVecVTs) {
864 if (!isTypeLegal(VT))
865 continue;
866
869
870 // Vectors implement MULHS/MULHU.
872
873 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
874 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
876
878 Legal);
879
881
882 // Custom-lower extensions and truncations from/to mask types.
884 VT, Custom);
885
886 // RVV has native int->float & float->int conversions where the
887 // element type sizes are within one power-of-two of each other. Any
888 // wider distances between type sizes have to be lowered as sequences
889 // which progressively narrow the gap in stages.
894 VT, Custom);
896 Custom);
900 VT, Legal);
901
902 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
903 // nodes which truncate by one power of two at a time.
906 Custom);
907
908 // Custom-lower insert/extract operations to simplify patterns.
910 Custom);
911
912 // Custom-lower reduction operations to set up the corresponding custom
913 // nodes' operands.
914 setOperationAction(IntegerVecReduceOps, VT, Custom);
915
916 setOperationAction(IntegerVPOps, VT, Custom);
917
918 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
919
920 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
921 VT, Custom);
922
924 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
925 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
926 VT, Custom);
927 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
928
931 VT, Custom);
932
935
937
939 setTruncStoreAction(VT, OtherVT, Expand);
941 OtherVT, Expand);
942 }
943
946
947 // Splice
949
950 if (Subtarget.hasStdExtZvkb()) {
952 setOperationAction(ISD::VP_BSWAP, VT, Custom);
953 } else {
954 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
956 }
957
958 if (Subtarget.hasStdExtZvbb()) {
960 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
961 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
962 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
963 VT, Custom);
964 } else {
965 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
967 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
968 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
969 VT, Expand);
970
971 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
972 // range of f32.
973 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
974 if (isTypeLegal(FloatVT)) {
976 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
977 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
978 VT, Custom);
979 }
980 }
981
983 }
984
985 for (MVT VT : VecTupleVTs) {
986 if (!isTypeLegal(VT))
987 continue;
988
989 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
990 }
991
992 // Expand various CCs to best match the RVV ISA, which natively supports UNE
993 // but no other unordered comparisons, and supports all ordered comparisons
994 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
995 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
996 // and we pattern-match those back to the "original", swapping operands once
997 // more. This way we catch both operations and both "vf" and "fv" forms with
998 // fewer patterns.
999 static const ISD::CondCode VFPCCToExpand[] = {
1003 };
1004
1005 // TODO: support more ops.
1006 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1007 ISD::FMINNUM,
1008 ISD::FMAXNUM,
1009 ISD::FMINIMUMNUM,
1010 ISD::FMAXIMUMNUM,
1011 ISD::FADD,
1012 ISD::FSUB,
1013 ISD::FMUL,
1014 ISD::FMA,
1015 ISD::FDIV,
1016 ISD::FSQRT,
1017 ISD::FCEIL,
1018 ISD::FTRUNC,
1019 ISD::FFLOOR,
1020 ISD::FROUND,
1021 ISD::FROUNDEVEN,
1022 ISD::FRINT,
1023 ISD::FNEARBYINT,
1025 ISD::SETCC,
1026 ISD::FMAXIMUM,
1027 ISD::FMINIMUM,
1034 ISD::VECREDUCE_FMIN,
1035 ISD::VECREDUCE_FMAX,
1036 ISD::VECREDUCE_FMINIMUM,
1037 ISD::VECREDUCE_FMAXIMUM};
1038
1039 // TODO: support more vp ops.
1040 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1041 ISD::VP_FADD,
1042 ISD::VP_FSUB,
1043 ISD::VP_FMUL,
1044 ISD::VP_FDIV,
1045 ISD::VP_FMA,
1046 ISD::VP_REDUCE_FMIN,
1047 ISD::VP_REDUCE_FMAX,
1048 ISD::VP_SQRT,
1049 ISD::VP_FMINNUM,
1050 ISD::VP_FMAXNUM,
1051 ISD::VP_FCEIL,
1052 ISD::VP_FFLOOR,
1053 ISD::VP_FROUND,
1054 ISD::VP_FROUNDEVEN,
1055 ISD::VP_FROUNDTOZERO,
1056 ISD::VP_FRINT,
1057 ISD::VP_FNEARBYINT,
1058 ISD::VP_SETCC,
1059 ISD::VP_FMINIMUM,
1060 ISD::VP_FMAXIMUM,
1061 ISD::VP_REDUCE_FMINIMUM,
1062 ISD::VP_REDUCE_FMAXIMUM};
1063
1064 // Sets common operation actions on RVV floating-point vector types.
1065 const auto SetCommonVFPActions = [&](MVT VT) {
1067 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1068 // sizes are within one power-of-two of each other. Therefore conversions
1069 // between vXf16 and vXf64 must be lowered as sequences which convert via
1070 // vXf32.
1071 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1072 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1073 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1074 // Custom-lower insert/extract operations to simplify patterns.
1076 Custom);
1077 // Expand various condition codes (explained above).
1078 setCondCodeAction(VFPCCToExpand, VT, Expand);
1079
1081 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1082 Legal);
1083 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1084
1085 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1086 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1088 VT, Custom);
1089
1090 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1091
1092 // Expand FP operations that need libcalls.
1093 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1094
1096
1097 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1098
1099 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1100 VT, Custom);
1101
1103 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1104 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1105 VT, Custom);
1106 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1107
1110
1113 VT, Custom);
1114
1117
1119 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1120 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1121
1122 setOperationAction(FloatingPointVPOps, VT, Custom);
1123
1125 Custom);
1128 VT, Legal);
1133 VT, Custom);
1134
1136 };
1137
1138 // Sets common extload/truncstore actions on RVV floating-point vector
1139 // types.
1140 const auto SetCommonVFPExtLoadTruncStoreActions =
1141 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1142 for (auto SmallVT : SmallerVTs) {
1143 setTruncStoreAction(VT, SmallVT, Expand);
1144 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1145 }
1146 };
1147
1148 // Sets common actions for f16 and bf16 for when there's only
1149 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1150 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1151 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1153 Custom);
1154 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1155 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1156 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1157 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1158 Custom);
1160 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1166 VT, Custom);
1167 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1168 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1169 MVT EltVT = VT.getVectorElementType();
1170 if (isTypeLegal(EltVT))
1171 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1173 VT, Custom);
1174 else
1175 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1176 EltVT, Custom);
1177 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1178 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1179 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1180 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1181 ISD::VP_SCATTER},
1182 VT, Custom);
1183 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1184
1185 setOperationAction(ISD::FNEG, VT, Expand);
1186 setOperationAction(ISD::FABS, VT, Expand);
1188
1189 // Expand FP operations that need libcalls.
1190 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1191
1192 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1193 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1194 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1195 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1196 } else {
1197 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1198 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1199 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1200 }
1201 };
1202
1203 if (Subtarget.hasVInstructionsF16()) {
1204 for (MVT VT : F16VecVTs) {
1205 if (!isTypeLegal(VT))
1206 continue;
1207 SetCommonVFPActions(VT);
1208 }
1209 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1210 for (MVT VT : F16VecVTs) {
1211 if (!isTypeLegal(VT))
1212 continue;
1213 SetCommonPromoteToF32Actions(VT);
1214 }
1215 }
1216
1217 if (Subtarget.hasVInstructionsBF16Minimal()) {
1218 for (MVT VT : BF16VecVTs) {
1219 if (!isTypeLegal(VT))
1220 continue;
1221 SetCommonPromoteToF32Actions(VT);
1222 }
1223 }
1224
1225 if (Subtarget.hasVInstructionsF32()) {
1226 for (MVT VT : F32VecVTs) {
1227 if (!isTypeLegal(VT))
1228 continue;
1229 SetCommonVFPActions(VT);
1230 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1231 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1232 }
1233 }
1234
1235 if (Subtarget.hasVInstructionsF64()) {
1236 for (MVT VT : F64VecVTs) {
1237 if (!isTypeLegal(VT))
1238 continue;
1239 SetCommonVFPActions(VT);
1240 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1241 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1242 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1243 }
1244 }
1245
1246 if (Subtarget.useRVVForFixedLengthVectors()) {
1248 if (!useRVVForFixedLengthVectorVT(VT))
1249 continue;
1250
1251 // By default everything must be expanded.
1252 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1255 setTruncStoreAction(VT, OtherVT, Expand);
1257 OtherVT, Expand);
1258 }
1259
1260 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1261 // expansion to a build_vector of 0s.
1263
1264 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1266 Custom);
1267
1270 Custom);
1271
1273 VT, Custom);
1274
1276 VT, Custom);
1277
1279
1280 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1281
1283
1285
1288 Custom);
1289
1290 setOperationAction(ISD::BITCAST, VT, Custom);
1291
1293 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1294 Custom);
1295
1297 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1298 Custom);
1299
1301 {
1310 },
1311 VT, Custom);
1313 Custom);
1314
1316
1317 // Operations below are different for between masks and other vectors.
1318 if (VT.getVectorElementType() == MVT::i1) {
1319 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1320 ISD::OR, ISD::XOR},
1321 VT, Custom);
1322
1323 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1324 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1325 VT, Custom);
1326
1327 setOperationAction(ISD::VP_MERGE, VT, Custom);
1328
1329 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1330 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1331 continue;
1332 }
1333
1334 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1335 // it before type legalization for i64 vectors on RV32. It will then be
1336 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1337 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1338 // improvements first.
1339 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1342
1343 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1345 }
1346
1348 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1349
1350 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1351 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1352 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1353 ISD::VP_SCATTER},
1354 VT, Custom);
1355 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1356
1360 VT, Custom);
1361
1364
1366
1367 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1368 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1370
1374 VT, Custom);
1375
1377
1380
1381 // Custom-lower reduction operations to set up the corresponding custom
1382 // nodes' operands.
1383 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1384 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1385 ISD::VECREDUCE_UMIN},
1386 VT, Custom);
1387
1388 setOperationAction(IntegerVPOps, VT, Custom);
1389
1390 if (Subtarget.hasStdExtZvkb())
1392
1393 if (Subtarget.hasStdExtZvbb()) {
1396 VT, Custom);
1397 } else {
1398 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1399 // range of f32.
1400 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1401 if (isTypeLegal(FloatVT))
1404 Custom);
1405 }
1406
1408 }
1409
1411 // There are no extending loads or truncating stores.
1412 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1413 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1414 setTruncStoreAction(VT, InnerVT, Expand);
1415 }
1416
1417 if (!useRVVForFixedLengthVectorVT(VT))
1418 continue;
1419
1420 // By default everything must be expanded.
1421 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1423
1424 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1425 // expansion to a build_vector of 0s.
1427
1432 VT, Custom);
1433 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1434 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1435
1437 VT, Custom);
1438
1439 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1440 ISD::MGATHER, ISD::MSCATTER},
1441 VT, Custom);
1442 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1443 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1444 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1445 VT, Custom);
1446 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1447
1448 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1450 Custom);
1451
1452 if (VT.getVectorElementType() == MVT::f16 &&
1453 !Subtarget.hasVInstructionsF16()) {
1454 setOperationAction(ISD::BITCAST, VT, Custom);
1455 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1457 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1458 Custom);
1459 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1460 Custom);
1461 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1462 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1463 if (Subtarget.hasStdExtZfhmin()) {
1465 } else {
1466 // We need to custom legalize f16 build vectors if Zfhmin isn't
1467 // available.
1469 }
1470 setOperationAction(ISD::FNEG, VT, Expand);
1471 setOperationAction(ISD::FABS, VT, Expand);
1473 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1474 // Don't promote f16 vector operations to f32 if f32 vector type is
1475 // not legal.
1476 // TODO: could split the f16 vector into two vectors and do promotion.
1477 if (!isTypeLegal(F32VecVT))
1478 continue;
1479 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1480 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1481 continue;
1482 }
1483
1484 if (VT.getVectorElementType() == MVT::bf16) {
1485 setOperationAction(ISD::BITCAST, VT, Custom);
1486 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1487 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1488 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1489 if (Subtarget.hasStdExtZfbfmin()) {
1491 } else {
1492 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1493 // available.
1495 }
1497 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1498 Custom);
1499 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1500 // Don't promote f16 vector operations to f32 if f32 vector type is
1501 // not legal.
1502 // TODO: could split the f16 vector into two vectors and do promotion.
1503 if (!isTypeLegal(F32VecVT))
1504 continue;
1505 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1506 // TODO: Promote VP ops to fp32.
1507 continue;
1508 }
1509
1511 Custom);
1512
1514 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1515 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1516 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1517 ISD::FMAXIMUM, ISD::FMINIMUM},
1518 VT, Custom);
1519
1520 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1521 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1522 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1523 ISD::FNEARBYINT},
1524 VT, Custom);
1525
1526 setCondCodeAction(VFPCCToExpand, VT, Expand);
1527
1530
1531 setOperationAction(ISD::BITCAST, VT, Custom);
1532
1533 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1534
1535 setOperationAction(FloatingPointVPOps, VT, Custom);
1536
1543 VT, Custom);
1544 }
1545
1546 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1547 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1548 if (Subtarget.is64Bit())
1549 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1550 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1551 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1552 if (Subtarget.hasStdExtZfbfmin())
1553 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1554 if (Subtarget.hasStdExtFOrZfinx())
1555 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1556 if (Subtarget.hasStdExtDOrZdinx())
1557 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1558 }
1559 }
1560
1561 if (Subtarget.hasStdExtA())
1562 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1563
1564 if (Subtarget.hasForcedAtomics()) {
1565 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1567 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1568 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1569 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1570 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1571 XLenVT, LibCall);
1572 }
1573
1574 if (Subtarget.hasVendorXTHeadMemIdx()) {
1575 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1576 setIndexedLoadAction(im, MVT::i8, Legal);
1577 setIndexedStoreAction(im, MVT::i8, Legal);
1578 setIndexedLoadAction(im, MVT::i16, Legal);
1579 setIndexedStoreAction(im, MVT::i16, Legal);
1580 setIndexedLoadAction(im, MVT::i32, Legal);
1581 setIndexedStoreAction(im, MVT::i32, Legal);
1582
1583 if (Subtarget.is64Bit()) {
1584 setIndexedLoadAction(im, MVT::i64, Legal);
1585 setIndexedStoreAction(im, MVT::i64, Legal);
1586 }
1587 }
1588 }
1589
1590 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1594
1598 }
1599
1600 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1601 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1602 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1603 ISD::PARTIAL_REDUCE_UMLA,
1604 ISD::PARTIAL_REDUCE_SUMLA};
1605 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1606 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1607 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1610
1611 if (Subtarget.useRVVForFixedLengthVectors()) {
1613 if (VT.getVectorElementType() != MVT::i32 ||
1614 !useRVVForFixedLengthVectorVT(VT))
1615 continue;
1616 ElementCount EC = VT.getVectorElementCount();
1617 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1618 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1619 }
1620 }
1621 }
1622
1623 // Customize load and store operation for bf16 if zfh isn't enabled.
1624 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1625 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1626 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1627 }
1628
1629 // Function alignments.
1630 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1631 setMinFunctionAlignment(FunctionAlignment);
1632 // Set preferred alignments.
1633 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1634 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1635
1641
1642 if (Subtarget.hasStdExtFOrZfinx())
1643 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1644
1645 if (Subtarget.hasStdExtZbb())
1647
1648 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1649 Subtarget.hasVInstructions())
1651
1652 if (Subtarget.hasStdExtZbkb())
1654
1655 if (Subtarget.hasStdExtFOrZfinx())
1658 if (Subtarget.hasVInstructions())
1660 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1661 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1662 ISD::SRL, ISD::SHL, ISD::STORE,
1664 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1668 ISD::VSELECT, ISD::VECREDUCE_ADD});
1669
1670 if (Subtarget.hasVendorXTHeadMemPair())
1671 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1672 if (Subtarget.useRVVForFixedLengthVectors())
1673 setTargetDAGCombine(ISD::BITCAST);
1674
1675 // Disable strict node mutation.
1676 IsStrictFPEnabled = true;
1677 EnableExtLdPromotion = true;
1678
1679 // Let the subtarget decide if a predictable select is more expensive than the
1680 // corresponding branch. This information is used in CGP/SelectOpt to decide
1681 // when to convert selects into branches.
1682 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1683
1684 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1685 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1686
1687 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1688 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1689 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1690
1692 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1693 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1694
1695 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1696 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1697}
1698
1700 LLVMContext &Context,
1701 EVT VT) const {
1702 if (!VT.isVector())
1703 return getPointerTy(DL);
1704 if (Subtarget.hasVInstructions() &&
1705 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1706 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1708}
1709
1711 return Subtarget.getXLenVT();
1712}
1713
1714// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1715bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1716 unsigned VF,
1717 bool IsScalable) const {
1718 if (!Subtarget.hasVInstructions())
1719 return true;
1720
1721 if (!IsScalable)
1722 return true;
1723
1724 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1725 return true;
1726
1727 // Don't allow VF=1 if those types are't legal.
1728 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1729 return true;
1730
1731 // VLEN=32 support is incomplete.
1732 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1733 return true;
1734
1735 // The maximum VF is for the smallest element width with LMUL=8.
1736 // VF must be a power of 2.
1737 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1738 return VF > MaxVF || !isPowerOf2_32(VF);
1739}
1740
1742 return !Subtarget.hasVInstructions() ||
1743 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1744}
1745
1747 const CallInst &I,
1748 MachineFunction &MF,
1749 unsigned Intrinsic) const {
1750 auto &DL = I.getDataLayout();
1751
1752 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1753 bool IsUnitStrided, bool UsePtrVal = false) {
1754 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1755 // We can't use ptrVal if the intrinsic can access memory before the
1756 // pointer. This means we can't use it for strided or indexed intrinsics.
1757 if (UsePtrVal)
1758 Info.ptrVal = I.getArgOperand(PtrOp);
1759 else
1760 Info.fallbackAddressSpace =
1761 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1762 Type *MemTy;
1763 if (IsStore) {
1764 // Store value is the first operand.
1765 MemTy = I.getArgOperand(0)->getType();
1766 } else {
1767 // Use return type. If it's segment load, return type is a struct.
1768 MemTy = I.getType();
1769 if (MemTy->isStructTy())
1770 MemTy = MemTy->getStructElementType(0);
1771 }
1772 if (!IsUnitStrided)
1773 MemTy = MemTy->getScalarType();
1774
1775 Info.memVT = getValueType(DL, MemTy);
1776 if (MemTy->isTargetExtTy()) {
1777 // RISC-V vector tuple type's alignment type should be its element type.
1778 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1779 MemTy = Type::getIntNTy(
1780 MemTy->getContext(),
1781 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1782 ->getZExtValue());
1783 Info.align = DL.getABITypeAlign(MemTy);
1784 } else {
1785 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1786 }
1787 Info.size = MemoryLocation::UnknownSize;
1788 Info.flags |=
1790 return true;
1791 };
1792
1793 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1795
1797 switch (Intrinsic) {
1798 default:
1799 return false;
1800 case Intrinsic::riscv_masked_atomicrmw_xchg:
1801 case Intrinsic::riscv_masked_atomicrmw_add:
1802 case Intrinsic::riscv_masked_atomicrmw_sub:
1803 case Intrinsic::riscv_masked_atomicrmw_nand:
1804 case Intrinsic::riscv_masked_atomicrmw_max:
1805 case Intrinsic::riscv_masked_atomicrmw_min:
1806 case Intrinsic::riscv_masked_atomicrmw_umax:
1807 case Intrinsic::riscv_masked_atomicrmw_umin:
1808 case Intrinsic::riscv_masked_cmpxchg:
1809 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1810 // narrow atomic operation. These will be expanded to an LR/SC loop that
1811 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1812 // will be used to modify the appropriate part of the 4 byte data and
1813 // preserve the rest.
1814 Info.opc = ISD::INTRINSIC_W_CHAIN;
1815 Info.memVT = MVT::i32;
1816 Info.ptrVal = I.getArgOperand(0);
1817 Info.offset = 0;
1818 Info.align = Align(4);
1821 return true;
1822 case Intrinsic::riscv_seg2_load_mask:
1823 case Intrinsic::riscv_seg3_load_mask:
1824 case Intrinsic::riscv_seg4_load_mask:
1825 case Intrinsic::riscv_seg5_load_mask:
1826 case Intrinsic::riscv_seg6_load_mask:
1827 case Intrinsic::riscv_seg7_load_mask:
1828 case Intrinsic::riscv_seg8_load_mask:
1829 case Intrinsic::riscv_sseg2_load_mask:
1830 case Intrinsic::riscv_sseg3_load_mask:
1831 case Intrinsic::riscv_sseg4_load_mask:
1832 case Intrinsic::riscv_sseg5_load_mask:
1833 case Intrinsic::riscv_sseg6_load_mask:
1834 case Intrinsic::riscv_sseg7_load_mask:
1835 case Intrinsic::riscv_sseg8_load_mask:
1836 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1837 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1838 case Intrinsic::riscv_seg2_store_mask:
1839 case Intrinsic::riscv_seg3_store_mask:
1840 case Intrinsic::riscv_seg4_store_mask:
1841 case Intrinsic::riscv_seg5_store_mask:
1842 case Intrinsic::riscv_seg6_store_mask:
1843 case Intrinsic::riscv_seg7_store_mask:
1844 case Intrinsic::riscv_seg8_store_mask:
1845 // Operands are (vec, ..., vec, ptr, mask, vl)
1846 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1847 /*IsStore*/ true,
1848 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1849 case Intrinsic::riscv_sseg2_store_mask:
1850 case Intrinsic::riscv_sseg3_store_mask:
1851 case Intrinsic::riscv_sseg4_store_mask:
1852 case Intrinsic::riscv_sseg5_store_mask:
1853 case Intrinsic::riscv_sseg6_store_mask:
1854 case Intrinsic::riscv_sseg7_store_mask:
1855 case Intrinsic::riscv_sseg8_store_mask:
1856 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1857 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1858 /*IsStore*/ true,
1859 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1860 case Intrinsic::riscv_vlm:
1861 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1862 /*IsStore*/ false,
1863 /*IsUnitStrided*/ true,
1864 /*UsePtrVal*/ true);
1865 case Intrinsic::riscv_vle:
1866 case Intrinsic::riscv_vle_mask:
1867 case Intrinsic::riscv_vleff:
1868 case Intrinsic::riscv_vleff_mask:
1869 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1870 /*IsStore*/ false,
1871 /*IsUnitStrided*/ true,
1872 /*UsePtrVal*/ true);
1873 case Intrinsic::riscv_vsm:
1874 case Intrinsic::riscv_vse:
1875 case Intrinsic::riscv_vse_mask:
1876 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1877 /*IsStore*/ true,
1878 /*IsUnitStrided*/ true,
1879 /*UsePtrVal*/ true);
1880 case Intrinsic::riscv_vlse:
1881 case Intrinsic::riscv_vlse_mask:
1882 case Intrinsic::riscv_vloxei:
1883 case Intrinsic::riscv_vloxei_mask:
1884 case Intrinsic::riscv_vluxei:
1885 case Intrinsic::riscv_vluxei_mask:
1886 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1887 /*IsStore*/ false,
1888 /*IsUnitStrided*/ false);
1889 case Intrinsic::riscv_vsse:
1890 case Intrinsic::riscv_vsse_mask:
1891 case Intrinsic::riscv_vsoxei:
1892 case Intrinsic::riscv_vsoxei_mask:
1893 case Intrinsic::riscv_vsuxei:
1894 case Intrinsic::riscv_vsuxei_mask:
1895 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1896 /*IsStore*/ true,
1897 /*IsUnitStrided*/ false);
1898 case Intrinsic::riscv_vlseg2:
1899 case Intrinsic::riscv_vlseg3:
1900 case Intrinsic::riscv_vlseg4:
1901 case Intrinsic::riscv_vlseg5:
1902 case Intrinsic::riscv_vlseg6:
1903 case Intrinsic::riscv_vlseg7:
1904 case Intrinsic::riscv_vlseg8:
1905 case Intrinsic::riscv_vlseg2ff:
1906 case Intrinsic::riscv_vlseg3ff:
1907 case Intrinsic::riscv_vlseg4ff:
1908 case Intrinsic::riscv_vlseg5ff:
1909 case Intrinsic::riscv_vlseg6ff:
1910 case Intrinsic::riscv_vlseg7ff:
1911 case Intrinsic::riscv_vlseg8ff:
1912 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1913 /*IsStore*/ false,
1914 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1915 case Intrinsic::riscv_vlseg2_mask:
1916 case Intrinsic::riscv_vlseg3_mask:
1917 case Intrinsic::riscv_vlseg4_mask:
1918 case Intrinsic::riscv_vlseg5_mask:
1919 case Intrinsic::riscv_vlseg6_mask:
1920 case Intrinsic::riscv_vlseg7_mask:
1921 case Intrinsic::riscv_vlseg8_mask:
1922 case Intrinsic::riscv_vlseg2ff_mask:
1923 case Intrinsic::riscv_vlseg3ff_mask:
1924 case Intrinsic::riscv_vlseg4ff_mask:
1925 case Intrinsic::riscv_vlseg5ff_mask:
1926 case Intrinsic::riscv_vlseg6ff_mask:
1927 case Intrinsic::riscv_vlseg7ff_mask:
1928 case Intrinsic::riscv_vlseg8ff_mask:
1929 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1930 /*IsStore*/ false,
1931 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1932 case Intrinsic::riscv_vlsseg2:
1933 case Intrinsic::riscv_vlsseg3:
1934 case Intrinsic::riscv_vlsseg4:
1935 case Intrinsic::riscv_vlsseg5:
1936 case Intrinsic::riscv_vlsseg6:
1937 case Intrinsic::riscv_vlsseg7:
1938 case Intrinsic::riscv_vlsseg8:
1939 case Intrinsic::riscv_vloxseg2:
1940 case Intrinsic::riscv_vloxseg3:
1941 case Intrinsic::riscv_vloxseg4:
1942 case Intrinsic::riscv_vloxseg5:
1943 case Intrinsic::riscv_vloxseg6:
1944 case Intrinsic::riscv_vloxseg7:
1945 case Intrinsic::riscv_vloxseg8:
1946 case Intrinsic::riscv_vluxseg2:
1947 case Intrinsic::riscv_vluxseg3:
1948 case Intrinsic::riscv_vluxseg4:
1949 case Intrinsic::riscv_vluxseg5:
1950 case Intrinsic::riscv_vluxseg6:
1951 case Intrinsic::riscv_vluxseg7:
1952 case Intrinsic::riscv_vluxseg8:
1953 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1954 /*IsStore*/ false,
1955 /*IsUnitStrided*/ false);
1956 case Intrinsic::riscv_vlsseg2_mask:
1957 case Intrinsic::riscv_vlsseg3_mask:
1958 case Intrinsic::riscv_vlsseg4_mask:
1959 case Intrinsic::riscv_vlsseg5_mask:
1960 case Intrinsic::riscv_vlsseg6_mask:
1961 case Intrinsic::riscv_vlsseg7_mask:
1962 case Intrinsic::riscv_vlsseg8_mask:
1963 case Intrinsic::riscv_vloxseg2_mask:
1964 case Intrinsic::riscv_vloxseg3_mask:
1965 case Intrinsic::riscv_vloxseg4_mask:
1966 case Intrinsic::riscv_vloxseg5_mask:
1967 case Intrinsic::riscv_vloxseg6_mask:
1968 case Intrinsic::riscv_vloxseg7_mask:
1969 case Intrinsic::riscv_vloxseg8_mask:
1970 case Intrinsic::riscv_vluxseg2_mask:
1971 case Intrinsic::riscv_vluxseg3_mask:
1972 case Intrinsic::riscv_vluxseg4_mask:
1973 case Intrinsic::riscv_vluxseg5_mask:
1974 case Intrinsic::riscv_vluxseg6_mask:
1975 case Intrinsic::riscv_vluxseg7_mask:
1976 case Intrinsic::riscv_vluxseg8_mask:
1977 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1978 /*IsStore*/ false,
1979 /*IsUnitStrided*/ false);
1980 case Intrinsic::riscv_vsseg2:
1981 case Intrinsic::riscv_vsseg3:
1982 case Intrinsic::riscv_vsseg4:
1983 case Intrinsic::riscv_vsseg5:
1984 case Intrinsic::riscv_vsseg6:
1985 case Intrinsic::riscv_vsseg7:
1986 case Intrinsic::riscv_vsseg8:
1987 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1988 /*IsStore*/ true,
1989 /*IsUnitStrided*/ false);
1990 case Intrinsic::riscv_vsseg2_mask:
1991 case Intrinsic::riscv_vsseg3_mask:
1992 case Intrinsic::riscv_vsseg4_mask:
1993 case Intrinsic::riscv_vsseg5_mask:
1994 case Intrinsic::riscv_vsseg6_mask:
1995 case Intrinsic::riscv_vsseg7_mask:
1996 case Intrinsic::riscv_vsseg8_mask:
1997 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1998 /*IsStore*/ true,
1999 /*IsUnitStrided*/ false);
2000 case Intrinsic::riscv_vssseg2:
2001 case Intrinsic::riscv_vssseg3:
2002 case Intrinsic::riscv_vssseg4:
2003 case Intrinsic::riscv_vssseg5:
2004 case Intrinsic::riscv_vssseg6:
2005 case Intrinsic::riscv_vssseg7:
2006 case Intrinsic::riscv_vssseg8:
2007 case Intrinsic::riscv_vsoxseg2:
2008 case Intrinsic::riscv_vsoxseg3:
2009 case Intrinsic::riscv_vsoxseg4:
2010 case Intrinsic::riscv_vsoxseg5:
2011 case Intrinsic::riscv_vsoxseg6:
2012 case Intrinsic::riscv_vsoxseg7:
2013 case Intrinsic::riscv_vsoxseg8:
2014 case Intrinsic::riscv_vsuxseg2:
2015 case Intrinsic::riscv_vsuxseg3:
2016 case Intrinsic::riscv_vsuxseg4:
2017 case Intrinsic::riscv_vsuxseg5:
2018 case Intrinsic::riscv_vsuxseg6:
2019 case Intrinsic::riscv_vsuxseg7:
2020 case Intrinsic::riscv_vsuxseg8:
2021 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2022 /*IsStore*/ true,
2023 /*IsUnitStrided*/ false);
2024 case Intrinsic::riscv_vssseg2_mask:
2025 case Intrinsic::riscv_vssseg3_mask:
2026 case Intrinsic::riscv_vssseg4_mask:
2027 case Intrinsic::riscv_vssseg5_mask:
2028 case Intrinsic::riscv_vssseg6_mask:
2029 case Intrinsic::riscv_vssseg7_mask:
2030 case Intrinsic::riscv_vssseg8_mask:
2031 case Intrinsic::riscv_vsoxseg2_mask:
2032 case Intrinsic::riscv_vsoxseg3_mask:
2033 case Intrinsic::riscv_vsoxseg4_mask:
2034 case Intrinsic::riscv_vsoxseg5_mask:
2035 case Intrinsic::riscv_vsoxseg6_mask:
2036 case Intrinsic::riscv_vsoxseg7_mask:
2037 case Intrinsic::riscv_vsoxseg8_mask:
2038 case Intrinsic::riscv_vsuxseg2_mask:
2039 case Intrinsic::riscv_vsuxseg3_mask:
2040 case Intrinsic::riscv_vsuxseg4_mask:
2041 case Intrinsic::riscv_vsuxseg5_mask:
2042 case Intrinsic::riscv_vsuxseg6_mask:
2043 case Intrinsic::riscv_vsuxseg7_mask:
2044 case Intrinsic::riscv_vsuxseg8_mask:
2045 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2046 /*IsStore*/ true,
2047 /*IsUnitStrided*/ false);
2048 }
2049}
2050
2052 const AddrMode &AM, Type *Ty,
2053 unsigned AS,
2054 Instruction *I) const {
2055 // No global is ever allowed as a base.
2056 if (AM.BaseGV)
2057 return false;
2058
2059 // None of our addressing modes allows a scalable offset
2060 if (AM.ScalableOffset)
2061 return false;
2062
2063 // RVV instructions only support register addressing.
2064 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2065 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2066
2067 // Require a 12-bit signed offset.
2068 if (!isInt<12>(AM.BaseOffs))
2069 return false;
2070
2071 switch (AM.Scale) {
2072 case 0: // "r+i" or just "i", depending on HasBaseReg.
2073 break;
2074 case 1:
2075 if (!AM.HasBaseReg) // allow "r+i".
2076 break;
2077 return false; // disallow "r+r" or "r+r+i".
2078 default:
2079 return false;
2080 }
2081
2082 return true;
2083}
2084
2086 return isInt<12>(Imm);
2087}
2088
2090 return isInt<12>(Imm);
2091}
2092
2093// On RV32, 64-bit integers are split into their high and low parts and held
2094// in two different registers, so the trunc is free since the low register can
2095// just be used.
2096// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2097// isTruncateFree?
2099 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2100 return false;
2101 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2102 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2103 return (SrcBits == 64 && DestBits == 32);
2104}
2105
2107 // We consider i64->i32 free on RV64 since we have good selection of W
2108 // instructions that make promoting operations back to i64 free in many cases.
2109 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2110 !DstVT.isInteger())
2111 return false;
2112 unsigned SrcBits = SrcVT.getSizeInBits();
2113 unsigned DestBits = DstVT.getSizeInBits();
2114 return (SrcBits == 64 && DestBits == 32);
2115}
2116
2118 EVT SrcVT = Val.getValueType();
2119 // free truncate from vnsrl and vnsra
2120 if (Subtarget.hasVInstructions() &&
2121 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2122 SrcVT.isVector() && VT2.isVector()) {
2123 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2124 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2125 if (SrcBits == DestBits * 2) {
2126 return true;
2127 }
2128 }
2129 return TargetLowering::isTruncateFree(Val, VT2);
2130}
2131
2133 // Zexts are free if they can be combined with a load.
2134 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2135 // poorly with type legalization of compares preferring sext.
2136 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2137 EVT MemVT = LD->getMemoryVT();
2138 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2139 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2140 LD->getExtensionType() == ISD::ZEXTLOAD))
2141 return true;
2142 }
2143
2144 return TargetLowering::isZExtFree(Val, VT2);
2145}
2146
2148 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2149}
2150
2152 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2153}
2154
2156 return Subtarget.hasCTZLike();
2157}
2158
2160 return Subtarget.hasCLZLike();
2161}
2162
2164 const Instruction &AndI) const {
2165 // We expect to be able to match a bit extraction instruction if the Zbs
2166 // extension is supported and the mask is a power of two. However, we
2167 // conservatively return false if the mask would fit in an ANDI instruction,
2168 // on the basis that it's possible the sinking+duplication of the AND in
2169 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2170 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2171 if (!Subtarget.hasBEXTILike())
2172 return false;
2174 if (!Mask)
2175 return false;
2176 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2177}
2178
2180 EVT VT = Y.getValueType();
2181
2182 if (VT.isVector())
2183 return false;
2184
2185 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2186 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2187}
2188
2190 EVT VT = Y.getValueType();
2191
2192 if (!VT.isVector())
2193 return hasAndNotCompare(Y);
2194
2195 return Subtarget.hasStdExtZvkb();
2196}
2197
2199 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2200 if (Subtarget.hasStdExtZbs())
2201 return X.getValueType().isScalarInteger();
2202 auto *C = dyn_cast<ConstantSDNode>(Y);
2203 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2204 if (Subtarget.hasVendorXTHeadBs())
2205 return C != nullptr;
2206 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2207 return C && C->getAPIntValue().ule(10);
2208}
2209
2211 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2212 SDValue Y) const {
2213 if (SelectOpcode != ISD::VSELECT)
2214 return false;
2215
2216 // Only enable for rvv.
2217 if (!VT.isVector() || !Subtarget.hasVInstructions())
2218 return false;
2219
2220 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2221 return false;
2222
2223 return true;
2224}
2225
2227 Type *Ty) const {
2228 assert(Ty->isIntegerTy());
2229
2230 unsigned BitSize = Ty->getIntegerBitWidth();
2231 if (BitSize > Subtarget.getXLen())
2232 return false;
2233
2234 // Fast path, assume 32-bit immediates are cheap.
2235 int64_t Val = Imm.getSExtValue();
2236 if (isInt<32>(Val))
2237 return true;
2238
2239 // A constant pool entry may be more aligned than the load we're trying to
2240 // replace. If we don't support unaligned scalar mem, prefer the constant
2241 // pool.
2242 // TODO: Can the caller pass down the alignment?
2243 if (!Subtarget.enableUnalignedScalarMem())
2244 return true;
2245
2246 // Prefer to keep the load if it would require many instructions.
2247 // This uses the same threshold we use for constant pools but doesn't
2248 // check useConstantPoolForLargeInts.
2249 // TODO: Should we keep the load only when we're definitely going to emit a
2250 // constant pool?
2251
2253 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2254}
2255
2259 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2260 SelectionDAG &DAG) const {
2261 // One interesting pattern that we'd want to form is 'bit extract':
2262 // ((1 >> Y) & 1) ==/!= 0
2263 // But we also need to be careful not to try to reverse that fold.
2264
2265 // Is this '((1 >> Y) & 1)'?
2266 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2267 return false; // Keep the 'bit extract' pattern.
2268
2269 // Will this be '((1 >> Y) & 1)' after the transform?
2270 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2271 return true; // Do form the 'bit extract' pattern.
2272
2273 // If 'X' is a constant, and we transform, then we will immediately
2274 // try to undo the fold, thus causing endless combine loop.
2275 // So only do the transform if X is not a constant. This matches the default
2276 // implementation of this function.
2277 return !XC;
2278}
2279
2281 unsigned Opc = VecOp.getOpcode();
2282
2283 // Assume target opcodes can't be scalarized.
2284 // TODO - do we have any exceptions?
2285 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2286 return false;
2287
2288 // If the vector op is not supported, try to convert to scalar.
2289 EVT VecVT = VecOp.getValueType();
2291 return true;
2292
2293 // If the vector op is supported, but the scalar op is not, the transform may
2294 // not be worthwhile.
2295 // Permit a vector binary operation can be converted to scalar binary
2296 // operation which is custom lowered with illegal type.
2297 EVT ScalarVT = VecVT.getScalarType();
2298 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2299 isOperationCustom(Opc, ScalarVT);
2300}
2301
2303 const GlobalAddressSDNode *GA) const {
2304 // In order to maximise the opportunity for common subexpression elimination,
2305 // keep a separate ADD node for the global address offset instead of folding
2306 // it in the global address node. Later peephole optimisations may choose to
2307 // fold it back in when profitable.
2308 return false;
2309}
2310
2311// Returns 0-31 if the fli instruction is available for the type and this is
2312// legal FP immediate for the type. Returns -1 otherwise.
2314 if (!Subtarget.hasStdExtZfa())
2315 return -1;
2316
2317 bool IsSupportedVT = false;
2318 if (VT == MVT::f16) {
2319 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2320 } else if (VT == MVT::f32) {
2321 IsSupportedVT = true;
2322 } else if (VT == MVT::f64) {
2323 assert(Subtarget.hasStdExtD() && "Expect D extension");
2324 IsSupportedVT = true;
2325 }
2326
2327 if (!IsSupportedVT)
2328 return -1;
2329
2330 return RISCVLoadFPImm::getLoadFPImm(Imm);
2331}
2332
2334 bool ForCodeSize) const {
2335 bool IsLegalVT = false;
2336 if (VT == MVT::f16)
2337 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2338 else if (VT == MVT::f32)
2339 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2340 else if (VT == MVT::f64)
2341 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2342 else if (VT == MVT::bf16)
2343 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2344
2345 if (!IsLegalVT)
2346 return false;
2347
2348 if (getLegalZfaFPImm(Imm, VT) >= 0)
2349 return true;
2350
2351 // Some constants can be produced by fli+fneg.
2352 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2353 return true;
2354
2355 // Cannot create a 64 bit floating-point immediate value for rv32.
2356 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2357 // td can handle +0.0 or -0.0 already.
2358 // -0.0 can be created by fmv + fneg.
2359 return Imm.isZero();
2360 }
2361
2362 // Special case: fmv + fneg
2363 if (Imm.isNegZero())
2364 return true;
2365
2366 // Building an integer and then converting requires a fmv at the end of
2367 // the integer sequence. The fmv is not required for Zfinx.
2368 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2369 const int Cost =
2370 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2371 Subtarget.getXLen(), Subtarget);
2372 return Cost <= FPImmCost;
2373}
2374
2375// TODO: This is very conservative.
2377 unsigned Index) const {
2379 return false;
2380
2381 // Extracts from index 0 are just subreg extracts.
2382 if (Index == 0)
2383 return true;
2384
2385 // Only support extracting a fixed from a fixed vector for now.
2386 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2387 return false;
2388
2389 EVT EltVT = ResVT.getVectorElementType();
2390 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2391
2392 // The smallest type we can slide is i8.
2393 // TODO: We can extract index 0 from a mask vector without a slide.
2394 if (EltVT == MVT::i1)
2395 return false;
2396
2397 unsigned ResElts = ResVT.getVectorNumElements();
2398 unsigned SrcElts = SrcVT.getVectorNumElements();
2399
2400 unsigned MinVLen = Subtarget.getRealMinVLen();
2401 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2402
2403 // If we're extracting only data from the first VLEN bits of the source
2404 // then we can always do this with an m1 vslidedown.vx. Restricting the
2405 // Index ensures we can use a vslidedown.vi.
2406 // TODO: We can generalize this when the exact VLEN is known.
2407 if (Index + ResElts <= MinVLMAX && Index < 31)
2408 return true;
2409
2410 // Convervatively only handle extracting half of a vector.
2411 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2412 // the upper half of a vector until we have more test coverage.
2413 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2414 // a cheap extract. However, this case is important in practice for
2415 // shuffled extracts of longer vectors. How resolve?
2416 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2417}
2418
2420 CallingConv::ID CC,
2421 EVT VT) const {
2422 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2423 // We might still end up using a GPR but that will be decided based on ABI.
2424 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2425 !Subtarget.hasStdExtZfhminOrZhinxmin())
2426 return MVT::f32;
2427
2428 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2429
2430 return PartVT;
2431}
2432
2433unsigned
2435 std::optional<MVT> RegisterVT) const {
2436 // Pair inline assembly operand
2437 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2438 *RegisterVT == MVT::Untyped)
2439 return 1;
2440
2441 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2442}
2443
2445 CallingConv::ID CC,
2446 EVT VT) const {
2447 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2448 // We might still end up using a GPR but that will be decided based on ABI.
2449 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2450 !Subtarget.hasStdExtZfhminOrZhinxmin())
2451 return 1;
2452
2453 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2454}
2455
2457 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2458 unsigned &NumIntermediates, MVT &RegisterVT) const {
2460 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2461
2462 return NumRegs;
2463}
2464
2465// Changes the condition code and swaps operands if necessary, so the SetCC
2466// operation matches one of the comparisons supported directly by branches
2467// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2468// with 1/-1.
2470 ISD::CondCode &CC, SelectionDAG &DAG,
2471 const RISCVSubtarget &Subtarget) {
2472 // If this is a single bit test that can't be handled by ANDI, shift the
2473 // bit to be tested to the MSB and perform a signed compare with 0.
2474 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2475 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2476 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2477 // XAndesPerf supports branch on test bit.
2478 !Subtarget.hasVendorXAndesPerf()) {
2479 uint64_t Mask = LHS.getConstantOperandVal(1);
2480 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2481 unsigned ShAmt = 0;
2482 if (isPowerOf2_64(Mask)) {
2483 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2484 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2485 } else {
2486 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2487 }
2488
2489 LHS = LHS.getOperand(0);
2490 if (ShAmt != 0)
2491 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2492 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2493 return;
2494 }
2495 }
2496
2497 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2498 int64_t C = RHSC->getSExtValue();
2499 switch (CC) {
2500 default: break;
2501 case ISD::SETGT:
2502 // Convert X > -1 to X >= 0.
2503 if (C == -1) {
2504 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2505 CC = ISD::SETGE;
2506 return;
2507 }
2508 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2509 C != INT64_MAX && isInt<5>(C + 1)) {
2510 // We have a conditional move instruction for SETGE but not SETGT.
2511 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2512 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2513 CC = ISD::SETGE;
2514 return;
2515 }
2516 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2517 // We have a branch immediate instruction for SETGE but not SETGT.
2518 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2519 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2520 CC = ISD::SETGE;
2521 return;
2522 }
2523 break;
2524 case ISD::SETLT:
2525 // Convert X < 1 to 0 >= X.
2526 if (C == 1) {
2527 RHS = LHS;
2528 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2529 CC = ISD::SETGE;
2530 return;
2531 }
2532 break;
2533 case ISD::SETUGT:
2534 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2535 C != INT64_MAX && isUInt<5>(C + 1)) {
2536 // We have a conditional move instruction for SETUGE but not SETUGT.
2537 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2538 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2539 CC = ISD::SETUGE;
2540 return;
2541 }
2542 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2543 // We have a branch immediate instruction for SETUGE but not SETUGT.
2544 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2545 // immediate.
2546 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2547 CC = ISD::SETUGE;
2548 return;
2549 }
2550 break;
2551 }
2552 }
2553
2554 switch (CC) {
2555 default:
2556 break;
2557 case ISD::SETGT:
2558 case ISD::SETLE:
2559 case ISD::SETUGT:
2560 case ISD::SETULE:
2562 std::swap(LHS, RHS);
2563 break;
2564 }
2565}
2566
2568 if (VT.isRISCVVectorTuple()) {
2569 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2570 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2571 return RISCVVType::LMUL_F8;
2572 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2573 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2574 return RISCVVType::LMUL_F4;
2575 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2576 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2577 return RISCVVType::LMUL_F2;
2578 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2579 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2580 return RISCVVType::LMUL_1;
2581 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2582 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2583 return RISCVVType::LMUL_2;
2584 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2585 return RISCVVType::LMUL_4;
2586 llvm_unreachable("Invalid vector tuple type LMUL.");
2587 }
2588
2589 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2590 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2591 if (VT.getVectorElementType() == MVT::i1)
2592 KnownSize *= 8;
2593
2594 switch (KnownSize) {
2595 default:
2596 llvm_unreachable("Invalid LMUL.");
2597 case 8:
2598 return RISCVVType::LMUL_F8;
2599 case 16:
2600 return RISCVVType::LMUL_F4;
2601 case 32:
2602 return RISCVVType::LMUL_F2;
2603 case 64:
2604 return RISCVVType::LMUL_1;
2605 case 128:
2606 return RISCVVType::LMUL_2;
2607 case 256:
2608 return RISCVVType::LMUL_4;
2609 case 512:
2610 return RISCVVType::LMUL_8;
2611 }
2612}
2613
2615 switch (LMul) {
2616 default:
2617 llvm_unreachable("Invalid LMUL.");
2621 case RISCVVType::LMUL_1:
2622 return RISCV::VRRegClassID;
2623 case RISCVVType::LMUL_2:
2624 return RISCV::VRM2RegClassID;
2625 case RISCVVType::LMUL_4:
2626 return RISCV::VRM4RegClassID;
2627 case RISCVVType::LMUL_8:
2628 return RISCV::VRM8RegClassID;
2629 }
2630}
2631
2632unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2633 RISCVVType::VLMUL LMUL = getLMUL(VT);
2634 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2635 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2636 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2637 "Unexpected subreg numbering");
2638 return RISCV::sub_vrm1_0 + Index;
2639 }
2640 if (LMUL == RISCVVType::LMUL_2) {
2641 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2642 "Unexpected subreg numbering");
2643 return RISCV::sub_vrm2_0 + Index;
2644 }
2645 if (LMUL == RISCVVType::LMUL_4) {
2646 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2647 "Unexpected subreg numbering");
2648 return RISCV::sub_vrm4_0 + Index;
2649 }
2650 llvm_unreachable("Invalid vector type.");
2651}
2652
2654 if (VT.isRISCVVectorTuple()) {
2655 unsigned NF = VT.getRISCVVectorTupleNumFields();
2656 unsigned RegsPerField =
2657 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2658 (NF * RISCV::RVVBitsPerBlock));
2659 switch (RegsPerField) {
2660 case 1:
2661 if (NF == 2)
2662 return RISCV::VRN2M1RegClassID;
2663 if (NF == 3)
2664 return RISCV::VRN3M1RegClassID;
2665 if (NF == 4)
2666 return RISCV::VRN4M1RegClassID;
2667 if (NF == 5)
2668 return RISCV::VRN5M1RegClassID;
2669 if (NF == 6)
2670 return RISCV::VRN6M1RegClassID;
2671 if (NF == 7)
2672 return RISCV::VRN7M1RegClassID;
2673 if (NF == 8)
2674 return RISCV::VRN8M1RegClassID;
2675 break;
2676 case 2:
2677 if (NF == 2)
2678 return RISCV::VRN2M2RegClassID;
2679 if (NF == 3)
2680 return RISCV::VRN3M2RegClassID;
2681 if (NF == 4)
2682 return RISCV::VRN4M2RegClassID;
2683 break;
2684 case 4:
2685 assert(NF == 2);
2686 return RISCV::VRN2M4RegClassID;
2687 default:
2688 break;
2689 }
2690 llvm_unreachable("Invalid vector tuple type RegClass.");
2691 }
2692
2693 if (VT.getVectorElementType() == MVT::i1)
2694 return RISCV::VRRegClassID;
2695 return getRegClassIDForLMUL(getLMUL(VT));
2696}
2697
2698// Attempt to decompose a subvector insert/extract between VecVT and
2699// SubVecVT via subregister indices. Returns the subregister index that
2700// can perform the subvector insert/extract with the given element index, as
2701// well as the index corresponding to any leftover subvectors that must be
2702// further inserted/extracted within the register class for SubVecVT.
2703std::pair<unsigned, unsigned>
2705 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2706 const RISCVRegisterInfo *TRI) {
2707 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2708 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2709 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2710 "Register classes not ordered");
2711 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2712 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2713
2714 // If VecVT is a vector tuple type, either it's the tuple type with same
2715 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2716 if (VecVT.isRISCVVectorTuple()) {
2717 if (VecRegClassID == SubRegClassID)
2718 return {RISCV::NoSubRegister, 0};
2719
2720 assert(SubVecVT.isScalableVector() &&
2721 "Only allow scalable vector subvector.");
2722 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2723 "Invalid vector tuple insert/extract for vector and subvector with "
2724 "different LMUL.");
2725 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2726 }
2727
2728 // Try to compose a subregister index that takes us from the incoming
2729 // LMUL>1 register class down to the outgoing one. At each step we half
2730 // the LMUL:
2731 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2732 // Note that this is not guaranteed to find a subregister index, such as
2733 // when we are extracting from one VR type to another.
2734 unsigned SubRegIdx = RISCV::NoSubRegister;
2735 for (const unsigned RCID :
2736 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2737 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2738 VecVT = VecVT.getHalfNumVectorElementsVT();
2739 bool IsHi =
2740 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2741 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2742 getSubregIndexByMVT(VecVT, IsHi));
2743 if (IsHi)
2744 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2745 }
2746 return {SubRegIdx, InsertExtractIdx};
2747}
2748
2749// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2750// stores for those types.
2751bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2752 return !Subtarget.useRVVForFixedLengthVectors() ||
2753 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2754}
2755
2757 if (!ScalarTy.isSimple())
2758 return false;
2759 switch (ScalarTy.getSimpleVT().SimpleTy) {
2760 case MVT::iPTR:
2761 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2762 case MVT::i8:
2763 case MVT::i16:
2764 case MVT::i32:
2765 return Subtarget.hasVInstructions();
2766 case MVT::i64:
2767 return Subtarget.hasVInstructionsI64();
2768 case MVT::f16:
2769 return Subtarget.hasVInstructionsF16Minimal();
2770 case MVT::bf16:
2771 return Subtarget.hasVInstructionsBF16Minimal();
2772 case MVT::f32:
2773 return Subtarget.hasVInstructionsF32();
2774 case MVT::f64:
2775 return Subtarget.hasVInstructionsF64();
2776 default:
2777 return false;
2778 }
2779}
2780
2781
2783 return NumRepeatedDivisors;
2784}
2785
2787 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2788 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2789 "Unexpected opcode");
2790 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2791 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2793 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2794 if (!II)
2795 return SDValue();
2796 return Op.getOperand(II->VLOperand + 1 + HasChain);
2797}
2798
2800 const RISCVSubtarget &Subtarget) {
2801 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2802 if (!Subtarget.useRVVForFixedLengthVectors())
2803 return false;
2804
2805 // We only support a set of vector types with a consistent maximum fixed size
2806 // across all supported vector element types to avoid legalization issues.
2807 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2808 // fixed-length vector type we support is 1024 bytes.
2809 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2810 return false;
2811
2812 unsigned MinVLen = Subtarget.getRealMinVLen();
2813
2814 MVT EltVT = VT.getVectorElementType();
2815
2816 // Don't use RVV for vectors we cannot scalarize if required.
2817 switch (EltVT.SimpleTy) {
2818 // i1 is supported but has different rules.
2819 default:
2820 return false;
2821 case MVT::i1:
2822 // Masks can only use a single register.
2823 if (VT.getVectorNumElements() > MinVLen)
2824 return false;
2825 MinVLen /= 8;
2826 break;
2827 case MVT::i8:
2828 case MVT::i16:
2829 case MVT::i32:
2830 break;
2831 case MVT::i64:
2832 if (!Subtarget.hasVInstructionsI64())
2833 return false;
2834 break;
2835 case MVT::f16:
2836 if (!Subtarget.hasVInstructionsF16Minimal())
2837 return false;
2838 break;
2839 case MVT::bf16:
2840 if (!Subtarget.hasVInstructionsBF16Minimal())
2841 return false;
2842 break;
2843 case MVT::f32:
2844 if (!Subtarget.hasVInstructionsF32())
2845 return false;
2846 break;
2847 case MVT::f64:
2848 if (!Subtarget.hasVInstructionsF64())
2849 return false;
2850 break;
2851 }
2852
2853 // Reject elements larger than ELEN.
2854 if (EltVT.getSizeInBits() > Subtarget.getELen())
2855 return false;
2856
2857 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2858 // Don't use RVV for types that don't fit.
2859 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2860 return false;
2861
2862 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2863 // the base fixed length RVV support in place.
2864 if (!VT.isPow2VectorType())
2865 return false;
2866
2867 return true;
2868}
2869
2870bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2871 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2872}
2873
2874// Return the largest legal scalable vector type that matches VT's element type.
2876 const RISCVSubtarget &Subtarget) {
2877 // This may be called before legal types are setup.
2878 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2879 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2880 "Expected legal fixed length vector!");
2881
2882 unsigned MinVLen = Subtarget.getRealMinVLen();
2883 unsigned MaxELen = Subtarget.getELen();
2884
2885 MVT EltVT = VT.getVectorElementType();
2886 switch (EltVT.SimpleTy) {
2887 default:
2888 llvm_unreachable("unexpected element type for RVV container");
2889 case MVT::i1:
2890 case MVT::i8:
2891 case MVT::i16:
2892 case MVT::i32:
2893 case MVT::i64:
2894 case MVT::bf16:
2895 case MVT::f16:
2896 case MVT::f32:
2897 case MVT::f64: {
2898 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2899 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2900 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2901 unsigned NumElts =
2903 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2904 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2905 return MVT::getScalableVectorVT(EltVT, NumElts);
2906 }
2907 }
2908}
2909
2911 const RISCVSubtarget &Subtarget) {
2913 Subtarget);
2914}
2915
2917 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2918}
2919
2920// Grow V to consume an entire RVV register.
2922 const RISCVSubtarget &Subtarget) {
2923 assert(VT.isScalableVector() &&
2924 "Expected to convert into a scalable vector!");
2925 assert(V.getValueType().isFixedLengthVector() &&
2926 "Expected a fixed length vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2929}
2930
2931// Shrink V so it's just big enough to maintain a VT's worth of data.
2933 const RISCVSubtarget &Subtarget) {
2935 "Expected to convert into a fixed length vector!");
2936 assert(V.getValueType().isScalableVector() &&
2937 "Expected a scalable vector operand!");
2938 SDLoc DL(V);
2939 return DAG.getExtractSubvector(DL, VT, V, 0);
2940}
2941
2942/// Return the type of the mask type suitable for masking the provided
2943/// vector type. This is simply an i1 element type vector of the same
2944/// (possibly scalable) length.
2945static MVT getMaskTypeFor(MVT VecVT) {
2946 assert(VecVT.isVector());
2948 return MVT::getVectorVT(MVT::i1, EC);
2949}
2950
2951/// Creates an all ones mask suitable for masking a vector of type VecTy with
2952/// vector length VL. .
2953static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2954 SelectionDAG &DAG) {
2955 MVT MaskVT = getMaskTypeFor(VecVT);
2956 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2957}
2958
2959static std::pair<SDValue, SDValue>
2961 const RISCVSubtarget &Subtarget) {
2962 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2963 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2964 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2965 return {Mask, VL};
2966}
2967
2968static std::pair<SDValue, SDValue>
2969getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2970 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2971 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2972 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2973 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2974 return {Mask, VL};
2975}
2976
2977// Gets the two common "VL" operands: an all-ones mask and the vector length.
2978// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2979// the vector type that the fixed-length vector is contained in. Otherwise if
2980// VecVT is scalable, then ContainerVT should be the same as VecVT.
2981static std::pair<SDValue, SDValue>
2982getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2983 const RISCVSubtarget &Subtarget) {
2984 if (VecVT.isFixedLengthVector())
2985 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2986 Subtarget);
2987 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2988 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2989}
2990
2992 SelectionDAG &DAG) const {
2993 assert(VecVT.isScalableVector() && "Expected scalable vector");
2994 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2995 VecVT.getVectorElementCount());
2996}
2997
2998std::pair<unsigned, unsigned>
3000 const RISCVSubtarget &Subtarget) {
3001 assert(VecVT.isScalableVector() && "Expected scalable vector");
3002
3003 unsigned EltSize = VecVT.getScalarSizeInBits();
3004 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3005
3006 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3007 unsigned MaxVLMAX =
3008 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3009
3010 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3011 unsigned MinVLMAX =
3012 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3013
3014 return std::make_pair(MinVLMAX, MaxVLMAX);
3015}
3016
3017// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3018// of either is (currently) supported. This can get us into an infinite loop
3019// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3020// as a ..., etc.
3021// Until either (or both) of these can reliably lower any node, reporting that
3022// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3023// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3024// which is not desirable.
3026 EVT VT, unsigned DefinedValues) const {
3027 return false;
3028}
3029
3031 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3032 // implementation-defined.
3033 if (!VT.isVector())
3035 unsigned DLenFactor = Subtarget.getDLenFactor();
3036 unsigned Cost;
3037 if (VT.isScalableVector()) {
3038 unsigned LMul;
3039 bool Fractional;
3040 std::tie(LMul, Fractional) =
3042 if (Fractional)
3043 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3044 else
3045 Cost = (LMul * DLenFactor);
3046 } else {
3047 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3048 }
3049 return Cost;
3050}
3051
3052
3053/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3054/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3055/// be by default. VRGatherCostModel reflects available options. Note that
3056/// operand (index and possibly mask) are handled separately.
3058 auto LMULCost = getLMULCost(VT);
3059 bool Log2CostModel =
3060 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3061 if (Log2CostModel && LMULCost.isValid()) {
3062 unsigned Log = Log2_64(LMULCost.getValue());
3063 if (Log > 0)
3064 return LMULCost * Log;
3065 }
3066 return LMULCost * LMULCost;
3067}
3068
3069/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3070/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3071/// or may track the vrgather.vv cost. It is implementation-dependent.
3075
3076/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3077/// for the type VT. (This does not cover the vslide1up or vslide1down
3078/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3079/// or may track the vrgather.vv cost. It is implementation-dependent.
3083
3084/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3085/// for the type VT. (This does not cover the vslide1up or vslide1down
3086/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3087/// or may track the vrgather.vv cost. It is implementation-dependent.
3091
3093 const RISCVSubtarget &Subtarget) {
3094 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3095 // bf16 conversions are always promoted to f32.
3096 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3097 Op.getValueType() == MVT::bf16) {
3098 bool IsStrict = Op->isStrictFPOpcode();
3099
3100 SDLoc DL(Op);
3101 if (IsStrict) {
3102 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3103 {Op.getOperand(0), Op.getOperand(1)});
3104 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3105 {Op.getValueType(), MVT::Other},
3106 {Val.getValue(1), Val.getValue(0),
3107 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3108 }
3109 return DAG.getNode(
3110 ISD::FP_ROUND, DL, Op.getValueType(),
3111 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3112 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3113 }
3114
3115 // Other operations are legal.
3116 return Op;
3117}
3118
3120 const RISCVSubtarget &Subtarget) {
3121 // RISC-V FP-to-int conversions saturate to the destination register size, but
3122 // don't produce 0 for nan. We can use a conversion instruction and fix the
3123 // nan case with a compare and a select.
3124 SDValue Src = Op.getOperand(0);
3125
3126 MVT DstVT = Op.getSimpleValueType();
3127 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3128
3129 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3130
3131 if (!DstVT.isVector()) {
3132 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3133 // the result.
3134 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3135 Src.getValueType() == MVT::bf16) {
3136 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3137 }
3138
3139 unsigned Opc;
3140 if (SatVT == DstVT)
3141 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3142 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3143 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3144 else
3145 return SDValue();
3146 // FIXME: Support other SatVTs by clamping before or after the conversion.
3147
3148 SDLoc DL(Op);
3149 SDValue FpToInt = DAG.getNode(
3150 Opc, DL, DstVT, Src,
3152
3153 if (Opc == RISCVISD::FCVT_WU_RV64)
3154 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3155
3156 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3157 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3159 }
3160
3161 // Vectors.
3162
3163 MVT DstEltVT = DstVT.getVectorElementType();
3164 MVT SrcVT = Src.getSimpleValueType();
3165 MVT SrcEltVT = SrcVT.getVectorElementType();
3166 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3167 unsigned DstEltSize = DstEltVT.getSizeInBits();
3168
3169 // Only handle saturating to the destination type.
3170 if (SatVT != DstEltVT)
3171 return SDValue();
3172
3173 MVT DstContainerVT = DstVT;
3174 MVT SrcContainerVT = SrcVT;
3175 if (DstVT.isFixedLengthVector()) {
3176 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3177 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3178 assert(DstContainerVT.getVectorElementCount() ==
3179 SrcContainerVT.getVectorElementCount() &&
3180 "Expected same element count");
3181 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3182 }
3183
3184 SDLoc DL(Op);
3185
3186 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3187
3188 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3189 {Src, Src, DAG.getCondCode(ISD::SETNE),
3190 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3191
3192 // Need to widen by more than 1 step, promote the FP type, then do a widening
3193 // convert.
3194 if (DstEltSize > (2 * SrcEltSize)) {
3195 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3196 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3197 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3198 }
3199
3200 MVT CvtContainerVT = DstContainerVT;
3201 MVT CvtEltVT = DstEltVT;
3202 if (SrcEltSize > (2 * DstEltSize)) {
3203 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3204 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3205 }
3206
3207 unsigned RVVOpc =
3208 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3209 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3210
3211 while (CvtContainerVT != DstContainerVT) {
3212 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3213 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3214 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3215 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3216 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3217 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3218 }
3219
3220 SDValue SplatZero = DAG.getNode(
3221 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3222 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3223 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3224 Res, DAG.getUNDEF(DstContainerVT), VL);
3225
3226 if (DstVT.isFixedLengthVector())
3227 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3228
3229 return Res;
3230}
3231
3233 const RISCVSubtarget &Subtarget) {
3234 bool IsStrict = Op->isStrictFPOpcode();
3235 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3236
3237 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3238 // bf16 conversions are always promoted to f32.
3239 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3240 SrcVal.getValueType() == MVT::bf16) {
3241 SDLoc DL(Op);
3242 if (IsStrict) {
3243 SDValue Ext =
3244 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3245 {Op.getOperand(0), SrcVal});
3246 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3247 {Ext.getValue(1), Ext.getValue(0)});
3248 }
3249 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3250 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3251 }
3252
3253 // Other operations are legal.
3254 return Op;
3255}
3256
3258 switch (Opc) {
3259 case ISD::FROUNDEVEN:
3261 case ISD::VP_FROUNDEVEN:
3262 return RISCVFPRndMode::RNE;
3263 case ISD::FTRUNC:
3264 case ISD::STRICT_FTRUNC:
3265 case ISD::VP_FROUNDTOZERO:
3266 return RISCVFPRndMode::RTZ;
3267 case ISD::FFLOOR:
3268 case ISD::STRICT_FFLOOR:
3269 case ISD::VP_FFLOOR:
3270 return RISCVFPRndMode::RDN;
3271 case ISD::FCEIL:
3272 case ISD::STRICT_FCEIL:
3273 case ISD::VP_FCEIL:
3274 return RISCVFPRndMode::RUP;
3275 case ISD::FROUND:
3276 case ISD::LROUND:
3277 case ISD::LLROUND:
3278 case ISD::STRICT_FROUND:
3279 case ISD::STRICT_LROUND:
3281 case ISD::VP_FROUND:
3282 return RISCVFPRndMode::RMM;
3283 case ISD::FRINT:
3284 case ISD::LRINT:
3285 case ISD::LLRINT:
3286 case ISD::STRICT_FRINT:
3287 case ISD::STRICT_LRINT:
3288 case ISD::STRICT_LLRINT:
3289 case ISD::VP_FRINT:
3290 case ISD::VP_LRINT:
3291 case ISD::VP_LLRINT:
3292 return RISCVFPRndMode::DYN;
3293 }
3294
3296}
3297
3298// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3299// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3300// the integer domain and back. Taking care to avoid converting values that are
3301// nan or already correct.
3302static SDValue
3304 const RISCVSubtarget &Subtarget) {
3305 MVT VT = Op.getSimpleValueType();
3306 assert(VT.isVector() && "Unexpected type");
3307
3308 SDLoc DL(Op);
3309
3310 SDValue Src = Op.getOperand(0);
3311
3312 // Freeze the source since we are increasing the number of uses.
3313 Src = DAG.getFreeze(Src);
3314
3315 MVT ContainerVT = VT;
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 SDValue Mask, VL;
3322 if (Op->isVPOpcode()) {
3323 Mask = Op.getOperand(1);
3324 if (VT.isFixedLengthVector())
3325 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3326 Subtarget);
3327 VL = Op.getOperand(2);
3328 } else {
3329 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3330 }
3331
3332 // We do the conversion on the absolute value and fix the sign at the end.
3333 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3334
3335 // Determine the largest integer that can be represented exactly. This and
3336 // values larger than it don't have any fractional bits so don't need to
3337 // be converted.
3338 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3339 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3340 APFloat MaxVal = APFloat(FltSem);
3341 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3342 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3343 SDValue MaxValNode =
3344 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3345 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3346 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3347
3348 // If abs(Src) was larger than MaxVal or nan, keep it.
3349 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3350 Mask =
3351 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3352 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3353 Mask, Mask, VL});
3354
3355 // Truncate to integer and convert back to FP.
3356 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3357 MVT XLenVT = Subtarget.getXLenVT();
3358 SDValue Truncated;
3359
3360 switch (Op.getOpcode()) {
3361 default:
3362 llvm_unreachable("Unexpected opcode");
3363 case ISD::FRINT:
3364 case ISD::VP_FRINT:
3365 case ISD::FCEIL:
3366 case ISD::VP_FCEIL:
3367 case ISD::FFLOOR:
3368 case ISD::VP_FFLOOR:
3369 case ISD::FROUND:
3370 case ISD::FROUNDEVEN:
3371 case ISD::VP_FROUND:
3372 case ISD::VP_FROUNDEVEN:
3373 case ISD::VP_FROUNDTOZERO: {
3376 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3377 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3378 break;
3379 }
3380 case ISD::FTRUNC:
3381 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3382 Mask, VL);
3383 break;
3384 case ISD::FNEARBYINT:
3385 case ISD::VP_FNEARBYINT:
3386 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3387 Mask, VL);
3388 break;
3389 }
3390
3391 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3392 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3393 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3394 Mask, VL);
3395
3396 // Restore the original sign so that -0.0 is preserved.
3397 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3398 Src, Src, Mask, VL);
3399
3400 if (!VT.isFixedLengthVector())
3401 return Truncated;
3402
3403 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3404}
3405
3406// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3407// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3408// qNan and converting the new source to integer and back to FP.
3409static SDValue
3411 const RISCVSubtarget &Subtarget) {
3412 SDLoc DL(Op);
3413 MVT VT = Op.getSimpleValueType();
3414 SDValue Chain = Op.getOperand(0);
3415 SDValue Src = Op.getOperand(1);
3416
3417 MVT ContainerVT = VT;
3418 if (VT.isFixedLengthVector()) {
3419 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3420 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3421 }
3422
3423 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3424
3425 // Freeze the source since we are increasing the number of uses.
3426 Src = DAG.getFreeze(Src);
3427
3428 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3429 MVT MaskVT = Mask.getSimpleValueType();
3430 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3431 DAG.getVTList(MaskVT, MVT::Other),
3432 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3433 DAG.getUNDEF(MaskVT), Mask, VL});
3434 Chain = Unorder.getValue(1);
3435 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3436 DAG.getVTList(ContainerVT, MVT::Other),
3437 {Chain, Src, Src, Src, Unorder, VL});
3438 Chain = Src.getValue(1);
3439
3440 // We do the conversion on the absolute value and fix the sign at the end.
3441 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3442
3443 // Determine the largest integer that can be represented exactly. This and
3444 // values larger than it don't have any fractional bits so don't need to
3445 // be converted.
3446 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3447 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3448 APFloat MaxVal = APFloat(FltSem);
3449 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3450 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3451 SDValue MaxValNode =
3452 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3453 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3454 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3455
3456 // If abs(Src) was larger than MaxVal or nan, keep it.
3457 Mask = DAG.getNode(
3458 RISCVISD::SETCC_VL, DL, MaskVT,
3459 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3460
3461 // Truncate to integer and convert back to FP.
3462 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3463 MVT XLenVT = Subtarget.getXLenVT();
3464 SDValue Truncated;
3465
3466 switch (Op.getOpcode()) {
3467 default:
3468 llvm_unreachable("Unexpected opcode");
3469 case ISD::STRICT_FCEIL:
3470 case ISD::STRICT_FFLOOR:
3471 case ISD::STRICT_FROUND:
3475 Truncated = DAG.getNode(
3476 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3477 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3478 break;
3479 }
3480 case ISD::STRICT_FTRUNC:
3481 Truncated =
3482 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3483 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3484 break;
3486 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3487 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3488 Mask, VL);
3489 break;
3490 }
3491 Chain = Truncated.getValue(1);
3492
3493 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3494 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3495 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3496 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3497 Truncated, Mask, VL);
3498 Chain = Truncated.getValue(1);
3499 }
3500
3501 // Restore the original sign so that -0.0 is preserved.
3502 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3503 Src, Src, Mask, VL);
3504
3505 if (VT.isFixedLengthVector())
3506 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3507 return DAG.getMergeValues({Truncated, Chain}, DL);
3508}
3509
3510static SDValue
3512 const RISCVSubtarget &Subtarget) {
3513 MVT VT = Op.getSimpleValueType();
3514 if (VT.isVector())
3515 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3516
3517 if (DAG.shouldOptForSize())
3518 return SDValue();
3519
3520 SDLoc DL(Op);
3521 SDValue Src = Op.getOperand(0);
3522
3523 // Create an integer the size of the mantissa with the MSB set. This and all
3524 // values larger than it don't have any fractional bits so don't need to be
3525 // converted.
3526 const fltSemantics &FltSem = VT.getFltSemantics();
3527 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3528 APFloat MaxVal = APFloat(FltSem);
3529 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3530 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3531 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3532
3534 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3535 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3536}
3537
3538// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3540 const RISCVSubtarget &Subtarget) {
3541 SDLoc DL(Op);
3542 MVT DstVT = Op.getSimpleValueType();
3543 SDValue Src = Op.getOperand(0);
3544 MVT SrcVT = Src.getSimpleValueType();
3545 assert(SrcVT.isVector() && DstVT.isVector() &&
3546 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3547 "Unexpected type");
3548
3549 MVT DstContainerVT = DstVT;
3550 MVT SrcContainerVT = SrcVT;
3551
3552 if (DstVT.isFixedLengthVector()) {
3553 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3554 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3555 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3556 }
3557
3558 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3559
3560 // [b]f16 -> f32
3561 MVT SrcElemType = SrcVT.getVectorElementType();
3562 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3563 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3564 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3565 }
3566
3567 SDValue Res =
3568 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3569 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3570 Subtarget.getXLenVT()),
3571 VL);
3572
3573 if (!DstVT.isFixedLengthVector())
3574 return Res;
3575
3576 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3577}
3578
3579static SDValue
3581 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3582 SDValue Offset, SDValue Mask, SDValue VL,
3584 if (Passthru.isUndef())
3586 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3587 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3588 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3589}
3590
3591static SDValue
3592getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3593 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3594 SDValue VL,
3596 if (Passthru.isUndef())
3598 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3599 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3600 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3601}
3602
3606 int64_t Addend;
3607};
3608
3609static std::optional<APInt> getExactInteger(const APFloat &APF,
3611 // We will use a SINT_TO_FP to materialize this constant so we should use a
3612 // signed APSInt here.
3613 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3614 // We use an arbitrary rounding mode here. If a floating-point is an exact
3615 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3616 // the rounding mode changes the output value, then it is not an exact
3617 // integer.
3619 bool IsExact;
3620 // If it is out of signed integer range, it will return an invalid operation.
3621 // If it is not an exact integer, IsExact is false.
3622 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3624 !IsExact)
3625 return std::nullopt;
3626 return ValInt.extractBits(BitWidth, 0);
3627}
3628
3629// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3630// to the (non-zero) step S and start value X. This can be then lowered as the
3631// RVV sequence (VID * S) + X, for example.
3632// The step S is represented as an integer numerator divided by a positive
3633// denominator. Note that the implementation currently only identifies
3634// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3635// cannot detect 2/3, for example.
3636// Note that this method will also match potentially unappealing index
3637// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3638// determine whether this is worth generating code for.
3639//
3640// EltSizeInBits is the size of the type that the sequence will be calculated
3641// in, i.e. SEW for build_vectors or XLEN for address calculations.
3642static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3643 unsigned EltSizeInBits) {
3644 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3646 return std::nullopt;
3647 bool IsInteger = Op.getValueType().isInteger();
3648
3649 std::optional<unsigned> SeqStepDenom;
3650 std::optional<APInt> SeqStepNum;
3651 std::optional<APInt> SeqAddend;
3652 std::optional<std::pair<APInt, unsigned>> PrevElt;
3653 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3654
3655 // First extract the ops into a list of constant integer values. This may not
3656 // be possible for floats if they're not all representable as integers.
3657 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3658 const unsigned OpSize = Op.getScalarValueSizeInBits();
3659 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3660 if (Elt.isUndef()) {
3661 Elts[Idx] = std::nullopt;
3662 continue;
3663 }
3664 if (IsInteger) {
3665 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3666 } else {
3667 auto ExactInteger =
3668 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3669 if (!ExactInteger)
3670 return std::nullopt;
3671 Elts[Idx] = *ExactInteger;
3672 }
3673 }
3674
3675 for (auto [Idx, Elt] : enumerate(Elts)) {
3676 // Assume undef elements match the sequence; we just have to be careful
3677 // when interpolating across them.
3678 if (!Elt)
3679 continue;
3680
3681 if (PrevElt) {
3682 // Calculate the step since the last non-undef element, and ensure
3683 // it's consistent across the entire sequence.
3684 unsigned IdxDiff = Idx - PrevElt->second;
3685 APInt ValDiff = *Elt - PrevElt->first;
3686
3687 // A zero-value value difference means that we're somewhere in the middle
3688 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3689 // step change before evaluating the sequence.
3690 if (ValDiff == 0)
3691 continue;
3692
3693 int64_t Remainder = ValDiff.srem(IdxDiff);
3694 // Normalize the step if it's greater than 1.
3695 if (Remainder != ValDiff.getSExtValue()) {
3696 // The difference must cleanly divide the element span.
3697 if (Remainder != 0)
3698 return std::nullopt;
3699 ValDiff = ValDiff.sdiv(IdxDiff);
3700 IdxDiff = 1;
3701 }
3702
3703 if (!SeqStepNum)
3704 SeqStepNum = ValDiff;
3705 else if (ValDiff != SeqStepNum)
3706 return std::nullopt;
3707
3708 if (!SeqStepDenom)
3709 SeqStepDenom = IdxDiff;
3710 else if (IdxDiff != *SeqStepDenom)
3711 return std::nullopt;
3712 }
3713
3714 // Record this non-undef element for later.
3715 if (!PrevElt || PrevElt->first != *Elt)
3716 PrevElt = std::make_pair(*Elt, Idx);
3717 }
3718
3719 // We need to have logged a step for this to count as a legal index sequence.
3720 if (!SeqStepNum || !SeqStepDenom)
3721 return std::nullopt;
3722
3723 // Loop back through the sequence and validate elements we might have skipped
3724 // while waiting for a valid step. While doing this, log any sequence addend.
3725 for (auto [Idx, Elt] : enumerate(Elts)) {
3726 if (!Elt)
3727 continue;
3728 APInt ExpectedVal =
3729 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3730 *SeqStepNum)
3731 .sdiv(*SeqStepDenom);
3732
3733 APInt Addend = *Elt - ExpectedVal;
3734 if (!SeqAddend)
3735 SeqAddend = Addend;
3736 else if (Addend != SeqAddend)
3737 return std::nullopt;
3738 }
3739
3740 assert(SeqAddend && "Must have an addend if we have a step");
3741
3742 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3743 SeqAddend->getSExtValue()};
3744}
3745
3746// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3747// and lower it as a VRGATHER_VX_VL from the source vector.
3748static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3749 SelectionDAG &DAG,
3750 const RISCVSubtarget &Subtarget) {
3751 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3752 return SDValue();
3753 SDValue Src = SplatVal.getOperand(0);
3754 // Don't perform this optimization for i1 vectors, or if the element types are
3755 // different
3756 // FIXME: Support i1 vectors, maybe by promoting to i8?
3757 MVT EltTy = VT.getVectorElementType();
3758 if (EltTy == MVT::i1 ||
3759 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3760 return SDValue();
3761 MVT SrcVT = Src.getSimpleValueType();
3762 if (EltTy != SrcVT.getVectorElementType())
3763 return SDValue();
3764 SDValue Idx = SplatVal.getOperand(1);
3765 // The index must be a legal type.
3766 if (Idx.getValueType() != Subtarget.getXLenVT())
3767 return SDValue();
3768
3769 // Check that we know Idx lies within VT
3770 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3771 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3772 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3773 return SDValue();
3774 }
3775
3776 // Convert fixed length vectors to scalable
3777 MVT ContainerVT = VT;
3778 if (VT.isFixedLengthVector())
3779 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3780
3781 MVT SrcContainerVT = SrcVT;
3782 if (SrcVT.isFixedLengthVector()) {
3783 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3784 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3785 }
3786
3787 // Put Vec in a VT sized vector
3788 if (SrcContainerVT.getVectorMinNumElements() <
3789 ContainerVT.getVectorMinNumElements())
3790 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3791 else
3792 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3793
3794 // We checked that Idx fits inside VT earlier
3795 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3796 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3797 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3798 if (VT.isFixedLengthVector())
3799 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3800 return Gather;
3801}
3802
3804 const RISCVSubtarget &Subtarget) {
3805 MVT VT = Op.getSimpleValueType();
3806 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3807
3808 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3809
3810 SDLoc DL(Op);
3811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3812
3813 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3814 int64_t StepNumerator = SimpleVID->StepNumerator;
3815 unsigned StepDenominator = SimpleVID->StepDenominator;
3816 int64_t Addend = SimpleVID->Addend;
3817
3818 assert(StepNumerator != 0 && "Invalid step");
3819 bool Negate = false;
3820 int64_t SplatStepVal = StepNumerator;
3821 unsigned StepOpcode = ISD::MUL;
3822 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3823 // anyway as the shift of 63 won't fit in uimm5.
3824 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3825 isPowerOf2_64(std::abs(StepNumerator))) {
3826 Negate = StepNumerator < 0;
3827 StepOpcode = ISD::SHL;
3828 SplatStepVal = Log2_64(std::abs(StepNumerator));
3829 }
3830
3831 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3832 // since it's the immediate value many RVV instructions accept. There is
3833 // no vmul.vi instruction so ensure multiply constant can fit in a
3834 // single addi instruction. For the addend, we allow up to 32 bits..
3835 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3836 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3837 isPowerOf2_32(StepDenominator) &&
3838 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3839 MVT VIDVT =
3841 MVT VIDContainerVT =
3842 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3843 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3844 // Convert right out of the scalable type so we can use standard ISD
3845 // nodes for the rest of the computation. If we used scalable types with
3846 // these, we'd lose the fixed-length vector info and generate worse
3847 // vsetvli code.
3848 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3849 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3850 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3851 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3852 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3853 }
3854 if (StepDenominator != 1) {
3855 SDValue SplatStep =
3856 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3857 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3858 }
3859 if (Addend != 0 || Negate) {
3860 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3861 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3862 VID);
3863 }
3864 if (VT.isFloatingPoint()) {
3865 // TODO: Use vfwcvt to reduce register pressure.
3866 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3867 }
3868 return VID;
3869 }
3870 }
3871
3872 return SDValue();
3873}
3874
3875/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3876/// which constitute a large proportion of the elements. In such cases we can
3877/// splat a vector with the dominant element and make up the shortfall with
3878/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3879/// Note that this includes vectors of 2 elements by association. The
3880/// upper-most element is the "dominant" one, allowing us to use a splat to
3881/// "insert" the upper element, and an insert of the lower element at position
3882/// 0, which improves codegen.
3884 const RISCVSubtarget &Subtarget) {
3885 MVT VT = Op.getSimpleValueType();
3886 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3887
3888 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3889
3890 SDLoc DL(Op);
3891 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3892
3893 MVT XLenVT = Subtarget.getXLenVT();
3894 unsigned NumElts = Op.getNumOperands();
3895
3896 SDValue DominantValue;
3897 unsigned MostCommonCount = 0;
3898 DenseMap<SDValue, unsigned> ValueCounts;
3899 unsigned NumUndefElts =
3900 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3901
3902 // Track the number of scalar loads we know we'd be inserting, estimated as
3903 // any non-zero floating-point constant. Other kinds of element are either
3904 // already in registers or are materialized on demand. The threshold at which
3905 // a vector load is more desirable than several scalar materializion and
3906 // vector-insertion instructions is not known.
3907 unsigned NumScalarLoads = 0;
3908
3909 for (SDValue V : Op->op_values()) {
3910 if (V.isUndef())
3911 continue;
3912
3913 unsigned &Count = ValueCounts[V];
3914 if (0 == Count)
3915 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3916 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3917
3918 // Is this value dominant? In case of a tie, prefer the highest element as
3919 // it's cheaper to insert near the beginning of a vector than it is at the
3920 // end.
3921 if (++Count >= MostCommonCount) {
3922 DominantValue = V;
3923 MostCommonCount = Count;
3924 }
3925 }
3926
3927 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3928 unsigned NumDefElts = NumElts - NumUndefElts;
3929 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3930
3931 // Don't perform this optimization when optimizing for size, since
3932 // materializing elements and inserting them tends to cause code bloat.
3933 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3934 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3935 ((MostCommonCount > DominantValueCountThreshold) ||
3936 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3937 // Start by splatting the most common element.
3938 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3939
3940 DenseSet<SDValue> Processed{DominantValue};
3941
3942 // We can handle an insert into the last element (of a splat) via
3943 // v(f)slide1down. This is slightly better than the vslideup insert
3944 // lowering as it avoids the need for a vector group temporary. It
3945 // is also better than using vmerge.vx as it avoids the need to
3946 // materialize the mask in a vector register.
3947 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3948 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3949 LastOp != DominantValue) {
3950 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3951 auto OpCode =
3952 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3953 if (!VT.isFloatingPoint())
3954 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3955 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3956 LastOp, Mask, VL);
3957 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3958 Processed.insert(LastOp);
3959 }
3960
3961 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3962 for (const auto &OpIdx : enumerate(Op->ops())) {
3963 const SDValue &V = OpIdx.value();
3964 if (V.isUndef() || !Processed.insert(V).second)
3965 continue;
3966 if (ValueCounts[V] == 1) {
3967 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3968 } else {
3969 // Blend in all instances of this value using a VSELECT, using a
3970 // mask where each bit signals whether that element is the one
3971 // we're after.
3973 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3974 return DAG.getConstant(V == V1, DL, XLenVT);
3975 });
3976 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3977 DAG.getBuildVector(SelMaskTy, DL, Ops),
3978 DAG.getSplatBuildVector(VT, DL, V), Vec);
3979 }
3980 }
3981
3982 return Vec;
3983 }
3984
3985 return SDValue();
3986}
3987
3989 const RISCVSubtarget &Subtarget) {
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992
3993 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3994
3995 SDLoc DL(Op);
3996 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3997
3998 MVT XLenVT = Subtarget.getXLenVT();
3999 unsigned NumElts = Op.getNumOperands();
4000
4001 if (VT.getVectorElementType() == MVT::i1) {
4002 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4003 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4004 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4005 }
4006
4007 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4008 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4009 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4010 }
4011
4012 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4013 // scalar integer chunks whose bit-width depends on the number of mask
4014 // bits and XLEN.
4015 // First, determine the most appropriate scalar integer type to use. This
4016 // is at most XLenVT, but may be shrunk to a smaller vector element type
4017 // according to the size of the final vector - use i8 chunks rather than
4018 // XLenVT if we're producing a v8i1. This results in more consistent
4019 // codegen across RV32 and RV64.
4020 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4021 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4022 // If we have to use more than one INSERT_VECTOR_ELT then this
4023 // optimization is likely to increase code size; avoid performing it in
4024 // such a case. We can use a load from a constant pool in this case.
4025 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4026 return SDValue();
4027 // Now we can create our integer vector type. Note that it may be larger
4028 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4029 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4030 MVT IntegerViaVecVT =
4031 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4032 IntegerViaVecElts);
4033
4034 uint64_t Bits = 0;
4035 unsigned BitPos = 0, IntegerEltIdx = 0;
4036 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4037
4038 for (unsigned I = 0; I < NumElts;) {
4039 SDValue V = Op.getOperand(I);
4040 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4041 Bits |= ((uint64_t)BitValue << BitPos);
4042 ++BitPos;
4043 ++I;
4044
4045 // Once we accumulate enough bits to fill our scalar type or process the
4046 // last element, insert into our vector and clear our accumulated data.
4047 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4048 if (NumViaIntegerBits <= 32)
4049 Bits = SignExtend64<32>(Bits);
4050 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4051 Elts[IntegerEltIdx] = Elt;
4052 Bits = 0;
4053 BitPos = 0;
4054 IntegerEltIdx++;
4055 }
4056 }
4057
4058 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4059
4060 if (NumElts < NumViaIntegerBits) {
4061 // If we're producing a smaller vector than our minimum legal integer
4062 // type, bitcast to the equivalent (known-legal) mask type, and extract
4063 // our final mask.
4064 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4065 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4066 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4067 } else {
4068 // Else we must have produced an integer type with the same size as the
4069 // mask type; bitcast for the final result.
4070 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4071 Vec = DAG.getBitcast(VT, Vec);
4072 }
4073
4074 return Vec;
4075 }
4076
4078 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4079 : RISCVISD::VMV_V_X_VL;
4080 if (!VT.isFloatingPoint())
4081 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4082 Splat =
4083 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4084 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4085 }
4086
4087 // Try and match index sequences, which we can lower to the vid instruction
4088 // with optional modifications. An all-undef vector is matched by
4089 // getSplatValue, above.
4090 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4091 return Res;
4092
4093 // For very small build_vectors, use a single scalar insert of a constant.
4094 // TODO: Base this on constant rematerialization cost, not size.
4095 const unsigned EltBitSize = VT.getScalarSizeInBits();
4096 if (VT.getSizeInBits() <= 32 &&
4098 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4099 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4100 "Unexpected sequence type");
4101 // If we can use the original VL with the modified element type, this
4102 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4103 // be moved into InsertVSETVLI?
4104 unsigned ViaVecLen =
4105 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4106 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4107
4108 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4109 uint64_t SplatValue = 0;
4110 // Construct the amalgamated value at this larger vector type.
4111 for (const auto &OpIdx : enumerate(Op->op_values())) {
4112 const auto &SeqV = OpIdx.value();
4113 if (!SeqV.isUndef())
4114 SplatValue |=
4115 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4116 }
4117
4118 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4119 // achieve better constant materializion.
4120 // On RV32, we need to sign-extend to use getSignedConstant.
4121 if (ViaIntVT == MVT::i32)
4122 SplatValue = SignExtend64<32>(SplatValue);
4123
4124 SDValue Vec = DAG.getInsertVectorElt(
4125 DL, DAG.getUNDEF(ViaVecVT),
4126 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4127 if (ViaVecLen != 1)
4128 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4129 return DAG.getBitcast(VT, Vec);
4130 }
4131
4132
4133 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4134 // when re-interpreted as a vector with a larger element type. For example,
4135 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4136 // could be instead splat as
4137 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4138 // TODO: This optimization could also work on non-constant splats, but it
4139 // would require bit-manipulation instructions to construct the splat value.
4140 SmallVector<SDValue> Sequence;
4141 const auto *BV = cast<BuildVectorSDNode>(Op);
4142 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4144 BV->getRepeatedSequence(Sequence) &&
4145 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4146 unsigned SeqLen = Sequence.size();
4147 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4148 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4149 ViaIntVT == MVT::i64) &&
4150 "Unexpected sequence type");
4151
4152 // If we can use the original VL with the modified element type, this
4153 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4154 // be moved into InsertVSETVLI?
4155 const unsigned RequiredVL = NumElts / SeqLen;
4156 const unsigned ViaVecLen =
4157 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4158 NumElts : RequiredVL;
4159 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4160
4161 unsigned EltIdx = 0;
4162 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4163 uint64_t SplatValue = 0;
4164 // Construct the amalgamated value which can be splatted as this larger
4165 // vector type.
4166 for (const auto &SeqV : Sequence) {
4167 if (!SeqV.isUndef())
4168 SplatValue |=
4169 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4170 EltIdx++;
4171 }
4172
4173 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4174 // achieve better constant materializion.
4175 // On RV32, we need to sign-extend to use getSignedConstant.
4176 if (ViaIntVT == MVT::i32)
4177 SplatValue = SignExtend64<32>(SplatValue);
4178
4179 // Since we can't introduce illegal i64 types at this stage, we can only
4180 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4181 // way we can use RVV instructions to splat.
4182 assert((ViaIntVT.bitsLE(XLenVT) ||
4183 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4184 "Unexpected bitcast sequence");
4185 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4186 SDValue ViaVL =
4187 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4188 MVT ViaContainerVT =
4189 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4190 SDValue Splat =
4191 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4192 DAG.getUNDEF(ViaContainerVT),
4193 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4194 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4195 if (ViaVecLen != RequiredVL)
4197 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4198 return DAG.getBitcast(VT, Splat);
4199 }
4200 }
4201
4202 // If the number of signbits allows, see if we can lower as a <N x i8>.
4203 // Our main goal here is to reduce LMUL (and thus work) required to
4204 // build the constant, but we will also narrow if the resulting
4205 // narrow vector is known to materialize cheaply.
4206 // TODO: We really should be costing the smaller vector. There are
4207 // profitable cases this misses.
4208 if (EltBitSize > 8 && VT.isInteger() &&
4209 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4210 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4211 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4212 DL, Op->ops());
4213 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4214 Source, DAG, Subtarget);
4215 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4216 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4217 }
4218
4219 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4220 return Res;
4221
4222 // For constant vectors, use generic constant pool lowering. Otherwise,
4223 // we'd have to materialize constants in GPRs just to move them into the
4224 // vector.
4225 return SDValue();
4226}
4227
4228static unsigned getPACKOpcode(unsigned DestBW,
4229 const RISCVSubtarget &Subtarget) {
4230 switch (DestBW) {
4231 default:
4232 llvm_unreachable("Unsupported pack size");
4233 case 16:
4234 return RISCV::PACKH;
4235 case 32:
4236 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4237 case 64:
4238 assert(Subtarget.is64Bit());
4239 return RISCV::PACK;
4240 }
4241}
4242
4243/// Double the element size of the build vector to reduce the number
4244/// of vslide1down in the build vector chain. In the worst case, this
4245/// trades three scalar operations for 1 vector operation. Scalar
4246/// operations are generally lower latency, and for out-of-order cores
4247/// we also benefit from additional parallelism.
4249 const RISCVSubtarget &Subtarget) {
4250 SDLoc DL(Op);
4251 MVT VT = Op.getSimpleValueType();
4252 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4253 MVT ElemVT = VT.getVectorElementType();
4254 if (!ElemVT.isInteger())
4255 return SDValue();
4256
4257 // TODO: Relax these architectural restrictions, possibly with costing
4258 // of the actual instructions required.
4259 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4260 return SDValue();
4261
4262 unsigned NumElts = VT.getVectorNumElements();
4263 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4264 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4265 NumElts % 2 != 0)
4266 return SDValue();
4267
4268 // Produce [B,A] packed into a type twice as wide. Note that all
4269 // scalars are XLenVT, possibly masked (see below).
4270 MVT XLenVT = Subtarget.getXLenVT();
4271 SDValue Mask = DAG.getConstant(
4272 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4273 auto pack = [&](SDValue A, SDValue B) {
4274 // Bias the scheduling of the inserted operations to near the
4275 // definition of the element - this tends to reduce register
4276 // pressure overall.
4277 SDLoc ElemDL(B);
4278 if (Subtarget.hasStdExtZbkb())
4279 // Note that we're relying on the high bits of the result being
4280 // don't care. For PACKW, the result is *sign* extended.
4281 return SDValue(
4282 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4283 ElemDL, XLenVT, A, B),
4284 0);
4285
4286 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4287 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4288 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4289 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4290 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4292 };
4293
4294 SmallVector<SDValue> NewOperands;
4295 NewOperands.reserve(NumElts / 2);
4296 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4297 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4298 assert(NumElts == NewOperands.size() * 2);
4299 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4300 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4301 return DAG.getNode(ISD::BITCAST, DL, VT,
4302 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4303}
4304
4306 const RISCVSubtarget &Subtarget) {
4307 MVT VT = Op.getSimpleValueType();
4308 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4309
4310 MVT EltVT = VT.getVectorElementType();
4311 MVT XLenVT = Subtarget.getXLenVT();
4312
4313 SDLoc DL(Op);
4314
4315 // Proper support for f16 requires Zvfh. bf16 always requires special
4316 // handling. We need to cast the scalar to integer and create an integer
4317 // build_vector.
4318 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4319 MVT IVT = VT.changeVectorElementType(MVT::i16);
4320 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4321 for (const auto &[I, U] : enumerate(Op->ops())) {
4322 SDValue Elem = U.get();
4323 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4324 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4325 // Called by LegalizeDAG, we need to use XLenVT operations since we
4326 // can't create illegal types.
4327 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4328 // Manually constant fold so the integer build_vector can be lowered
4329 // better. Waiting for DAGCombine will be too late.
4330 APInt V =
4331 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4332 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4333 } else {
4334 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4335 }
4336 } else {
4337 // Called by scalar type legalizer, we can use i16.
4338 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4339 }
4340 }
4341 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4342 return DAG.getBitcast(VT, Res);
4343 }
4344
4345 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4347 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4348
4349 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4350
4351 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4352
4353 if (VT.getVectorElementType() == MVT::i1) {
4354 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4355 // vector type, we have a legal equivalently-sized i8 type, so we can use
4356 // that.
4357 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4358 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4359
4360 SDValue WideVec;
4362 // For a splat, perform a scalar truncate before creating the wider
4363 // vector.
4364 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4365 DAG.getConstant(1, DL, Splat.getValueType()));
4366 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4367 } else {
4368 SmallVector<SDValue, 8> Ops(Op->op_values());
4369 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4370 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4371 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4372 }
4373
4374 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4375 }
4376
4378 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4379 return Gather;
4380
4381 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4382 // pressure at high LMUL.
4383 if (all_of(Op->ops().drop_front(),
4384 [](const SDUse &U) { return U.get().isUndef(); })) {
4385 unsigned Opc =
4386 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4387 if (!VT.isFloatingPoint())
4388 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4389 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4390 Splat, VL);
4391 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4392 }
4393
4394 unsigned Opc =
4395 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4396 if (!VT.isFloatingPoint())
4397 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4398 Splat =
4399 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4400 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4401 }
4402
4403 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4404 return Res;
4405
4406 // If we're compiling for an exact VLEN value, we can split our work per
4407 // register in the register group.
4408 if (const auto VLen = Subtarget.getRealVLen();
4409 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4410 MVT ElemVT = VT.getVectorElementType();
4411 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4412 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4413 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4414 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4415 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4416
4417 // The following semantically builds up a fixed length concat_vector
4418 // of the component build_vectors. We eagerly lower to scalable and
4419 // insert_subvector here to avoid DAG combining it back to a large
4420 // build_vector.
4421 SmallVector<SDValue> BuildVectorOps(Op->ops());
4422 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4423 SDValue Vec = DAG.getUNDEF(ContainerVT);
4424 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4425 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4426 SDValue SubBV =
4427 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4428 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4429 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4430 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4431 }
4432 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4433 }
4434
4435 // If we're about to resort to vslide1down (or stack usage), pack our
4436 // elements into the widest scalar type we can. This will force a VL/VTYPE
4437 // toggle, but reduces the critical path, the number of vslide1down ops
4438 // required, and possibly enables scalar folds of the values.
4439 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4440 return Res;
4441
4442 // For m1 vectors, if we have non-undef values in both halves of our vector,
4443 // split the vector into low and high halves, build them separately, then
4444 // use a vselect to combine them. For long vectors, this cuts the critical
4445 // path of the vslide1down sequence in half, and gives us an opportunity
4446 // to special case each half independently. Note that we don't change the
4447 // length of the sub-vectors here, so if both fallback to the generic
4448 // vslide1down path, we should be able to fold the vselect into the final
4449 // vslidedown (for the undef tail) for the first half w/ masking.
4450 unsigned NumElts = VT.getVectorNumElements();
4451 unsigned NumUndefElts =
4452 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4453 unsigned NumDefElts = NumElts - NumUndefElts;
4454 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4455 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4456 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4457 SmallVector<SDValue> MaskVals;
4458 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4459 SubVecAOps.reserve(NumElts);
4460 SubVecBOps.reserve(NumElts);
4461 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4462 SDValue Elem = U.get();
4463 if (Idx < NumElts / 2) {
4464 SubVecAOps.push_back(Elem);
4465 SubVecBOps.push_back(UndefElem);
4466 } else {
4467 SubVecAOps.push_back(UndefElem);
4468 SubVecBOps.push_back(Elem);
4469 }
4470 bool SelectMaskVal = (Idx < NumElts / 2);
4471 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4472 }
4473 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4474 MaskVals.size() == NumElts);
4475
4476 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4477 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4478 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4479 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4480 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4481 }
4482
4483 // Cap the cost at a value linear to the number of elements in the vector.
4484 // The default lowering is to use the stack. The vector store + scalar loads
4485 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4486 // being (at least) linear in LMUL. As a result, using the vslidedown
4487 // lowering for every element ends up being VL*LMUL..
4488 // TODO: Should we be directly costing the stack alternative? Doing so might
4489 // give us a more accurate upper bound.
4490 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4491
4492 // TODO: unify with TTI getSlideCost.
4493 InstructionCost PerSlideCost = 1;
4494 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4495 default: break;
4496 case RISCVVType::LMUL_2:
4497 PerSlideCost = 2;
4498 break;
4499 case RISCVVType::LMUL_4:
4500 PerSlideCost = 4;
4501 break;
4502 case RISCVVType::LMUL_8:
4503 PerSlideCost = 8;
4504 break;
4505 }
4506
4507 // TODO: Should we be using the build instseq then cost + evaluate scheme
4508 // we use for integer constants here?
4509 unsigned UndefCount = 0;
4510 for (const SDValue &V : Op->ops()) {
4511 if (V.isUndef()) {
4512 UndefCount++;
4513 continue;
4514 }
4515 if (UndefCount) {
4516 LinearBudget -= PerSlideCost;
4517 UndefCount = 0;
4518 }
4519 LinearBudget -= PerSlideCost;
4520 }
4521 if (UndefCount) {
4522 LinearBudget -= PerSlideCost;
4523 }
4524
4525 if (LinearBudget < 0)
4526 return SDValue();
4527
4528 assert((!VT.isFloatingPoint() ||
4529 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4530 "Illegal type which will result in reserved encoding");
4531
4532 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4533
4534 // General case: splat the first operand and slide other operands down one
4535 // by one to form a vector. Alternatively, if every operand is an
4536 // extraction from element 0 of a vector, we use that vector from the last
4537 // extraction as the start value and slide up instead of slide down. Such that
4538 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4539 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4540 // something we cannot do with vslide1down/vslidedown.
4541 // Of course, using vslide1up/vslideup might increase the register pressure,
4542 // and that's why we conservatively limit to cases where every operand is an
4543 // extraction from the first element.
4544 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4545 SDValue EVec;
4546 bool SlideUp = false;
4547 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4548 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4549 if (SlideUp)
4550 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4551 Mask, VL, Policy);
4552 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4553 Mask, VL, Policy);
4554 };
4555
4556 // The reason we don't use all_of here is because we're also capturing EVec
4557 // from the last non-undef operand. If the std::execution_policy of the
4558 // underlying std::all_of is anything but std::sequenced_policy we might
4559 // capture the wrong EVec.
4560 for (SDValue V : Operands) {
4561 using namespace SDPatternMatch;
4562 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4563 if (!SlideUp)
4564 break;
4565 }
4566
4567 // Do not slideup if the element type of EVec is different.
4568 if (SlideUp) {
4569 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4570 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4571 if (EVecEltVT != ContainerEltVT)
4572 SlideUp = false;
4573 }
4574
4575 if (SlideUp) {
4576 MVT EVecContainerVT = EVec.getSimpleValueType();
4577 // Make sure the original vector has scalable vector type.
4578 if (EVecContainerVT.isFixedLengthVector()) {
4579 EVecContainerVT =
4580 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4581 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4582 }
4583
4584 // Adapt EVec's type into ContainerVT.
4585 if (EVecContainerVT.getVectorMinNumElements() <
4586 ContainerVT.getVectorMinNumElements())
4587 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4588 else
4589 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4590
4591 // Reverse the elements as we're going to slide up from the last element.
4592 std::reverse(Operands.begin(), Operands.end());
4593 }
4594
4595 SDValue Vec;
4596 UndefCount = 0;
4597 for (SDValue V : Operands) {
4598 if (V.isUndef()) {
4599 UndefCount++;
4600 continue;
4601 }
4602
4603 // Start our sequence with either a TA splat or extract source in the
4604 // hopes that hardware is able to recognize there's no dependency on the
4605 // prior value of our temporary register.
4606 if (!Vec) {
4607 if (SlideUp) {
4608 Vec = EVec;
4609 } else {
4610 Vec = DAG.getSplatVector(VT, DL, V);
4611 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4612 }
4613
4614 UndefCount = 0;
4615 continue;
4616 }
4617
4618 if (UndefCount) {
4619 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4620 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4621 VL);
4622 UndefCount = 0;
4623 }
4624
4625 unsigned Opcode;
4626 if (VT.isFloatingPoint())
4627 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4628 else
4629 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4630
4631 if (!VT.isFloatingPoint())
4632 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4633 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4634 V, Mask, VL);
4635 }
4636 if (UndefCount) {
4637 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4638 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4639 VL);
4640 }
4641 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4642}
4643
4644static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4646 SelectionDAG &DAG) {
4647 if (!Passthru)
4648 Passthru = DAG.getUNDEF(VT);
4650 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4651 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4652 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4653 // node in order to try and match RVV vector/scalar instructions.
4654 if ((LoC >> 31) == HiC)
4655 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4656
4657 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4658 // VL. This can temporarily increase VL if VL less than VLMAX.
4659 if (LoC == HiC) {
4660 SDValue NewVL;
4661 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4662 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4663 else
4664 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4665 MVT InterVT =
4666 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4667 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4668 DAG.getUNDEF(InterVT), Lo, NewVL);
4669 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4670 }
4671 }
4672
4673 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4674 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4675 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4676 Hi.getConstantOperandVal(1) == 31)
4677 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4678
4679 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4680 // even if it might be sign extended.
4681 if (Hi.isUndef())
4682 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4683
4684 // Fall back to a stack store and stride x0 vector load.
4685 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4686 Hi, VL);
4687}
4688
4689// Called by type legalization to handle splat of i64 on RV32.
4690// FIXME: We can optimize this when the type has sign or zero bits in one
4691// of the halves.
4692static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4693 SDValue Scalar, SDValue VL,
4694 SelectionDAG &DAG) {
4695 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4696 SDValue Lo, Hi;
4697 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4698 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4699}
4700
4701// This function lowers a splat of a scalar operand Splat with the vector
4702// length VL. It ensures the final sequence is type legal, which is useful when
4703// lowering a splat after type legalization.
4704static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4705 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4706 const RISCVSubtarget &Subtarget) {
4707 bool HasPassthru = Passthru && !Passthru.isUndef();
4708 if (!HasPassthru && !Passthru)
4709 Passthru = DAG.getUNDEF(VT);
4710
4711 MVT EltVT = VT.getVectorElementType();
4712 MVT XLenVT = Subtarget.getXLenVT();
4713
4714 if (VT.isFloatingPoint()) {
4715 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4716 EltVT == MVT::bf16) {
4717 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4718 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4719 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4720 else
4721 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4722 MVT IVT = VT.changeVectorElementType(MVT::i16);
4723 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4724 SDValue Splat =
4725 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4726 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4727 }
4728 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4729 }
4730
4731 // Simplest case is that the operand needs to be promoted to XLenVT.
4732 if (Scalar.getValueType().bitsLE(XLenVT)) {
4733 // If the operand is a constant, sign extend to increase our chances
4734 // of being able to use a .vi instruction. ANY_EXTEND would become a
4735 // a zero extend and the simm5 check in isel would fail.
4736 // FIXME: Should we ignore the upper bits in isel instead?
4737 unsigned ExtOpc =
4739 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4740 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4741 }
4742
4743 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4744 "Unexpected scalar for splat lowering!");
4745
4746 if (isOneConstant(VL) && isNullConstant(Scalar))
4747 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4748 DAG.getConstant(0, DL, XLenVT), VL);
4749
4750 // Otherwise use the more complicated splatting algorithm.
4751 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4752}
4753
4754// This function lowers an insert of a scalar operand Scalar into lane
4755// 0 of the vector regardless of the value of VL. The contents of the
4756// remaining lanes of the result vector are unspecified. VL is assumed
4757// to be non-zero.
4759 const SDLoc &DL, SelectionDAG &DAG,
4760 const RISCVSubtarget &Subtarget) {
4761 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4762
4763 const MVT XLenVT = Subtarget.getXLenVT();
4764 SDValue Passthru = DAG.getUNDEF(VT);
4765
4766 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4767 isNullConstant(Scalar.getOperand(1))) {
4768 SDValue ExtractedVal = Scalar.getOperand(0);
4769 // The element types must be the same.
4770 if (ExtractedVal.getValueType().getVectorElementType() ==
4771 VT.getVectorElementType()) {
4772 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4773 MVT ExtractedContainerVT = ExtractedVT;
4774 if (ExtractedContainerVT.isFixedLengthVector()) {
4775 ExtractedContainerVT = getContainerForFixedLengthVector(
4776 DAG, ExtractedContainerVT, Subtarget);
4777 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4778 ExtractedVal, DAG, Subtarget);
4779 }
4780 if (ExtractedContainerVT.bitsLE(VT))
4781 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4782 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4783 }
4784 }
4785
4786 if (VT.isFloatingPoint())
4787 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4788 VL);
4789
4790 // Avoid the tricky legalization cases by falling back to using the
4791 // splat code which already handles it gracefully.
4792 if (!Scalar.getValueType().bitsLE(XLenVT))
4793 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4794 DAG.getConstant(1, DL, XLenVT),
4795 VT, DL, DAG, Subtarget);
4796
4797 // If the operand is a constant, sign extend to increase our chances
4798 // of being able to use a .vi instruction. ANY_EXTEND would become a
4799 // a zero extend and the simm5 check in isel would fail.
4800 // FIXME: Should we ignore the upper bits in isel instead?
4801 unsigned ExtOpc =
4803 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4804 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4805 VL);
4806}
4807
4808/// If concat_vector(V1,V2) could be folded away to some existing
4809/// vector source, return it. Note that the source may be larger
4810/// than the requested concat_vector (i.e. a extract_subvector
4811/// might be required.)
4813 EVT VT = V1.getValueType();
4814 assert(VT == V2.getValueType() && "argument types must match");
4815 // Both input must be extracts.
4816 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4818 return SDValue();
4819
4820 // Extracting from the same source.
4821 SDValue Src = V1.getOperand(0);
4822 if (Src != V2.getOperand(0) ||
4823 VT.isScalableVector() != Src.getValueType().isScalableVector())
4824 return SDValue();
4825
4826 // The extracts must extract the two halves of the source.
4827 if (V1.getConstantOperandVal(1) != 0 ||
4829 return SDValue();
4830
4831 return Src;
4832}
4833
4834// Can this shuffle be performed on exactly one (possibly larger) input?
4836
4837 if (V2.isUndef())
4838 return V1;
4839
4840 unsigned NumElts = VT.getVectorNumElements();
4841 // Src needs to have twice the number of elements.
4842 // TODO: Update shuffle lowering to add the extract subvector
4843 if (SDValue Src = foldConcatVector(V1, V2);
4844 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4845 return Src;
4846
4847 return SDValue();
4848}
4849
4850/// Is this shuffle interleaving contiguous elements from one vector into the
4851/// even elements and contiguous elements from another vector into the odd
4852/// elements. \p EvenSrc will contain the element that should be in the first
4853/// even element. \p OddSrc will contain the element that should be in the first
4854/// odd element. These can be the first element in a source or the element half
4855/// way through the source.
4856static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4857 int &OddSrc, const RISCVSubtarget &Subtarget) {
4858 // We need to be able to widen elements to the next larger integer type or
4859 // use the zip2a instruction at e64.
4860 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4861 !Subtarget.hasVendorXRivosVizip())
4862 return false;
4863
4864 int Size = Mask.size();
4865 int NumElts = VT.getVectorNumElements();
4866 assert(Size == (int)NumElts && "Unexpected mask size");
4867
4868 SmallVector<unsigned, 2> StartIndexes;
4869 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4870 return false;
4871
4872 EvenSrc = StartIndexes[0];
4873 OddSrc = StartIndexes[1];
4874
4875 // One source should be low half of first vector.
4876 if (EvenSrc != 0 && OddSrc != 0)
4877 return false;
4878
4879 // Subvectors will be subtracted from either at the start of the two input
4880 // vectors, or at the start and middle of the first vector if it's an unary
4881 // interleave.
4882 // In both cases, HalfNumElts will be extracted.
4883 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4884 // we'll create an illegal extract_subvector.
4885 // FIXME: We could support other values using a slidedown first.
4886 int HalfNumElts = NumElts / 2;
4887 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4888}
4889
4890/// Is this mask representing a masked combination of two slides?
4892 std::array<std::pair<int, int>, 2> &SrcInfo) {
4893 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4894 return false;
4895
4896 // Avoid matching vselect idioms
4897 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4898 return false;
4899 // Prefer vslideup as the second instruction, and identity
4900 // only as the initial instruction.
4901 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4902 SrcInfo[1].second == 0)
4903 std::swap(SrcInfo[0], SrcInfo[1]);
4904 assert(SrcInfo[0].first != -1 && "Must find one slide");
4905 return true;
4906}
4907
4908// Exactly matches the semantics of a previously existing custom matcher
4909// to allow migration to new matcher without changing output.
4910static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4911 unsigned NumElts) {
4912 if (SrcInfo[1].first == -1)
4913 return true;
4914 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4915 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4916}
4917
4918static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4919 ArrayRef<int> Mask, unsigned Factor,
4920 bool RequiredPolarity) {
4921 int NumElts = Mask.size();
4922 for (const auto &[Idx, M] : enumerate(Mask)) {
4923 if (M < 0)
4924 continue;
4925 int Src = M >= NumElts;
4926 int Diff = (int)Idx - (M % NumElts);
4927 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4928 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4929 "Must match exactly one of the two slides");
4930 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4931 return false;
4932 }
4933 return true;
4934}
4935
4936/// Given a shuffle which can be represented as a pair of two slides,
4937/// see if it is a zipeven idiom. Zipeven is:
4938/// vs2: a0 a1 a2 a3
4939/// vs1: b0 b1 b2 b3
4940/// vd: a0 b0 a2 b2
4941static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4942 ArrayRef<int> Mask, unsigned &Factor) {
4943 Factor = SrcInfo[1].second;
4944 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4945 Mask.size() % Factor == 0 &&
4946 isAlternating(SrcInfo, Mask, Factor, true);
4947}
4948
4949/// Given a shuffle which can be represented as a pair of two slides,
4950/// see if it is a zipodd idiom. Zipodd is:
4951/// vs2: a0 a1 a2 a3
4952/// vs1: b0 b1 b2 b3
4953/// vd: a1 b1 a3 b3
4954/// Note that the operand order is swapped due to the way we canonicalize
4955/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4956static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4957 ArrayRef<int> Mask, unsigned &Factor) {
4958 Factor = -SrcInfo[1].second;
4959 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4960 Mask.size() % Factor == 0 &&
4961 isAlternating(SrcInfo, Mask, Factor, false);
4962}
4963
4964// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4965// 2, 4, 8 and the integer type Factor-times larger than VT's
4966// element type must be a legal element type.
4967// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4968// -> [p, q, r, s] (Factor=2, Index=1)
4970 SDValue Src, unsigned Factor,
4971 unsigned Index, SelectionDAG &DAG) {
4972 unsigned EltBits = VT.getScalarSizeInBits();
4973 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4974 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4975 SrcEC.divideCoefficientBy(Factor));
4976 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4977 SrcEC.divideCoefficientBy(Factor));
4978 Src = DAG.getBitcast(WideSrcVT, Src);
4979
4980 unsigned Shift = Index * EltBits;
4981 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4982 DAG.getConstant(Shift, DL, WideSrcVT));
4983 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4985 Res = DAG.getBitcast(CastVT, Res);
4986 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4987}
4988
4989/// Match a single source shuffle which is an identity except that some
4990/// particular element is repeated. This can be lowered as a masked
4991/// vrgather.vi/vx. Note that the two source form of this is handled
4992/// by the recursive splitting logic and doesn't need special handling.
4994 const RISCVSubtarget &Subtarget,
4995 SelectionDAG &DAG) {
4996
4997 SDLoc DL(SVN);
4998 MVT VT = SVN->getSimpleValueType(0);
4999 SDValue V1 = SVN->getOperand(0);
5000 assert(SVN->getOperand(1).isUndef());
5001 ArrayRef<int> Mask = SVN->getMask();
5002 const unsigned NumElts = VT.getVectorNumElements();
5003 MVT XLenVT = Subtarget.getXLenVT();
5004
5005 std::optional<int> SplatIdx;
5006 for (auto [I, M] : enumerate(Mask)) {
5007 if (M == -1 || I == (unsigned)M)
5008 continue;
5009 if (SplatIdx && *SplatIdx != M)
5010 return SDValue();
5011 SplatIdx = M;
5012 }
5013
5014 if (!SplatIdx)
5015 return SDValue();
5016
5017 SmallVector<SDValue> MaskVals;
5018 for (int MaskIndex : Mask) {
5019 bool SelectMaskVal = MaskIndex == *SplatIdx;
5020 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5021 }
5022 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5023 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5024 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5025 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5026 SmallVector<int>(NumElts, *SplatIdx));
5027 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5028}
5029
5030// Lower the following shuffle to vslidedown.
5031// a)
5032// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5033// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5034// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5035// b)
5036// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5037// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5038// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5039// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5040// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5041// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5043 SDValue V1, SDValue V2,
5044 ArrayRef<int> Mask,
5045 const RISCVSubtarget &Subtarget,
5046 SelectionDAG &DAG) {
5047 auto findNonEXTRACT_SUBVECTORParent =
5048 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5049 uint64_t Offset = 0;
5050 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5051 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5052 // a scalable vector. But we don't want to match the case.
5053 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5054 Offset += Parent.getConstantOperandVal(1);
5055 Parent = Parent.getOperand(0);
5056 }
5057 return std::make_pair(Parent, Offset);
5058 };
5059
5060 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5061 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5062
5063 // Extracting from the same source.
5064 SDValue Src = V1Src;
5065 if (Src != V2Src)
5066 return SDValue();
5067
5068 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5069 SmallVector<int, 16> NewMask(Mask);
5070 for (size_t i = 0; i != NewMask.size(); ++i) {
5071 if (NewMask[i] == -1)
5072 continue;
5073
5074 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5075 NewMask[i] = NewMask[i] + V1IndexOffset;
5076 } else {
5077 // Minus NewMask.size() is needed. Otherwise, the b case would be
5078 // <5,6,7,12> instead of <5,6,7,8>.
5079 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5080 }
5081 }
5082
5083 // First index must be known and non-zero. It will be used as the slidedown
5084 // amount.
5085 if (NewMask[0] <= 0)
5086 return SDValue();
5087
5088 // NewMask is also continuous.
5089 for (unsigned i = 1; i != NewMask.size(); ++i)
5090 if (NewMask[i - 1] + 1 != NewMask[i])
5091 return SDValue();
5092
5093 MVT XLenVT = Subtarget.getXLenVT();
5094 MVT SrcVT = Src.getSimpleValueType();
5095 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5096 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5097 SDValue Slidedown =
5098 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5099 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5100 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5101 return DAG.getExtractSubvector(
5102 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5103}
5104
5105// Because vslideup leaves the destination elements at the start intact, we can
5106// use it to perform shuffles that insert subvectors:
5107//
5108// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5109// ->
5110// vsetvli zero, 8, e8, mf2, ta, ma
5111// vslideup.vi v8, v9, 4
5112//
5113// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5114// ->
5115// vsetvli zero, 5, e8, mf2, tu, ma
5116// vslideup.v1 v8, v9, 2
5118 SDValue V1, SDValue V2,
5119 ArrayRef<int> Mask,
5120 const RISCVSubtarget &Subtarget,
5121 SelectionDAG &DAG) {
5122 unsigned NumElts = VT.getVectorNumElements();
5123 int NumSubElts, Index;
5124 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5125 Index))
5126 return SDValue();
5127
5128 bool OpsSwapped = Mask[Index] < (int)NumElts;
5129 SDValue InPlace = OpsSwapped ? V2 : V1;
5130 SDValue ToInsert = OpsSwapped ? V1 : V2;
5131
5132 MVT XLenVT = Subtarget.getXLenVT();
5133 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5134 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5135 // We slide up by the index that the subvector is being inserted at, and set
5136 // VL to the index + the number of elements being inserted.
5137 unsigned Policy =
5139 // If the we're adding a suffix to the in place vector, i.e. inserting right
5140 // up to the very end of it, then we don't actually care about the tail.
5141 if (NumSubElts + Index >= (int)NumElts)
5142 Policy |= RISCVVType::TAIL_AGNOSTIC;
5143
5144 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5145 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5146 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5147
5148 SDValue Res;
5149 // If we're inserting into the lowest elements, use a tail undisturbed
5150 // vmv.v.v.
5151 if (Index == 0)
5152 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5153 VL);
5154 else
5155 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5156 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5157 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5158}
5159
5160/// Match v(f)slide1up/down idioms. These operations involve sliding
5161/// N-1 elements to make room for an inserted scalar at one end.
5163 SDValue V1, SDValue V2,
5164 ArrayRef<int> Mask,
5165 const RISCVSubtarget &Subtarget,
5166 SelectionDAG &DAG) {
5167 bool OpsSwapped = false;
5168 if (!isa<BuildVectorSDNode>(V1)) {
5169 if (!isa<BuildVectorSDNode>(V2))
5170 return SDValue();
5171 std::swap(V1, V2);
5172 OpsSwapped = true;
5173 }
5174 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5175 if (!Splat)
5176 return SDValue();
5177
5178 // Return true if the mask could describe a slide of Mask.size() - 1
5179 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5180 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5181 const unsigned S = (Offset > 0) ? 0 : -Offset;
5182 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5183 for (unsigned i = S; i != E; ++i)
5184 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5185 return false;
5186 return true;
5187 };
5188
5189 const unsigned NumElts = VT.getVectorNumElements();
5190 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5191 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5192 return SDValue();
5193
5194 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5195 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5196 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5197 return SDValue();
5198
5199 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5200 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5201
5202 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5203 // vslide1{down,up}.vx instead.
5204 if (VT.getVectorElementType() == MVT::bf16 ||
5205 (VT.getVectorElementType() == MVT::f16 &&
5206 !Subtarget.hasVInstructionsF16())) {
5207 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5208 Splat =
5209 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5210 V2 = DAG.getBitcast(
5211 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5212 SDValue Vec = DAG.getNode(
5213 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5214 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5215 Vec = DAG.getBitcast(ContainerVT, Vec);
5216 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5217 }
5218
5219 auto OpCode = IsVSlidedown ?
5220 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5221 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5222 if (!VT.isFloatingPoint())
5223 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5224 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5225 DAG.getUNDEF(ContainerVT),
5226 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5227 Splat, TrueMask, VL);
5228 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5229}
5230
5231/// Match a mask which "spreads" the leading elements of a vector evenly
5232/// across the result. Factor is the spread amount, and Index is the
5233/// offset applied. (on success, Index < Factor) This is the inverse
5234/// of a deinterleave with the same Factor and Index. This is analogous
5235/// to an interleave, except that all but one lane is undef.
5237 unsigned &Index) {
5238 SmallVector<bool> LaneIsUndef(Factor, true);
5239 for (unsigned i = 0; i < Mask.size(); i++)
5240 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5241
5242 bool Found = false;
5243 for (unsigned i = 0; i < Factor; i++) {
5244 if (LaneIsUndef[i])
5245 continue;
5246 if (Found)
5247 return false;
5248 Index = i;
5249 Found = true;
5250 }
5251 if (!Found)
5252 return false;
5253
5254 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5255 unsigned j = i * Factor + Index;
5256 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5257 return false;
5258 }
5259 return true;
5260}
5261
5262static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5263 const SDLoc &DL, SelectionDAG &DAG,
5264 const RISCVSubtarget &Subtarget) {
5265 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5266 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5267 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5269
5270 MVT VT = Op0.getSimpleValueType();
5272 Op0 = DAG.getBitcast(IntVT, Op0);
5273 Op1 = DAG.getBitcast(IntVT, Op1);
5274
5275 MVT ContainerVT = IntVT;
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5278 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5279 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5280 }
5281
5282 MVT InnerVT = ContainerVT;
5283 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5284 if (Op1.isUndef() &&
5285 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5286 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5287 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5288 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5289 Subtarget.getXLenVT());
5290 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5291 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5292 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5293 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5294 }
5295
5296 SDValue Passthru = DAG.getUNDEF(InnerVT);
5297 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5298 if (InnerVT.bitsLT(ContainerVT))
5299 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5300 if (IntVT.isFixedLengthVector())
5301 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5302 Res = DAG.getBitcast(VT, Res);
5303 return Res;
5304}
5305
5306// Given a vector a, b, c, d return a vector Factor times longer
5307// with Factor-1 undef's between elements. Ex:
5308// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5309// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5310static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5311 const SDLoc &DL, SelectionDAG &DAG) {
5312
5313 MVT VT = V.getSimpleValueType();
5314 unsigned EltBits = VT.getScalarSizeInBits();
5316 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5317
5318 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5319
5320 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5321 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5322 // allow the SHL to fold away if Index is 0.
5323 if (Index != 0)
5324 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5325 DAG.getConstant(EltBits * Index, DL, WideVT));
5326 // Make sure to use original element type
5328 EC.multiplyCoefficientBy(Factor));
5329 return DAG.getBitcast(ResultVT, Result);
5330}
5331
5332// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5333// to create an interleaved vector of <[vscale x] n*2 x ty>.
5334// This requires that the size of ty is less than the subtarget's maximum ELEN.
5336 const SDLoc &DL, SelectionDAG &DAG,
5337 const RISCVSubtarget &Subtarget) {
5338
5339 // FIXME: Not only does this optimize the code, it fixes some correctness
5340 // issues because MIR does not have freeze.
5341 if (EvenV.isUndef())
5342 return getWideningSpread(OddV, 2, 1, DL, DAG);
5343 if (OddV.isUndef())
5344 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5345
5346 MVT VecVT = EvenV.getSimpleValueType();
5347 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5348 // Convert fixed vectors to scalable if needed
5349 if (VecContainerVT.isFixedLengthVector()) {
5350 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5351 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5352 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5353 }
5354
5355 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5356
5357 // We're working with a vector of the same size as the resulting
5358 // interleaved vector, but with half the number of elements and
5359 // twice the SEW (Hence the restriction on not using the maximum
5360 // ELEN)
5361 MVT WideVT =
5363 VecVT.getVectorElementCount());
5364 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5365 if (WideContainerVT.isFixedLengthVector())
5366 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5367
5368 // Bitcast the input vectors to integers in case they are FP
5369 VecContainerVT = VecContainerVT.changeTypeToInteger();
5370 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5371 OddV = DAG.getBitcast(VecContainerVT, OddV);
5372
5373 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5374 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5375
5376 SDValue Interleaved;
5377 if (Subtarget.hasStdExtZvbb()) {
5378 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5379 SDValue OffsetVec =
5380 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5381 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5382 OffsetVec, Passthru, Mask, VL);
5383 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5384 Interleaved, EvenV, Passthru, Mask, VL);
5385 } else {
5386 // FIXME: We should freeze the odd vector here. We already handled the case
5387 // of provably undef/poison above.
5388
5389 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5390 // vwaddu.vv
5391 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5392 OddV, Passthru, Mask, VL);
5393
5394 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5395 SDValue AllOnesVec = DAG.getSplatVector(
5396 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5397 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5398 OddV, AllOnesVec, Passthru, Mask, VL);
5399
5400 // Add the two together so we get
5401 // (OddV * 0xff...ff) + (OddV + EvenV)
5402 // = (OddV * 0x100...00) + EvenV
5403 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5404 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5405 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5406 Interleaved, OddsMul, Passthru, Mask, VL);
5407 }
5408
5409 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5410 MVT ResultContainerVT = MVT::getVectorVT(
5411 VecVT.getVectorElementType(), // Make sure to use original type
5412 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5413 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5414
5415 // Convert back to a fixed vector if needed
5416 MVT ResultVT =
5419 if (ResultVT.isFixedLengthVector())
5420 Interleaved =
5421 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5422
5423 return Interleaved;
5424}
5425
5426// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5427// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5429 SelectionDAG &DAG,
5430 const RISCVSubtarget &Subtarget) {
5431 SDLoc DL(SVN);
5432 MVT VT = SVN->getSimpleValueType(0);
5433 SDValue V = SVN->getOperand(0);
5434 unsigned NumElts = VT.getVectorNumElements();
5435
5436 assert(VT.getVectorElementType() == MVT::i1);
5437
5439 SVN->getMask().size()) ||
5440 !SVN->getOperand(1).isUndef())
5441 return SDValue();
5442
5443 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5444 EVT ViaVT = EVT::getVectorVT(
5445 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5446 EVT ViaBitVT =
5447 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5448
5449 // If we don't have zvbb or the larger element type > ELEN, the operation will
5450 // be illegal.
5452 ViaVT) ||
5453 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5454 return SDValue();
5455
5456 // If the bit vector doesn't fit exactly into the larger element type, we need
5457 // to insert it into the larger vector and then shift up the reversed bits
5458 // afterwards to get rid of the gap introduced.
5459 if (ViaEltSize > NumElts)
5460 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5461
5462 SDValue Res =
5463 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5464
5465 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5466 // element type.
5467 if (ViaEltSize > NumElts)
5468 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5469 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5470
5471 Res = DAG.getBitcast(ViaBitVT, Res);
5472
5473 if (ViaEltSize > NumElts)
5474 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5475 return Res;
5476}
5477
5479 const RISCVSubtarget &Subtarget,
5480 MVT &RotateVT, unsigned &RotateAmt) {
5481 unsigned NumElts = VT.getVectorNumElements();
5482 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5483 unsigned NumSubElts;
5484 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5485 NumElts, NumSubElts, RotateAmt))
5486 return false;
5487 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5488 NumElts / NumSubElts);
5489
5490 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5491 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5492}
5493
5494// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5495// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5496// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5498 SelectionDAG &DAG,
5499 const RISCVSubtarget &Subtarget) {
5500 SDLoc DL(SVN);
5501
5502 EVT VT = SVN->getValueType(0);
5503 unsigned RotateAmt;
5504 MVT RotateVT;
5505 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5506 return SDValue();
5507
5508 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5509
5510 SDValue Rotate;
5511 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5512 // so canonicalize to vrev8.
5513 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5514 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5515 else
5516 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5517 DAG.getConstant(RotateAmt, DL, RotateVT));
5518
5519 return DAG.getBitcast(VT, Rotate);
5520}
5521
5522// If compiling with an exactly known VLEN, see if we can split a
5523// shuffle on m2 or larger into a small number of m1 sized shuffles
5524// which write each destination registers exactly once.
5526 SelectionDAG &DAG,
5527 const RISCVSubtarget &Subtarget) {
5528 SDLoc DL(SVN);
5529 MVT VT = SVN->getSimpleValueType(0);
5530 SDValue V1 = SVN->getOperand(0);
5531 SDValue V2 = SVN->getOperand(1);
5532 ArrayRef<int> Mask = SVN->getMask();
5533
5534 // If we don't know exact data layout, not much we can do. If this
5535 // is already m1 or smaller, no point in splitting further.
5536 const auto VLen = Subtarget.getRealVLen();
5537 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5538 return SDValue();
5539
5540 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5541 // expansion for.
5542 unsigned RotateAmt;
5543 MVT RotateVT;
5544 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5545 return SDValue();
5546
5547 MVT ElemVT = VT.getVectorElementType();
5548 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5549
5550 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5551 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5552 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5553 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5554 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5555 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5556 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5557 unsigned NumOfDestRegs = NumElts / NumOpElts;
5558 // The following semantically builds up a fixed length concat_vector
5559 // of the component shuffle_vectors. We eagerly lower to scalable here
5560 // to avoid DAG combining it back to a large shuffle_vector again.
5561 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5562 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5564 Operands;
5566 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5567 [&]() { Operands.emplace_back(); },
5568 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5569 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5570 SmallVector<int>(SrcSubMask));
5571 },
5572 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5573 if (NewReg)
5574 Operands.emplace_back();
5575 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5576 });
5577 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5578 // Note: check that we do not emit too many shuffles here to prevent code
5579 // size explosion.
5580 // TODO: investigate, if it can be improved by extra analysis of the masks to
5581 // check if the code is more profitable.
5582 unsigned NumShuffles = std::accumulate(
5583 Operands.begin(), Operands.end(), 0u,
5584 [&](unsigned N,
5585 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5586 if (Data.empty())
5587 return N;
5588 N += Data.size();
5589 for (const auto &P : Data) {
5590 unsigned Idx2 = std::get<1>(P);
5591 ArrayRef<int> Mask = std::get<2>(P);
5592 if (Idx2 != UINT_MAX)
5593 ++N;
5594 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5595 --N;
5596 }
5597 return N;
5598 });
5599 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5600 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5601 return SDValue();
5602 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5603 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5604 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5605 return SubVec;
5606 };
5607 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5609 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5610 return SubVec;
5611 };
5612 SDValue Vec = DAG.getUNDEF(ContainerVT);
5613 for (auto [I, Data] : enumerate(Operands)) {
5614 if (Data.empty())
5615 continue;
5617 for (unsigned I : seq<unsigned>(Data.size())) {
5618 const auto &[Idx1, Idx2, _] = Data[I];
5619 // If the shuffle contains permutation of odd number of elements,
5620 // Idx1 might be used already in the first iteration.
5621 //
5622 // Idx1 = shuffle Idx1, Idx2
5623 // Idx1 = shuffle Idx1, Idx3
5624 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5625 if (!V)
5626 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5627 (Idx1 % NumOfSrcRegs) * NumOpElts);
5628 if (Idx2 != UINT_MAX) {
5629 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5630 if (!V)
5631 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5632 (Idx2 % NumOfSrcRegs) * NumOpElts);
5633 }
5634 }
5635 SDValue V;
5636 for (const auto &[Idx1, Idx2, Mask] : Data) {
5637 SDValue V1 = Values.at(Idx1);
5638 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5639 V = PerformShuffle(V1, V2, Mask);
5640 Values[Idx1] = V;
5641 }
5642
5643 unsigned InsertIdx = I * NumOpElts;
5644 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5645 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5646 }
5647 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5648}
5649
5650// Matches a subset of compress masks with a contiguous prefix of output
5651// elements. This could be extended to allow gaps by deciding which
5652// source elements to spuriously demand.
5654 int Last = -1;
5655 bool SawUndef = false;
5656 for (const auto &[Idx, M] : enumerate(Mask)) {
5657 if (M == -1) {
5658 SawUndef = true;
5659 continue;
5660 }
5661 if (SawUndef)
5662 return false;
5663 if (Idx > (unsigned)M)
5664 return false;
5665 if (M <= Last)
5666 return false;
5667 Last = M;
5668 }
5669 return true;
5670}
5671
5672/// Given a shuffle where the indices are disjoint between the two sources,
5673/// e.g.:
5674///
5675/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5676///
5677/// Merge the two sources into one and do a single source shuffle:
5678///
5679/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5680/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5681///
5682/// A vselect will either be merged into a masked instruction or be lowered as a
5683/// vmerge.vvm, which is cheaper than a vrgather.vv.
5685 SelectionDAG &DAG,
5686 const RISCVSubtarget &Subtarget) {
5687 MVT VT = SVN->getSimpleValueType(0);
5688 MVT XLenVT = Subtarget.getXLenVT();
5689 SDLoc DL(SVN);
5690
5691 const ArrayRef<int> Mask = SVN->getMask();
5692
5693 // Work out which source each lane will come from.
5694 SmallVector<int, 16> Srcs(Mask.size(), -1);
5695
5696 for (int Idx : Mask) {
5697 if (Idx == -1)
5698 continue;
5699 unsigned SrcIdx = Idx % Mask.size();
5700 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5701 if (Srcs[SrcIdx] == -1)
5702 // Mark this source as using this lane.
5703 Srcs[SrcIdx] = Src;
5704 else if (Srcs[SrcIdx] != Src)
5705 // The other source is using this lane: not disjoint.
5706 return SDValue();
5707 }
5708
5709 SmallVector<SDValue> SelectMaskVals;
5710 for (int Lane : Srcs) {
5711 if (Lane == -1)
5712 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5713 else
5714 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5715 }
5716 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5717 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5718 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5719 SVN->getOperand(0), SVN->getOperand(1));
5720
5721 // Move all indices relative to the first source.
5722 SmallVector<int> NewMask(Mask.size());
5723 for (unsigned I = 0; I < Mask.size(); I++) {
5724 if (Mask[I] == -1)
5725 NewMask[I] = -1;
5726 else
5727 NewMask[I] = Mask[I] % Mask.size();
5728 }
5729
5730 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5731}
5732
5733/// Is this mask local (i.e. elements only move within their local span), and
5734/// repeating (that is, the same rearrangement is being done within each span)?
5735static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5736 // Require a prefix from the original mask until the consumer code
5737 // is adjusted to rewrite the mask instead of just taking a prefix.
5738 for (auto [I, M] : enumerate(Mask)) {
5739 if (M == -1)
5740 continue;
5741 if ((M / Span) != (int)(I / Span))
5742 return false;
5743 int SpanIdx = I % Span;
5744 int Expected = M % Span;
5745 if (Mask[SpanIdx] != Expected)
5746 return false;
5747 }
5748 return true;
5749}
5750
5751/// Is this mask only using elements from the first span of the input?
5752static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5753 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5754}
5755
5756/// Return true for a mask which performs an arbitrary shuffle within the first
5757/// span, and then repeats that same result across all remaining spans. Note
5758/// that this doesn't check if all the inputs come from a single span!
5759static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5760 // Require a prefix from the original mask until the consumer code
5761 // is adjusted to rewrite the mask instead of just taking a prefix.
5762 for (auto [I, M] : enumerate(Mask)) {
5763 if (M == -1)
5764 continue;
5765 int SpanIdx = I % Span;
5766 if (Mask[SpanIdx] != M)
5767 return false;
5768 }
5769 return true;
5770}
5771
5772/// Try to widen element type to get a new mask value for a better permutation
5773/// sequence. This doesn't try to inspect the widened mask for profitability;
5774/// we speculate the widened form is equal or better. This has the effect of
5775/// reducing mask constant sizes - allowing cheaper materialization sequences
5776/// - and index sequence sizes - reducing register pressure and materialization
5777/// cost, at the cost of (possibly) an extra VTYPE toggle.
5779 SDLoc DL(Op);
5780 MVT VT = Op.getSimpleValueType();
5781 MVT ScalarVT = VT.getVectorElementType();
5782 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5783 SDValue V0 = Op.getOperand(0);
5784 SDValue V1 = Op.getOperand(1);
5785 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5786
5787 // Avoid wasted work leading to isTypeLegal check failing below
5788 if (ElementSize > 32)
5789 return SDValue();
5790
5791 SmallVector<int, 8> NewMask;
5792 if (!widenShuffleMaskElts(Mask, NewMask))
5793 return SDValue();
5794
5795 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5796 : MVT::getIntegerVT(ElementSize * 2);
5797 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5798 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5799 return SDValue();
5800 V0 = DAG.getBitcast(NewVT, V0);
5801 V1 = DAG.getBitcast(NewVT, V1);
5802 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5803}
5804
5806 const RISCVSubtarget &Subtarget) {
5807 SDValue V1 = Op.getOperand(0);
5808 SDValue V2 = Op.getOperand(1);
5809 SDLoc DL(Op);
5810 MVT XLenVT = Subtarget.getXLenVT();
5811 MVT VT = Op.getSimpleValueType();
5812 unsigned NumElts = VT.getVectorNumElements();
5814
5815 if (VT.getVectorElementType() == MVT::i1) {
5816 // Lower to a vror.vi of a larger element type if possible before we promote
5817 // i1s to i8s.
5818 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5819 return V;
5820 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5821 return V;
5822
5823 // Promote i1 shuffle to i8 shuffle.
5824 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5825 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5826 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5827 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5828 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5829 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5830 ISD::SETNE);
5831 }
5832
5833 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5834
5835 // Store the return value in a single variable instead of structured bindings
5836 // so that we can pass it to GetSlide below, which cannot capture structured
5837 // bindings until C++20.
5838 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5839 auto [TrueMask, VL] = TrueMaskVL;
5840
5841 if (SVN->isSplat()) {
5842 const int Lane = SVN->getSplatIndex();
5843 if (Lane >= 0) {
5844 MVT SVT = VT.getVectorElementType();
5845
5846 // Turn splatted vector load into a strided load with an X0 stride.
5847 SDValue V = V1;
5848 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5849 // with undef.
5850 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5851 int Offset = Lane;
5852 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5853 int OpElements =
5854 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5855 V = V.getOperand(Offset / OpElements);
5856 Offset %= OpElements;
5857 }
5858
5859 // We need to ensure the load isn't atomic or volatile.
5860 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5861 auto *Ld = cast<LoadSDNode>(V);
5862 Offset *= SVT.getStoreSize();
5863 SDValue NewAddr = DAG.getMemBasePlusOffset(
5864 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5865
5866 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5867 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5868 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5869 SDValue IntID =
5870 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5871 SDValue Ops[] = {Ld->getChain(),
5872 IntID,
5873 DAG.getUNDEF(ContainerVT),
5874 NewAddr,
5875 DAG.getRegister(RISCV::X0, XLenVT),
5876 VL};
5877 SDValue NewLoad = DAG.getMemIntrinsicNode(
5878 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5880 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5881 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5882 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5883 }
5884
5885 MVT SplatVT = ContainerVT;
5886
5887 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5888 if (SVT == MVT::bf16 ||
5889 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5890 SVT = MVT::i16;
5891 SplatVT = ContainerVT.changeVectorElementType(SVT);
5892 }
5893
5894 // Otherwise use a scalar load and splat. This will give the best
5895 // opportunity to fold a splat into the operation. ISel can turn it into
5896 // the x0 strided load if we aren't able to fold away the select.
5897 if (SVT.isFloatingPoint())
5898 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5899 Ld->getPointerInfo().getWithOffset(Offset),
5900 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5901 else
5902 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5903 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5904 Ld->getBaseAlign(),
5905 Ld->getMemOperand()->getFlags());
5907
5908 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5909 : RISCVISD::VMV_V_X_VL;
5910 SDValue Splat =
5911 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5912 Splat = DAG.getBitcast(ContainerVT, Splat);
5913 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5914 }
5915
5916 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5917 assert(Lane < (int)NumElts && "Unexpected lane!");
5918 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5919 V1, DAG.getConstant(Lane, DL, XLenVT),
5920 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5921 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5922 }
5923 }
5924
5925 // For exact VLEN m2 or greater, try to split to m1 operations if we
5926 // can split cleanly.
5927 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5928 return V;
5929
5930 ArrayRef<int> Mask = SVN->getMask();
5931
5932 if (SDValue V =
5933 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5934 return V;
5935
5936 if (SDValue V =
5937 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5938 return V;
5939
5940 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5941 // available.
5942 if (Subtarget.hasStdExtZvkb())
5943 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5944 return V;
5945
5946 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5947 NumElts != 2)
5948 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5949
5950 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5951 // use shift and truncate to perform the shuffle.
5952 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5953 // shift-and-trunc reducing total cost for everything except an mf8 result.
5954 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5955 // to do the entire operation.
5956 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5957 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5958 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5959 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5960 unsigned Index = 0;
5961 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5962 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5963 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5964 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5965 if (1 < count_if(Mask,
5966 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5967 1 < count_if(Mask, [&Mask](int Idx) {
5968 return Idx >= (int)Mask.size();
5969 })) {
5970 // Narrow each source and concatenate them.
5971 // FIXME: For small LMUL it is better to concatenate first.
5972 MVT EltVT = VT.getVectorElementType();
5973 auto EltCnt = VT.getVectorElementCount();
5974 MVT SubVT =
5975 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5976
5977 SDValue Lo =
5978 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5979 SDValue Hi =
5980 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5981
5982 SDValue Concat =
5985 if (Factor == 2)
5986 return Concat;
5987
5988 SDValue Vec = DAG.getUNDEF(VT);
5989 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5990 }
5991 }
5992 }
5993 }
5994
5995 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5996 // e64 which can't match above.
5997 unsigned Index = 0;
5998 if (Subtarget.hasVendorXRivosVizip() &&
6000 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6001 unsigned Opc =
6002 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6003 if (V2.isUndef())
6004 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6005 if (auto VLEN = Subtarget.getRealVLen();
6006 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6007 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6008 if (SDValue Src = foldConcatVector(V1, V2)) {
6009 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6010 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6011 SDValue Res =
6012 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6013 return DAG.getExtractSubvector(DL, VT, Res, 0);
6014 }
6015 // Deinterleave each source and concatenate them, or concat first, then
6016 // deinterleave.
6017 if (1 < count_if(Mask,
6018 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6019 1 < count_if(Mask,
6020 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6021
6022 const unsigned EltSize = VT.getScalarSizeInBits();
6023 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6024 if (NumElts < MinVLMAX) {
6025 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6026 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6027 SDValue Res =
6028 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6029 return DAG.getExtractSubvector(DL, VT, Res, 0);
6030 }
6031
6032 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6033 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6034
6035 MVT SubVT = VT.getHalfNumVectorElementsVT();
6036 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6037 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6038 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6039 }
6040 }
6041
6042 if (SDValue V =
6043 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6044 return V;
6045
6046 // Detect an interleave shuffle and lower to
6047 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6048 int EvenSrc, OddSrc;
6049 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6050 !(NumElts == 2 &&
6051 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6052 // Extract the halves of the vectors.
6053 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6054
6055 // Recognize if one half is actually undef; the matching above will
6056 // otherwise reuse the even stream for the undef one. This improves
6057 // spread(2) shuffles.
6058 bool LaneIsUndef[2] = { true, true};
6059 for (const auto &[Idx, M] : enumerate(Mask))
6060 LaneIsUndef[Idx % 2] &= (M == -1);
6061
6062 int Size = Mask.size();
6063 SDValue EvenV, OddV;
6064 if (LaneIsUndef[0]) {
6065 EvenV = DAG.getUNDEF(HalfVT);
6066 } else {
6067 assert(EvenSrc >= 0 && "Undef source?");
6068 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6069 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6070 }
6071
6072 if (LaneIsUndef[1]) {
6073 OddV = DAG.getUNDEF(HalfVT);
6074 } else {
6075 assert(OddSrc >= 0 && "Undef source?");
6076 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6077 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6078 }
6079
6080 // Prefer vzip2a if available.
6081 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6082 if (Subtarget.hasVendorXRivosVizip()) {
6083 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6084 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6085 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6086 }
6087 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6088 }
6089
6090 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6091 // instructions (in any combination) with masking on the second instruction.
6092 // Also handles masked slides into an identity source, and single slides
6093 // without masking. Avoid matching bit rotates (which are not also element
6094 // rotates) as slide pairs. This is a performance heuristic, not a
6095 // functional check.
6096 std::array<std::pair<int, int>, 2> SrcInfo;
6097 unsigned RotateAmt;
6098 MVT RotateVT;
6099 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6100 (isElementRotate(SrcInfo, NumElts) ||
6101 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6102 SDValue Sources[2];
6103 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6104 int SrcIdx = Info.first;
6105 assert(SrcIdx == 0 || SrcIdx == 1);
6106 SDValue &Src = Sources[SrcIdx];
6107 if (!Src) {
6108 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6109 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6110 }
6111 return Src;
6112 };
6113 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6114 SDValue Passthru) {
6115 auto [TrueMask, VL] = TrueMaskVL;
6116 SDValue SrcV = GetSourceFor(Src);
6117 int SlideAmt = Src.second;
6118 if (SlideAmt == 0) {
6119 // Should never be second operation
6120 assert(Mask == TrueMask);
6121 return SrcV;
6122 }
6123 if (SlideAmt < 0)
6124 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6125 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6127 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6128 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6130 };
6131
6132 if (SrcInfo[1].first == -1) {
6133 SDValue Res = DAG.getUNDEF(ContainerVT);
6134 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6135 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6136 }
6137
6138 if (Subtarget.hasVendorXRivosVizip()) {
6139 bool TryWiden = false;
6140 unsigned Factor;
6141 if (isZipEven(SrcInfo, Mask, Factor)) {
6142 if (Factor == 1) {
6143 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6144 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6145 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6146 Subtarget);
6147 }
6148 TryWiden = true;
6149 }
6150 if (isZipOdd(SrcInfo, Mask, Factor)) {
6151 if (Factor == 1) {
6152 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6153 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6154 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6155 Subtarget);
6156 }
6157 TryWiden = true;
6158 }
6159 // If we found a widening oppurtunity which would let us form a
6160 // zipeven or zipodd, use the generic code to widen the shuffle
6161 // and recurse through this logic.
6162 if (TryWiden)
6163 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6164 return V;
6165 }
6166
6167 // Build the mask. Note that vslideup unconditionally preserves elements
6168 // below the slide amount in the destination, and thus those elements are
6169 // undefined in the mask. If the mask ends up all true (or undef), it
6170 // will be folded away by general logic.
6171 SmallVector<SDValue> MaskVals;
6172 for (const auto &[Idx, M] : enumerate(Mask)) {
6173 if (M < 0 ||
6174 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6175 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6176 continue;
6177 }
6178 int Src = M >= (int)NumElts;
6179 int Diff = (int)Idx - (M % NumElts);
6180 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6181 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6182 "Must match exactly one of the two slides");
6183 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6184 }
6185 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6186 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6187 SDValue SelectMask = convertToScalableVector(
6188 ContainerVT.changeVectorElementType(MVT::i1),
6189 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6190
6191 SDValue Res = DAG.getUNDEF(ContainerVT);
6192 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6193 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6194 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6195 }
6196
6197 // Handle any remaining single source shuffles
6198 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6199 if (V2.isUndef()) {
6200 // We might be able to express the shuffle as a bitrotate. But even if we
6201 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6202 // shifts and a vor will have a higher throughput than a vrgather.
6203 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6204 return V;
6205
6206 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6207 return V;
6208
6209 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6210 // is fully covered in interleave(2) above, so it is ignored here.
6211 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6212 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6213 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6214 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6215 unsigned Index;
6216 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6217 MVT NarrowVT =
6218 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6219 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6220 return getWideningSpread(Src, Factor, Index, DL, DAG);
6221 }
6222 }
6223 }
6224
6225 // If only a prefix of the source elements influence a prefix of the
6226 // destination elements, try to see if we can reduce the required LMUL
6227 unsigned MinVLen = Subtarget.getRealMinVLen();
6228 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6229 if (NumElts > MinVLMAX) {
6230 unsigned MaxIdx = 0;
6231 for (auto [I, M] : enumerate(Mask)) {
6232 if (M == -1)
6233 continue;
6234 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6235 }
6236 unsigned NewNumElts =
6237 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6238 if (NewNumElts != NumElts) {
6239 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6240 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6241 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6242 Mask.take_front(NewNumElts));
6243 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6244 }
6245 }
6246
6247 // Before hitting generic lowering fallbacks, try to widen the mask
6248 // to a wider SEW.
6249 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6250 return V;
6251
6252 // Can we generate a vcompress instead of a vrgather? These scale better
6253 // at high LMUL, at the cost of not being able to fold a following select
6254 // into them. The mask constants are also smaller than the index vector
6255 // constants, and thus easier to materialize.
6256 if (isCompressMask(Mask)) {
6257 SmallVector<SDValue> MaskVals(NumElts,
6258 DAG.getConstant(false, DL, XLenVT));
6259 for (auto Idx : Mask) {
6260 if (Idx == -1)
6261 break;
6262 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6263 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6264 }
6265 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6266 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6267 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6268 DAG.getUNDEF(VT));
6269 }
6270
6271 if (VT.getScalarSizeInBits() == 8 &&
6272 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6273 // On such a vector we're unable to use i8 as the index type.
6274 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6275 // may involve vector splitting if we're already at LMUL=8, or our
6276 // user-supplied maximum fixed-length LMUL.
6277 return SDValue();
6278 }
6279
6280 // Base case for the two operand recursion below - handle the worst case
6281 // single source shuffle.
6282 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6283 MVT IndexVT = VT.changeTypeToInteger();
6284 // Since we can't introduce illegal index types at this stage, use i16 and
6285 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6286 // than XLenVT.
6287 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6288 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6289 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6290 }
6291
6292 // If the mask allows, we can do all the index computation in 16 bits. This
6293 // requires less work and less register pressure at high LMUL, and creates
6294 // smaller constants which may be cheaper to materialize.
6295 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6296 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6297 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6298 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6299 }
6300
6301 MVT IndexContainerVT =
6302 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6303
6304 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6305 SmallVector<SDValue> GatherIndicesLHS;
6306 for (int MaskIndex : Mask) {
6307 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6308 GatherIndicesLHS.push_back(IsLHSIndex
6309 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6310 : DAG.getUNDEF(XLenVT));
6311 }
6312 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6313 LHSIndices =
6314 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6315 // At m1 and less, there's no point trying any of the high LMUL splitting
6316 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6317 if (NumElts <= MinVLMAX) {
6318 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6319 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6320 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6321 }
6322
6323 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6324 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6325 auto [InnerTrueMask, InnerVL] =
6326 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6327 int N =
6328 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6329 assert(isPowerOf2_32(N) && N <= 8);
6330
6331 // If we have a locally repeating mask, then we can reuse the first
6332 // register in the index register group for all registers within the
6333 // source register group. TODO: This generalizes to m2, and m4.
6334 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6335 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6336 SDValue Gather = DAG.getUNDEF(ContainerVT);
6337 for (int i = 0; i < N; i++) {
6338 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6339 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6340 SDValue SubVec =
6341 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6342 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6343 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6344 }
6345 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6346 }
6347
6348 // If we have a shuffle which only uses the first register in our source
6349 // register group, and repeats the same index across all spans, we can
6350 // use a single vrgather (and possibly some register moves).
6351 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6352 // which we can do a linear number of shuffles to form an m1 which
6353 // contains all the output elements.
6354 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6355 isSpanSplatShuffle(Mask, MinVLMAX)) {
6356 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6357 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6358 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6359 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6360 SDValue Gather = DAG.getUNDEF(ContainerVT);
6361 for (int i = 0; i < N; i++)
6362 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6363 M1VT.getVectorMinNumElements() * i);
6364 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6365 }
6366
6367 // If we have a shuffle which only uses the first register in our
6368 // source register group, we can do a linear number of m1 vrgathers
6369 // reusing the same source register (but with different indices)
6370 // TODO: This can be generalized for m2 or m4, or for any shuffle
6371 // for which we can do a vslidedown followed by this expansion.
6372 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6373 SDValue SlideAmt =
6374 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6375 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6376 SDValue Gather = DAG.getUNDEF(ContainerVT);
6377 for (int i = 0; i < N; i++) {
6378 if (i != 0)
6379 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6380 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6381 SlideAmt, TrueMask, VL);
6382 SDValue SubIndex =
6383 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6384 SDValue SubVec =
6385 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6386 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6387 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6388 M1VT.getVectorMinNumElements() * i);
6389 }
6390 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6391 }
6392
6393 // Fallback to generic vrgather if we can't find anything better.
6394 // On many machines, this will be O(LMUL^2)
6395 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6396 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6397 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6398 }
6399
6400 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6401 // merged with a second vrgather.
6402 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6403
6404 // Now construct the mask that will be used by the blended vrgather operation.
6405 // Construct the appropriate indices into each vector.
6406 for (int MaskIndex : Mask) {
6407 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6408 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6409 ? MaskIndex : -1);
6410 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6411 }
6412
6413 // If the mask indices are disjoint between the two sources, we can lower it
6414 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6415 // operands may end up being lowered to something cheaper than a vrgather.vv.
6416 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6417 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6418 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6419 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6420 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6421 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6422 return V;
6423
6424 // Before hitting generic lowering fallbacks, try to widen the mask
6425 // to a wider SEW.
6426 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6427 return V;
6428
6429 // Try to pick a profitable operand order.
6430 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6431 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6432
6433 // Recursively invoke lowering for each operand if we had two
6434 // independent single source shuffles, and then combine the result via a
6435 // vselect. Note that the vselect will likely be folded back into the
6436 // second permute (vrgather, or other) by the post-isel combine.
6437 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6438 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6439
6440 SmallVector<SDValue> MaskVals;
6441 for (int MaskIndex : Mask) {
6442 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6443 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6444 }
6445
6446 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6447 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6448 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6449
6450 if (SwapOps)
6451 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6452 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6453}
6454
6456 // Only support legal VTs for other shuffles for now.
6457 if (!isTypeLegal(VT))
6458 return false;
6459
6460 // Support splats for any type. These should type legalize well.
6462 return true;
6463
6464 const unsigned NumElts = M.size();
6465 MVT SVT = VT.getSimpleVT();
6466
6467 // Not for i1 vectors.
6468 if (SVT.getScalarType() == MVT::i1)
6469 return false;
6470
6471 std::array<std::pair<int, int>, 2> SrcInfo;
6472 int Dummy1, Dummy2;
6473 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6474 (::isMaskedSlidePair(M, SrcInfo) &&
6475 isElementRotate(SrcInfo, NumElts)) ||
6476 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6477}
6478
6479// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6480// the exponent.
6481SDValue
6482RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6483 SelectionDAG &DAG) const {
6484 MVT VT = Op.getSimpleValueType();
6485 unsigned EltSize = VT.getScalarSizeInBits();
6486 SDValue Src = Op.getOperand(0);
6487 SDLoc DL(Op);
6488 MVT ContainerVT = VT;
6489
6490 SDValue Mask, VL;
6491 if (Op->isVPOpcode()) {
6492 Mask = Op.getOperand(1);
6493 if (VT.isFixedLengthVector())
6494 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6495 Subtarget);
6496 VL = Op.getOperand(2);
6497 }
6498
6499 // We choose FP type that can represent the value if possible. Otherwise, we
6500 // use rounding to zero conversion for correct exponent of the result.
6501 // TODO: Use f16 for i8 when possible?
6502 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6503 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6504 FloatEltVT = MVT::f32;
6505 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6506
6507 // Legal types should have been checked in the RISCVTargetLowering
6508 // constructor.
6509 // TODO: Splitting may make sense in some cases.
6510 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6511 "Expected legal float type!");
6512
6513 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6514 // The trailing zero count is equal to log2 of this single bit value.
6515 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6516 SDValue Neg = DAG.getNegative(Src, DL, VT);
6517 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6518 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6519 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6520 Src, Mask, VL);
6521 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6522 }
6523
6524 // We have a legal FP type, convert to it.
6525 SDValue FloatVal;
6526 if (FloatVT.bitsGT(VT)) {
6527 if (Op->isVPOpcode())
6528 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6529 else
6530 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6531 } else {
6532 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6533 if (VT.isFixedLengthVector()) {
6534 ContainerVT = getContainerForFixedLengthVector(VT);
6535 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6536 }
6537 if (!Op->isVPOpcode())
6538 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6539 SDValue RTZRM =
6540 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6541 MVT ContainerFloatVT =
6542 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6543 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6544 Src, Mask, RTZRM, VL);
6545 if (VT.isFixedLengthVector())
6546 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6547 }
6548 // Bitcast to integer and shift the exponent to the LSB.
6549 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6550 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6551 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6552
6553 SDValue Exp;
6554 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6555 if (Op->isVPOpcode()) {
6556 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6557 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6558 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6559 } else {
6560 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6561 DAG.getConstant(ShiftAmt, DL, IntVT));
6562 if (IntVT.bitsLT(VT))
6563 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6564 else if (IntVT.bitsGT(VT))
6565 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6566 }
6567
6568 // The exponent contains log2 of the value in biased form.
6569 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6570 // For trailing zeros, we just need to subtract the bias.
6571 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6572 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6573 DAG.getConstant(ExponentBias, DL, VT));
6574 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6575 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6576 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6577
6578 // For leading zeros, we need to remove the bias and convert from log2 to
6579 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6580 unsigned Adjust = ExponentBias + (EltSize - 1);
6581 SDValue Res;
6582 if (Op->isVPOpcode())
6583 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6584 Mask, VL);
6585 else
6586 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6587
6588 // The above result with zero input equals to Adjust which is greater than
6589 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6590 if (Op.getOpcode() == ISD::CTLZ)
6591 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6592 else if (Op.getOpcode() == ISD::VP_CTLZ)
6593 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6594 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6595 return Res;
6596}
6597
6598SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6599 SelectionDAG &DAG) const {
6600 SDLoc DL(Op);
6601 MVT XLenVT = Subtarget.getXLenVT();
6602 SDValue Source = Op->getOperand(0);
6603 MVT SrcVT = Source.getSimpleValueType();
6604 SDValue Mask = Op->getOperand(1);
6605 SDValue EVL = Op->getOperand(2);
6606
6607 if (SrcVT.isFixedLengthVector()) {
6608 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6609 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6610 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6611 Subtarget);
6612 SrcVT = ContainerVT;
6613 }
6614
6615 // Convert to boolean vector.
6616 if (SrcVT.getScalarType() != MVT::i1) {
6617 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6618 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6619 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6620 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6621 DAG.getUNDEF(SrcVT), Mask, EVL});
6622 }
6623
6624 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6625 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6626 // In this case, we can interpret poison as -1, so nothing to do further.
6627 return Res;
6628
6629 // Convert -1 to VL.
6630 SDValue SetCC =
6631 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6632 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6633 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6634}
6635
6636// While RVV has alignment restrictions, we should always be able to load as a
6637// legal equivalently-sized byte-typed vector instead. This method is
6638// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6639// the load is already correctly-aligned, it returns SDValue().
6640SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6641 SelectionDAG &DAG) const {
6642 auto *Load = cast<LoadSDNode>(Op);
6643 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6644
6646 Load->getMemoryVT(),
6647 *Load->getMemOperand()))
6648 return SDValue();
6649
6650 SDLoc DL(Op);
6651 MVT VT = Op.getSimpleValueType();
6652 unsigned EltSizeBits = VT.getScalarSizeInBits();
6653 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6654 "Unexpected unaligned RVV load type");
6655 MVT NewVT =
6656 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6657 assert(NewVT.isValid() &&
6658 "Expecting equally-sized RVV vector types to be legal");
6659 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6660 Load->getPointerInfo(), Load->getBaseAlign(),
6661 Load->getMemOperand()->getFlags());
6662 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6663}
6664
6665// While RVV has alignment restrictions, we should always be able to store as a
6666// legal equivalently-sized byte-typed vector instead. This method is
6667// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6668// returns SDValue() if the store is already correctly aligned.
6669SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6670 SelectionDAG &DAG) const {
6671 auto *Store = cast<StoreSDNode>(Op);
6672 assert(Store && Store->getValue().getValueType().isVector() &&
6673 "Expected vector store");
6674
6676 Store->getMemoryVT(),
6677 *Store->getMemOperand()))
6678 return SDValue();
6679
6680 SDLoc DL(Op);
6681 SDValue StoredVal = Store->getValue();
6682 MVT VT = StoredVal.getSimpleValueType();
6683 unsigned EltSizeBits = VT.getScalarSizeInBits();
6684 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6685 "Unexpected unaligned RVV store type");
6686 MVT NewVT =
6687 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6688 assert(NewVT.isValid() &&
6689 "Expecting equally-sized RVV vector types to be legal");
6690 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6691 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6692 Store->getPointerInfo(), Store->getBaseAlign(),
6693 Store->getMemOperand()->getFlags());
6694}
6695
6697 const RISCVSubtarget &Subtarget) {
6698 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6699
6700 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6701
6702 // All simm32 constants should be handled by isel.
6703 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6704 // this check redundant, but small immediates are common so this check
6705 // should have better compile time.
6706 if (isInt<32>(Imm))
6707 return Op;
6708
6709 // We only need to cost the immediate, if constant pool lowering is enabled.
6710 if (!Subtarget.useConstantPoolForLargeInts())
6711 return Op;
6712
6714 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6715 return Op;
6716
6717 // Optimizations below are disabled for opt size. If we're optimizing for
6718 // size, use a constant pool.
6719 if (DAG.shouldOptForSize())
6720 return SDValue();
6721
6722 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6723 // that if it will avoid a constant pool.
6724 // It will require an extra temporary register though.
6725 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6726 // low and high 32 bits are the same and bit 31 and 63 are set.
6727 unsigned ShiftAmt, AddOpc;
6728 RISCVMatInt::InstSeq SeqLo =
6729 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6730 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6731 return Op;
6732
6733 return SDValue();
6734}
6735
6736SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6737 SelectionDAG &DAG) const {
6738 MVT VT = Op.getSimpleValueType();
6739 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6740
6741 // Can this constant be selected by a Zfa FLI instruction?
6742 bool Negate = false;
6743 int Index = getLegalZfaFPImm(Imm, VT);
6744
6745 // If the constant is negative, try negating.
6746 if (Index < 0 && Imm.isNegative()) {
6747 Index = getLegalZfaFPImm(-Imm, VT);
6748 Negate = true;
6749 }
6750
6751 // If we couldn't find a FLI lowering, fall back to generic code.
6752 if (Index < 0)
6753 return SDValue();
6754
6755 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6756 SDLoc DL(Op);
6757 SDValue Const =
6758 DAG.getNode(RISCVISD::FLI, DL, VT,
6759 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6760 if (!Negate)
6761 return Const;
6762
6763 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6764}
6765
6767 SelectionDAG &DAG) {
6768
6769 unsigned IsData = Op.getConstantOperandVal(4);
6770
6771 // mips-p8700 we support data prefetch for now.
6772 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6773 return Op.getOperand(0);
6774 return Op;
6775}
6776
6778 const RISCVSubtarget &Subtarget) {
6779 SDLoc dl(Op);
6780 AtomicOrdering FenceOrdering =
6781 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6782 SyncScope::ID FenceSSID =
6783 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6784
6785 if (Subtarget.hasStdExtZtso()) {
6786 // The only fence that needs an instruction is a sequentially-consistent
6787 // cross-thread fence.
6788 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6789 FenceSSID == SyncScope::System)
6790 return Op;
6791
6792 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6793 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6794 }
6795
6796 // singlethread fences only synchronize with signal handlers on the same
6797 // thread and thus only need to preserve instruction order, not actually
6798 // enforce memory ordering.
6799 if (FenceSSID == SyncScope::SingleThread)
6800 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6801 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6802
6803 return Op;
6804}
6805
6806SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6807 SelectionDAG &DAG) const {
6808 SDLoc DL(Op);
6809 MVT VT = Op.getSimpleValueType();
6810 MVT XLenVT = Subtarget.getXLenVT();
6811 unsigned Check = Op.getConstantOperandVal(1);
6812 unsigned TDCMask = 0;
6813 if (Check & fcSNan)
6814 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6815 if (Check & fcQNan)
6816 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6817 if (Check & fcPosInf)
6819 if (Check & fcNegInf)
6821 if (Check & fcPosNormal)
6823 if (Check & fcNegNormal)
6825 if (Check & fcPosSubnormal)
6827 if (Check & fcNegSubnormal)
6829 if (Check & fcPosZero)
6830 TDCMask |= RISCV::FPMASK_Positive_Zero;
6831 if (Check & fcNegZero)
6832 TDCMask |= RISCV::FPMASK_Negative_Zero;
6833
6834 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6835
6836 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6837
6838 if (VT.isVector()) {
6839 SDValue Op0 = Op.getOperand(0);
6840 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6841
6842 if (VT.isScalableVector()) {
6843 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6844 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6845 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6846 Mask = Op.getOperand(2);
6847 VL = Op.getOperand(3);
6848 }
6849 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6850 VL, Op->getFlags());
6851 if (IsOneBitMask)
6852 return DAG.getSetCC(DL, VT, FPCLASS,
6853 DAG.getConstant(TDCMask, DL, DstVT),
6855 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6856 DAG.getConstant(TDCMask, DL, DstVT));
6857 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6858 ISD::SETNE);
6859 }
6860
6861 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6862 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6863 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6864 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6865 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6866 Mask = Op.getOperand(2);
6867 MVT MaskContainerVT =
6868 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6869 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6870 VL = Op.getOperand(3);
6871 }
6872 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6873
6874 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6875 Mask, VL, Op->getFlags());
6876
6877 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6878 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6879 if (IsOneBitMask) {
6880 SDValue VMSEQ =
6881 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6882 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6883 DAG.getUNDEF(ContainerVT), Mask, VL});
6884 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6885 }
6886 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6887 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6888
6889 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6890 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6891 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6892
6893 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6894 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6895 DAG.getUNDEF(ContainerVT), Mask, VL});
6896 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6897 }
6898
6899 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6900 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6901 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6903 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6904}
6905
6906// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6907// operations propagate nans.
6909 const RISCVSubtarget &Subtarget) {
6910 SDLoc DL(Op);
6911 MVT VT = Op.getSimpleValueType();
6912
6913 SDValue X = Op.getOperand(0);
6914 SDValue Y = Op.getOperand(1);
6915
6916 if (!VT.isVector()) {
6917 MVT XLenVT = Subtarget.getXLenVT();
6918
6919 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6920 // ensures that when one input is a nan, the other will also be a nan
6921 // allowing the nan to propagate. If both inputs are nan, this will swap the
6922 // inputs which is harmless.
6923
6924 SDValue NewY = Y;
6925 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6926 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6927 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6928 }
6929
6930 SDValue NewX = X;
6931 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6932 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6933 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6934 }
6935
6936 unsigned Opc =
6937 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6938 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6939 }
6940
6941 // Check no NaNs before converting to fixed vector scalable.
6942 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6943 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6944
6945 MVT ContainerVT = VT;
6946 if (VT.isFixedLengthVector()) {
6947 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6948 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6949 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6950 }
6951
6952 SDValue Mask, VL;
6953 if (Op->isVPOpcode()) {
6954 Mask = Op.getOperand(2);
6955 if (VT.isFixedLengthVector())
6956 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6957 Subtarget);
6958 VL = Op.getOperand(3);
6959 } else {
6960 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6961 }
6962
6963 SDValue NewY = Y;
6964 if (!XIsNeverNan) {
6965 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6966 {X, X, DAG.getCondCode(ISD::SETOEQ),
6967 DAG.getUNDEF(ContainerVT), Mask, VL});
6968 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6969 DAG.getUNDEF(ContainerVT), VL);
6970 }
6971
6972 SDValue NewX = X;
6973 if (!YIsNeverNan) {
6974 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6975 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6976 DAG.getUNDEF(ContainerVT), Mask, VL});
6977 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6978 DAG.getUNDEF(ContainerVT), VL);
6979 }
6980
6981 unsigned Opc =
6982 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6983 ? RISCVISD::VFMAX_VL
6984 : RISCVISD::VFMIN_VL;
6985 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6986 DAG.getUNDEF(ContainerVT), Mask, VL);
6987 if (VT.isFixedLengthVector())
6988 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6989 return Res;
6990}
6991
6993 const RISCVSubtarget &Subtarget) {
6994 bool IsFABS = Op.getOpcode() == ISD::FABS;
6995 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6996 "Wrong opcode for lowering FABS or FNEG.");
6997
6998 MVT XLenVT = Subtarget.getXLenVT();
6999 MVT VT = Op.getSimpleValueType();
7000 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7001
7002 SDLoc DL(Op);
7003 SDValue Fmv =
7004 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7005
7006 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7007 Mask = Mask.sext(Subtarget.getXLen());
7008
7009 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7010 SDValue Logic =
7011 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7012 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7013}
7014
7016 const RISCVSubtarget &Subtarget) {
7017 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7018
7019 MVT XLenVT = Subtarget.getXLenVT();
7020 MVT VT = Op.getSimpleValueType();
7021 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7022
7023 SDValue Mag = Op.getOperand(0);
7024 SDValue Sign = Op.getOperand(1);
7025
7026 SDLoc DL(Op);
7027
7028 // Get sign bit into an integer value.
7029 unsigned SignSize = Sign.getValueSizeInBits();
7030 SDValue SignAsInt = [&]() {
7031 if (SignSize == Subtarget.getXLen())
7032 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7033 switch (SignSize) {
7034 case 16:
7035 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7036 case 32:
7037 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7038 case 64: {
7039 assert(XLenVT == MVT::i32 && "Unexpected type");
7040 // Copy the upper word to integer.
7041 SignSize = 32;
7042 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7043 .getValue(1);
7044 }
7045 default:
7046 llvm_unreachable("Unexpected sign size");
7047 }
7048 }();
7049
7050 // Get the signbit at the right position for MagAsInt.
7051 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7052 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7053 SignAsInt,
7054 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7055
7056 // Mask the sign bit and any bits above it. The extra bits will be dropped
7057 // when we convert back to FP.
7058 SDValue SignMask = DAG.getConstant(
7059 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7060 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7061
7062 // Transform Mag value to integer, and clear the sign bit.
7063 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7064 SDValue ClearSignMask = DAG.getConstant(
7065 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7066 SDValue ClearedSign =
7067 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7068
7069 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7071
7072 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7073}
7074
7075/// Get a RISC-V target specified VL op for a given SDNode.
7076static unsigned getRISCVVLOp(SDValue Op) {
7077#define OP_CASE(NODE) \
7078 case ISD::NODE: \
7079 return RISCVISD::NODE##_VL;
7080#define VP_CASE(NODE) \
7081 case ISD::VP_##NODE: \
7082 return RISCVISD::NODE##_VL;
7083 // clang-format off
7084 switch (Op.getOpcode()) {
7085 default:
7086 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7087 OP_CASE(ADD)
7088 OP_CASE(SUB)
7089 OP_CASE(MUL)
7090 OP_CASE(MULHS)
7091 OP_CASE(MULHU)
7092 OP_CASE(SDIV)
7093 OP_CASE(SREM)
7094 OP_CASE(UDIV)
7095 OP_CASE(UREM)
7096 OP_CASE(SHL)
7097 OP_CASE(SRA)
7098 OP_CASE(SRL)
7099 OP_CASE(ROTL)
7100 OP_CASE(ROTR)
7101 OP_CASE(BSWAP)
7102 OP_CASE(CTTZ)
7103 OP_CASE(CTLZ)
7104 OP_CASE(CTPOP)
7105 OP_CASE(BITREVERSE)
7106 OP_CASE(SADDSAT)
7107 OP_CASE(UADDSAT)
7108 OP_CASE(SSUBSAT)
7109 OP_CASE(USUBSAT)
7110 OP_CASE(AVGFLOORS)
7111 OP_CASE(AVGFLOORU)
7112 OP_CASE(AVGCEILS)
7113 OP_CASE(AVGCEILU)
7114 OP_CASE(FADD)
7115 OP_CASE(FSUB)
7116 OP_CASE(FMUL)
7117 OP_CASE(FDIV)
7118 OP_CASE(FNEG)
7119 OP_CASE(FABS)
7120 OP_CASE(FCOPYSIGN)
7121 OP_CASE(FSQRT)
7122 OP_CASE(SMIN)
7123 OP_CASE(SMAX)
7124 OP_CASE(UMIN)
7125 OP_CASE(UMAX)
7126 OP_CASE(STRICT_FADD)
7127 OP_CASE(STRICT_FSUB)
7128 OP_CASE(STRICT_FMUL)
7129 OP_CASE(STRICT_FDIV)
7130 OP_CASE(STRICT_FSQRT)
7131 VP_CASE(ADD) // VP_ADD
7132 VP_CASE(SUB) // VP_SUB
7133 VP_CASE(MUL) // VP_MUL
7134 VP_CASE(SDIV) // VP_SDIV
7135 VP_CASE(SREM) // VP_SREM
7136 VP_CASE(UDIV) // VP_UDIV
7137 VP_CASE(UREM) // VP_UREM
7138 VP_CASE(SHL) // VP_SHL
7139 VP_CASE(FADD) // VP_FADD
7140 VP_CASE(FSUB) // VP_FSUB
7141 VP_CASE(FMUL) // VP_FMUL
7142 VP_CASE(FDIV) // VP_FDIV
7143 VP_CASE(FNEG) // VP_FNEG
7144 VP_CASE(FABS) // VP_FABS
7145 VP_CASE(SMIN) // VP_SMIN
7146 VP_CASE(SMAX) // VP_SMAX
7147 VP_CASE(UMIN) // VP_UMIN
7148 VP_CASE(UMAX) // VP_UMAX
7149 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7150 VP_CASE(SETCC) // VP_SETCC
7151 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7152 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7153 VP_CASE(BITREVERSE) // VP_BITREVERSE
7154 VP_CASE(SADDSAT) // VP_SADDSAT
7155 VP_CASE(UADDSAT) // VP_UADDSAT
7156 VP_CASE(SSUBSAT) // VP_SSUBSAT
7157 VP_CASE(USUBSAT) // VP_USUBSAT
7158 VP_CASE(BSWAP) // VP_BSWAP
7159 VP_CASE(CTLZ) // VP_CTLZ
7160 VP_CASE(CTTZ) // VP_CTTZ
7161 VP_CASE(CTPOP) // VP_CTPOP
7163 case ISD::VP_CTLZ_ZERO_UNDEF:
7164 return RISCVISD::CTLZ_VL;
7166 case ISD::VP_CTTZ_ZERO_UNDEF:
7167 return RISCVISD::CTTZ_VL;
7168 case ISD::FMA:
7169 case ISD::VP_FMA:
7170 return RISCVISD::VFMADD_VL;
7171 case ISD::STRICT_FMA:
7172 return RISCVISD::STRICT_VFMADD_VL;
7173 case ISD::AND:
7174 case ISD::VP_AND:
7175 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7176 return RISCVISD::VMAND_VL;
7177 return RISCVISD::AND_VL;
7178 case ISD::OR:
7179 case ISD::VP_OR:
7180 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7181 return RISCVISD::VMOR_VL;
7182 return RISCVISD::OR_VL;
7183 case ISD::XOR:
7184 case ISD::VP_XOR:
7185 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7186 return RISCVISD::VMXOR_VL;
7187 return RISCVISD::XOR_VL;
7188 case ISD::ANY_EXTEND:
7189 case ISD::ZERO_EXTEND:
7190 return RISCVISD::VZEXT_VL;
7191 case ISD::SIGN_EXTEND:
7192 return RISCVISD::VSEXT_VL;
7193 case ISD::SETCC:
7194 return RISCVISD::SETCC_VL;
7195 case ISD::VSELECT:
7196 return RISCVISD::VMERGE_VL;
7197 case ISD::VP_SELECT:
7198 case ISD::VP_MERGE:
7199 return RISCVISD::VMERGE_VL;
7200 case ISD::VP_SRA:
7201 return RISCVISD::SRA_VL;
7202 case ISD::VP_SRL:
7203 return RISCVISD::SRL_VL;
7204 case ISD::VP_SQRT:
7205 return RISCVISD::FSQRT_VL;
7206 case ISD::VP_SIGN_EXTEND:
7207 return RISCVISD::VSEXT_VL;
7208 case ISD::VP_ZERO_EXTEND:
7209 return RISCVISD::VZEXT_VL;
7210 case ISD::VP_FP_TO_SINT:
7211 return RISCVISD::VFCVT_RTZ_X_F_VL;
7212 case ISD::VP_FP_TO_UINT:
7213 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7214 case ISD::FMINNUM:
7215 case ISD::FMINIMUMNUM:
7216 case ISD::VP_FMINNUM:
7217 return RISCVISD::VFMIN_VL;
7218 case ISD::FMAXNUM:
7219 case ISD::FMAXIMUMNUM:
7220 case ISD::VP_FMAXNUM:
7221 return RISCVISD::VFMAX_VL;
7222 case ISD::LRINT:
7223 case ISD::VP_LRINT:
7224 case ISD::LLRINT:
7225 case ISD::VP_LLRINT:
7226 return RISCVISD::VFCVT_RM_X_F_VL;
7227 }
7228 // clang-format on
7229#undef OP_CASE
7230#undef VP_CASE
7231}
7232
7234 const RISCVSubtarget &Subtarget) {
7235 return (Op.getValueType() == MVT::nxv32f16 &&
7236 (Subtarget.hasVInstructionsF16Minimal() &&
7237 !Subtarget.hasVInstructionsF16())) ||
7238 Op.getValueType() == MVT::nxv32bf16;
7239}
7240
7242 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7243 SDLoc DL(Op);
7244
7245 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7246 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7247
7248 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7249 if (!Op.getOperand(j).getValueType().isVector()) {
7250 LoOperands[j] = Op.getOperand(j);
7251 HiOperands[j] = Op.getOperand(j);
7252 continue;
7253 }
7254 std::tie(LoOperands[j], HiOperands[j]) =
7255 DAG.SplitVector(Op.getOperand(j), DL);
7256 }
7257
7258 SDValue LoRes =
7259 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7260 SDValue HiRes =
7261 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7262
7263 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7264}
7265
7267 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7268 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7269 SDLoc DL(Op);
7270
7271 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7272 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7273
7274 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7275 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7276 std::tie(LoOperands[j], HiOperands[j]) =
7277 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7278 continue;
7279 }
7280 if (!Op.getOperand(j).getValueType().isVector()) {
7281 LoOperands[j] = Op.getOperand(j);
7282 HiOperands[j] = Op.getOperand(j);
7283 continue;
7284 }
7285 std::tie(LoOperands[j], HiOperands[j]) =
7286 DAG.SplitVector(Op.getOperand(j), DL);
7287 }
7288
7289 SDValue LoRes =
7290 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7291 SDValue HiRes =
7292 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7293
7294 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7295}
7296
7298 SDLoc DL(Op);
7299
7300 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7301 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7302 auto [EVLLo, EVLHi] =
7303 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7304
7305 SDValue ResLo =
7306 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7307 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7308 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7309 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7310}
7311
7313
7314 assert(Op->isStrictFPOpcode());
7315
7316 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7317
7318 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7319 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7320
7321 SDLoc DL(Op);
7322
7323 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7324 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7325
7326 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7327 if (!Op.getOperand(j).getValueType().isVector()) {
7328 LoOperands[j] = Op.getOperand(j);
7329 HiOperands[j] = Op.getOperand(j);
7330 continue;
7331 }
7332 std::tie(LoOperands[j], HiOperands[j]) =
7333 DAG.SplitVector(Op.getOperand(j), DL);
7334 }
7335
7336 SDValue LoRes =
7337 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7338 HiOperands[0] = LoRes.getValue(1);
7339 SDValue HiRes =
7340 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7341
7342 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7343 LoRes.getValue(0), HiRes.getValue(0));
7344 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7345}
7346
7347SDValue
7348RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7349 SelectionDAG &DAG) const {
7350 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7351 "Unexpected bfloat16 load lowering");
7352
7353 SDLoc DL(Op);
7354 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7355 EVT MemVT = LD->getMemoryVT();
7356 SDValue Load = DAG.getExtLoad(
7357 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7358 LD->getBasePtr(),
7360 LD->getMemOperand());
7361 // Using mask to make bf16 nan-boxing valid when we don't have flh
7362 // instruction. -65536 would be treat as a small number and thus it can be
7363 // directly used lui to get the constant.
7364 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7365 SDValue OrSixteenOne =
7366 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7367 SDValue ConvertedResult =
7368 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7369 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7370}
7371
7372SDValue
7373RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7374 SelectionDAG &DAG) const {
7375 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7376 "Unexpected bfloat16 store lowering");
7377
7378 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7379 SDLoc DL(Op);
7380 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7381 Subtarget.getXLenVT(), ST->getValue());
7382 return DAG.getTruncStore(
7383 ST->getChain(), DL, FMV, ST->getBasePtr(),
7384 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7385 ST->getMemOperand());
7386}
7387
7389 SelectionDAG &DAG) const {
7390 switch (Op.getOpcode()) {
7391 default:
7393 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7394 case ISD::PREFETCH:
7395 return LowerPREFETCH(Op, Subtarget, DAG);
7396 case ISD::ATOMIC_FENCE:
7397 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7398 case ISD::GlobalAddress:
7399 return lowerGlobalAddress(Op, DAG);
7400 case ISD::BlockAddress:
7401 return lowerBlockAddress(Op, DAG);
7402 case ISD::ConstantPool:
7403 return lowerConstantPool(Op, DAG);
7404 case ISD::JumpTable:
7405 return lowerJumpTable(Op, DAG);
7407 return lowerGlobalTLSAddress(Op, DAG);
7408 case ISD::Constant:
7409 return lowerConstant(Op, DAG, Subtarget);
7410 case ISD::ConstantFP:
7411 return lowerConstantFP(Op, DAG);
7412 case ISD::SELECT:
7413 return lowerSELECT(Op, DAG);
7414 case ISD::BRCOND:
7415 return lowerBRCOND(Op, DAG);
7416 case ISD::VASTART:
7417 return lowerVASTART(Op, DAG);
7418 case ISD::FRAMEADDR:
7419 return lowerFRAMEADDR(Op, DAG);
7420 case ISD::RETURNADDR:
7421 return lowerRETURNADDR(Op, DAG);
7422 case ISD::SHL_PARTS:
7423 return lowerShiftLeftParts(Op, DAG);
7424 case ISD::SRA_PARTS:
7425 return lowerShiftRightParts(Op, DAG, true);
7426 case ISD::SRL_PARTS:
7427 return lowerShiftRightParts(Op, DAG, false);
7428 case ISD::ROTL:
7429 case ISD::ROTR:
7430 if (Op.getValueType().isFixedLengthVector()) {
7431 assert(Subtarget.hasStdExtZvkb());
7432 return lowerToScalableOp(Op, DAG);
7433 }
7434 assert(Subtarget.hasVendorXTHeadBb() &&
7435 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7436 "Unexpected custom legalization");
7437 // XTHeadBb only supports rotate by constant.
7438 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7439 return SDValue();
7440 return Op;
7441 case ISD::BITCAST: {
7442 SDLoc DL(Op);
7443 EVT VT = Op.getValueType();
7444 SDValue Op0 = Op.getOperand(0);
7445 EVT Op0VT = Op0.getValueType();
7446 MVT XLenVT = Subtarget.getXLenVT();
7447 if (Op0VT == MVT::i16 &&
7448 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7449 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7450 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7451 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7452 }
7453 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7454 Subtarget.hasStdExtFOrZfinx()) {
7455 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7456 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7457 }
7458 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7459 Subtarget.hasStdExtDOrZdinx()) {
7460 SDValue Lo, Hi;
7461 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7462 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7463 }
7464
7465 // Consider other scalar<->scalar casts as legal if the types are legal.
7466 // Otherwise expand them.
7467 if (!VT.isVector() && !Op0VT.isVector()) {
7468 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7469 return Op;
7470 return SDValue();
7471 }
7472
7473 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7474 "Unexpected types");
7475
7476 if (VT.isFixedLengthVector()) {
7477 // We can handle fixed length vector bitcasts with a simple replacement
7478 // in isel.
7479 if (Op0VT.isFixedLengthVector())
7480 return Op;
7481 // When bitcasting from scalar to fixed-length vector, insert the scalar
7482 // into a one-element vector of the result type, and perform a vector
7483 // bitcast.
7484 if (!Op0VT.isVector()) {
7485 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7486 if (!isTypeLegal(BVT))
7487 return SDValue();
7488 return DAG.getBitcast(
7489 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7490 }
7491 return SDValue();
7492 }
7493 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7494 // thus: bitcast the vector to a one-element vector type whose element type
7495 // is the same as the result type, and extract the first element.
7496 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7497 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7498 if (!isTypeLegal(BVT))
7499 return SDValue();
7500 SDValue BVec = DAG.getBitcast(BVT, Op0);
7501 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7502 }
7503 return SDValue();
7504 }
7506 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7508 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7510 return LowerINTRINSIC_VOID(Op, DAG);
7511 case ISD::IS_FPCLASS:
7512 return LowerIS_FPCLASS(Op, DAG);
7513 case ISD::BITREVERSE: {
7514 MVT VT = Op.getSimpleValueType();
7515 if (VT.isFixedLengthVector()) {
7516 assert(Subtarget.hasStdExtZvbb());
7517 return lowerToScalableOp(Op, DAG);
7518 }
7519 SDLoc DL(Op);
7520 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7521 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7522 // Expand bitreverse to a bswap(rev8) followed by brev8.
7523 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7524 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7525 }
7526 case ISD::TRUNCATE:
7529 // Only custom-lower vector truncates
7530 if (!Op.getSimpleValueType().isVector())
7531 return Op;
7532 return lowerVectorTruncLike(Op, DAG);
7533 case ISD::ANY_EXTEND:
7534 case ISD::ZERO_EXTEND:
7535 if (Op.getOperand(0).getValueType().isVector() &&
7536 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7537 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7538 if (Op.getValueType().isScalableVector())
7539 return Op;
7540 return lowerToScalableOp(Op, DAG);
7541 case ISD::SIGN_EXTEND:
7542 if (Op.getOperand(0).getValueType().isVector() &&
7543 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7544 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7545 if (Op.getValueType().isScalableVector())
7546 return Op;
7547 return lowerToScalableOp(Op, DAG);
7549 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7551 return lowerINSERT_VECTOR_ELT(Op, DAG);
7553 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7554 case ISD::SCALAR_TO_VECTOR: {
7555 MVT VT = Op.getSimpleValueType();
7556 SDLoc DL(Op);
7557 SDValue Scalar = Op.getOperand(0);
7558 if (VT.getVectorElementType() == MVT::i1) {
7559 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7560 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7561 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7562 }
7563 MVT ContainerVT = VT;
7564 if (VT.isFixedLengthVector())
7565 ContainerVT = getContainerForFixedLengthVector(VT);
7566 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7567
7568 SDValue V;
7569 if (VT.isFloatingPoint()) {
7570 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7571 DAG.getUNDEF(ContainerVT), Scalar, VL);
7572 } else {
7573 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7574 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7575 DAG.getUNDEF(ContainerVT), Scalar, VL);
7576 }
7577 if (VT.isFixedLengthVector())
7578 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7579 return V;
7580 }
7581 case ISD::VSCALE: {
7582 MVT XLenVT = Subtarget.getXLenVT();
7583 MVT VT = Op.getSimpleValueType();
7584 SDLoc DL(Op);
7585 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7586 // We define our scalable vector types for lmul=1 to use a 64 bit known
7587 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7588 // vscale as VLENB / 8.
7589 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7590 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7591 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7592 // We assume VLENB is a multiple of 8. We manually choose the best shift
7593 // here because SimplifyDemandedBits isn't always able to simplify it.
7594 uint64_t Val = Op.getConstantOperandVal(0);
7595 if (isPowerOf2_64(Val)) {
7596 uint64_t Log2 = Log2_64(Val);
7597 if (Log2 < 3) {
7598 SDNodeFlags Flags;
7599 Flags.setExact(true);
7600 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7601 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7602 } else if (Log2 > 3) {
7603 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7604 DAG.getConstant(Log2 - 3, DL, XLenVT));
7605 }
7606 } else if ((Val % 8) == 0) {
7607 // If the multiplier is a multiple of 8, scale it down to avoid needing
7608 // to shift the VLENB value.
7609 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7610 DAG.getConstant(Val / 8, DL, XLenVT));
7611 } else {
7612 SDNodeFlags Flags;
7613 Flags.setExact(true);
7614 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7615 DAG.getConstant(3, DL, XLenVT), Flags);
7616 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7617 DAG.getConstant(Val, DL, XLenVT));
7618 }
7619 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7620 }
7621 case ISD::FPOWI: {
7622 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7623 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7624 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7625 Op.getOperand(1).getValueType() == MVT::i32) {
7626 SDLoc DL(Op);
7627 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7628 SDValue Powi =
7629 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7630 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7631 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7632 }
7633 return SDValue();
7634 }
7635 case ISD::FMAXIMUM:
7636 case ISD::FMINIMUM:
7637 if (isPromotedOpNeedingSplit(Op, Subtarget))
7638 return SplitVectorOp(Op, DAG);
7639 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7640 case ISD::FP_EXTEND:
7641 case ISD::FP_ROUND:
7642 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7645 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7646 case ISD::SINT_TO_FP:
7647 case ISD::UINT_TO_FP:
7648 if (Op.getValueType().isVector() &&
7649 ((Op.getValueType().getScalarType() == MVT::f16 &&
7650 (Subtarget.hasVInstructionsF16Minimal() &&
7651 !Subtarget.hasVInstructionsF16())) ||
7652 Op.getValueType().getScalarType() == MVT::bf16)) {
7653 if (isPromotedOpNeedingSplit(Op, Subtarget))
7654 return SplitVectorOp(Op, DAG);
7655 // int -> f32
7656 SDLoc DL(Op);
7657 MVT NVT =
7658 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7659 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7660 // f32 -> [b]f16
7661 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7662 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7663 }
7664 [[fallthrough]];
7665 case ISD::FP_TO_SINT:
7666 case ISD::FP_TO_UINT:
7667 if (SDValue Op1 = Op.getOperand(0);
7668 Op1.getValueType().isVector() &&
7669 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7670 (Subtarget.hasVInstructionsF16Minimal() &&
7671 !Subtarget.hasVInstructionsF16())) ||
7672 Op1.getValueType().getScalarType() == MVT::bf16)) {
7673 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7674 return SplitVectorOp(Op, DAG);
7675 // [b]f16 -> f32
7676 SDLoc DL(Op);
7677 MVT NVT = MVT::getVectorVT(MVT::f32,
7678 Op1.getValueType().getVectorElementCount());
7679 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7680 // f32 -> int
7681 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7682 }
7683 [[fallthrough]];
7688 // RVV can only do fp<->int conversions to types half/double the size as
7689 // the source. We custom-lower any conversions that do two hops into
7690 // sequences.
7691 MVT VT = Op.getSimpleValueType();
7692 if (VT.isScalarInteger())
7693 return lowerFP_TO_INT(Op, DAG, Subtarget);
7694 bool IsStrict = Op->isStrictFPOpcode();
7695 SDValue Src = Op.getOperand(0 + IsStrict);
7696 MVT SrcVT = Src.getSimpleValueType();
7697 if (SrcVT.isScalarInteger())
7698 return lowerINT_TO_FP(Op, DAG, Subtarget);
7699 if (!VT.isVector())
7700 return Op;
7701 SDLoc DL(Op);
7702 MVT EltVT = VT.getVectorElementType();
7703 MVT SrcEltVT = SrcVT.getVectorElementType();
7704 unsigned EltSize = EltVT.getSizeInBits();
7705 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7706 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7707 "Unexpected vector element types");
7708
7709 bool IsInt2FP = SrcEltVT.isInteger();
7710 // Widening conversions
7711 if (EltSize > (2 * SrcEltSize)) {
7712 if (IsInt2FP) {
7713 // Do a regular integer sign/zero extension then convert to float.
7714 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7716 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7717 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7720 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7721 if (IsStrict)
7722 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7723 Op.getOperand(0), Ext);
7724 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7725 }
7726 // FP2Int
7727 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7728 // Do one doubling fp_extend then complete the operation by converting
7729 // to int.
7730 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7731 if (IsStrict) {
7732 auto [FExt, Chain] =
7733 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7734 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7735 }
7736 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7737 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7738 }
7739
7740 // Narrowing conversions
7741 if (SrcEltSize > (2 * EltSize)) {
7742 if (IsInt2FP) {
7743 // One narrowing int_to_fp, then an fp_round.
7744 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7745 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7746 if (IsStrict) {
7747 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7748 DAG.getVTList(InterimFVT, MVT::Other),
7749 Op.getOperand(0), Src);
7750 SDValue Chain = Int2FP.getValue(1);
7751 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7752 }
7753 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7754 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7755 }
7756 // FP2Int
7757 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7758 // representable by the integer, the result is poison.
7759 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7761 if (IsStrict) {
7762 SDValue FP2Int =
7763 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7764 Op.getOperand(0), Src);
7765 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7766 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7767 }
7768 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7769 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7770 }
7771
7772 // Scalable vectors can exit here. Patterns will handle equally-sized
7773 // conversions halving/doubling ones.
7774 if (!VT.isFixedLengthVector())
7775 return Op;
7776
7777 // For fixed-length vectors we lower to a custom "VL" node.
7778 unsigned RVVOpc = 0;
7779 switch (Op.getOpcode()) {
7780 default:
7781 llvm_unreachable("Impossible opcode");
7782 case ISD::FP_TO_SINT:
7783 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7784 break;
7785 case ISD::FP_TO_UINT:
7786 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7787 break;
7788 case ISD::SINT_TO_FP:
7789 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7790 break;
7791 case ISD::UINT_TO_FP:
7792 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7793 break;
7795 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7796 break;
7798 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7799 break;
7801 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7802 break;
7804 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7805 break;
7806 }
7807
7808 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7809 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7810 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7811 "Expected same element count");
7812
7813 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7814
7815 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7816 if (IsStrict) {
7817 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7818 Op.getOperand(0), Src, Mask, VL);
7819 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7820 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7821 }
7822 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7823 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7824 }
7827 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7828 case ISD::FP_TO_BF16: {
7829 // Custom lower to ensure the libcall return is passed in an FPR on hard
7830 // float ABIs.
7831 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7832 SDLoc DL(Op);
7833 MakeLibCallOptions CallOptions;
7834 RTLIB::Libcall LC =
7835 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7836 SDValue Res =
7837 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7838 if (Subtarget.is64Bit())
7839 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7840 return DAG.getBitcast(MVT::i32, Res);
7841 }
7842 case ISD::BF16_TO_FP: {
7843 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7844 MVT VT = Op.getSimpleValueType();
7845 SDLoc DL(Op);
7846 Op = DAG.getNode(
7847 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7848 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7849 SDValue Res = Subtarget.is64Bit()
7850 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7851 : DAG.getBitcast(MVT::f32, Op);
7852 // fp_extend if the target VT is bigger than f32.
7853 if (VT != MVT::f32)
7854 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7855 return Res;
7856 }
7857 case ISD::STRICT_FP_TO_FP16:
7858 case ISD::FP_TO_FP16: {
7859 // Custom lower to ensure the libcall return is passed in an FPR on hard
7860 // float ABIs.
7861 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7862 SDLoc DL(Op);
7863 MakeLibCallOptions CallOptions;
7864 bool IsStrict = Op->isStrictFPOpcode();
7865 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7866 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7867 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7868 SDValue Res;
7869 std::tie(Res, Chain) =
7870 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7871 if (Subtarget.is64Bit())
7872 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7873 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7874 if (IsStrict)
7875 return DAG.getMergeValues({Result, Chain}, DL);
7876 return Result;
7877 }
7878 case ISD::STRICT_FP16_TO_FP:
7879 case ISD::FP16_TO_FP: {
7880 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7881 // float ABIs.
7882 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7883 SDLoc DL(Op);
7884 MakeLibCallOptions CallOptions;
7885 bool IsStrict = Op->isStrictFPOpcode();
7886 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7887 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7888 SDValue Arg = Subtarget.is64Bit()
7889 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7890 : DAG.getBitcast(MVT::f32, Op0);
7891 SDValue Res;
7892 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7893 CallOptions, DL, Chain);
7894 if (IsStrict)
7895 return DAG.getMergeValues({Res, Chain}, DL);
7896 return Res;
7897 }
7898 case ISD::FTRUNC:
7899 case ISD::FCEIL:
7900 case ISD::FFLOOR:
7901 case ISD::FNEARBYINT:
7902 case ISD::FRINT:
7903 case ISD::FROUND:
7904 case ISD::FROUNDEVEN:
7905 if (isPromotedOpNeedingSplit(Op, Subtarget))
7906 return SplitVectorOp(Op, DAG);
7907 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7908 case ISD::LRINT:
7909 case ISD::LLRINT:
7910 case ISD::LROUND:
7911 case ISD::LLROUND: {
7912 if (Op.getValueType().isVector())
7913 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7914 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7915 "Unexpected custom legalisation");
7916 SDLoc DL(Op);
7917 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7918 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7919 }
7920 case ISD::STRICT_LRINT:
7921 case ISD::STRICT_LLRINT:
7922 case ISD::STRICT_LROUND:
7923 case ISD::STRICT_LLROUND: {
7924 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7925 "Unexpected custom legalisation");
7926 SDLoc DL(Op);
7927 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7928 {Op.getOperand(0), Op.getOperand(1)});
7929 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7930 {Ext.getValue(1), Ext.getValue(0)});
7931 }
7932 case ISD::VECREDUCE_ADD:
7933 case ISD::VECREDUCE_UMAX:
7934 case ISD::VECREDUCE_SMAX:
7935 case ISD::VECREDUCE_UMIN:
7936 case ISD::VECREDUCE_SMIN:
7937 return lowerVECREDUCE(Op, DAG);
7938 case ISD::VECREDUCE_AND:
7939 case ISD::VECREDUCE_OR:
7940 case ISD::VECREDUCE_XOR:
7941 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7942 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7943 return lowerVECREDUCE(Op, DAG);
7944 case ISD::VECREDUCE_FADD:
7945 case ISD::VECREDUCE_SEQ_FADD:
7946 case ISD::VECREDUCE_FMIN:
7947 case ISD::VECREDUCE_FMAX:
7948 case ISD::VECREDUCE_FMAXIMUM:
7949 case ISD::VECREDUCE_FMINIMUM:
7950 return lowerFPVECREDUCE(Op, DAG);
7951 case ISD::VP_REDUCE_ADD:
7952 case ISD::VP_REDUCE_UMAX:
7953 case ISD::VP_REDUCE_SMAX:
7954 case ISD::VP_REDUCE_UMIN:
7955 case ISD::VP_REDUCE_SMIN:
7956 case ISD::VP_REDUCE_FADD:
7957 case ISD::VP_REDUCE_SEQ_FADD:
7958 case ISD::VP_REDUCE_FMIN:
7959 case ISD::VP_REDUCE_FMAX:
7960 case ISD::VP_REDUCE_FMINIMUM:
7961 case ISD::VP_REDUCE_FMAXIMUM:
7962 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7963 return SplitVectorReductionOp(Op, DAG);
7964 return lowerVPREDUCE(Op, DAG);
7965 case ISD::VP_REDUCE_AND:
7966 case ISD::VP_REDUCE_OR:
7967 case ISD::VP_REDUCE_XOR:
7968 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7969 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7970 return lowerVPREDUCE(Op, DAG);
7971 case ISD::VP_CTTZ_ELTS:
7972 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7973 return lowerVPCttzElements(Op, DAG);
7974 case ISD::UNDEF: {
7975 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7976 return convertFromScalableVector(Op.getSimpleValueType(),
7977 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7978 }
7980 return lowerINSERT_SUBVECTOR(Op, DAG);
7982 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7984 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7986 return lowerVECTOR_INTERLEAVE(Op, DAG);
7987 case ISD::STEP_VECTOR:
7988 return lowerSTEP_VECTOR(Op, DAG);
7990 return lowerVECTOR_REVERSE(Op, DAG);
7991 case ISD::VECTOR_SPLICE:
7992 return lowerVECTOR_SPLICE(Op, DAG);
7993 case ISD::BUILD_VECTOR: {
7994 MVT VT = Op.getSimpleValueType();
7995 MVT EltVT = VT.getVectorElementType();
7996 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7997 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7998 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7999 }
8000 case ISD::SPLAT_VECTOR: {
8001 MVT VT = Op.getSimpleValueType();
8002 MVT EltVT = VT.getVectorElementType();
8003 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8004 EltVT == MVT::bf16) {
8005 SDLoc DL(Op);
8006 SDValue Elt;
8007 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8008 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8009 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8010 Op.getOperand(0));
8011 else
8012 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8013 MVT IVT = VT.changeVectorElementType(MVT::i16);
8014 return DAG.getNode(ISD::BITCAST, DL, VT,
8015 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8016 }
8017
8018 if (EltVT == MVT::i1)
8019 return lowerVectorMaskSplat(Op, DAG);
8020 return SDValue();
8021 }
8023 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8024 case ISD::CONCAT_VECTORS: {
8025 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8026 // better than going through the stack, as the default expansion does.
8027 SDLoc DL(Op);
8028 MVT VT = Op.getSimpleValueType();
8029 MVT ContainerVT = VT;
8030 if (VT.isFixedLengthVector())
8031 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8032
8033 // Recursively split concat_vectors with more than 2 operands:
8034 //
8035 // concat_vector op1, op2, op3, op4
8036 // ->
8037 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8038 //
8039 // This reduces the length of the chain of vslideups and allows us to
8040 // perform the vslideups at a smaller LMUL, limited to MF2.
8041 if (Op.getNumOperands() > 2 &&
8042 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8043 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8044 assert(isPowerOf2_32(Op.getNumOperands()));
8045 size_t HalfNumOps = Op.getNumOperands() / 2;
8046 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8047 Op->ops().take_front(HalfNumOps));
8048 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8049 Op->ops().drop_front(HalfNumOps));
8050 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8051 }
8052
8053 unsigned NumOpElts =
8054 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8055 SDValue Vec = DAG.getUNDEF(VT);
8056 for (const auto &OpIdx : enumerate(Op->ops())) {
8057 SDValue SubVec = OpIdx.value();
8058 // Don't insert undef subvectors.
8059 if (SubVec.isUndef())
8060 continue;
8061 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8062 }
8063 return Vec;
8064 }
8065 case ISD::LOAD: {
8066 auto *Load = cast<LoadSDNode>(Op);
8067 EVT VT = Load->getValueType(0);
8068 if (VT == MVT::f64) {
8069 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8070 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8071
8072 // Replace a double precision load with two i32 loads and a BuildPairF64.
8073 SDLoc DL(Op);
8074 SDValue BasePtr = Load->getBasePtr();
8075 SDValue Chain = Load->getChain();
8076
8077 SDValue Lo =
8078 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8079 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8080 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8081 SDValue Hi = DAG.getLoad(
8082 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8083 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8084 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8085 Hi.getValue(1));
8086
8087 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8088 return DAG.getMergeValues({Pair, Chain}, DL);
8089 }
8090
8091 if (VT == MVT::bf16)
8092 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8093
8094 // Handle normal vector tuple load.
8095 if (VT.isRISCVVectorTuple()) {
8096 SDLoc DL(Op);
8097 MVT XLenVT = Subtarget.getXLenVT();
8098 unsigned NF = VT.getRISCVVectorTupleNumFields();
8099 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8100 unsigned NumElts = Sz / (NF * 8);
8101 int Log2LMUL = Log2_64(NumElts) - 3;
8102
8103 auto Flag = SDNodeFlags();
8104 Flag.setNoUnsignedWrap(true);
8105 SDValue Ret = DAG.getUNDEF(VT);
8106 SDValue BasePtr = Load->getBasePtr();
8107 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8108 VROffset =
8109 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8110 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8111 SmallVector<SDValue, 8> OutChains;
8112
8113 // Load NF vector registers and combine them to a vector tuple.
8114 for (unsigned i = 0; i < NF; ++i) {
8115 SDValue LoadVal = DAG.getLoad(
8116 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8117 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8118 OutChains.push_back(LoadVal.getValue(1));
8119 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8120 DAG.getTargetConstant(i, DL, MVT::i32));
8121 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8122 }
8123 return DAG.getMergeValues(
8124 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8125 }
8126
8127 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8128 return V;
8129 if (Op.getValueType().isFixedLengthVector())
8130 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8131 return Op;
8132 }
8133 case ISD::STORE: {
8134 auto *Store = cast<StoreSDNode>(Op);
8135 SDValue StoredVal = Store->getValue();
8136 EVT VT = StoredVal.getValueType();
8137 if (VT == MVT::f64) {
8138 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8139 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8140
8141 // Replace a double precision store with a SplitF64 and i32 stores.
8142 SDValue DL(Op);
8143 SDValue BasePtr = Store->getBasePtr();
8144 SDValue Chain = Store->getChain();
8145 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8146 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8147
8148 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8149 Store->getPointerInfo(), Store->getBaseAlign(),
8150 Store->getMemOperand()->getFlags());
8151 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8152 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8153 Store->getPointerInfo().getWithOffset(4),
8154 Store->getBaseAlign(),
8155 Store->getMemOperand()->getFlags());
8156 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8157 }
8158 if (VT == MVT::i64) {
8159 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8160 "Unexpected custom legalisation");
8161 if (Store->isTruncatingStore())
8162 return SDValue();
8163
8164 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8165 return SDValue();
8166
8167 SDLoc DL(Op);
8168 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8169 DAG.getTargetConstant(0, DL, MVT::i32));
8170 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8171 DAG.getTargetConstant(1, DL, MVT::i32));
8172
8173 return DAG.getMemIntrinsicNode(
8174 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8175 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8176 Store->getMemOperand());
8177 }
8178
8179 if (VT == MVT::bf16)
8180 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8181
8182 // Handle normal vector tuple store.
8183 if (VT.isRISCVVectorTuple()) {
8184 SDLoc DL(Op);
8185 MVT XLenVT = Subtarget.getXLenVT();
8186 unsigned NF = VT.getRISCVVectorTupleNumFields();
8187 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8188 unsigned NumElts = Sz / (NF * 8);
8189 int Log2LMUL = Log2_64(NumElts) - 3;
8190
8191 auto Flag = SDNodeFlags();
8192 Flag.setNoUnsignedWrap(true);
8193 SDValue Ret;
8194 SDValue Chain = Store->getChain();
8195 SDValue BasePtr = Store->getBasePtr();
8196 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8197 VROffset =
8198 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8199 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8200
8201 // Extract subregisters in a vector tuple and store them individually.
8202 for (unsigned i = 0; i < NF; ++i) {
8203 auto Extract =
8204 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8205 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8206 DAG.getTargetConstant(i, DL, MVT::i32));
8207 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8208 MachinePointerInfo(Store->getAddressSpace()),
8209 Store->getBaseAlign(),
8210 Store->getMemOperand()->getFlags());
8211 Chain = Ret.getValue(0);
8212 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8213 }
8214 return Ret;
8215 }
8216
8217 if (auto V = expandUnalignedRVVStore(Op, DAG))
8218 return V;
8219 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8220 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8221 return Op;
8222 }
8223 case ISD::MLOAD:
8224 case ISD::VP_LOAD:
8225 return lowerMaskedLoad(Op, DAG);
8226 case ISD::VP_LOAD_FF:
8227 return lowerLoadFF(Op, DAG);
8228 case ISD::MSTORE:
8229 case ISD::VP_STORE:
8230 return lowerMaskedStore(Op, DAG);
8232 return lowerVectorCompress(Op, DAG);
8233 case ISD::SELECT_CC: {
8234 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8235 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8236 // into separate SETCC+SELECT just like LegalizeDAG.
8237 SDValue Tmp1 = Op.getOperand(0);
8238 SDValue Tmp2 = Op.getOperand(1);
8239 SDValue True = Op.getOperand(2);
8240 SDValue False = Op.getOperand(3);
8241 EVT VT = Op.getValueType();
8242 SDValue CC = Op.getOperand(4);
8243 EVT CmpVT = Tmp1.getValueType();
8244 EVT CCVT =
8245 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8246 SDLoc DL(Op);
8247 SDValue Cond =
8248 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8249 return DAG.getSelect(DL, VT, Cond, True, False);
8250 }
8251 case ISD::SETCC: {
8252 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8253 if (OpVT.isScalarInteger()) {
8254 MVT VT = Op.getSimpleValueType();
8255 SDValue LHS = Op.getOperand(0);
8256 SDValue RHS = Op.getOperand(1);
8257 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8258 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8259 "Unexpected CondCode");
8260
8261 SDLoc DL(Op);
8262
8263 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8264 // convert this to the equivalent of (set(u)ge X, C+1) by using
8265 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8266 // in a register.
8267 if (isa<ConstantSDNode>(RHS)) {
8268 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8269 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8270 // If this is an unsigned compare and the constant is -1, incrementing
8271 // the constant would change behavior. The result should be false.
8272 if (CCVal == ISD::SETUGT && Imm == -1)
8273 return DAG.getConstant(0, DL, VT);
8274 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8275 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8276 SDValue SetCC = DAG.getSetCC(
8277 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8278 return DAG.getLogicalNOT(DL, SetCC, VT);
8279 }
8280 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8281 if (CCVal == ISD::SETUGT && Imm == 2047) {
8282 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8283 DAG.getShiftAmountConstant(11, OpVT, DL));
8284 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8285 ISD::SETNE);
8286 }
8287 }
8288
8289 // Not a constant we could handle, swap the operands and condition code to
8290 // SETLT/SETULT.
8291 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8292 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8293 }
8294
8295 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8296 return SplitVectorOp(Op, DAG);
8297
8298 return lowerToScalableOp(Op, DAG);
8299 }
8300 case ISD::ADD:
8301 case ISD::SUB:
8302 case ISD::MUL:
8303 case ISD::MULHS:
8304 case ISD::MULHU:
8305 case ISD::AND:
8306 case ISD::OR:
8307 case ISD::XOR:
8308 case ISD::SDIV:
8309 case ISD::SREM:
8310 case ISD::UDIV:
8311 case ISD::UREM:
8312 case ISD::BSWAP:
8313 case ISD::CTPOP:
8314 case ISD::VSELECT:
8315 return lowerToScalableOp(Op, DAG);
8316 case ISD::SHL:
8317 case ISD::SRA:
8318 case ISD::SRL:
8319 if (Op.getSimpleValueType().isFixedLengthVector())
8320 return lowerToScalableOp(Op, DAG);
8321 // This can be called for an i32 shift amount that needs to be promoted.
8322 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8323 "Unexpected custom legalisation");
8324 return SDValue();
8325 case ISD::FABS:
8326 case ISD::FNEG:
8327 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8328 return lowerFABSorFNEG(Op, DAG, Subtarget);
8329 [[fallthrough]];
8330 case ISD::FADD:
8331 case ISD::FSUB:
8332 case ISD::FMUL:
8333 case ISD::FDIV:
8334 case ISD::FSQRT:
8335 case ISD::FMA:
8336 case ISD::FMINNUM:
8337 case ISD::FMAXNUM:
8338 case ISD::FMINIMUMNUM:
8339 case ISD::FMAXIMUMNUM:
8340 if (isPromotedOpNeedingSplit(Op, Subtarget))
8341 return SplitVectorOp(Op, DAG);
8342 [[fallthrough]];
8343 case ISD::AVGFLOORS:
8344 case ISD::AVGFLOORU:
8345 case ISD::AVGCEILS:
8346 case ISD::AVGCEILU:
8347 case ISD::SMIN:
8348 case ISD::SMAX:
8349 case ISD::UMIN:
8350 case ISD::UMAX:
8351 case ISD::UADDSAT:
8352 case ISD::USUBSAT:
8353 case ISD::SADDSAT:
8354 case ISD::SSUBSAT:
8355 return lowerToScalableOp(Op, DAG);
8356 case ISD::ABDS:
8357 case ISD::ABDU: {
8358 SDLoc dl(Op);
8359 EVT VT = Op->getValueType(0);
8360 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8361 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8362 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8363
8364 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8365 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8366 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8367 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8368 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8369 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8370 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8371 }
8372 case ISD::ABS:
8373 case ISD::VP_ABS:
8374 return lowerABS(Op, DAG);
8375 case ISD::CTLZ:
8377 case ISD::CTTZ:
8379 if (Subtarget.hasStdExtZvbb())
8380 return lowerToScalableOp(Op, DAG);
8381 assert(Op.getOpcode() != ISD::CTTZ);
8382 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8383 case ISD::FCOPYSIGN:
8384 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8385 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8386 if (isPromotedOpNeedingSplit(Op, Subtarget))
8387 return SplitVectorOp(Op, DAG);
8388 return lowerToScalableOp(Op, DAG);
8389 case ISD::STRICT_FADD:
8390 case ISD::STRICT_FSUB:
8391 case ISD::STRICT_FMUL:
8392 case ISD::STRICT_FDIV:
8393 case ISD::STRICT_FSQRT:
8394 case ISD::STRICT_FMA:
8395 if (isPromotedOpNeedingSplit(Op, Subtarget))
8396 return SplitStrictFPVectorOp(Op, DAG);
8397 return lowerToScalableOp(Op, DAG);
8398 case ISD::STRICT_FSETCC:
8400 return lowerVectorStrictFSetcc(Op, DAG);
8401 case ISD::STRICT_FCEIL:
8402 case ISD::STRICT_FRINT:
8403 case ISD::STRICT_FFLOOR:
8404 case ISD::STRICT_FTRUNC:
8406 case ISD::STRICT_FROUND:
8408 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8409 case ISD::MGATHER:
8410 case ISD::VP_GATHER:
8411 return lowerMaskedGather(Op, DAG);
8412 case ISD::MSCATTER:
8413 case ISD::VP_SCATTER:
8414 return lowerMaskedScatter(Op, DAG);
8415 case ISD::GET_ROUNDING:
8416 return lowerGET_ROUNDING(Op, DAG);
8417 case ISD::SET_ROUNDING:
8418 return lowerSET_ROUNDING(Op, DAG);
8419 case ISD::GET_FPENV:
8420 return lowerGET_FPENV(Op, DAG);
8421 case ISD::SET_FPENV:
8422 return lowerSET_FPENV(Op, DAG);
8423 case ISD::RESET_FPENV:
8424 return lowerRESET_FPENV(Op, DAG);
8425 case ISD::GET_FPMODE:
8426 return lowerGET_FPMODE(Op, DAG);
8427 case ISD::SET_FPMODE:
8428 return lowerSET_FPMODE(Op, DAG);
8429 case ISD::RESET_FPMODE:
8430 return lowerRESET_FPMODE(Op, DAG);
8431 case ISD::EH_DWARF_CFA:
8432 return lowerEH_DWARF_CFA(Op, DAG);
8433 case ISD::VP_MERGE:
8434 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8435 return lowerVPMergeMask(Op, DAG);
8436 [[fallthrough]];
8437 case ISD::VP_SELECT:
8438 case ISD::VP_ADD:
8439 case ISD::VP_SUB:
8440 case ISD::VP_MUL:
8441 case ISD::VP_SDIV:
8442 case ISD::VP_UDIV:
8443 case ISD::VP_SREM:
8444 case ISD::VP_UREM:
8445 case ISD::VP_UADDSAT:
8446 case ISD::VP_USUBSAT:
8447 case ISD::VP_SADDSAT:
8448 case ISD::VP_SSUBSAT:
8449 case ISD::VP_LRINT:
8450 case ISD::VP_LLRINT:
8451 return lowerVPOp(Op, DAG);
8452 case ISD::VP_AND:
8453 case ISD::VP_OR:
8454 case ISD::VP_XOR:
8455 return lowerLogicVPOp(Op, DAG);
8456 case ISD::VP_FADD:
8457 case ISD::VP_FSUB:
8458 case ISD::VP_FMUL:
8459 case ISD::VP_FDIV:
8460 case ISD::VP_FNEG:
8461 case ISD::VP_FABS:
8462 case ISD::VP_SQRT:
8463 case ISD::VP_FMA:
8464 case ISD::VP_FMINNUM:
8465 case ISD::VP_FMAXNUM:
8466 case ISD::VP_FCOPYSIGN:
8467 if (isPromotedOpNeedingSplit(Op, Subtarget))
8468 return SplitVPOp(Op, DAG);
8469 [[fallthrough]];
8470 case ISD::VP_SRA:
8471 case ISD::VP_SRL:
8472 case ISD::VP_SHL:
8473 return lowerVPOp(Op, DAG);
8474 case ISD::VP_IS_FPCLASS:
8475 return LowerIS_FPCLASS(Op, DAG);
8476 case ISD::VP_SIGN_EXTEND:
8477 case ISD::VP_ZERO_EXTEND:
8478 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8479 return lowerVPExtMaskOp(Op, DAG);
8480 return lowerVPOp(Op, DAG);
8481 case ISD::VP_TRUNCATE:
8482 return lowerVectorTruncLike(Op, DAG);
8483 case ISD::VP_FP_EXTEND:
8484 case ISD::VP_FP_ROUND:
8485 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8486 case ISD::VP_SINT_TO_FP:
8487 case ISD::VP_UINT_TO_FP:
8488 if (Op.getValueType().isVector() &&
8489 ((Op.getValueType().getScalarType() == MVT::f16 &&
8490 (Subtarget.hasVInstructionsF16Minimal() &&
8491 !Subtarget.hasVInstructionsF16())) ||
8492 Op.getValueType().getScalarType() == MVT::bf16)) {
8493 if (isPromotedOpNeedingSplit(Op, Subtarget))
8494 return SplitVectorOp(Op, DAG);
8495 // int -> f32
8496 SDLoc DL(Op);
8497 MVT NVT =
8498 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8499 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8500 // f32 -> [b]f16
8501 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8502 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8503 }
8504 [[fallthrough]];
8505 case ISD::VP_FP_TO_SINT:
8506 case ISD::VP_FP_TO_UINT:
8507 if (SDValue Op1 = Op.getOperand(0);
8508 Op1.getValueType().isVector() &&
8509 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8510 (Subtarget.hasVInstructionsF16Minimal() &&
8511 !Subtarget.hasVInstructionsF16())) ||
8512 Op1.getValueType().getScalarType() == MVT::bf16)) {
8513 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8514 return SplitVectorOp(Op, DAG);
8515 // [b]f16 -> f32
8516 SDLoc DL(Op);
8517 MVT NVT = MVT::getVectorVT(MVT::f32,
8518 Op1.getValueType().getVectorElementCount());
8519 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8520 // f32 -> int
8521 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8522 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8523 }
8524 return lowerVPFPIntConvOp(Op, DAG);
8525 case ISD::VP_SETCC:
8526 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8527 return SplitVPOp(Op, DAG);
8528 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8529 return lowerVPSetCCMaskOp(Op, DAG);
8530 [[fallthrough]];
8531 case ISD::VP_SMIN:
8532 case ISD::VP_SMAX:
8533 case ISD::VP_UMIN:
8534 case ISD::VP_UMAX:
8535 case ISD::VP_BITREVERSE:
8536 case ISD::VP_BSWAP:
8537 return lowerVPOp(Op, DAG);
8538 case ISD::VP_CTLZ:
8539 case ISD::VP_CTLZ_ZERO_UNDEF:
8540 if (Subtarget.hasStdExtZvbb())
8541 return lowerVPOp(Op, DAG);
8542 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8543 case ISD::VP_CTTZ:
8544 case ISD::VP_CTTZ_ZERO_UNDEF:
8545 if (Subtarget.hasStdExtZvbb())
8546 return lowerVPOp(Op, DAG);
8547 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8548 case ISD::VP_CTPOP:
8549 return lowerVPOp(Op, DAG);
8550 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8551 return lowerVPStridedLoad(Op, DAG);
8552 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8553 return lowerVPStridedStore(Op, DAG);
8554 case ISD::VP_FCEIL:
8555 case ISD::VP_FFLOOR:
8556 case ISD::VP_FRINT:
8557 case ISD::VP_FNEARBYINT:
8558 case ISD::VP_FROUND:
8559 case ISD::VP_FROUNDEVEN:
8560 case ISD::VP_FROUNDTOZERO:
8561 if (isPromotedOpNeedingSplit(Op, Subtarget))
8562 return SplitVPOp(Op, DAG);
8563 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8564 case ISD::VP_FMAXIMUM:
8565 case ISD::VP_FMINIMUM:
8566 if (isPromotedOpNeedingSplit(Op, Subtarget))
8567 return SplitVPOp(Op, DAG);
8568 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8569 case ISD::EXPERIMENTAL_VP_SPLICE:
8570 return lowerVPSpliceExperimental(Op, DAG);
8571 case ISD::EXPERIMENTAL_VP_REVERSE:
8572 return lowerVPReverseExperimental(Op, DAG);
8573 case ISD::EXPERIMENTAL_VP_SPLAT:
8574 return lowerVPSplatExperimental(Op, DAG);
8575 case ISD::CLEAR_CACHE: {
8576 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8577 "llvm.clear_cache only needs custom lower on Linux targets");
8578 SDLoc DL(Op);
8579 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8580 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8581 Op.getOperand(2), Flags, DL);
8582 }
8583 case ISD::DYNAMIC_STACKALLOC:
8584 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8585 case ISD::INIT_TRAMPOLINE:
8586 return lowerINIT_TRAMPOLINE(Op, DAG);
8587 case ISD::ADJUST_TRAMPOLINE:
8588 return lowerADJUST_TRAMPOLINE(Op, DAG);
8589 case ISD::PARTIAL_REDUCE_UMLA:
8590 case ISD::PARTIAL_REDUCE_SMLA:
8591 case ISD::PARTIAL_REDUCE_SUMLA:
8592 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8593 }
8594}
8595
8596SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8597 SDValue Start, SDValue End,
8598 SDValue Flags, SDLoc DL) const {
8599 MakeLibCallOptions CallOptions;
8600 std::pair<SDValue, SDValue> CallResult =
8601 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8602 {Start, End, Flags}, CallOptions, DL, InChain);
8603
8604 // This function returns void so only the out chain matters.
8605 return CallResult.second;
8606}
8607
8608SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8609 SelectionDAG &DAG) const {
8610 if (!Subtarget.is64Bit())
8611 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8612
8613 // Create an MCCodeEmitter to encode instructions.
8614 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8615 assert(TLO);
8616 MCContext &MCCtx = TLO->getContext();
8617
8618 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8619 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8620
8621 SDValue Root = Op.getOperand(0);
8622 SDValue Trmp = Op.getOperand(1); // trampoline
8623 SDLoc dl(Op);
8624
8625 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8626
8627 // We store in the trampoline buffer the following instructions and data.
8628 // Offset:
8629 // 0: auipc t2, 0
8630 // 4: ld t0, 24(t2)
8631 // 8: ld t2, 16(t2)
8632 // 12: jalr t0
8633 // 16: <StaticChainOffset>
8634 // 24: <FunctionAddressOffset>
8635 // 32:
8636 // Offset with branch control flow protection enabled:
8637 // 0: lpad <imm20>
8638 // 4: auipc t3, 0
8639 // 8: ld t2, 28(t3)
8640 // 12: ld t3, 20(t3)
8641 // 16: jalr t2
8642 // 20: <StaticChainOffset>
8643 // 28: <FunctionAddressOffset>
8644 // 36:
8645
8646 const bool HasCFBranch =
8647 Subtarget.hasStdExtZicfilp() &&
8649 "cf-protection-branch");
8650 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8651 const unsigned StaticChainOffset = StaticChainIdx * 4;
8652 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8653
8654 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8655 assert(STI);
8656 auto GetEncoding = [&](const MCInst &MC) {
8659 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8660 uint32_t Encoding = support::endian::read32le(CB.data());
8661 return Encoding;
8662 };
8663
8664 SmallVector<SDValue> OutChains;
8665
8666 SmallVector<uint32_t> Encodings;
8667 if (!HasCFBranch) {
8668 Encodings.append(
8669 {// auipc t2, 0
8670 // Loads the current PC into t2.
8671 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8672 // ld t0, 24(t2)
8673 // Loads the function address into t0. Note that we are using offsets
8674 // pc-relative to the first instruction of the trampoline.
8675 GetEncoding(MCInstBuilder(RISCV::LD)
8676 .addReg(RISCV::X5)
8677 .addReg(RISCV::X7)
8678 .addImm(FunctionAddressOffset)),
8679 // ld t2, 16(t2)
8680 // Load the value of the static chain.
8681 GetEncoding(MCInstBuilder(RISCV::LD)
8682 .addReg(RISCV::X7)
8683 .addReg(RISCV::X7)
8684 .addImm(StaticChainOffset)),
8685 // jalr t0
8686 // Jump to the function.
8687 GetEncoding(MCInstBuilder(RISCV::JALR)
8688 .addReg(RISCV::X0)
8689 .addReg(RISCV::X5)
8690 .addImm(0))});
8691 } else {
8692 Encodings.append(
8693 {// auipc x0, <imm20> (lpad <imm20>)
8694 // Landing pad.
8695 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8696 // auipc t3, 0
8697 // Loads the current PC into t3.
8698 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8699 // ld t2, (FunctionAddressOffset - 4)(t3)
8700 // Loads the function address into t2. Note that we are using offsets
8701 // pc-relative to the SECOND instruction of the trampoline.
8702 GetEncoding(MCInstBuilder(RISCV::LD)
8703 .addReg(RISCV::X7)
8704 .addReg(RISCV::X28)
8705 .addImm(FunctionAddressOffset - 4)),
8706 // ld t3, (StaticChainOffset - 4)(t3)
8707 // Load the value of the static chain.
8708 GetEncoding(MCInstBuilder(RISCV::LD)
8709 .addReg(RISCV::X28)
8710 .addReg(RISCV::X28)
8711 .addImm(StaticChainOffset - 4)),
8712 // jalr t2
8713 // Software-guarded jump to the function.
8714 GetEncoding(MCInstBuilder(RISCV::JALR)
8715 .addReg(RISCV::X0)
8716 .addReg(RISCV::X7)
8717 .addImm(0))});
8718 }
8719
8720 // Store encoded instructions.
8721 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8722 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8723 DAG.getConstant(Idx * 4, dl, MVT::i64))
8724 : Trmp;
8725 OutChains.push_back(DAG.getTruncStore(
8726 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8727 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8728 }
8729
8730 // Now store the variable part of the trampoline.
8731 SDValue FunctionAddress = Op.getOperand(2);
8732 SDValue StaticChain = Op.getOperand(3);
8733
8734 // Store the given static chain and function pointer in the trampoline buffer.
8735 struct OffsetValuePair {
8736 const unsigned Offset;
8737 const SDValue Value;
8738 SDValue Addr = SDValue(); // Used to cache the address.
8739 } OffsetValues[] = {
8740 {StaticChainOffset, StaticChain},
8741 {FunctionAddressOffset, FunctionAddress},
8742 };
8743 for (auto &OffsetValue : OffsetValues) {
8744 SDValue Addr =
8745 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8746 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8747 OffsetValue.Addr = Addr;
8748 OutChains.push_back(
8749 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8750 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8751 }
8752
8753 assert(OutChains.size() == StaticChainIdx + 2 &&
8754 "Size of OutChains mismatch");
8755 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8756
8757 // The end of instructions of trampoline is the same as the static chain
8758 // address that we computed earlier.
8759 SDValue EndOfTrmp = OffsetValues[0].Addr;
8760
8761 // Call clear cache on the trampoline instructions.
8762 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8763 Trmp, EndOfTrmp);
8764
8765 return Chain;
8766}
8767
8768SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8769 SelectionDAG &DAG) const {
8770 if (!Subtarget.is64Bit())
8771 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8772
8773 return Op.getOperand(0);
8774}
8775
8776SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8777 SelectionDAG &DAG) const {
8778 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8779 // TODO: There are many other sub-cases we could potentially lower, are
8780 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8781 SDLoc DL(Op);
8782 MVT VT = Op.getSimpleValueType();
8783 SDValue Accum = Op.getOperand(0);
8784 assert(Accum.getSimpleValueType() == VT &&
8785 VT.getVectorElementType() == MVT::i32);
8786 SDValue A = Op.getOperand(1);
8787 SDValue B = Op.getOperand(2);
8788 MVT ArgVT = A.getSimpleValueType();
8789 assert(ArgVT == B.getSimpleValueType() &&
8790 ArgVT.getVectorElementType() == MVT::i8);
8791 (void)ArgVT;
8792
8793 // The zvqdotq pseudos are defined with sources and destination both
8794 // being i32. This cast is needed for correctness to avoid incorrect
8795 // .vx matching of i8 splats.
8796 A = DAG.getBitcast(VT, A);
8797 B = DAG.getBitcast(VT, B);
8798
8799 MVT ContainerVT = VT;
8800 if (VT.isFixedLengthVector()) {
8801 ContainerVT = getContainerForFixedLengthVector(VT);
8802 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8803 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8804 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8805 }
8806
8807 unsigned Opc;
8808 switch (Op.getOpcode()) {
8809 case ISD::PARTIAL_REDUCE_SMLA:
8810 Opc = RISCVISD::VQDOT_VL;
8811 break;
8812 case ISD::PARTIAL_REDUCE_UMLA:
8813 Opc = RISCVISD::VQDOTU_VL;
8814 break;
8815 case ISD::PARTIAL_REDUCE_SUMLA:
8816 Opc = RISCVISD::VQDOTSU_VL;
8817 break;
8818 default:
8819 llvm_unreachable("Unexpected opcode");
8820 }
8821 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8822 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8823 if (VT.isFixedLengthVector())
8824 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8825 return Res;
8826}
8827
8829 SelectionDAG &DAG, unsigned Flags) {
8830 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8831}
8832
8834 SelectionDAG &DAG, unsigned Flags) {
8835 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8836 Flags);
8837}
8838
8840 SelectionDAG &DAG, unsigned Flags) {
8841 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8842 N->getOffset(), Flags);
8843}
8844
8846 SelectionDAG &DAG, unsigned Flags) {
8847 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8848}
8849
8851 EVT Ty, SelectionDAG &DAG) {
8853 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8854 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8855 return DAG.getLoad(
8856 Ty, DL, DAG.getEntryNode(), LC,
8858}
8859
8861 EVT Ty, SelectionDAG &DAG) {
8863 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8864 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8865 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8866 return DAG.getLoad(
8867 Ty, DL, DAG.getEntryNode(), LC,
8869}
8870
8871template <class NodeTy>
8872SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8873 bool IsLocal, bool IsExternWeak) const {
8874 SDLoc DL(N);
8875 EVT Ty = getPointerTy(DAG.getDataLayout());
8876
8877 // When HWASAN is used and tagging of global variables is enabled
8878 // they should be accessed via the GOT, since the tagged address of a global
8879 // is incompatible with existing code models. This also applies to non-pic
8880 // mode.
8881 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8882 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8883 if (IsLocal && !Subtarget.allowTaggedGlobals())
8884 // Use PC-relative addressing to access the symbol. This generates the
8885 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8886 // %pcrel_lo(auipc)).
8887 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8888
8889 // Use PC-relative addressing to access the GOT for this symbol, then load
8890 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8891 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8892 SDValue Load =
8893 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8894 MachineFunction &MF = DAG.getMachineFunction();
8895 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8899 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8900 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8901 return Load;
8902 }
8903
8904 switch (getTargetMachine().getCodeModel()) {
8905 default:
8906 reportFatalUsageError("Unsupported code model for lowering");
8907 case CodeModel::Small: {
8908 // Generate a sequence for accessing addresses within the first 2 GiB of
8909 // address space.
8910 if (Subtarget.hasVendorXqcili()) {
8911 // Use QC.E.LI to generate the address, as this is easier to relax than
8912 // LUI/ADDI.
8913 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8914 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8915 }
8916
8917 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8918 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8919 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8920 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8921 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8922 }
8923 case CodeModel::Medium: {
8924 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8925 if (IsExternWeak) {
8926 // An extern weak symbol may be undefined, i.e. have value 0, which may
8927 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8928 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8929 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8930 SDValue Load =
8931 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8932 MachineFunction &MF = DAG.getMachineFunction();
8933 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8937 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8938 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8939 return Load;
8940 }
8941
8942 // Generate a sequence for accessing addresses within any 2GiB range within
8943 // the address space. This generates the pattern (PseudoLLA sym), which
8944 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8945 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8946 }
8947 case CodeModel::Large: {
8948 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8949 return getLargeGlobalAddress(G, DL, Ty, DAG);
8950
8951 // Using pc-relative mode for other node type.
8952 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8953 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8954 }
8955 }
8956}
8957
8958SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8959 SelectionDAG &DAG) const {
8960 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8961 assert(N->getOffset() == 0 && "unexpected offset in global node");
8962 const GlobalValue *GV = N->getGlobal();
8963 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8964}
8965
8966SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8967 SelectionDAG &DAG) const {
8968 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8969
8970 return getAddr(N, DAG);
8971}
8972
8973SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8974 SelectionDAG &DAG) const {
8975 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8976
8977 return getAddr(N, DAG);
8978}
8979
8980SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8981 SelectionDAG &DAG) const {
8982 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8983
8984 return getAddr(N, DAG);
8985}
8986
8987SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8988 SelectionDAG &DAG,
8989 bool UseGOT) const {
8990 SDLoc DL(N);
8991 EVT Ty = getPointerTy(DAG.getDataLayout());
8992 const GlobalValue *GV = N->getGlobal();
8993 MVT XLenVT = Subtarget.getXLenVT();
8994
8995 if (UseGOT) {
8996 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8997 // load the address from the GOT and add the thread pointer. This generates
8998 // the pattern (PseudoLA_TLS_IE sym), which expands to
8999 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9000 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9001 SDValue Load =
9002 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9003 MachineFunction &MF = DAG.getMachineFunction();
9004 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9008 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9009 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9010
9011 // Add the thread pointer.
9012 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9013 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9014 }
9015
9016 // Generate a sequence for accessing the address relative to the thread
9017 // pointer, with the appropriate adjustment for the thread pointer offset.
9018 // This generates the pattern
9019 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9020 SDValue AddrHi =
9022 SDValue AddrAdd =
9024 SDValue AddrLo =
9026
9027 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9028 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9029 SDValue MNAdd =
9030 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9031 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9032}
9033
9034SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9035 SelectionDAG &DAG) const {
9036 SDLoc DL(N);
9037 EVT Ty = getPointerTy(DAG.getDataLayout());
9038 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9039 const GlobalValue *GV = N->getGlobal();
9040
9041 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9042 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9043 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9044 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9045 SDValue Load =
9046 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9047
9048 // Prepare argument list to generate call.
9050 Args.emplace_back(Load, CallTy);
9051
9052 // Setup call to __tls_get_addr.
9053 TargetLowering::CallLoweringInfo CLI(DAG);
9054 CLI.setDebugLoc(DL)
9055 .setChain(DAG.getEntryNode())
9056 .setLibCallee(CallingConv::C, CallTy,
9057 DAG.getExternalSymbol("__tls_get_addr", Ty),
9058 std::move(Args));
9059
9060 return LowerCallTo(CLI).first;
9061}
9062
9063SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9064 SelectionDAG &DAG) const {
9065 SDLoc DL(N);
9066 EVT Ty = getPointerTy(DAG.getDataLayout());
9067 const GlobalValue *GV = N->getGlobal();
9068
9069 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9070 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9071 //
9072 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9073 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9074 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9075 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9076 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9077 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9078}
9079
9080SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9081 SelectionDAG &DAG) const {
9082 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9083 assert(N->getOffset() == 0 && "unexpected offset in global node");
9084
9085 if (DAG.getTarget().useEmulatedTLS())
9086 return LowerToTLSEmulatedModel(N, DAG);
9087
9089
9092 reportFatalUsageError("In GHC calling convention TLS is not supported");
9093
9094 SDValue Addr;
9095 switch (Model) {
9097 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9098 break;
9100 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9101 break;
9104 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9105 : getDynamicTLSAddr(N, DAG);
9106 break;
9107 }
9108
9109 return Addr;
9110}
9111
9112// Return true if Val is equal to (setcc LHS, RHS, CC).
9113// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9114// Otherwise, return std::nullopt.
9115static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9116 ISD::CondCode CC, SDValue Val) {
9117 assert(Val->getOpcode() == ISD::SETCC);
9118 SDValue LHS2 = Val.getOperand(0);
9119 SDValue RHS2 = Val.getOperand(1);
9120 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9121
9122 if (LHS == LHS2 && RHS == RHS2) {
9123 if (CC == CC2)
9124 return true;
9125 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9126 return false;
9127 } else if (LHS == RHS2 && RHS == LHS2) {
9129 if (CC == CC2)
9130 return true;
9131 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9132 return false;
9133 }
9134
9135 return std::nullopt;
9136}
9137
9139 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9140}
9141
9143 const RISCVSubtarget &Subtarget) {
9144 SDValue CondV = N->getOperand(0);
9145 SDValue TrueV = N->getOperand(1);
9146 SDValue FalseV = N->getOperand(2);
9147 MVT VT = N->getSimpleValueType(0);
9148 SDLoc DL(N);
9149
9150 if (!Subtarget.hasConditionalMoveFusion()) {
9151 // (select c, -1, y) -> -c | y
9152 if (isAllOnesConstant(TrueV)) {
9153 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9154 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9155 }
9156 // (select c, y, -1) -> (c-1) | y
9157 if (isAllOnesConstant(FalseV)) {
9158 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9159 DAG.getAllOnesConstant(DL, VT));
9160 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9161 }
9162
9163 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9164
9165 // (select c, 0, y) -> (c-1) & y
9166 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9167 SDValue Neg =
9168 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9169 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9170 }
9171 if (isNullConstant(FalseV)) {
9172 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9173 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9174 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9175 if (isPowerOf2_64(TrueM1)) {
9176 unsigned ShAmount = Log2_64(TrueM1);
9177 if (Subtarget.hasShlAdd(ShAmount))
9178 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9179 DAG.getConstant(ShAmount, DL, VT), CondV);
9180 }
9181 }
9182 // (select c, y, 0) -> -c & y
9183 if (!HasCZero || isSimm12Constant(TrueV)) {
9184 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9185 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9186 }
9187 }
9188 }
9189
9190 // select c, ~x, x --> xor -c, x
9191 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9192 const APInt &TrueVal = TrueV->getAsAPIntVal();
9193 const APInt &FalseVal = FalseV->getAsAPIntVal();
9194 if (~TrueVal == FalseVal) {
9195 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9196 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9197 }
9198 }
9199
9200 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9201 // when both truev and falsev are also setcc.
9202 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9203 FalseV.getOpcode() == ISD::SETCC) {
9204 SDValue LHS = CondV.getOperand(0);
9205 SDValue RHS = CondV.getOperand(1);
9206 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9207
9208 // (select x, x, y) -> x | y
9209 // (select !x, x, y) -> x & y
9210 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9211 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9212 DAG.getFreeze(FalseV));
9213 }
9214 // (select x, y, x) -> x & y
9215 // (select !x, y, x) -> x | y
9216 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9217 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9218 DAG.getFreeze(TrueV), FalseV);
9219 }
9220 }
9221
9222 return SDValue();
9223}
9224
9225// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9226// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9227// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9228// being `0` or `-1`. In such cases we can replace `select` with `and`.
9229// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9230// than `c0`?
9231static SDValue
9233 const RISCVSubtarget &Subtarget) {
9234 if (Subtarget.hasShortForwardBranchOpt())
9235 return SDValue();
9236
9237 unsigned SelOpNo = 0;
9238 SDValue Sel = BO->getOperand(0);
9239 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9240 SelOpNo = 1;
9241 Sel = BO->getOperand(1);
9242 }
9243
9244 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9245 return SDValue();
9246
9247 unsigned ConstSelOpNo = 1;
9248 unsigned OtherSelOpNo = 2;
9249 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9250 ConstSelOpNo = 2;
9251 OtherSelOpNo = 1;
9252 }
9253 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9254 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9255 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9256 return SDValue();
9257
9258 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9259 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9260 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9261 return SDValue();
9262
9263 SDLoc DL(Sel);
9264 EVT VT = BO->getValueType(0);
9265
9266 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9267 if (SelOpNo == 1)
9268 std::swap(NewConstOps[0], NewConstOps[1]);
9269
9270 SDValue NewConstOp =
9271 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9272 if (!NewConstOp)
9273 return SDValue();
9274
9275 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9276 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9277 return SDValue();
9278
9279 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9280 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9281 if (SelOpNo == 1)
9282 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9283 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9284
9285 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9286 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9287 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9288}
9289
9290SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9291 SDValue CondV = Op.getOperand(0);
9292 SDValue TrueV = Op.getOperand(1);
9293 SDValue FalseV = Op.getOperand(2);
9294 SDLoc DL(Op);
9295 MVT VT = Op.getSimpleValueType();
9296 MVT XLenVT = Subtarget.getXLenVT();
9297
9298 // Lower vector SELECTs to VSELECTs by splatting the condition.
9299 if (VT.isVector()) {
9300 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9301 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9302 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9303 }
9304
9305 // Try some other optimizations before falling back to generic lowering.
9306 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9307 return V;
9308
9309 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9310 // nodes to implement the SELECT. Performing the lowering here allows for
9311 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9312 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9313 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9314
9315 // (select c, t, 0) -> (czero_eqz t, c)
9316 if (isNullConstant(FalseV))
9317 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9318 // (select c, 0, f) -> (czero_nez f, c)
9319 if (isNullConstant(TrueV))
9320 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9321
9322 // Check to see if a given operation is a 'NOT', if so return the negated
9323 // operand
9324 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9325 using namespace llvm::SDPatternMatch;
9326 SDValue Xor;
9327 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9328 return Xor;
9329 }
9330 return std::nullopt;
9331 };
9332 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9333 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9334 if (TrueV.getOpcode() == ISD::AND &&
9335 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9336 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9337 ? getNotOperand(TrueV.getOperand(1))
9338 : getNotOperand(TrueV.getOperand(0));
9339 if (NotOperand) {
9340 SDValue CMOV =
9341 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9342 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9343 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9344 }
9345 return DAG.getNode(
9346 ISD::OR, DL, VT, TrueV,
9347 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9348 }
9349
9350 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9351 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9352 if (FalseV.getOpcode() == ISD::AND &&
9353 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9354 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9355 ? getNotOperand(FalseV.getOperand(1))
9356 : getNotOperand(FalseV.getOperand(0));
9357 if (NotOperand) {
9358 SDValue CMOV =
9359 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9360 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9361 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9362 }
9363 return DAG.getNode(
9364 ISD::OR, DL, VT, FalseV,
9365 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9366 }
9367
9368 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9369 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9370 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9371 const APInt &TrueVal = TrueV->getAsAPIntVal();
9372 const APInt &FalseVal = FalseV->getAsAPIntVal();
9373
9374 // Prefer these over Zicond to avoid materializing an immediate:
9375 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9376 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9377 if (CondV.getOpcode() == ISD::SETCC &&
9378 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9379 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9380 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9381 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9382 int64_t TrueImm = TrueVal.getSExtValue();
9383 int64_t FalseImm = FalseVal.getSExtValue();
9384 if (CCVal == ISD::SETGT)
9385 std::swap(TrueImm, FalseImm);
9386 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9387 isInt<12>(TrueImm - FalseImm)) {
9388 SDValue SRA =
9389 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9390 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9391 SDValue AND =
9392 DAG.getNode(ISD::AND, DL, VT, SRA,
9393 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9394 return DAG.getNode(ISD::ADD, DL, VT, AND,
9395 DAG.getSignedConstant(FalseImm, DL, VT));
9396 }
9397 }
9398 }
9399
9400 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9401 // a constant in register
9402 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9403 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9404 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9405 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9406 }
9407 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9408 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9409 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9410 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9411 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9412 }
9413
9414 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9415 const int DeltaCost = RISCVMatInt::getIntMatCost(
9416 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9417 // Does the addend fold into an ADDI
9418 if (Addend.isSignedIntN(12))
9419 return DeltaCost;
9420 const int AddendCost = RISCVMatInt::getIntMatCost(
9421 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9422 return AddendCost + DeltaCost;
9423 };
9424 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9425 getCost(TrueVal - FalseVal, FalseVal);
9426 SDValue LHSVal = DAG.getConstant(
9427 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9428 SDValue CMOV =
9429 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9430 DL, VT, LHSVal, CondV);
9431 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9432 }
9433
9434 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9435 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9436 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9437 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9438 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9439 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9440 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9441 // Fall back to XORI if Const == -0x800
9442 if (RawConstVal == -0x800) {
9443 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9444 SDValue CMOV =
9445 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9446 DL, VT, XorOp, CondV);
9447 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9448 }
9449 // Efficient only if the constant and its negation fit into `ADDI`
9450 // Prefer Add/Sub over Xor since can be compressed for small immediates
9451 if (isInt<12>(RawConstVal)) {
9452 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9453 SDValue CMOV =
9454 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9455 DL, VT, SubOp, CondV);
9456 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9457 }
9458 }
9459
9460 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9461 // Unless we have the short forward branch optimization.
9462 if (!Subtarget.hasConditionalMoveFusion())
9463 return DAG.getNode(
9464 ISD::OR, DL, VT,
9465 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9466 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9468 }
9469
9470 if (Op.hasOneUse()) {
9471 unsigned UseOpc = Op->user_begin()->getOpcode();
9472 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9473 SDNode *BinOp = *Op->user_begin();
9474 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9475 DAG, Subtarget)) {
9476 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9477 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9478 // may return a constant node and cause crash in lowerSELECT.
9479 if (NewSel.getOpcode() == ISD::SELECT)
9480 return lowerSELECT(NewSel, DAG);
9481 return NewSel;
9482 }
9483 }
9484 }
9485
9486 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9487 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9488 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9489 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9490 if (FPTV && FPFV) {
9491 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9492 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9493 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9494 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9495 DAG.getConstant(1, DL, XLenVT));
9496 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9497 }
9498 }
9499
9500 // If the condition is not an integer SETCC which operates on XLenVT, we need
9501 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9502 // (select condv, truev, falsev)
9503 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9504 if (CondV.getOpcode() != ISD::SETCC ||
9505 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9506 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9507 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9508
9509 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9510
9511 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9512 }
9513
9514 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9515 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9516 // advantage of the integer compare+branch instructions. i.e.:
9517 // (select (setcc lhs, rhs, cc), truev, falsev)
9518 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9519 SDValue LHS = CondV.getOperand(0);
9520 SDValue RHS = CondV.getOperand(1);
9521 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9522
9523 // Special case for a select of 2 constants that have a difference of 1.
9524 // Normally this is done by DAGCombine, but if the select is introduced by
9525 // type legalization or op legalization, we miss it. Restricting to SETLT
9526 // case for now because that is what signed saturating add/sub need.
9527 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9528 // but we would probably want to swap the true/false values if the condition
9529 // is SETGE/SETLE to avoid an XORI.
9530 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9531 CCVal == ISD::SETLT) {
9532 const APInt &TrueVal = TrueV->getAsAPIntVal();
9533 const APInt &FalseVal = FalseV->getAsAPIntVal();
9534 if (TrueVal - 1 == FalseVal)
9535 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9536 if (TrueVal + 1 == FalseVal)
9537 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9538 }
9539
9540 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9541 // 1 < x ? x : 1 -> 0 < x ? x : 1
9542 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9543 RHS == TrueV && LHS == FalseV) {
9544 LHS = DAG.getConstant(0, DL, VT);
9545 // 0 <u x is the same as x != 0.
9546 if (CCVal == ISD::SETULT) {
9547 std::swap(LHS, RHS);
9548 CCVal = ISD::SETNE;
9549 }
9550 }
9551
9552 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9553 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9554 RHS == FalseV) {
9555 RHS = DAG.getConstant(0, DL, VT);
9556 }
9557
9558 SDValue TargetCC = DAG.getCondCode(CCVal);
9559
9560 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9561 // (select (setcc lhs, rhs, CC), constant, falsev)
9562 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9563 std::swap(TrueV, FalseV);
9564 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9565 }
9566
9567 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9568 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9569}
9570
9571SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9572 SDValue CondV = Op.getOperand(1);
9573 SDLoc DL(Op);
9574 MVT XLenVT = Subtarget.getXLenVT();
9575
9576 if (CondV.getOpcode() == ISD::SETCC &&
9577 CondV.getOperand(0).getValueType() == XLenVT) {
9578 SDValue LHS = CondV.getOperand(0);
9579 SDValue RHS = CondV.getOperand(1);
9580 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9581
9582 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9583
9584 SDValue TargetCC = DAG.getCondCode(CCVal);
9585 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9586 LHS, RHS, TargetCC, Op.getOperand(2));
9587 }
9588
9589 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9590 CondV, DAG.getConstant(0, DL, XLenVT),
9591 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9592}
9593
9594SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9595 MachineFunction &MF = DAG.getMachineFunction();
9596 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9597
9598 SDLoc DL(Op);
9599 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9601
9602 // vastart just stores the address of the VarArgsFrameIndex slot into the
9603 // memory location argument.
9604 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9605 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9606 MachinePointerInfo(SV));
9607}
9608
9609SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9610 SelectionDAG &DAG) const {
9611 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9612 MachineFunction &MF = DAG.getMachineFunction();
9613 MachineFrameInfo &MFI = MF.getFrameInfo();
9614 MFI.setFrameAddressIsTaken(true);
9615 Register FrameReg = RI.getFrameRegister(MF);
9616 int XLenInBytes = Subtarget.getXLen() / 8;
9617
9618 EVT VT = Op.getValueType();
9619 SDLoc DL(Op);
9620 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9621 unsigned Depth = Op.getConstantOperandVal(0);
9622 while (Depth--) {
9623 int Offset = -(XLenInBytes * 2);
9624 SDValue Ptr = DAG.getNode(
9625 ISD::ADD, DL, VT, FrameAddr,
9627 FrameAddr =
9628 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9629 }
9630 return FrameAddr;
9631}
9632
9633SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9634 SelectionDAG &DAG) const {
9635 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9636 MachineFunction &MF = DAG.getMachineFunction();
9637 MachineFrameInfo &MFI = MF.getFrameInfo();
9638 MFI.setReturnAddressIsTaken(true);
9639 MVT XLenVT = Subtarget.getXLenVT();
9640 int XLenInBytes = Subtarget.getXLen() / 8;
9641
9642 EVT VT = Op.getValueType();
9643 SDLoc DL(Op);
9644 unsigned Depth = Op.getConstantOperandVal(0);
9645 if (Depth) {
9646 int Off = -XLenInBytes;
9647 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9648 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9649 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9650 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9651 MachinePointerInfo());
9652 }
9653
9654 // Return the value of the return address register, marking it an implicit
9655 // live-in.
9656 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9657 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9658}
9659
9660SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9661 SelectionDAG &DAG) const {
9662 SDLoc DL(Op);
9663 SDValue Lo = Op.getOperand(0);
9664 SDValue Hi = Op.getOperand(1);
9665 SDValue Shamt = Op.getOperand(2);
9666 EVT VT = Lo.getValueType();
9667
9668 // if Shamt-XLEN < 0: // Shamt < XLEN
9669 // Lo = Lo << Shamt
9670 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9671 // else:
9672 // Lo = 0
9673 // Hi = Lo << (Shamt-XLEN)
9674
9675 SDValue Zero = DAG.getConstant(0, DL, VT);
9676 SDValue One = DAG.getConstant(1, DL, VT);
9677 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9678 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9679 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9680 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9681
9682 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9683 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9684 SDValue ShiftRightLo =
9685 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9686 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9687 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9688 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9689
9690 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9691
9692 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9693 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9694
9695 SDValue Parts[2] = {Lo, Hi};
9696 return DAG.getMergeValues(Parts, DL);
9697}
9698
9699SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9700 bool IsSRA) const {
9701 SDLoc DL(Op);
9702 SDValue Lo = Op.getOperand(0);
9703 SDValue Hi = Op.getOperand(1);
9704 SDValue Shamt = Op.getOperand(2);
9705 EVT VT = Lo.getValueType();
9706
9707 // SRA expansion:
9708 // if Shamt-XLEN < 0: // Shamt < XLEN
9709 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9710 // Hi = Hi >>s Shamt
9711 // else:
9712 // Lo = Hi >>s (Shamt-XLEN);
9713 // Hi = Hi >>s (XLEN-1)
9714 //
9715 // SRL expansion:
9716 // if Shamt-XLEN < 0: // Shamt < XLEN
9717 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9718 // Hi = Hi >>u Shamt
9719 // else:
9720 // Lo = Hi >>u (Shamt-XLEN);
9721 // Hi = 0;
9722
9723 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9724
9725 SDValue Zero = DAG.getConstant(0, DL, VT);
9726 SDValue One = DAG.getConstant(1, DL, VT);
9727 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9728 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9729 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9730 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9731
9732 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9733 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9734 SDValue ShiftLeftHi =
9735 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9736 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9737 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9738 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9739 SDValue HiFalse =
9740 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9741
9742 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9743
9744 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9745 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9746
9747 SDValue Parts[2] = {Lo, Hi};
9748 return DAG.getMergeValues(Parts, DL);
9749}
9750
9751// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9752// legal equivalently-sized i8 type, so we can use that as a go-between.
9753SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9754 SelectionDAG &DAG) const {
9755 SDLoc DL(Op);
9756 MVT VT = Op.getSimpleValueType();
9757 SDValue SplatVal = Op.getOperand(0);
9758 // All-zeros or all-ones splats are handled specially.
9759 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9760 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9761 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9762 }
9763 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9764 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9765 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9766 }
9767 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9768 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9769 DAG.getConstant(1, DL, SplatVal.getValueType()));
9770 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9771 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9772 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9773}
9774
9775// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9776// illegal (currently only vXi64 RV32).
9777// FIXME: We could also catch non-constant sign-extended i32 values and lower
9778// them to VMV_V_X_VL.
9779SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9780 SelectionDAG &DAG) const {
9781 SDLoc DL(Op);
9782 MVT VecVT = Op.getSimpleValueType();
9783 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9784 "Unexpected SPLAT_VECTOR_PARTS lowering");
9785
9786 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9787 SDValue Lo = Op.getOperand(0);
9788 SDValue Hi = Op.getOperand(1);
9789
9790 MVT ContainerVT = VecVT;
9791 if (VecVT.isFixedLengthVector())
9792 ContainerVT = getContainerForFixedLengthVector(VecVT);
9793
9794 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9795
9796 SDValue Res =
9797 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9798
9799 if (VecVT.isFixedLengthVector())
9800 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9801
9802 return Res;
9803}
9804
9805// Custom-lower extensions from mask vectors by using a vselect either with 1
9806// for zero/any-extension or -1 for sign-extension:
9807// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9808// Note that any-extension is lowered identically to zero-extension.
9809SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9810 int64_t ExtTrueVal) const {
9811 SDLoc DL(Op);
9812 MVT VecVT = Op.getSimpleValueType();
9813 SDValue Src = Op.getOperand(0);
9814 // Only custom-lower extensions from mask types
9815 assert(Src.getValueType().isVector() &&
9816 Src.getValueType().getVectorElementType() == MVT::i1);
9817
9818 if (VecVT.isScalableVector()) {
9819 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9820 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9821 if (Src.getOpcode() == ISD::XOR &&
9822 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9823 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9824 SplatTrueVal);
9825 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9826 }
9827
9828 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9829 MVT I1ContainerVT =
9830 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9831
9832 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9833
9834 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9835
9836 MVT XLenVT = Subtarget.getXLenVT();
9837 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9838 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9839
9840 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9841 SDValue Xor = Src.getOperand(0);
9842 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9843 SDValue ScalableOnes = Xor.getOperand(1);
9844 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9845 ScalableOnes.getOperand(0).isUndef() &&
9847 ScalableOnes.getOperand(1).getNode())) {
9848 CC = Xor.getOperand(0);
9849 std::swap(SplatZero, SplatTrueVal);
9850 }
9851 }
9852 }
9853
9854 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9855 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9856 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9857 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9858 SDValue Select =
9859 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9860 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9861
9862 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9863}
9864
9865// Custom-lower truncations from vectors to mask vectors by using a mask and a
9866// setcc operation:
9867// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9868SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9869 SelectionDAG &DAG) const {
9870 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9871 SDLoc DL(Op);
9872 EVT MaskVT = Op.getValueType();
9873 // Only expect to custom-lower truncations to mask types
9874 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9875 "Unexpected type for vector mask lowering");
9876 SDValue Src = Op.getOperand(0);
9877 MVT VecVT = Src.getSimpleValueType();
9878 SDValue Mask, VL;
9879 if (IsVPTrunc) {
9880 Mask = Op.getOperand(1);
9881 VL = Op.getOperand(2);
9882 }
9883 // If this is a fixed vector, we need to convert it to a scalable vector.
9884 MVT ContainerVT = VecVT;
9885
9886 if (VecVT.isFixedLengthVector()) {
9887 ContainerVT = getContainerForFixedLengthVector(VecVT);
9888 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9889 if (IsVPTrunc) {
9890 MVT MaskContainerVT =
9891 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9892 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9893 }
9894 }
9895
9896 if (!IsVPTrunc) {
9897 std::tie(Mask, VL) =
9898 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9899 }
9900
9901 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9902 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9903
9904 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9905 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9906 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9907 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9908
9909 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9910 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9911 DAG.getUNDEF(ContainerVT), Mask, VL);
9912 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9913 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9914 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9915 if (MaskVT.isFixedLengthVector())
9916 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9917 return Trunc;
9918}
9919
9920SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9921 SelectionDAG &DAG) const {
9922 unsigned Opc = Op.getOpcode();
9923 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9924 SDLoc DL(Op);
9925
9926 MVT VT = Op.getSimpleValueType();
9927 // Only custom-lower vector truncates
9928 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9929
9930 // Truncates to mask types are handled differently
9931 if (VT.getVectorElementType() == MVT::i1)
9932 return lowerVectorMaskTruncLike(Op, DAG);
9933
9934 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9935 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9936 // truncate by one power of two at a time.
9937 MVT DstEltVT = VT.getVectorElementType();
9938
9939 SDValue Src = Op.getOperand(0);
9940 MVT SrcVT = Src.getSimpleValueType();
9941 MVT SrcEltVT = SrcVT.getVectorElementType();
9942
9943 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9944 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9945 "Unexpected vector truncate lowering");
9946
9947 MVT ContainerVT = SrcVT;
9948 SDValue Mask, VL;
9949 if (IsVPTrunc) {
9950 Mask = Op.getOperand(1);
9951 VL = Op.getOperand(2);
9952 }
9953 if (SrcVT.isFixedLengthVector()) {
9954 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9955 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9956 if (IsVPTrunc) {
9957 MVT MaskVT = getMaskTypeFor(ContainerVT);
9958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9959 }
9960 }
9961
9962 SDValue Result = Src;
9963 if (!IsVPTrunc) {
9964 std::tie(Mask, VL) =
9965 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9966 }
9967
9968 unsigned NewOpc;
9970 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9971 else if (Opc == ISD::TRUNCATE_USAT_U)
9972 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9973 else
9974 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9975
9976 do {
9977 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9978 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9979 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9980 } while (SrcEltVT != DstEltVT);
9981
9982 if (SrcVT.isFixedLengthVector())
9983 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9984
9985 return Result;
9986}
9987
9988SDValue
9989RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9990 SelectionDAG &DAG) const {
9991 SDLoc DL(Op);
9992 SDValue Chain = Op.getOperand(0);
9993 SDValue Src = Op.getOperand(1);
9994 MVT VT = Op.getSimpleValueType();
9995 MVT SrcVT = Src.getSimpleValueType();
9996 MVT ContainerVT = VT;
9997 if (VT.isFixedLengthVector()) {
9998 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9999 ContainerVT =
10000 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10001 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10002 }
10003
10004 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10005
10006 // RVV can only widen/truncate fp to types double/half the size as the source.
10007 if ((VT.getVectorElementType() == MVT::f64 &&
10008 (SrcVT.getVectorElementType() == MVT::f16 ||
10009 SrcVT.getVectorElementType() == MVT::bf16)) ||
10010 ((VT.getVectorElementType() == MVT::f16 ||
10011 VT.getVectorElementType() == MVT::bf16) &&
10012 SrcVT.getVectorElementType() == MVT::f64)) {
10013 // For double rounding, the intermediate rounding should be round-to-odd.
10014 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10015 ? RISCVISD::STRICT_FP_EXTEND_VL
10016 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10017 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10018 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10019 Chain, Src, Mask, VL);
10020 Chain = Src.getValue(1);
10021 }
10022
10023 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10024 ? RISCVISD::STRICT_FP_EXTEND_VL
10025 : RISCVISD::STRICT_FP_ROUND_VL;
10026 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10027 Chain, Src, Mask, VL);
10028 if (VT.isFixedLengthVector()) {
10029 // StrictFP operations have two result values. Their lowered result should
10030 // have same result count.
10031 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10032 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10033 }
10034 return Res;
10035}
10036
10037SDValue
10038RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10039 SelectionDAG &DAG) const {
10040 bool IsVP =
10041 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10042 bool IsExtend =
10043 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10044 // RVV can only do truncate fp to types half the size as the source. We
10045 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10046 // conversion instruction.
10047 SDLoc DL(Op);
10048 MVT VT = Op.getSimpleValueType();
10049
10050 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10051
10052 SDValue Src = Op.getOperand(0);
10053 MVT SrcVT = Src.getSimpleValueType();
10054
10055 bool IsDirectExtend =
10056 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10057 (SrcVT.getVectorElementType() != MVT::f16 &&
10058 SrcVT.getVectorElementType() != MVT::bf16));
10059 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10060 VT.getVectorElementType() != MVT::bf16) ||
10061 SrcVT.getVectorElementType() != MVT::f64);
10062
10063 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10064
10065 // We have regular SD node patterns for direct non-VL extends.
10066 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10067 return Op;
10068
10069 // Prepare any fixed-length vector operands.
10070 MVT ContainerVT = VT;
10071 SDValue Mask, VL;
10072 if (IsVP) {
10073 Mask = Op.getOperand(1);
10074 VL = Op.getOperand(2);
10075 }
10076 if (VT.isFixedLengthVector()) {
10077 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10078 ContainerVT =
10079 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10080 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10081 if (IsVP) {
10082 MVT MaskVT = getMaskTypeFor(ContainerVT);
10083 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10084 }
10085 }
10086
10087 if (!IsVP)
10088 std::tie(Mask, VL) =
10089 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10090
10091 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10092
10093 if (IsDirectConv) {
10094 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10095 if (VT.isFixedLengthVector())
10096 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10097 return Src;
10098 }
10099
10100 unsigned InterConvOpc =
10101 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10102
10103 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10104 SDValue IntermediateConv =
10105 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10106 SDValue Result =
10107 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10108 if (VT.isFixedLengthVector())
10109 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10110 return Result;
10111}
10112
10113// Given a scalable vector type and an index into it, returns the type for the
10114// smallest subvector that the index fits in. This can be used to reduce LMUL
10115// for operations like vslidedown.
10116//
10117// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10118static std::optional<MVT>
10119getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10120 const RISCVSubtarget &Subtarget) {
10121 assert(VecVT.isScalableVector());
10122 const unsigned EltSize = VecVT.getScalarSizeInBits();
10123 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10124 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10125 MVT SmallerVT;
10126 if (MaxIdx < MinVLMAX)
10127 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10128 else if (MaxIdx < MinVLMAX * 2)
10129 SmallerVT =
10131 else if (MaxIdx < MinVLMAX * 4)
10132 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10135 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10136 return std::nullopt;
10137 return SmallerVT;
10138}
10139
10141 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10142 if (!IdxC || isNullConstant(Idx))
10143 return false;
10144 return isUInt<5>(IdxC->getZExtValue());
10145}
10146
10147// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10148// first position of a vector, and that vector is slid up to the insert index.
10149// By limiting the active vector length to index+1 and merging with the
10150// original vector (with an undisturbed tail policy for elements >= VL), we
10151// achieve the desired result of leaving all elements untouched except the one
10152// at VL-1, which is replaced with the desired value.
10153SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10154 SelectionDAG &DAG) const {
10155 SDLoc DL(Op);
10156 MVT VecVT = Op.getSimpleValueType();
10157 MVT XLenVT = Subtarget.getXLenVT();
10158 SDValue Vec = Op.getOperand(0);
10159 SDValue Val = Op.getOperand(1);
10160 MVT ValVT = Val.getSimpleValueType();
10161 SDValue Idx = Op.getOperand(2);
10162
10163 if (VecVT.getVectorElementType() == MVT::i1) {
10164 // FIXME: For now we just promote to an i8 vector and insert into that,
10165 // but this is probably not optimal.
10166 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10167 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10168 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10169 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10170 }
10171
10172 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10173 ValVT == MVT::bf16) {
10174 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10175 MVT IntVT = VecVT.changeTypeToInteger();
10176 SDValue IntInsert = DAG.getNode(
10177 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10178 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10179 return DAG.getBitcast(VecVT, IntInsert);
10180 }
10181
10182 MVT ContainerVT = VecVT;
10183 // If the operand is a fixed-length vector, convert to a scalable one.
10184 if (VecVT.isFixedLengthVector()) {
10185 ContainerVT = getContainerForFixedLengthVector(VecVT);
10186 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10187 }
10188
10189 // If we know the index we're going to insert at, we can shrink Vec so that
10190 // we're performing the scalar inserts and slideup on a smaller LMUL.
10191 SDValue OrigVec = Vec;
10192 std::optional<unsigned> AlignedIdx;
10193 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10194 const unsigned OrigIdx = IdxC->getZExtValue();
10195 // Do we know an upper bound on LMUL?
10196 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10197 DL, DAG, Subtarget)) {
10198 ContainerVT = *ShrunkVT;
10199 AlignedIdx = 0;
10200 }
10201
10202 // If we're compiling for an exact VLEN value, we can always perform
10203 // the insert in m1 as we can determine the register corresponding to
10204 // the index in the register group.
10205 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10206 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10207 EVT ElemVT = VecVT.getVectorElementType();
10208 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10209 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10210 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10211 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10212 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10213 ContainerVT = M1VT;
10214 }
10215
10216 if (AlignedIdx)
10217 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10218 }
10219
10220 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10221 // Even i64-element vectors on RV32 can be lowered without scalar
10222 // legalization if the most-significant 32 bits of the value are not affected
10223 // by the sign-extension of the lower 32 bits.
10224 // TODO: We could also catch sign extensions of a 32-bit value.
10225 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10226 const auto *CVal = cast<ConstantSDNode>(Val);
10227 if (isInt<32>(CVal->getSExtValue())) {
10228 IsLegalInsert = true;
10229 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10230 }
10231 }
10232
10233 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10234
10235 SDValue ValInVec;
10236
10237 if (IsLegalInsert) {
10238 unsigned Opc =
10239 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10240 if (isNullConstant(Idx)) {
10241 if (!VecVT.isFloatingPoint())
10242 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10243 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10244
10245 if (AlignedIdx)
10246 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10247 if (!VecVT.isFixedLengthVector())
10248 return Vec;
10249 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10250 }
10251
10252 // Use ri.vinsert.v.x if available.
10253 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10255 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10256 SDValue PolicyOp =
10258 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10259 VL, PolicyOp);
10260 if (AlignedIdx)
10261 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10262 if (!VecVT.isFixedLengthVector())
10263 return Vec;
10264 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10265 }
10266
10267 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10268 } else {
10269 // On RV32, i64-element vectors must be specially handled to place the
10270 // value at element 0, by using two vslide1down instructions in sequence on
10271 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10272 // this.
10273 SDValue ValLo, ValHi;
10274 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10275 MVT I32ContainerVT =
10276 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10277 SDValue I32Mask =
10278 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10279 // Limit the active VL to two.
10280 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10281 // If the Idx is 0 we can insert directly into the vector.
10282 if (isNullConstant(Idx)) {
10283 // First slide in the lo value, then the hi in above it. We use slide1down
10284 // to avoid the register group overlap constraint of vslide1up.
10285 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10286 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10287 // If the source vector is undef don't pass along the tail elements from
10288 // the previous slide1down.
10289 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10290 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10291 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10292 // Bitcast back to the right container type.
10293 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10294
10295 if (AlignedIdx)
10296 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10297 if (!VecVT.isFixedLengthVector())
10298 return ValInVec;
10299 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10300 }
10301
10302 // First slide in the lo value, then the hi in above it. We use slide1down
10303 // to avoid the register group overlap constraint of vslide1up.
10304 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10305 DAG.getUNDEF(I32ContainerVT),
10306 DAG.getUNDEF(I32ContainerVT), ValLo,
10307 I32Mask, InsertI64VL);
10308 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10309 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10310 I32Mask, InsertI64VL);
10311 // Bitcast back to the right container type.
10312 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10313 }
10314
10315 // Now that the value is in a vector, slide it into position.
10316 SDValue InsertVL =
10317 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10318
10319 // Use tail agnostic policy if Idx is the last index of Vec.
10321 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10322 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10324 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10325 Idx, Mask, InsertVL, Policy);
10326
10327 if (AlignedIdx)
10328 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10329 if (!VecVT.isFixedLengthVector())
10330 return Slideup;
10331 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10332}
10333
10334// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10335// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10336// types this is done using VMV_X_S to allow us to glean information about the
10337// sign bits of the result.
10338SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10339 SelectionDAG &DAG) const {
10340 SDLoc DL(Op);
10341 SDValue Idx = Op.getOperand(1);
10342 SDValue Vec = Op.getOperand(0);
10343 EVT EltVT = Op.getValueType();
10344 MVT VecVT = Vec.getSimpleValueType();
10345 MVT XLenVT = Subtarget.getXLenVT();
10346
10347 if (VecVT.getVectorElementType() == MVT::i1) {
10348 // Use vfirst.m to extract the first bit.
10349 if (isNullConstant(Idx)) {
10350 MVT ContainerVT = VecVT;
10351 if (VecVT.isFixedLengthVector()) {
10352 ContainerVT = getContainerForFixedLengthVector(VecVT);
10353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10354 }
10355 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10356 SDValue Vfirst =
10357 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10358 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10359 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10360 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10361 }
10362 if (VecVT.isFixedLengthVector()) {
10363 unsigned NumElts = VecVT.getVectorNumElements();
10364 if (NumElts >= 8) {
10365 MVT WideEltVT;
10366 unsigned WidenVecLen;
10367 SDValue ExtractElementIdx;
10368 SDValue ExtractBitIdx;
10369 unsigned MaxEEW = Subtarget.getELen();
10370 MVT LargestEltVT = MVT::getIntegerVT(
10371 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10372 if (NumElts <= LargestEltVT.getSizeInBits()) {
10373 assert(isPowerOf2_32(NumElts) &&
10374 "the number of elements should be power of 2");
10375 WideEltVT = MVT::getIntegerVT(NumElts);
10376 WidenVecLen = 1;
10377 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10378 ExtractBitIdx = Idx;
10379 } else {
10380 WideEltVT = LargestEltVT;
10381 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10382 // extract element index = index / element width
10383 ExtractElementIdx = DAG.getNode(
10384 ISD::SRL, DL, XLenVT, Idx,
10385 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10386 // mask bit index = index % element width
10387 ExtractBitIdx = DAG.getNode(
10388 ISD::AND, DL, XLenVT, Idx,
10389 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10390 }
10391 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10392 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10393 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10394 Vec, ExtractElementIdx);
10395 // Extract the bit from GPR.
10396 SDValue ShiftRight =
10397 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10398 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10399 DAG.getConstant(1, DL, XLenVT));
10400 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10401 }
10402 }
10403 // Otherwise, promote to an i8 vector and extract from that.
10404 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10405 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10406 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10407 }
10408
10409 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10410 EltVT == MVT::bf16) {
10411 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10412 MVT IntVT = VecVT.changeTypeToInteger();
10413 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10414 SDValue IntExtract =
10415 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10416 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10417 }
10418
10419 // If this is a fixed vector, we need to convert it to a scalable vector.
10420 MVT ContainerVT = VecVT;
10421 if (VecVT.isFixedLengthVector()) {
10422 ContainerVT = getContainerForFixedLengthVector(VecVT);
10423 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10424 }
10425
10426 // If we're compiling for an exact VLEN value and we have a known
10427 // constant index, we can always perform the extract in m1 (or
10428 // smaller) as we can determine the register corresponding to
10429 // the index in the register group.
10430 const auto VLen = Subtarget.getRealVLen();
10431 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10432 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10433 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10434 unsigned OrigIdx = IdxC->getZExtValue();
10435 EVT ElemVT = VecVT.getVectorElementType();
10436 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10437 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10438 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10439 unsigned ExtractIdx =
10440 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10441 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10442 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10443 ContainerVT = M1VT;
10444 }
10445
10446 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10447 // contains our index.
10448 std::optional<uint64_t> MaxIdx;
10449 if (VecVT.isFixedLengthVector())
10450 MaxIdx = VecVT.getVectorNumElements() - 1;
10451 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10452 MaxIdx = IdxC->getZExtValue();
10453 if (MaxIdx) {
10454 if (auto SmallerVT =
10455 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10456 ContainerVT = *SmallerVT;
10457 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10458 }
10459 }
10460
10461 // Use ri.vextract.x.v if available.
10462 // TODO: Avoid index 0 and just use the vmv.x.s
10463 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10465 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10466 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10467 }
10468
10469 // If after narrowing, the required slide is still greater than LMUL2,
10470 // fallback to generic expansion and go through the stack. This is done
10471 // for a subtle reason: extracting *all* elements out of a vector is
10472 // widely expected to be linear in vector size, but because vslidedown
10473 // is linear in LMUL, performing N extracts using vslidedown becomes
10474 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10475 // seems to have the same problem (the store is linear in LMUL), but the
10476 // generic expansion *memoizes* the store, and thus for many extracts of
10477 // the same vector we end up with one store and a bunch of loads.
10478 // TODO: We don't have the same code for insert_vector_elt because we
10479 // have BUILD_VECTOR and handle the degenerate case there. Should we
10480 // consider adding an inverse BUILD_VECTOR node?
10481 MVT LMUL2VT =
10483 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10484 return SDValue();
10485
10486 // If the index is 0, the vector is already in the right position.
10487 if (!isNullConstant(Idx)) {
10488 // Use a VL of 1 to avoid processing more elements than we need.
10489 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10490 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10491 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10492 }
10493
10494 if (!EltVT.isInteger()) {
10495 // Floating-point extracts are handled in TableGen.
10496 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10497 }
10498
10499 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10500 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10501}
10502
10503// Some RVV intrinsics may claim that they want an integer operand to be
10504// promoted or expanded.
10506 const RISCVSubtarget &Subtarget) {
10507 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10508 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10509 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10510 "Unexpected opcode");
10511
10512 if (!Subtarget.hasVInstructions())
10513 return SDValue();
10514
10515 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10516 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10517 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10518
10519 SDLoc DL(Op);
10520
10522 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10523 if (!II || !II->hasScalarOperand())
10524 return SDValue();
10525
10526 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10527 assert(SplatOp < Op.getNumOperands());
10528
10530 SDValue &ScalarOp = Operands[SplatOp];
10531 MVT OpVT = ScalarOp.getSimpleValueType();
10532 MVT XLenVT = Subtarget.getXLenVT();
10533
10534 // If this isn't a scalar, or its type is XLenVT we're done.
10535 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10536 return SDValue();
10537
10538 // Simplest case is that the operand needs to be promoted to XLenVT.
10539 if (OpVT.bitsLT(XLenVT)) {
10540 // If the operand is a constant, sign extend to increase our chances
10541 // of being able to use a .vi instruction. ANY_EXTEND would become a
10542 // a zero extend and the simm5 check in isel would fail.
10543 // FIXME: Should we ignore the upper bits in isel instead?
10544 unsigned ExtOpc =
10546 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10547 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10548 }
10549
10550 // Use the previous operand to get the vXi64 VT. The result might be a mask
10551 // VT for compares. Using the previous operand assumes that the previous
10552 // operand will never have a smaller element size than a scalar operand and
10553 // that a widening operation never uses SEW=64.
10554 // NOTE: If this fails the below assert, we can probably just find the
10555 // element count from any operand or result and use it to construct the VT.
10556 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10557 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10558
10559 // The more complex case is when the scalar is larger than XLenVT.
10560 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10561 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10562
10563 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10564 // instruction to sign-extend since SEW>XLEN.
10565 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10566 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10567 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10568 }
10569
10570 switch (IntNo) {
10571 case Intrinsic::riscv_vslide1up:
10572 case Intrinsic::riscv_vslide1down:
10573 case Intrinsic::riscv_vslide1up_mask:
10574 case Intrinsic::riscv_vslide1down_mask: {
10575 // We need to special case these when the scalar is larger than XLen.
10576 unsigned NumOps = Op.getNumOperands();
10577 bool IsMasked = NumOps == 7;
10578
10579 // Convert the vector source to the equivalent nxvXi32 vector.
10580 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10581 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10582 SDValue ScalarLo, ScalarHi;
10583 std::tie(ScalarLo, ScalarHi) =
10584 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10585
10586 // Double the VL since we halved SEW.
10587 SDValue AVL = getVLOperand(Op);
10588 SDValue I32VL;
10589
10590 // Optimize for constant AVL
10591 if (isa<ConstantSDNode>(AVL)) {
10592 const auto [MinVLMAX, MaxVLMAX] =
10594
10595 uint64_t AVLInt = AVL->getAsZExtVal();
10596 if (AVLInt <= MinVLMAX) {
10597 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10598 } else if (AVLInt >= 2 * MaxVLMAX) {
10599 // Just set vl to VLMAX in this situation
10600 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10601 } else {
10602 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10603 // is related to the hardware implementation.
10604 // So let the following code handle
10605 }
10606 }
10607 if (!I32VL) {
10609 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10610 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10611 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10612 SDValue SETVL =
10613 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10614 // Using vsetvli instruction to get actually used length which related to
10615 // the hardware implementation
10616 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10617 SEW, LMUL);
10618 I32VL =
10619 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10620 }
10621
10622 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10623
10624 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10625 // instructions.
10626 SDValue Passthru;
10627 if (IsMasked)
10628 Passthru = DAG.getUNDEF(I32VT);
10629 else
10630 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10631
10632 if (IntNo == Intrinsic::riscv_vslide1up ||
10633 IntNo == Intrinsic::riscv_vslide1up_mask) {
10634 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10635 ScalarHi, I32Mask, I32VL);
10636 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10637 ScalarLo, I32Mask, I32VL);
10638 } else {
10639 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10640 ScalarLo, I32Mask, I32VL);
10641 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10642 ScalarHi, I32Mask, I32VL);
10643 }
10644
10645 // Convert back to nxvXi64.
10646 Vec = DAG.getBitcast(VT, Vec);
10647
10648 if (!IsMasked)
10649 return Vec;
10650 // Apply mask after the operation.
10651 SDValue Mask = Operands[NumOps - 3];
10652 SDValue MaskedOff = Operands[1];
10653 // Assume Policy operand is the last operand.
10654 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10655 // We don't need to select maskedoff if it's undef.
10656 if (MaskedOff.isUndef())
10657 return Vec;
10658 // TAMU
10659 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10660 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10661 DAG.getUNDEF(VT), AVL);
10662 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10663 // It's fine because vmerge does not care mask policy.
10664 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10665 MaskedOff, AVL);
10666 }
10667 }
10668
10669 // We need to convert the scalar to a splat vector.
10670 SDValue VL = getVLOperand(Op);
10671 assert(VL.getValueType() == XLenVT);
10672 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10673 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10674}
10675
10676// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10677// scalable vector llvm.get.vector.length for now.
10678//
10679// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10680// (vscale * VF). The vscale and VF are independent of element width. We use
10681// SEW=8 for the vsetvli because it is the only element width that supports all
10682// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10683// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10684// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10685// SEW and LMUL are better for the surrounding vector instructions.
10687 const RISCVSubtarget &Subtarget) {
10688 MVT XLenVT = Subtarget.getXLenVT();
10689
10690 // The smallest LMUL is only valid for the smallest element width.
10691 const unsigned ElementWidth = 8;
10692
10693 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10694 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10695 // We don't support VF==1 with ELEN==32.
10696 [[maybe_unused]] unsigned MinVF =
10697 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10698
10699 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10700 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10701 "Unexpected VF");
10702
10703 bool Fractional = VF < LMul1VF;
10704 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10705 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10706 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10707
10708 SDLoc DL(N);
10709
10710 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10711 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10712
10713 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10714
10715 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10716 SDValue Res =
10717 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10718 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10719}
10720
10722 const RISCVSubtarget &Subtarget) {
10723 SDValue Op0 = N->getOperand(1);
10724 MVT OpVT = Op0.getSimpleValueType();
10725 MVT ContainerVT = OpVT;
10726 if (OpVT.isFixedLengthVector()) {
10727 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10728 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10729 }
10730 MVT XLenVT = Subtarget.getXLenVT();
10731 SDLoc DL(N);
10732 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10733 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10734 if (isOneConstant(N->getOperand(2)))
10735 return Res;
10736
10737 // Convert -1 to VL.
10738 SDValue Setcc =
10739 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10740 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10741 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10742}
10743
10744static inline void promoteVCIXScalar(SDValue Op,
10746 SelectionDAG &DAG) {
10747 const RISCVSubtarget &Subtarget =
10749
10750 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10752 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10753 SDLoc DL(Op);
10754
10756 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10757 if (!II || !II->hasScalarOperand())
10758 return;
10759
10760 unsigned SplatOp = II->ScalarOperand + 1;
10761 assert(SplatOp < Op.getNumOperands());
10762
10763 SDValue &ScalarOp = Operands[SplatOp];
10764 MVT OpVT = ScalarOp.getSimpleValueType();
10765 MVT XLenVT = Subtarget.getXLenVT();
10766
10767 // The code below is partially copied from lowerVectorIntrinsicScalars.
10768 // If this isn't a scalar, or its type is XLenVT we're done.
10769 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10770 return;
10771
10772 // Manually emit promote operation for scalar operation.
10773 if (OpVT.bitsLT(XLenVT)) {
10774 unsigned ExtOpc =
10776 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10777 }
10778}
10779
10780static void processVCIXOperands(SDValue OrigOp,
10782 SelectionDAG &DAG) {
10783 promoteVCIXScalar(OrigOp, Operands, DAG);
10784 const RISCVSubtarget &Subtarget =
10786 for (SDValue &V : Operands) {
10787 EVT ValType = V.getValueType();
10788 if (ValType.isVector() && ValType.isFloatingPoint()) {
10789 MVT InterimIVT =
10790 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10791 ValType.getVectorElementCount());
10792 V = DAG.getBitcast(InterimIVT, V);
10793 }
10794 if (ValType.isFixedLengthVector()) {
10795 MVT OpContainerVT = getContainerForFixedLengthVector(
10796 DAG, V.getSimpleValueType(), Subtarget);
10797 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10798 }
10799 }
10800}
10801
10802// LMUL * VLEN should be greater than or equal to EGS * SEW
10803static inline bool isValidEGW(int EGS, EVT VT,
10804 const RISCVSubtarget &Subtarget) {
10805 return (Subtarget.getRealMinVLen() *
10807 EGS * VT.getScalarSizeInBits();
10808}
10809
10810SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10811 SelectionDAG &DAG) const {
10812 unsigned IntNo = Op.getConstantOperandVal(0);
10813 SDLoc DL(Op);
10814 MVT XLenVT = Subtarget.getXLenVT();
10815
10816 switch (IntNo) {
10817 default:
10818 break; // Don't custom lower most intrinsics.
10819 case Intrinsic::riscv_tuple_insert: {
10820 SDValue Vec = Op.getOperand(1);
10821 SDValue SubVec = Op.getOperand(2);
10822 SDValue Index = Op.getOperand(3);
10823
10824 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10825 SubVec, Index);
10826 }
10827 case Intrinsic::riscv_tuple_extract: {
10828 SDValue Vec = Op.getOperand(1);
10829 SDValue Index = Op.getOperand(2);
10830
10831 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10832 Index);
10833 }
10834 case Intrinsic::thread_pointer: {
10835 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10836 return DAG.getRegister(RISCV::X4, PtrVT);
10837 }
10838 case Intrinsic::riscv_orc_b:
10839 case Intrinsic::riscv_brev8:
10840 case Intrinsic::riscv_sha256sig0:
10841 case Intrinsic::riscv_sha256sig1:
10842 case Intrinsic::riscv_sha256sum0:
10843 case Intrinsic::riscv_sha256sum1:
10844 case Intrinsic::riscv_sm3p0:
10845 case Intrinsic::riscv_sm3p1: {
10846 unsigned Opc;
10847 switch (IntNo) {
10848 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10849 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10850 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10851 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10852 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10853 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10854 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10855 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10856 }
10857
10858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10859 }
10860 case Intrinsic::riscv_sm4ks:
10861 case Intrinsic::riscv_sm4ed: {
10862 unsigned Opc =
10863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10864
10865 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10866 Op.getOperand(3));
10867 }
10868 case Intrinsic::riscv_zip:
10869 case Intrinsic::riscv_unzip: {
10870 unsigned Opc =
10871 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10872 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10873 }
10874 case Intrinsic::riscv_mopr:
10875 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10876 Op.getOperand(2));
10877
10878 case Intrinsic::riscv_moprr: {
10879 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10880 Op.getOperand(2), Op.getOperand(3));
10881 }
10882 case Intrinsic::riscv_clmul:
10883 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10884 Op.getOperand(2));
10885 case Intrinsic::riscv_clmulh:
10886 case Intrinsic::riscv_clmulr: {
10887 unsigned Opc =
10888 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10889 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10890 }
10891 case Intrinsic::experimental_get_vector_length:
10892 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10893 case Intrinsic::experimental_cttz_elts:
10894 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10895 case Intrinsic::riscv_vmv_x_s: {
10896 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10897 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10898 }
10899 case Intrinsic::riscv_vfmv_f_s:
10900 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10901 case Intrinsic::riscv_vmv_v_x:
10902 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10903 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10904 Subtarget);
10905 case Intrinsic::riscv_vfmv_v_f:
10906 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10907 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10908 case Intrinsic::riscv_vmv_s_x: {
10909 SDValue Scalar = Op.getOperand(2);
10910
10911 if (Scalar.getValueType().bitsLE(XLenVT)) {
10912 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10913 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10914 Op.getOperand(1), Scalar, Op.getOperand(3));
10915 }
10916
10917 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10918
10919 // This is an i64 value that lives in two scalar registers. We have to
10920 // insert this in a convoluted way. First we build vXi64 splat containing
10921 // the two values that we assemble using some bit math. Next we'll use
10922 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10923 // to merge element 0 from our splat into the source vector.
10924 // FIXME: This is probably not the best way to do this, but it is
10925 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10926 // point.
10927 // sw lo, (a0)
10928 // sw hi, 4(a0)
10929 // vlse vX, (a0)
10930 //
10931 // vid.v vVid
10932 // vmseq.vx mMask, vVid, 0
10933 // vmerge.vvm vDest, vSrc, vVal, mMask
10934 MVT VT = Op.getSimpleValueType();
10935 SDValue Vec = Op.getOperand(1);
10936 SDValue VL = getVLOperand(Op);
10937
10938 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10939 if (Op.getOperand(1).isUndef())
10940 return SplattedVal;
10941 SDValue SplattedIdx =
10942 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10943 DAG.getConstant(0, DL, MVT::i32), VL);
10944
10945 MVT MaskVT = getMaskTypeFor(VT);
10946 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10947 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10948 SDValue SelectCond =
10949 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10950 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10951 DAG.getUNDEF(MaskVT), Mask, VL});
10952 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10953 Vec, DAG.getUNDEF(VT), VL);
10954 }
10955 case Intrinsic::riscv_vfmv_s_f:
10956 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10957 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10958 // EGS * EEW >= 128 bits
10959 case Intrinsic::riscv_vaesdf_vv:
10960 case Intrinsic::riscv_vaesdf_vs:
10961 case Intrinsic::riscv_vaesdm_vv:
10962 case Intrinsic::riscv_vaesdm_vs:
10963 case Intrinsic::riscv_vaesef_vv:
10964 case Intrinsic::riscv_vaesef_vs:
10965 case Intrinsic::riscv_vaesem_vv:
10966 case Intrinsic::riscv_vaesem_vs:
10967 case Intrinsic::riscv_vaeskf1:
10968 case Intrinsic::riscv_vaeskf2:
10969 case Intrinsic::riscv_vaesz_vs:
10970 case Intrinsic::riscv_vsm4k:
10971 case Intrinsic::riscv_vsm4r_vv:
10972 case Intrinsic::riscv_vsm4r_vs: {
10973 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10974 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10975 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10976 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10977 return Op;
10978 }
10979 // EGS * EEW >= 256 bits
10980 case Intrinsic::riscv_vsm3c:
10981 case Intrinsic::riscv_vsm3me: {
10982 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10983 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10984 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10985 return Op;
10986 }
10987 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10988 case Intrinsic::riscv_vsha2ch:
10989 case Intrinsic::riscv_vsha2cl:
10990 case Intrinsic::riscv_vsha2ms: {
10991 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10992 !Subtarget.hasStdExtZvknhb())
10993 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10994 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10995 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10996 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10997 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10998 return Op;
10999 }
11000 case Intrinsic::riscv_sf_vc_v_x:
11001 case Intrinsic::riscv_sf_vc_v_i:
11002 case Intrinsic::riscv_sf_vc_v_xv:
11003 case Intrinsic::riscv_sf_vc_v_iv:
11004 case Intrinsic::riscv_sf_vc_v_vv:
11005 case Intrinsic::riscv_sf_vc_v_fv:
11006 case Intrinsic::riscv_sf_vc_v_xvv:
11007 case Intrinsic::riscv_sf_vc_v_ivv:
11008 case Intrinsic::riscv_sf_vc_v_vvv:
11009 case Intrinsic::riscv_sf_vc_v_fvv:
11010 case Intrinsic::riscv_sf_vc_v_xvw:
11011 case Intrinsic::riscv_sf_vc_v_ivw:
11012 case Intrinsic::riscv_sf_vc_v_vvw:
11013 case Intrinsic::riscv_sf_vc_v_fvw: {
11014 MVT VT = Op.getSimpleValueType();
11015
11016 SmallVector<SDValue> Operands{Op->op_values()};
11018
11019 MVT RetVT = VT;
11020 if (VT.isFixedLengthVector())
11022 else if (VT.isFloatingPoint())
11025
11026 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11027
11028 if (VT.isFixedLengthVector())
11029 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11030 else if (VT.isFloatingPoint())
11031 NewNode = DAG.getBitcast(VT, NewNode);
11032
11033 if (Op == NewNode)
11034 break;
11035
11036 return NewNode;
11037 }
11038 }
11039
11040 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11041}
11042
11044 unsigned Type) {
11045 SDLoc DL(Op);
11046 SmallVector<SDValue> Operands{Op->op_values()};
11047 Operands.erase(Operands.begin() + 1);
11048
11049 const RISCVSubtarget &Subtarget =
11051 MVT VT = Op.getSimpleValueType();
11052 MVT RetVT = VT;
11053 MVT FloatVT = VT;
11054
11055 if (VT.isFloatingPoint()) {
11056 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11057 VT.getVectorElementCount());
11058 FloatVT = RetVT;
11059 }
11060 if (VT.isFixedLengthVector())
11062 Subtarget);
11063
11065
11066 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11067 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11068 SDValue Chain = NewNode.getValue(1);
11069
11070 if (VT.isFixedLengthVector())
11071 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11072 if (VT.isFloatingPoint())
11073 NewNode = DAG.getBitcast(VT, NewNode);
11074
11075 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11076
11077 return NewNode;
11078}
11079
11081 unsigned Type) {
11082 SmallVector<SDValue> Operands{Op->op_values()};
11083 Operands.erase(Operands.begin() + 1);
11085
11086 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11087}
11088
11089static SDValue
11091 const RISCVSubtarget &Subtarget,
11092 SelectionDAG &DAG) {
11093 bool IsStrided;
11094 switch (IntNo) {
11095 case Intrinsic::riscv_seg2_load_mask:
11096 case Intrinsic::riscv_seg3_load_mask:
11097 case Intrinsic::riscv_seg4_load_mask:
11098 case Intrinsic::riscv_seg5_load_mask:
11099 case Intrinsic::riscv_seg6_load_mask:
11100 case Intrinsic::riscv_seg7_load_mask:
11101 case Intrinsic::riscv_seg8_load_mask:
11102 IsStrided = false;
11103 break;
11104 case Intrinsic::riscv_sseg2_load_mask:
11105 case Intrinsic::riscv_sseg3_load_mask:
11106 case Intrinsic::riscv_sseg4_load_mask:
11107 case Intrinsic::riscv_sseg5_load_mask:
11108 case Intrinsic::riscv_sseg6_load_mask:
11109 case Intrinsic::riscv_sseg7_load_mask:
11110 case Intrinsic::riscv_sseg8_load_mask:
11111 IsStrided = true;
11112 break;
11113 default:
11114 llvm_unreachable("unexpected intrinsic ID");
11115 };
11116
11117 static const Intrinsic::ID VlsegInts[7] = {
11118 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11119 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11120 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11121 Intrinsic::riscv_vlseg8_mask};
11122 static const Intrinsic::ID VlssegInts[7] = {
11123 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11124 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11125 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11126 Intrinsic::riscv_vlsseg8_mask};
11127
11128 SDLoc DL(Op);
11129 unsigned NF = Op->getNumValues() - 1;
11130 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11131 MVT XLenVT = Subtarget.getXLenVT();
11132 MVT VT = Op->getSimpleValueType(0);
11133 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11134 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11135 ContainerVT.getScalarSizeInBits();
11136 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11137
11138 // Operands: (chain, int_id, pointer, mask, vl) or
11139 // (chain, int_id, pointer, offset, mask, vl)
11140 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11141 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11142 MVT MaskVT = Mask.getSimpleValueType();
11143 MVT MaskContainerVT =
11144 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11145 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11146
11147 SDValue IntID = DAG.getTargetConstant(
11148 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11149 auto *Load = cast<MemIntrinsicSDNode>(Op);
11150
11151 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11153 Load->getChain(),
11154 IntID,
11155 DAG.getUNDEF(VecTupTy),
11156 Op.getOperand(2),
11157 Mask,
11158 VL,
11161 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11162 // Insert the stride operand.
11163 if (IsStrided)
11164 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11165
11166 SDValue Result =
11168 Load->getMemoryVT(), Load->getMemOperand());
11170 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11171 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11172 Result.getValue(0),
11173 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11174 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11175 }
11176 Results.push_back(Result.getValue(1));
11177 return DAG.getMergeValues(Results, DL);
11178}
11179
11180SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11181 SelectionDAG &DAG) const {
11182 unsigned IntNo = Op.getConstantOperandVal(1);
11183 switch (IntNo) {
11184 default:
11185 break;
11186 case Intrinsic::riscv_seg2_load_mask:
11187 case Intrinsic::riscv_seg3_load_mask:
11188 case Intrinsic::riscv_seg4_load_mask:
11189 case Intrinsic::riscv_seg5_load_mask:
11190 case Intrinsic::riscv_seg6_load_mask:
11191 case Intrinsic::riscv_seg7_load_mask:
11192 case Intrinsic::riscv_seg8_load_mask:
11193 case Intrinsic::riscv_sseg2_load_mask:
11194 case Intrinsic::riscv_sseg3_load_mask:
11195 case Intrinsic::riscv_sseg4_load_mask:
11196 case Intrinsic::riscv_sseg5_load_mask:
11197 case Intrinsic::riscv_sseg6_load_mask:
11198 case Intrinsic::riscv_sseg7_load_mask:
11199 case Intrinsic::riscv_sseg8_load_mask:
11200 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11201
11202 case Intrinsic::riscv_sf_vc_v_x_se:
11203 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11204 case Intrinsic::riscv_sf_vc_v_i_se:
11205 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11206 case Intrinsic::riscv_sf_vc_v_xv_se:
11207 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11208 case Intrinsic::riscv_sf_vc_v_iv_se:
11209 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11210 case Intrinsic::riscv_sf_vc_v_vv_se:
11211 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11212 case Intrinsic::riscv_sf_vc_v_fv_se:
11213 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11214 case Intrinsic::riscv_sf_vc_v_xvv_se:
11215 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11216 case Intrinsic::riscv_sf_vc_v_ivv_se:
11217 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11218 case Intrinsic::riscv_sf_vc_v_vvv_se:
11219 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11220 case Intrinsic::riscv_sf_vc_v_fvv_se:
11221 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11222 case Intrinsic::riscv_sf_vc_v_xvw_se:
11223 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11224 case Intrinsic::riscv_sf_vc_v_ivw_se:
11225 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11226 case Intrinsic::riscv_sf_vc_v_vvw_se:
11227 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11228 case Intrinsic::riscv_sf_vc_v_fvw_se:
11229 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11230 }
11231
11232 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11233}
11234
11235static SDValue
11237 const RISCVSubtarget &Subtarget,
11238 SelectionDAG &DAG) {
11239 bool IsStrided;
11240 switch (IntNo) {
11241 case Intrinsic::riscv_seg2_store_mask:
11242 case Intrinsic::riscv_seg3_store_mask:
11243 case Intrinsic::riscv_seg4_store_mask:
11244 case Intrinsic::riscv_seg5_store_mask:
11245 case Intrinsic::riscv_seg6_store_mask:
11246 case Intrinsic::riscv_seg7_store_mask:
11247 case Intrinsic::riscv_seg8_store_mask:
11248 IsStrided = false;
11249 break;
11250 case Intrinsic::riscv_sseg2_store_mask:
11251 case Intrinsic::riscv_sseg3_store_mask:
11252 case Intrinsic::riscv_sseg4_store_mask:
11253 case Intrinsic::riscv_sseg5_store_mask:
11254 case Intrinsic::riscv_sseg6_store_mask:
11255 case Intrinsic::riscv_sseg7_store_mask:
11256 case Intrinsic::riscv_sseg8_store_mask:
11257 IsStrided = true;
11258 break;
11259 default:
11260 llvm_unreachable("unexpected intrinsic ID");
11261 }
11262
11263 SDLoc DL(Op);
11264 static const Intrinsic::ID VssegInts[] = {
11265 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11266 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11267 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11268 Intrinsic::riscv_vsseg8_mask};
11269 static const Intrinsic::ID VsssegInts[] = {
11270 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11271 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11272 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11273 Intrinsic::riscv_vssseg8_mask};
11274
11275 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11276 // (chain, int_id, vec*, ptr, stride, mask, vl)
11277 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11278 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11279 MVT XLenVT = Subtarget.getXLenVT();
11280 MVT VT = Op->getOperand(2).getSimpleValueType();
11281 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11282 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11283 ContainerVT.getScalarSizeInBits();
11284 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11285
11286 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11287 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11288 MVT MaskVT = Mask.getSimpleValueType();
11289 MVT MaskContainerVT =
11290 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11291 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11292
11293 SDValue IntID = DAG.getTargetConstant(
11294 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11295 SDValue Ptr = Op->getOperand(NF + 2);
11296
11297 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11298
11299 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11300 for (unsigned i = 0; i < NF; i++)
11301 StoredVal = DAG.getNode(
11302 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11303 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11304 DAG, Subtarget),
11305 DAG.getTargetConstant(i, DL, MVT::i32));
11306
11308 FixedIntrinsic->getChain(),
11309 IntID,
11310 StoredVal,
11311 Ptr,
11312 Mask,
11313 VL,
11314 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11315 // Insert the stride operand.
11316 if (IsStrided)
11317 Ops.insert(std::next(Ops.begin(), 4),
11318 Op.getOperand(Op.getNumOperands() - 3));
11319
11320 return DAG.getMemIntrinsicNode(
11321 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11322 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11323}
11324
11325SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11326 SelectionDAG &DAG) const {
11327 unsigned IntNo = Op.getConstantOperandVal(1);
11328 switch (IntNo) {
11329 default:
11330 break;
11331 case Intrinsic::riscv_seg2_store_mask:
11332 case Intrinsic::riscv_seg3_store_mask:
11333 case Intrinsic::riscv_seg4_store_mask:
11334 case Intrinsic::riscv_seg5_store_mask:
11335 case Intrinsic::riscv_seg6_store_mask:
11336 case Intrinsic::riscv_seg7_store_mask:
11337 case Intrinsic::riscv_seg8_store_mask:
11338 case Intrinsic::riscv_sseg2_store_mask:
11339 case Intrinsic::riscv_sseg3_store_mask:
11340 case Intrinsic::riscv_sseg4_store_mask:
11341 case Intrinsic::riscv_sseg5_store_mask:
11342 case Intrinsic::riscv_sseg6_store_mask:
11343 case Intrinsic::riscv_sseg7_store_mask:
11344 case Intrinsic::riscv_sseg8_store_mask:
11345 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11346
11347 case Intrinsic::riscv_sf_vc_xv_se:
11348 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11349 case Intrinsic::riscv_sf_vc_iv_se:
11350 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11351 case Intrinsic::riscv_sf_vc_vv_se:
11352 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11353 case Intrinsic::riscv_sf_vc_fv_se:
11354 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11355 case Intrinsic::riscv_sf_vc_xvv_se:
11356 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11357 case Intrinsic::riscv_sf_vc_ivv_se:
11358 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11359 case Intrinsic::riscv_sf_vc_vvv_se:
11360 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11361 case Intrinsic::riscv_sf_vc_fvv_se:
11362 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11363 case Intrinsic::riscv_sf_vc_xvw_se:
11364 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11365 case Intrinsic::riscv_sf_vc_ivw_se:
11366 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11367 case Intrinsic::riscv_sf_vc_vvw_se:
11368 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11369 case Intrinsic::riscv_sf_vc_fvw_se:
11370 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11371 }
11372
11373 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11374}
11375
11376static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11377 switch (ISDOpcode) {
11378 default:
11379 llvm_unreachable("Unhandled reduction");
11380 case ISD::VP_REDUCE_ADD:
11381 case ISD::VECREDUCE_ADD:
11382 return RISCVISD::VECREDUCE_ADD_VL;
11383 case ISD::VP_REDUCE_UMAX:
11384 case ISD::VECREDUCE_UMAX:
11385 return RISCVISD::VECREDUCE_UMAX_VL;
11386 case ISD::VP_REDUCE_SMAX:
11387 case ISD::VECREDUCE_SMAX:
11388 return RISCVISD::VECREDUCE_SMAX_VL;
11389 case ISD::VP_REDUCE_UMIN:
11390 case ISD::VECREDUCE_UMIN:
11391 return RISCVISD::VECREDUCE_UMIN_VL;
11392 case ISD::VP_REDUCE_SMIN:
11393 case ISD::VECREDUCE_SMIN:
11394 return RISCVISD::VECREDUCE_SMIN_VL;
11395 case ISD::VP_REDUCE_AND:
11396 case ISD::VECREDUCE_AND:
11397 return RISCVISD::VECREDUCE_AND_VL;
11398 case ISD::VP_REDUCE_OR:
11399 case ISD::VECREDUCE_OR:
11400 return RISCVISD::VECREDUCE_OR_VL;
11401 case ISD::VP_REDUCE_XOR:
11402 case ISD::VECREDUCE_XOR:
11403 return RISCVISD::VECREDUCE_XOR_VL;
11404 case ISD::VP_REDUCE_FADD:
11405 return RISCVISD::VECREDUCE_FADD_VL;
11406 case ISD::VP_REDUCE_SEQ_FADD:
11407 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11408 case ISD::VP_REDUCE_FMAX:
11409 case ISD::VP_REDUCE_FMAXIMUM:
11410 return RISCVISD::VECREDUCE_FMAX_VL;
11411 case ISD::VP_REDUCE_FMIN:
11412 case ISD::VP_REDUCE_FMINIMUM:
11413 return RISCVISD::VECREDUCE_FMIN_VL;
11414 }
11415
11416}
11417
11418SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11419 SelectionDAG &DAG,
11420 bool IsVP) const {
11421 SDLoc DL(Op);
11422 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11423 MVT VecVT = Vec.getSimpleValueType();
11424 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11425 Op.getOpcode() == ISD::VECREDUCE_OR ||
11426 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11427 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11428 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11429 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11430 "Unexpected reduction lowering");
11431
11432 MVT XLenVT = Subtarget.getXLenVT();
11433
11434 MVT ContainerVT = VecVT;
11435 if (VecVT.isFixedLengthVector()) {
11436 ContainerVT = getContainerForFixedLengthVector(VecVT);
11437 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11438 }
11439
11440 SDValue Mask, VL;
11441 if (IsVP) {
11442 Mask = Op.getOperand(2);
11443 VL = Op.getOperand(3);
11444 } else {
11445 std::tie(Mask, VL) =
11446 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11447 }
11448
11449 ISD::CondCode CC;
11450 switch (Op.getOpcode()) {
11451 default:
11452 llvm_unreachable("Unhandled reduction");
11453 case ISD::VECREDUCE_AND:
11454 case ISD::VP_REDUCE_AND: {
11455 // vcpop ~x == 0
11456 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11457 if (IsVP || VecVT.isFixedLengthVector())
11458 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11459 else
11460 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11461 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11462 CC = ISD::SETEQ;
11463 break;
11464 }
11465 case ISD::VECREDUCE_OR:
11466 case ISD::VP_REDUCE_OR:
11467 // vcpop x != 0
11468 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11469 CC = ISD::SETNE;
11470 break;
11471 case ISD::VECREDUCE_XOR:
11472 case ISD::VP_REDUCE_XOR: {
11473 // ((vcpop x) & 1) != 0
11474 SDValue One = DAG.getConstant(1, DL, XLenVT);
11475 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11476 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11477 CC = ISD::SETNE;
11478 break;
11479 }
11480 }
11481
11482 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11483 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11484 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11485
11486 if (!IsVP)
11487 return SetCC;
11488
11489 // Now include the start value in the operation.
11490 // Note that we must return the start value when no elements are operated
11491 // upon. The vcpop instructions we've emitted in each case above will return
11492 // 0 for an inactive vector, and so we've already received the neutral value:
11493 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11494 // can simply include the start value.
11495 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11496 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11497}
11498
11499static bool isNonZeroAVL(SDValue AVL) {
11500 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11501 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11502 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11503 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11504}
11505
11506/// Helper to lower a reduction sequence of the form:
11507/// scalar = reduce_op vec, scalar_start
11508static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11509 SDValue StartValue, SDValue Vec, SDValue Mask,
11510 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11511 const RISCVSubtarget &Subtarget) {
11512 const MVT VecVT = Vec.getSimpleValueType();
11513 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11514 const MVT XLenVT = Subtarget.getXLenVT();
11515 const bool NonZeroAVL = isNonZeroAVL(VL);
11516
11517 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11518 // or the original VT if fractional.
11519 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11520 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11521 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11522 // be the result of the reduction operation.
11523 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11524 SDValue InitialValue =
11525 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11526 if (M1VT != InnerVT)
11527 InitialValue =
11528 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11529 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11531 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11532 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11533 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11534}
11535
11536SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11537 SelectionDAG &DAG) const {
11538 SDLoc DL(Op);
11539 SDValue Vec = Op.getOperand(0);
11540 EVT VecEVT = Vec.getValueType();
11541
11542 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11543
11544 // Due to ordering in legalize types we may have a vector type that needs to
11545 // be split. Do that manually so we can get down to a legal type.
11546 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11548 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11549 VecEVT = Lo.getValueType();
11550 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11551 }
11552
11553 // TODO: The type may need to be widened rather than split. Or widened before
11554 // it can be split.
11555 if (!isTypeLegal(VecEVT))
11556 return SDValue();
11557
11558 MVT VecVT = VecEVT.getSimpleVT();
11559 MVT VecEltVT = VecVT.getVectorElementType();
11560 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11561
11562 MVT ContainerVT = VecVT;
11563 if (VecVT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VecVT);
11565 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11566 }
11567
11568 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11569
11570 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11571 switch (BaseOpc) {
11572 case ISD::AND:
11573 case ISD::OR:
11574 case ISD::UMAX:
11575 case ISD::UMIN:
11576 case ISD::SMAX:
11577 case ISD::SMIN:
11578 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11579 }
11580 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11581 Mask, VL, DL, DAG, Subtarget);
11582}
11583
11584// Given a reduction op, this function returns the matching reduction opcode,
11585// the vector SDValue and the scalar SDValue required to lower this to a
11586// RISCVISD node.
11587static std::tuple<unsigned, SDValue, SDValue>
11589 const RISCVSubtarget &Subtarget) {
11590 SDLoc DL(Op);
11591 auto Flags = Op->getFlags();
11592 unsigned Opcode = Op.getOpcode();
11593 switch (Opcode) {
11594 default:
11595 llvm_unreachable("Unhandled reduction");
11596 case ISD::VECREDUCE_FADD: {
11597 // Use positive zero if we can. It is cheaper to materialize.
11598 SDValue Zero =
11599 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11600 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11601 }
11602 case ISD::VECREDUCE_SEQ_FADD:
11603 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11604 Op.getOperand(0));
11605 case ISD::VECREDUCE_FMINIMUM:
11606 case ISD::VECREDUCE_FMAXIMUM:
11607 case ISD::VECREDUCE_FMIN:
11608 case ISD::VECREDUCE_FMAX: {
11609 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11610 unsigned RVVOpc =
11611 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11612 ? RISCVISD::VECREDUCE_FMIN_VL
11613 : RISCVISD::VECREDUCE_FMAX_VL;
11614 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11615 }
11616 }
11617}
11618
11619SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11620 SelectionDAG &DAG) const {
11621 SDLoc DL(Op);
11622 MVT VecEltVT = Op.getSimpleValueType();
11623
11624 unsigned RVVOpcode;
11625 SDValue VectorVal, ScalarVal;
11626 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11627 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11628 MVT VecVT = VectorVal.getSimpleValueType();
11629
11630 MVT ContainerVT = VecVT;
11631 if (VecVT.isFixedLengthVector()) {
11632 ContainerVT = getContainerForFixedLengthVector(VecVT);
11633 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11634 }
11635
11636 MVT ResVT = Op.getSimpleValueType();
11637 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11638 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11639 VL, DL, DAG, Subtarget);
11640 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11641 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11642 return Res;
11643
11644 if (Op->getFlags().hasNoNaNs())
11645 return Res;
11646
11647 // Force output to NaN if any element is Nan.
11648 SDValue IsNan =
11649 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11650 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11651 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11652 MVT XLenVT = Subtarget.getXLenVT();
11653 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11654 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11655 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11656 return DAG.getSelect(
11657 DL, ResVT, NoNaNs, Res,
11658 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11659}
11660
11661SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11662 SelectionDAG &DAG) const {
11663 SDLoc DL(Op);
11664 unsigned Opc = Op.getOpcode();
11665 SDValue Start = Op.getOperand(0);
11666 SDValue Vec = Op.getOperand(1);
11667 EVT VecEVT = Vec.getValueType();
11668 MVT XLenVT = Subtarget.getXLenVT();
11669
11670 // TODO: The type may need to be widened rather than split. Or widened before
11671 // it can be split.
11672 if (!isTypeLegal(VecEVT))
11673 return SDValue();
11674
11675 MVT VecVT = VecEVT.getSimpleVT();
11676 unsigned RVVOpcode = getRVVReductionOp(Opc);
11677
11678 if (VecVT.isFixedLengthVector()) {
11679 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11680 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11681 }
11682
11683 SDValue VL = Op.getOperand(3);
11684 SDValue Mask = Op.getOperand(2);
11685 SDValue Res =
11686 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11687 Vec, Mask, VL, DL, DAG, Subtarget);
11688 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11689 Op->getFlags().hasNoNaNs())
11690 return Res;
11691
11692 // Propagate NaNs.
11693 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11694 // Check if any of the elements in Vec is NaN.
11695 SDValue IsNaN = DAG.getNode(
11696 RISCVISD::SETCC_VL, DL, PredVT,
11697 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11698 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11699 // Check if the start value is NaN.
11700 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11701 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11702 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11703 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11704 MVT ResVT = Res.getSimpleValueType();
11705 return DAG.getSelect(
11706 DL, ResVT, NoNaNs, Res,
11707 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11708}
11709
11710SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11711 SelectionDAG &DAG) const {
11712 SDValue Vec = Op.getOperand(0);
11713 SDValue SubVec = Op.getOperand(1);
11714 MVT VecVT = Vec.getSimpleValueType();
11715 MVT SubVecVT = SubVec.getSimpleValueType();
11716
11717 SDLoc DL(Op);
11718 MVT XLenVT = Subtarget.getXLenVT();
11719 unsigned OrigIdx = Op.getConstantOperandVal(2);
11720 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11721
11722 if (OrigIdx == 0 && Vec.isUndef())
11723 return Op;
11724
11725 // We don't have the ability to slide mask vectors up indexed by their i1
11726 // elements; the smallest we can do is i8. Often we are able to bitcast to
11727 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11728 // into a scalable one, we might not necessarily have enough scalable
11729 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11730 if (SubVecVT.getVectorElementType() == MVT::i1) {
11731 if (VecVT.getVectorMinNumElements() >= 8 &&
11732 SubVecVT.getVectorMinNumElements() >= 8) {
11733 assert(OrigIdx % 8 == 0 && "Invalid index");
11734 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11735 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11736 "Unexpected mask vector lowering");
11737 OrigIdx /= 8;
11738 SubVecVT =
11739 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11740 SubVecVT.isScalableVector());
11741 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11742 VecVT.isScalableVector());
11743 Vec = DAG.getBitcast(VecVT, Vec);
11744 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11745 } else {
11746 // We can't slide this mask vector up indexed by its i1 elements.
11747 // This poses a problem when we wish to insert a scalable vector which
11748 // can't be re-expressed as a larger type. Just choose the slow path and
11749 // extend to a larger type, then truncate back down.
11750 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11751 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11752 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11753 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11754 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11755 Op.getOperand(2));
11756 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11757 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11758 }
11759 }
11760
11761 // If the subvector vector is a fixed-length type and we don't know VLEN
11762 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11763 // don't know which register of a LMUL group contains the specific subvector
11764 // as we only know the minimum register size. Therefore we must slide the
11765 // vector group up the full amount.
11766 const auto VLen = Subtarget.getRealVLen();
11767 if (SubVecVT.isFixedLengthVector() && !VLen) {
11768 MVT ContainerVT = VecVT;
11769 if (VecVT.isFixedLengthVector()) {
11770 ContainerVT = getContainerForFixedLengthVector(VecVT);
11771 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11772 }
11773
11774 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11775
11776 SDValue Mask =
11777 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11778 // Set the vector length to only the number of elements we care about. Note
11779 // that for slideup this includes the offset.
11780 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11781 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11782
11783 // Use tail agnostic policy if we're inserting over Vec's tail.
11785 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11787
11788 // If we're inserting into the lowest elements, use a tail undisturbed
11789 // vmv.v.v.
11790 if (OrigIdx == 0) {
11791 SubVec =
11792 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11793 } else {
11794 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11795 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11796 SlideupAmt, Mask, VL, Policy);
11797 }
11798
11799 if (VecVT.isFixedLengthVector())
11800 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11801 return DAG.getBitcast(Op.getValueType(), SubVec);
11802 }
11803
11804 MVT ContainerVecVT = VecVT;
11805 if (VecVT.isFixedLengthVector()) {
11806 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11807 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11808 }
11809
11810 MVT ContainerSubVecVT = SubVecVT;
11811 if (SubVecVT.isFixedLengthVector()) {
11812 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11813 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11814 }
11815
11816 unsigned SubRegIdx;
11817 ElementCount RemIdx;
11818 // insert_subvector scales the index by vscale if the subvector is scalable,
11819 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11820 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11821 if (SubVecVT.isFixedLengthVector()) {
11822 assert(VLen);
11823 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11824 auto Decompose =
11826 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11827 SubRegIdx = Decompose.first;
11828 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11829 (OrigIdx % Vscale));
11830 } else {
11831 auto Decompose =
11833 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11834 SubRegIdx = Decompose.first;
11835 RemIdx = ElementCount::getScalable(Decompose.second);
11836 }
11837
11838 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11840 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11841 bool ExactlyVecRegSized =
11842 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11843 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11844
11845 // 1. If the Idx has been completely eliminated and this subvector's size is
11846 // a vector register or a multiple thereof, or the surrounding elements are
11847 // undef, then this is a subvector insert which naturally aligns to a vector
11848 // register. These can easily be handled using subregister manipulation.
11849 // 2. If the subvector isn't an exact multiple of a valid register group size,
11850 // then the insertion must preserve the undisturbed elements of the register.
11851 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11852 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11853 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11854 // of that LMUL=1 type back into the larger vector (resolving to another
11855 // subregister operation). See below for how our VSLIDEUP works. We go via a
11856 // LMUL=1 type to avoid allocating a large register group to hold our
11857 // subvector.
11858 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11859 if (SubVecVT.isFixedLengthVector()) {
11860 // We may get NoSubRegister if inserting at index 0 and the subvec
11861 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11862 if (SubRegIdx == RISCV::NoSubRegister) {
11863 assert(OrigIdx == 0);
11864 return Op;
11865 }
11866
11867 // Use a insert_subvector that will resolve to an insert subreg.
11868 assert(VLen);
11869 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11870 SDValue Insert =
11871 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11872 if (VecVT.isFixedLengthVector())
11873 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11874 return Insert;
11875 }
11876 return Op;
11877 }
11878
11879 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11880 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11881 // (in our case undisturbed). This means we can set up a subvector insertion
11882 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11883 // size of the subvector.
11884 MVT InterSubVT = ContainerVecVT;
11885 SDValue AlignedExtract = Vec;
11886 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11887 if (SubVecVT.isFixedLengthVector()) {
11888 assert(VLen);
11889 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11890 }
11891 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11892 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11893 // Extract a subvector equal to the nearest full vector register type. This
11894 // should resolve to a EXTRACT_SUBREG instruction.
11895 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11896 }
11897
11898 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11899
11900 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11901
11902 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11903 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11904
11905 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11907 if (Subtarget.expandVScale(EndIndex) ==
11908 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11910
11911 // If we're inserting into the lowest elements, use a tail undisturbed
11912 // vmv.v.v.
11913 if (RemIdx.isZero()) {
11914 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11915 SubVec, VL);
11916 } else {
11917 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11918
11919 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11920 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11921
11922 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11923 SlideupAmt, Mask, VL, Policy);
11924 }
11925
11926 // If required, insert this subvector back into the correct vector register.
11927 // This should resolve to an INSERT_SUBREG instruction.
11928 if (ContainerVecVT.bitsGT(InterSubVT))
11929 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11930
11931 if (VecVT.isFixedLengthVector())
11932 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11933
11934 // We might have bitcast from a mask type: cast back to the original type if
11935 // required.
11936 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11937}
11938
11939SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11940 SelectionDAG &DAG) const {
11941 SDValue Vec = Op.getOperand(0);
11942 MVT SubVecVT = Op.getSimpleValueType();
11943 MVT VecVT = Vec.getSimpleValueType();
11944
11945 SDLoc DL(Op);
11946 MVT XLenVT = Subtarget.getXLenVT();
11947 unsigned OrigIdx = Op.getConstantOperandVal(1);
11948 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11949
11950 // With an index of 0 this is a cast-like subvector, which can be performed
11951 // with subregister operations.
11952 if (OrigIdx == 0)
11953 return Op;
11954
11955 // We don't have the ability to slide mask vectors down indexed by their i1
11956 // elements; the smallest we can do is i8. Often we are able to bitcast to
11957 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11958 // from a scalable one, we might not necessarily have enough scalable
11959 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11960 if (SubVecVT.getVectorElementType() == MVT::i1) {
11961 if (VecVT.getVectorMinNumElements() >= 8 &&
11962 SubVecVT.getVectorMinNumElements() >= 8) {
11963 assert(OrigIdx % 8 == 0 && "Invalid index");
11964 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11965 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11966 "Unexpected mask vector lowering");
11967 OrigIdx /= 8;
11968 SubVecVT =
11969 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11970 SubVecVT.isScalableVector());
11971 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11972 VecVT.isScalableVector());
11973 Vec = DAG.getBitcast(VecVT, Vec);
11974 } else {
11975 // We can't slide this mask vector down, indexed by its i1 elements.
11976 // This poses a problem when we wish to extract a scalable vector which
11977 // can't be re-expressed as a larger type. Just choose the slow path and
11978 // extend to a larger type, then truncate back down.
11979 // TODO: We could probably improve this when extracting certain fixed
11980 // from fixed, where we can extract as i8 and shift the correct element
11981 // right to reach the desired subvector?
11982 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11983 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11984 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11985 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11986 Op.getOperand(1));
11987 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11988 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11989 }
11990 }
11991
11992 const auto VLen = Subtarget.getRealVLen();
11993
11994 // If the subvector vector is a fixed-length type and we don't know VLEN
11995 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11996 // don't know which register of a LMUL group contains the specific subvector
11997 // as we only know the minimum register size. Therefore we must slide the
11998 // vector group down the full amount.
11999 if (SubVecVT.isFixedLengthVector() && !VLen) {
12000 MVT ContainerVT = VecVT;
12001 if (VecVT.isFixedLengthVector()) {
12002 ContainerVT = getContainerForFixedLengthVector(VecVT);
12003 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12004 }
12005
12006 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12007 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12008 if (auto ShrunkVT =
12009 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12010 ContainerVT = *ShrunkVT;
12011 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12012 }
12013
12014 SDValue Mask =
12015 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12016 // Set the vector length to only the number of elements we care about. This
12017 // avoids sliding down elements we're going to discard straight away.
12018 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12019 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12020 SDValue Slidedown =
12021 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12022 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12023 // Now we can use a cast-like subvector extract to get the result.
12024 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12025 return DAG.getBitcast(Op.getValueType(), Slidedown);
12026 }
12027
12028 if (VecVT.isFixedLengthVector()) {
12029 VecVT = getContainerForFixedLengthVector(VecVT);
12030 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12031 }
12032
12033 MVT ContainerSubVecVT = SubVecVT;
12034 if (SubVecVT.isFixedLengthVector())
12035 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12036
12037 unsigned SubRegIdx;
12038 ElementCount RemIdx;
12039 // extract_subvector scales the index by vscale if the subvector is scalable,
12040 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12041 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12042 if (SubVecVT.isFixedLengthVector()) {
12043 assert(VLen);
12044 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12045 auto Decompose =
12047 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12048 SubRegIdx = Decompose.first;
12049 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12050 (OrigIdx % Vscale));
12051 } else {
12052 auto Decompose =
12054 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12055 SubRegIdx = Decompose.first;
12056 RemIdx = ElementCount::getScalable(Decompose.second);
12057 }
12058
12059 // If the Idx has been completely eliminated then this is a subvector extract
12060 // which naturally aligns to a vector register. These can easily be handled
12061 // using subregister manipulation. We use an extract_subvector that will
12062 // resolve to an extract subreg.
12063 if (RemIdx.isZero()) {
12064 if (SubVecVT.isFixedLengthVector()) {
12065 assert(VLen);
12066 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12067 Vec =
12068 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12069 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12070 }
12071 return Op;
12072 }
12073
12074 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12075 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12076 // divide exactly.
12077 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12078 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12079
12080 // If the vector type is an LMUL-group type, extract a subvector equal to the
12081 // nearest full vector register type.
12082 MVT InterSubVT = VecVT;
12083 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12084 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12085 // we should have successfully decomposed the extract into a subregister.
12086 // We use an extract_subvector that will resolve to a subreg extract.
12087 assert(SubRegIdx != RISCV::NoSubRegister);
12088 (void)SubRegIdx;
12089 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12090 if (SubVecVT.isFixedLengthVector()) {
12091 assert(VLen);
12092 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12093 }
12094 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12095 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12096 }
12097
12098 // Slide this vector register down by the desired number of elements in order
12099 // to place the desired subvector starting at element 0.
12100 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12101 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12102 if (SubVecVT.isFixedLengthVector())
12103 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12104 SDValue Slidedown =
12105 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12106 Vec, SlidedownAmt, Mask, VL);
12107
12108 // Now the vector is in the right position, extract our final subvector. This
12109 // should resolve to a COPY.
12110 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12111
12112 // We might have bitcast from a mask type: cast back to the original type if
12113 // required.
12114 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12115}
12116
12117// Widen a vector's operands to i8, then truncate its results back to the
12118// original type, typically i1. All operand and result types must be the same.
12120 SelectionDAG &DAG) {
12121 MVT VT = N.getSimpleValueType();
12122 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12124 for (SDValue Op : N->ops()) {
12125 assert(Op.getSimpleValueType() == VT &&
12126 "Operands and result must be same type");
12127 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12128 }
12129
12130 unsigned NumVals = N->getNumValues();
12131
12133 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12134 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12135 SmallVector<SDValue, 4> TruncVals;
12136 for (unsigned I = 0; I < NumVals; I++) {
12137 TruncVals.push_back(
12138 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12139 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12140 }
12141
12142 if (TruncVals.size() > 1)
12143 return DAG.getMergeValues(TruncVals, DL);
12144 return TruncVals.front();
12145}
12146
12147SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12148 SelectionDAG &DAG) const {
12149 SDLoc DL(Op);
12150 MVT VecVT = Op.getSimpleValueType();
12151
12152 const unsigned Factor = Op->getNumValues();
12153 assert(Factor <= 8);
12154
12155 // 1 bit element vectors need to be widened to e8
12156 if (VecVT.getVectorElementType() == MVT::i1)
12157 return widenVectorOpsToi8(Op, DL, DAG);
12158
12159 // Convert to scalable vectors first.
12160 if (VecVT.isFixedLengthVector()) {
12161 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12163 for (unsigned i = 0U; i < Factor; ++i)
12164 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12165 Subtarget);
12166
12167 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12168 SDValue NewDeinterleave =
12170
12171 SmallVector<SDValue, 8> Res(Factor);
12172 for (unsigned i = 0U; i < Factor; ++i)
12173 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12174 DAG, Subtarget);
12175 return DAG.getMergeValues(Res, DL);
12176 }
12177
12178 // If concatenating would exceed LMUL=8, we need to split.
12179 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12180 (8 * RISCV::RVVBitsPerBlock)) {
12181 SmallVector<SDValue, 8> Ops(Factor * 2);
12182 for (unsigned i = 0; i != Factor; ++i) {
12183 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12184 Ops[i * 2] = OpLo;
12185 Ops[i * 2 + 1] = OpHi;
12186 }
12187
12188 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12189
12191 ArrayRef(Ops).slice(0, Factor));
12193 ArrayRef(Ops).slice(Factor, Factor));
12194
12195 SmallVector<SDValue, 8> Res(Factor);
12196 for (unsigned i = 0; i != Factor; ++i)
12197 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12198 Hi.getValue(i));
12199
12200 return DAG.getMergeValues(Res, DL);
12201 }
12202
12203 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12204 MVT VT = Op->getSimpleValueType(0);
12205 SDValue V1 = Op->getOperand(0);
12206 SDValue V2 = Op->getOperand(1);
12207
12208 // For fractional LMUL, check if we can use a higher LMUL
12209 // instruction to avoid a vslidedown.
12210 if (SDValue Src = foldConcatVector(V1, V2);
12211 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12212 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12213 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12214 // Freeze the source so we can increase its use count.
12215 Src = DAG.getFreeze(Src);
12216 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12217 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12218 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12219 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12220 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12221 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12222 return DAG.getMergeValues({Even, Odd}, DL);
12223 }
12224
12225 // Freeze the sources so we can increase their use count.
12226 V1 = DAG.getFreeze(V1);
12227 V2 = DAG.getFreeze(V2);
12228 SDValue Even =
12229 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12230 SDValue Odd =
12231 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12232 return DAG.getMergeValues({Even, Odd}, DL);
12233 }
12234
12235 SmallVector<SDValue, 8> Ops(Op->op_values());
12236
12237 // Concatenate the vectors as one vector to deinterleave
12238 MVT ConcatVT =
12241 PowerOf2Ceil(Factor)));
12242 if (Ops.size() < PowerOf2Ceil(Factor))
12243 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12244 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12245
12246 if (Factor == 2) {
12247 // We can deinterleave through vnsrl.wi if the element type is smaller than
12248 // ELEN
12249 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12250 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12251 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12252 return DAG.getMergeValues({Even, Odd}, DL);
12253 }
12254
12255 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12256 // possibly mask vector, then extract the required subvector. Doing this
12257 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12258 // creation to be rematerialized during register allocation to reduce
12259 // register pressure if needed.
12260
12261 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12262
12263 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12264 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12265 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12266
12267 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12268 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12269 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12270
12271 // vcompress the even and odd elements into two separate vectors
12272 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12273 EvenMask, DAG.getUNDEF(ConcatVT));
12274 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12275 OddMask, DAG.getUNDEF(ConcatVT));
12276
12277 // Extract the result half of the gather for even and odd
12278 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12279 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12280
12281 return DAG.getMergeValues({Even, Odd}, DL);
12282 }
12283
12284 // Store with unit-stride store and load it back with segmented load.
12285 MVT XLenVT = Subtarget.getXLenVT();
12286 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12287 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12288
12289 // Allocate a stack slot.
12290 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12292 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12293 auto &MF = DAG.getMachineFunction();
12294 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12295 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12296
12297 SDValue StoreOps[] = {DAG.getEntryNode(),
12298 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12299 Concat, StackPtr, VL};
12300
12301 SDValue Chain = DAG.getMemIntrinsicNode(
12302 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12303 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12305
12306 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12307 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12308 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12309 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12310 Intrinsic::riscv_vlseg8_mask};
12311
12312 SDValue LoadOps[] = {
12313 Chain,
12314 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12315 Passthru,
12316 StackPtr,
12317 Mask,
12318 VL,
12321 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12322
12323 unsigned Sz =
12324 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12325 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12326
12328 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12329 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12331
12332 SmallVector<SDValue, 8> Res(Factor);
12333
12334 for (unsigned i = 0U; i < Factor; ++i)
12335 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12336 DAG.getTargetConstant(i, DL, MVT::i32));
12337
12338 return DAG.getMergeValues(Res, DL);
12339}
12340
12341SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12342 SelectionDAG &DAG) const {
12343 SDLoc DL(Op);
12344 MVT VecVT = Op.getSimpleValueType();
12345
12346 const unsigned Factor = Op.getNumOperands();
12347 assert(Factor <= 8);
12348
12349 // i1 vectors need to be widened to i8
12350 if (VecVT.getVectorElementType() == MVT::i1)
12351 return widenVectorOpsToi8(Op, DL, DAG);
12352
12353 // Convert to scalable vectors first.
12354 if (VecVT.isFixedLengthVector()) {
12355 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12357 for (unsigned i = 0U; i < Factor; ++i)
12358 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12359 Subtarget);
12360
12361 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12362 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12363
12364 SmallVector<SDValue, 8> Res(Factor);
12365 for (unsigned i = 0U; i < Factor; ++i)
12366 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12367 Subtarget);
12368 return DAG.getMergeValues(Res, DL);
12369 }
12370
12371 MVT XLenVT = Subtarget.getXLenVT();
12372 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12373
12374 // If the VT is larger than LMUL=8, we need to split and reassemble.
12375 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12376 (8 * RISCV::RVVBitsPerBlock)) {
12377 SmallVector<SDValue, 8> Ops(Factor * 2);
12378 for (unsigned i = 0; i != Factor; ++i) {
12379 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12380 Ops[i] = OpLo;
12381 Ops[i + Factor] = OpHi;
12382 }
12383
12384 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12385
12386 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12387 ArrayRef(Ops).take_front(Factor)),
12389 ArrayRef(Ops).drop_front(Factor))};
12390
12391 SmallVector<SDValue, 8> Concats(Factor);
12392 for (unsigned i = 0; i != Factor; ++i) {
12393 unsigned IdxLo = 2 * i;
12394 unsigned IdxHi = 2 * i + 1;
12395 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12396 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12397 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12398 }
12399
12400 return DAG.getMergeValues(Concats, DL);
12401 }
12402
12403 SDValue Interleaved;
12404
12405 // Spill to the stack using a segment store for simplicity.
12406 if (Factor != 2) {
12407 EVT MemVT =
12409 VecVT.getVectorElementCount() * Factor);
12410
12411 // Allocate a stack slot.
12412 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12414 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12415 EVT PtrVT = StackPtr.getValueType();
12416 auto &MF = DAG.getMachineFunction();
12417 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12418 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12419
12420 static const Intrinsic::ID IntrIds[] = {
12421 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12422 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12423 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12424 Intrinsic::riscv_vsseg8_mask,
12425 };
12426
12427 unsigned Sz =
12428 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12429 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12430
12431 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12432 for (unsigned i = 0; i < Factor; i++)
12433 StoredVal =
12434 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12435 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12436
12437 SDValue Ops[] = {DAG.getEntryNode(),
12438 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12439 StoredVal,
12440 StackPtr,
12441 Mask,
12442 VL,
12444 DL, XLenVT)};
12445
12446 SDValue Chain = DAG.getMemIntrinsicNode(
12447 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12448 VecVT.getVectorElementType(), PtrInfo, Alignment,
12450
12451 SmallVector<SDValue, 8> Loads(Factor);
12452
12454 DAG.getVScale(DL, PtrVT,
12455 APInt(PtrVT.getFixedSizeInBits(),
12456 VecVT.getStoreSize().getKnownMinValue()));
12457 for (unsigned i = 0; i != Factor; ++i) {
12458 if (i != 0)
12459 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12460
12461 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12462 }
12463
12464 return DAG.getMergeValues(Loads, DL);
12465 }
12466
12467 // Use ri.vzip2{a,b} if available
12468 // TODO: Figure out the best lowering for the spread variants
12469 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12470 !Op.getOperand(1).isUndef()) {
12471 // Freeze the sources so we can increase their use count.
12472 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12473 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12474 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12475 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12476 return DAG.getMergeValues({Lo, Hi}, DL);
12477 }
12478
12479 // If the element type is smaller than ELEN, then we can interleave with
12480 // vwaddu.vv and vwmaccu.vx
12481 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12482 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12483 DAG, Subtarget);
12484 } else {
12485 // Otherwise, fallback to using vrgathere16.vv
12486 MVT ConcatVT =
12489 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12490 Op.getOperand(0), Op.getOperand(1));
12491
12492 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12493
12494 // 0 1 2 3 4 5 6 7 ...
12495 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12496
12497 // 1 1 1 1 1 1 1 1 ...
12498 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12499
12500 // 1 0 1 0 1 0 1 0 ...
12501 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12502 OddMask = DAG.getSetCC(
12503 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12504 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12506
12507 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12508
12509 // Build up the index vector for interleaving the concatenated vector
12510 // 0 0 1 1 2 2 3 3 ...
12511 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12512 // 0 n 1 n+1 2 n+2 3 n+3 ...
12513 Idx =
12514 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12515
12516 // Then perform the interleave
12517 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12518 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12519 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12520 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12521 }
12522
12523 // Extract the two halves from the interleaved result
12524 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12525 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12526 VecVT.getVectorMinNumElements());
12527
12528 return DAG.getMergeValues({Lo, Hi}, DL);
12529}
12530
12531// Lower step_vector to the vid instruction. Any non-identity step value must
12532// be accounted for my manual expansion.
12533SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12534 SelectionDAG &DAG) const {
12535 SDLoc DL(Op);
12536 MVT VT = Op.getSimpleValueType();
12537 assert(VT.isScalableVector() && "Expected scalable vector");
12538 MVT XLenVT = Subtarget.getXLenVT();
12539 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12540 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12541 uint64_t StepValImm = Op.getConstantOperandVal(0);
12542 if (StepValImm != 1) {
12543 if (isPowerOf2_64(StepValImm)) {
12544 SDValue StepVal =
12545 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12546 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12547 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12548 } else {
12549 SDValue StepVal = lowerScalarSplat(
12550 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12551 VL, VT, DL, DAG, Subtarget);
12552 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12553 }
12554 }
12555 return StepVec;
12556}
12557
12558// Implement vector_reverse using vrgather.vv with indices determined by
12559// subtracting the id of each element from (VLMAX-1). This will convert
12560// the indices like so:
12561// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12562// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12563SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12564 SelectionDAG &DAG) const {
12565 SDLoc DL(Op);
12566 MVT VecVT = Op.getSimpleValueType();
12567 if (VecVT.getVectorElementType() == MVT::i1) {
12568 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12569 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12570 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12571 return DAG.getSetCC(DL, VecVT, Op2,
12572 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12573 }
12574
12575 MVT ContainerVT = VecVT;
12576 SDValue Vec = Op.getOperand(0);
12577 if (VecVT.isFixedLengthVector()) {
12578 ContainerVT = getContainerForFixedLengthVector(VecVT);
12579 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12580 }
12581
12582 MVT XLenVT = Subtarget.getXLenVT();
12583 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12584
12585 // On some uarchs vrgather.vv will read from every input register for each
12586 // output register, regardless of the indices. However to reverse a vector
12587 // each output register only needs to read from one register. So decompose it
12588 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12589 // O(LMUL^2).
12590 //
12591 // vsetvli a1, zero, e64, m4, ta, ma
12592 // vrgatherei16.vv v12, v8, v16
12593 // ->
12594 // vsetvli a1, zero, e64, m1, ta, ma
12595 // vrgather.vv v15, v8, v16
12596 // vrgather.vv v14, v9, v16
12597 // vrgather.vv v13, v10, v16
12598 // vrgather.vv v12, v11, v16
12599 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12600 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12601 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12602 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12603 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12604 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12605
12606 // Fixed length vectors might not fit exactly into their container, and so
12607 // leave a gap in the front of the vector after being reversed. Slide this
12608 // away.
12609 //
12610 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12611 // 0 1 2 3 x x x x <- reverse
12612 // x x x x 0 1 2 3 <- vslidedown.vx
12613 if (VecVT.isFixedLengthVector()) {
12614 SDValue Offset = DAG.getNode(
12615 ISD::SUB, DL, XLenVT,
12616 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12617 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12618 Concat =
12619 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12620 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12621 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12622 }
12623 return Concat;
12624 }
12625
12626 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12627 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12628 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12629 unsigned MaxVLMAX =
12630 VecVT.isFixedLengthVector()
12631 ? VecVT.getVectorNumElements()
12632 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12633
12634 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12635 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12636
12637 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12638 // to use vrgatherei16.vv.
12639 if (MaxVLMAX > 256 && EltSize == 8) {
12640 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12641 // Reverse each half, then reassemble them in reverse order.
12642 // NOTE: It's also possible that after splitting that VLMAX no longer
12643 // requires vrgatherei16.vv.
12644 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12645 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12646 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12647 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12648 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12649 // Reassemble the low and high pieces reversed.
12650 // FIXME: This is a CONCAT_VECTORS.
12651 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12652 return DAG.getInsertSubvector(DL, Res, Lo,
12653 LoVT.getVectorMinNumElements());
12654 }
12655
12656 // Just promote the int type to i16 which will double the LMUL.
12657 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12658 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12659 }
12660
12661 // At LMUL > 1, do the index computation in 16 bits to reduce register
12662 // pressure.
12663 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12664 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12665 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12666 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12667 IntVT = IntVT.changeVectorElementType(MVT::i16);
12668 }
12669
12670 // Calculate VLMAX-1 for the desired SEW.
12671 SDValue VLMinus1 = DAG.getNode(
12672 ISD::SUB, DL, XLenVT,
12673 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12674 DAG.getConstant(1, DL, XLenVT));
12675
12676 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12677 bool IsRV32E64 =
12678 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12679 SDValue SplatVL;
12680 if (!IsRV32E64)
12681 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12682 else
12683 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12684 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12685
12686 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12687 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12688 DAG.getUNDEF(IntVT), Mask, VL);
12689
12690 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12691 DAG.getUNDEF(ContainerVT), Mask, VL);
12692 if (VecVT.isFixedLengthVector())
12693 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12694 return Gather;
12695}
12696
12697SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12698 SelectionDAG &DAG) const {
12699 SDLoc DL(Op);
12700 SDValue V1 = Op.getOperand(0);
12701 SDValue V2 = Op.getOperand(1);
12702 MVT XLenVT = Subtarget.getXLenVT();
12703 MVT VecVT = Op.getSimpleValueType();
12704
12705 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12706
12707 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12708 SDValue DownOffset, UpOffset;
12709 if (ImmValue >= 0) {
12710 // The operand is a TargetConstant, we need to rebuild it as a regular
12711 // constant.
12712 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12713 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12714 } else {
12715 // The operand is a TargetConstant, we need to rebuild it as a regular
12716 // constant rather than negating the original operand.
12717 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12718 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12719 }
12720
12721 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12722
12723 SDValue SlideDown = getVSlidedown(
12724 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12725 Subtarget.hasVLDependentLatency() ? UpOffset
12726 : DAG.getRegister(RISCV::X0, XLenVT));
12727 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12728 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12730}
12731
12732SDValue
12733RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12734 SelectionDAG &DAG) const {
12735 SDLoc DL(Op);
12736 auto *Load = cast<LoadSDNode>(Op);
12737
12739 Load->getMemoryVT(),
12740 *Load->getMemOperand()) &&
12741 "Expecting a correctly-aligned load");
12742
12743 MVT VT = Op.getSimpleValueType();
12744 MVT XLenVT = Subtarget.getXLenVT();
12745 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12746
12747 // If we know the exact VLEN and our fixed length vector completely fills
12748 // the container, use a whole register load instead.
12749 const auto [MinVLMAX, MaxVLMAX] =
12750 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12751 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12752 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12753 MachineMemOperand *MMO = Load->getMemOperand();
12754 SDValue NewLoad =
12755 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12756 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12757 MMO->getAAInfo(), MMO->getRanges());
12758 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12759 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12760 }
12761
12762 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12763
12764 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12765 SDValue IntID = DAG.getTargetConstant(
12766 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12767 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12768 if (!IsMaskOp)
12769 Ops.push_back(DAG.getUNDEF(ContainerVT));
12770 Ops.push_back(Load->getBasePtr());
12771 Ops.push_back(VL);
12772 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12773 SDValue NewLoad =
12775 Load->getMemoryVT(), Load->getMemOperand());
12776
12777 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12778 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12779}
12780
12781SDValue
12782RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12783 SelectionDAG &DAG) const {
12784 SDLoc DL(Op);
12785 auto *Store = cast<StoreSDNode>(Op);
12786
12788 Store->getMemoryVT(),
12789 *Store->getMemOperand()) &&
12790 "Expecting a correctly-aligned store");
12791
12792 SDValue StoreVal = Store->getValue();
12793 MVT VT = StoreVal.getSimpleValueType();
12794 MVT XLenVT = Subtarget.getXLenVT();
12795
12796 // If the size less than a byte, we need to pad with zeros to make a byte.
12797 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12798 VT = MVT::v8i1;
12799 StoreVal =
12800 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12801 }
12802
12803 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12804
12805 SDValue NewValue =
12806 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12807
12808 // If we know the exact VLEN and our fixed length vector completely fills
12809 // the container, use a whole register store instead.
12810 const auto [MinVLMAX, MaxVLMAX] =
12811 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12812 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12813 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12814 MachineMemOperand *MMO = Store->getMemOperand();
12815 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12816 MMO->getPointerInfo(), MMO->getBaseAlign(),
12817 MMO->getFlags(), MMO->getAAInfo());
12818 }
12819
12820 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12821
12822 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12823 SDValue IntID = DAG.getTargetConstant(
12824 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12825 return DAG.getMemIntrinsicNode(
12826 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12827 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12828 Store->getMemoryVT(), Store->getMemOperand());
12829}
12830
12831SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12832 SelectionDAG &DAG) const {
12833 SDLoc DL(Op);
12834 MVT VT = Op.getSimpleValueType();
12835
12836 const auto *MemSD = cast<MemSDNode>(Op);
12837 EVT MemVT = MemSD->getMemoryVT();
12838 MachineMemOperand *MMO = MemSD->getMemOperand();
12839 SDValue Chain = MemSD->getChain();
12840 SDValue BasePtr = MemSD->getBasePtr();
12841
12842 SDValue Mask, PassThru, VL;
12843 bool IsExpandingLoad = false;
12844 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12845 Mask = VPLoad->getMask();
12846 PassThru = DAG.getUNDEF(VT);
12847 VL = VPLoad->getVectorLength();
12848 } else {
12849 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12850 Mask = MLoad->getMask();
12851 PassThru = MLoad->getPassThru();
12852 IsExpandingLoad = MLoad->isExpandingLoad();
12853 }
12854
12855 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12856
12857 MVT XLenVT = Subtarget.getXLenVT();
12858
12859 MVT ContainerVT = VT;
12860 if (VT.isFixedLengthVector()) {
12861 ContainerVT = getContainerForFixedLengthVector(VT);
12862 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12863 if (!IsUnmasked) {
12864 MVT MaskVT = getMaskTypeFor(ContainerVT);
12865 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12866 }
12867 }
12868
12869 if (!VL)
12870 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12871
12872 SDValue ExpandingVL;
12873 if (!IsUnmasked && IsExpandingLoad) {
12874 ExpandingVL = VL;
12875 VL =
12876 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12877 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12878 }
12879
12880 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12881 : Intrinsic::riscv_vle_mask;
12882 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12883 if (IntID == Intrinsic::riscv_vle)
12884 Ops.push_back(DAG.getUNDEF(ContainerVT));
12885 else
12886 Ops.push_back(PassThru);
12887 Ops.push_back(BasePtr);
12888 if (IntID == Intrinsic::riscv_vle_mask)
12889 Ops.push_back(Mask);
12890 Ops.push_back(VL);
12891 if (IntID == Intrinsic::riscv_vle_mask)
12892 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12893
12894 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12895
12896 SDValue Result =
12897 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12898 Chain = Result.getValue(1);
12899 if (ExpandingVL) {
12900 MVT IndexVT = ContainerVT;
12901 if (ContainerVT.isFloatingPoint())
12902 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12903
12904 MVT IndexEltVT = IndexVT.getVectorElementType();
12905 bool UseVRGATHEREI16 = false;
12906 // If index vector is an i8 vector and the element count exceeds 256, we
12907 // should change the element type of index vector to i16 to avoid
12908 // overflow.
12909 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12910 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12911 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12912 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12913 UseVRGATHEREI16 = true;
12914 }
12915
12916 SDValue Iota =
12917 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12918 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12919 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12920 Result =
12921 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12922 : RISCVISD::VRGATHER_VV_VL,
12923 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12924 }
12925
12926 if (VT.isFixedLengthVector())
12927 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12928
12929 return DAG.getMergeValues({Result, Chain}, DL);
12930}
12931
12932SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12933 SDLoc DL(Op);
12934 MVT VT = Op->getSimpleValueType(0);
12935
12936 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12937 EVT MemVT = VPLoadFF->getMemoryVT();
12938 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12939 SDValue Chain = VPLoadFF->getChain();
12940 SDValue BasePtr = VPLoadFF->getBasePtr();
12941
12942 SDValue Mask = VPLoadFF->getMask();
12943 SDValue VL = VPLoadFF->getVectorLength();
12944
12945 MVT XLenVT = Subtarget.getXLenVT();
12946
12947 MVT ContainerVT = VT;
12948 if (VT.isFixedLengthVector()) {
12949 ContainerVT = getContainerForFixedLengthVector(VT);
12950 MVT MaskVT = getMaskTypeFor(ContainerVT);
12951 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12952 }
12953
12954 unsigned IntID = Intrinsic::riscv_vleff_mask;
12955 SDValue Ops[] = {
12956 Chain,
12957 DAG.getTargetConstant(IntID, DL, XLenVT),
12958 DAG.getUNDEF(ContainerVT),
12959 BasePtr,
12960 Mask,
12961 VL,
12963
12964 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12965
12966 SDValue Result =
12967 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12968 SDValue OutVL = Result.getValue(1);
12969 Chain = Result.getValue(2);
12970
12971 if (VT.isFixedLengthVector())
12972 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12973
12974 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12975}
12976
12977SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12978 SelectionDAG &DAG) const {
12979 SDLoc DL(Op);
12980
12981 const auto *MemSD = cast<MemSDNode>(Op);
12982 EVT MemVT = MemSD->getMemoryVT();
12983 MachineMemOperand *MMO = MemSD->getMemOperand();
12984 SDValue Chain = MemSD->getChain();
12985 SDValue BasePtr = MemSD->getBasePtr();
12986 SDValue Val, Mask, VL;
12987
12988 bool IsCompressingStore = false;
12989 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12990 Val = VPStore->getValue();
12991 Mask = VPStore->getMask();
12992 VL = VPStore->getVectorLength();
12993 } else {
12994 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12995 Val = MStore->getValue();
12996 Mask = MStore->getMask();
12997 IsCompressingStore = MStore->isCompressingStore();
12998 }
12999
13000 bool IsUnmasked =
13001 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13002
13003 MVT VT = Val.getSimpleValueType();
13004 MVT XLenVT = Subtarget.getXLenVT();
13005
13006 MVT ContainerVT = VT;
13007 if (VT.isFixedLengthVector()) {
13008 ContainerVT = getContainerForFixedLengthVector(VT);
13009
13010 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13011 if (!IsUnmasked || IsCompressingStore) {
13012 MVT MaskVT = getMaskTypeFor(ContainerVT);
13013 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13014 }
13015 }
13016
13017 if (!VL)
13018 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13019
13020 if (IsCompressingStore) {
13021 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13022 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13023 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13024 VL =
13025 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13026 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13027 }
13028
13029 unsigned IntID =
13030 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13031 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13032 Ops.push_back(Val);
13033 Ops.push_back(BasePtr);
13034 if (!IsUnmasked)
13035 Ops.push_back(Mask);
13036 Ops.push_back(VL);
13037
13039 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13040}
13041
13042SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13043 SelectionDAG &DAG) const {
13044 SDLoc DL(Op);
13045 SDValue Val = Op.getOperand(0);
13046 SDValue Mask = Op.getOperand(1);
13047 SDValue Passthru = Op.getOperand(2);
13048
13049 MVT VT = Val.getSimpleValueType();
13050 MVT XLenVT = Subtarget.getXLenVT();
13051 MVT ContainerVT = VT;
13052 if (VT.isFixedLengthVector()) {
13053 ContainerVT = getContainerForFixedLengthVector(VT);
13054 MVT MaskVT = getMaskTypeFor(ContainerVT);
13055 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13056 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13057 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13058 }
13059
13060 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13061 SDValue Res =
13062 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13063 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13064 Passthru, Val, Mask, VL);
13065
13066 if (VT.isFixedLengthVector())
13067 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13068
13069 return Res;
13070}
13071
13072SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13073 SelectionDAG &DAG) const {
13074 unsigned Opc = Op.getOpcode();
13075 SDLoc DL(Op);
13076 SDValue Chain = Op.getOperand(0);
13077 SDValue Op1 = Op.getOperand(1);
13078 SDValue Op2 = Op.getOperand(2);
13079 SDValue CC = Op.getOperand(3);
13080 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13081 MVT VT = Op.getSimpleValueType();
13082 MVT InVT = Op1.getSimpleValueType();
13083
13084 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13085 // condition code.
13086 if (Opc == ISD::STRICT_FSETCCS) {
13087 // Expand strict_fsetccs(x, oeq) to
13088 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13089 SDVTList VTList = Op->getVTList();
13090 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13091 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13092 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13093 Op2, OLECCVal);
13094 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13095 Op1, OLECCVal);
13096 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13097 Tmp1.getValue(1), Tmp2.getValue(1));
13098 // Tmp1 and Tmp2 might be the same node.
13099 if (Tmp1 != Tmp2)
13100 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13101 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13102 }
13103
13104 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13105 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13106 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13107 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13108 Op2, OEQCCVal);
13109 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13110 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13111 }
13112 }
13113
13114 MVT ContainerInVT = InVT;
13115 if (InVT.isFixedLengthVector()) {
13116 ContainerInVT = getContainerForFixedLengthVector(InVT);
13117 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13118 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13119 }
13120 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13121
13122 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13123
13124 SDValue Res;
13125 if (Opc == ISD::STRICT_FSETCC &&
13126 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13127 CCVal == ISD::SETOLE)) {
13128 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13129 // active when both input elements are ordered.
13130 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13131 SDValue OrderMask1 = DAG.getNode(
13132 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13133 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13134 True, VL});
13135 SDValue OrderMask2 = DAG.getNode(
13136 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13137 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13138 True, VL});
13139 Mask =
13140 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13141 // Use Mask as the passthru operand to let the result be 0 if either of the
13142 // inputs is unordered.
13143 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13144 DAG.getVTList(MaskVT, MVT::Other),
13145 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13146 } else {
13147 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13148 : RISCVISD::STRICT_FSETCCS_VL;
13149 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13150 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13151 }
13152
13153 if (VT.isFixedLengthVector()) {
13154 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13155 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13156 }
13157 return Res;
13158}
13159
13160// Lower vector ABS to smax(X, sub(0, X)).
13161SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13162 SDLoc DL(Op);
13163 MVT VT = Op.getSimpleValueType();
13164 SDValue X = Op.getOperand(0);
13165
13166 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13167 "Unexpected type for ISD::ABS");
13168
13169 MVT ContainerVT = VT;
13170 if (VT.isFixedLengthVector()) {
13171 ContainerVT = getContainerForFixedLengthVector(VT);
13172 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13173 }
13174
13175 SDValue Mask, VL;
13176 if (Op->getOpcode() == ISD::VP_ABS) {
13177 Mask = Op->getOperand(1);
13178 if (VT.isFixedLengthVector())
13179 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13180 Subtarget);
13181 VL = Op->getOperand(2);
13182 } else
13183 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13184
13185 SDValue SplatZero = DAG.getNode(
13186 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13187 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13188 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13189 DAG.getUNDEF(ContainerVT), Mask, VL);
13190 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13191 DAG.getUNDEF(ContainerVT), Mask, VL);
13192
13193 if (VT.isFixedLengthVector())
13194 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13195 return Max;
13196}
13197
13198SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13199 SelectionDAG &DAG) const {
13200 const auto &TSInfo =
13201 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13202
13203 unsigned NewOpc = getRISCVVLOp(Op);
13204 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13205 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13206
13207 MVT VT = Op.getSimpleValueType();
13208 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13209
13210 // Create list of operands by converting existing ones to scalable types.
13212 for (const SDValue &V : Op->op_values()) {
13213 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13214
13215 // Pass through non-vector operands.
13216 if (!V.getValueType().isVector()) {
13217 Ops.push_back(V);
13218 continue;
13219 }
13220
13221 // "cast" fixed length vector to a scalable vector.
13222 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13223 "Only fixed length vectors are supported!");
13224 MVT VContainerVT = ContainerVT.changeVectorElementType(
13225 V.getSimpleValueType().getVectorElementType());
13226 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13227 }
13228
13229 SDLoc DL(Op);
13230 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13231 if (HasPassthruOp)
13232 Ops.push_back(DAG.getUNDEF(ContainerVT));
13233 if (HasMask)
13234 Ops.push_back(Mask);
13235 Ops.push_back(VL);
13236
13237 // StrictFP operations have two result values. Their lowered result should
13238 // have same result count.
13239 if (Op->isStrictFPOpcode()) {
13240 SDValue ScalableRes =
13241 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13242 Op->getFlags());
13243 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13244 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13245 }
13246
13247 SDValue ScalableRes =
13248 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13249 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13250}
13251
13252// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13253// * Operands of each node are assumed to be in the same order.
13254// * The EVL operand is promoted from i32 to i64 on RV64.
13255// * Fixed-length vectors are converted to their scalable-vector container
13256// types.
13257SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13258 const auto &TSInfo =
13259 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13260
13261 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13262 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13263
13264 SDLoc DL(Op);
13265 MVT VT = Op.getSimpleValueType();
13267
13268 MVT ContainerVT = VT;
13269 if (VT.isFixedLengthVector())
13270 ContainerVT = getContainerForFixedLengthVector(VT);
13271
13272 for (const auto &OpIdx : enumerate(Op->ops())) {
13273 SDValue V = OpIdx.value();
13274 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13275 // Add dummy passthru value before the mask. Or if there isn't a mask,
13276 // before EVL.
13277 if (HasPassthruOp) {
13278 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13279 if (MaskIdx) {
13280 if (*MaskIdx == OpIdx.index())
13281 Ops.push_back(DAG.getUNDEF(ContainerVT));
13282 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13283 OpIdx.index()) {
13284 if (Op.getOpcode() == ISD::VP_MERGE) {
13285 // For VP_MERGE, copy the false operand instead of an undef value.
13286 Ops.push_back(Ops.back());
13287 } else {
13288 assert(Op.getOpcode() == ISD::VP_SELECT);
13289 // For VP_SELECT, add an undef value.
13290 Ops.push_back(DAG.getUNDEF(ContainerVT));
13291 }
13292 }
13293 }
13294 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13295 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13296 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13298 Subtarget.getXLenVT()));
13299 // Pass through operands which aren't fixed-length vectors.
13300 if (!V.getValueType().isFixedLengthVector()) {
13301 Ops.push_back(V);
13302 continue;
13303 }
13304 // "cast" fixed length vector to a scalable vector.
13305 MVT OpVT = V.getSimpleValueType();
13306 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13307 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13308 "Only fixed length vectors are supported!");
13309 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13310 }
13311
13312 if (!VT.isFixedLengthVector())
13313 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13314
13315 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13316
13317 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13318}
13319
13320SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13321 SelectionDAG &DAG) const {
13322 SDLoc DL(Op);
13323 MVT VT = Op.getSimpleValueType();
13324
13325 SDValue Src = Op.getOperand(0);
13326 // NOTE: Mask is dropped.
13327 SDValue VL = Op.getOperand(2);
13328
13329 MVT ContainerVT = VT;
13330 if (VT.isFixedLengthVector()) {
13331 ContainerVT = getContainerForFixedLengthVector(VT);
13332 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13333 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13334 }
13335
13336 MVT XLenVT = Subtarget.getXLenVT();
13337 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13338 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13339 DAG.getUNDEF(ContainerVT), Zero, VL);
13340
13341 SDValue SplatValue = DAG.getSignedConstant(
13342 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13343 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13344 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13345
13346 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13347 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13348 if (!VT.isFixedLengthVector())
13349 return Result;
13350 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13351}
13352
13353SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13354 SelectionDAG &DAG) const {
13355 SDLoc DL(Op);
13356 MVT VT = Op.getSimpleValueType();
13357
13358 SDValue Op1 = Op.getOperand(0);
13359 SDValue Op2 = Op.getOperand(1);
13360 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13361 // NOTE: Mask is dropped.
13362 SDValue VL = Op.getOperand(4);
13363
13364 MVT ContainerVT = VT;
13365 if (VT.isFixedLengthVector()) {
13366 ContainerVT = getContainerForFixedLengthVector(VT);
13367 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13368 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13369 }
13370
13372 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13373
13374 switch (Condition) {
13375 default:
13376 break;
13377 // X != Y --> (X^Y)
13378 case ISD::SETNE:
13379 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13380 break;
13381 // X == Y --> ~(X^Y)
13382 case ISD::SETEQ: {
13383 SDValue Temp =
13384 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13385 Result =
13386 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13387 break;
13388 }
13389 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13390 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13391 case ISD::SETGT:
13392 case ISD::SETULT: {
13393 SDValue Temp =
13394 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13395 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13396 break;
13397 }
13398 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13399 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13400 case ISD::SETLT:
13401 case ISD::SETUGT: {
13402 SDValue Temp =
13403 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13404 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13405 break;
13406 }
13407 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13408 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13409 case ISD::SETGE:
13410 case ISD::SETULE: {
13411 SDValue Temp =
13412 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13413 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13414 break;
13415 }
13416 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13417 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13418 case ISD::SETLE:
13419 case ISD::SETUGE: {
13420 SDValue Temp =
13421 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13422 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13423 break;
13424 }
13425 }
13426
13427 if (!VT.isFixedLengthVector())
13428 return Result;
13429 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13430}
13431
13432// Lower Floating-Point/Integer Type-Convert VP SDNodes
13433SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13434 SelectionDAG &DAG) const {
13435 SDLoc DL(Op);
13436
13437 SDValue Src = Op.getOperand(0);
13438 SDValue Mask = Op.getOperand(1);
13439 SDValue VL = Op.getOperand(2);
13440 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13441
13442 MVT DstVT = Op.getSimpleValueType();
13443 MVT SrcVT = Src.getSimpleValueType();
13444 if (DstVT.isFixedLengthVector()) {
13445 DstVT = getContainerForFixedLengthVector(DstVT);
13446 SrcVT = getContainerForFixedLengthVector(SrcVT);
13447 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13448 MVT MaskVT = getMaskTypeFor(DstVT);
13449 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13450 }
13451
13452 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13453 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13454
13456 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13457 if (SrcVT.isInteger()) {
13458 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13459
13460 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13461 ? RISCVISD::VSEXT_VL
13462 : RISCVISD::VZEXT_VL;
13463
13464 // Do we need to do any pre-widening before converting?
13465 if (SrcEltSize == 1) {
13466 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13467 MVT XLenVT = Subtarget.getXLenVT();
13468 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13469 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13470 DAG.getUNDEF(IntVT), Zero, VL);
13471 SDValue One = DAG.getSignedConstant(
13472 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13473 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13474 DAG.getUNDEF(IntVT), One, VL);
13475 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13476 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13477 } else if (DstEltSize > (2 * SrcEltSize)) {
13478 // Widen before converting.
13479 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13480 DstVT.getVectorElementCount());
13481 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13482 }
13483
13484 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13485 } else {
13486 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13487 "Wrong input/output vector types");
13488
13489 // Convert f16 to f32 then convert f32 to i64.
13490 if (DstEltSize > (2 * SrcEltSize)) {
13491 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13492 MVT InterimFVT =
13493 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13494 Src =
13495 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13496 }
13497
13498 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13499 }
13500 } else { // Narrowing + Conversion
13501 if (SrcVT.isInteger()) {
13502 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13503 // First do a narrowing convert to an FP type half the size, then round
13504 // the FP type to a small FP type if needed.
13505
13506 MVT InterimFVT = DstVT;
13507 if (SrcEltSize > (2 * DstEltSize)) {
13508 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13509 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13510 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13511 }
13512
13513 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13514
13515 if (InterimFVT != DstVT) {
13516 Src = Result;
13517 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13518 }
13519 } else {
13520 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13521 "Wrong input/output vector types");
13522 // First do a narrowing conversion to an integer half the size, then
13523 // truncate if needed.
13524
13525 if (DstEltSize == 1) {
13526 // First convert to the same size integer, then convert to mask using
13527 // setcc.
13528 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13529 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13530 DstVT.getVectorElementCount());
13531 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13532
13533 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13534 // otherwise the conversion was undefined.
13535 MVT XLenVT = Subtarget.getXLenVT();
13536 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13537 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13538 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13539 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13540 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13541 DAG.getUNDEF(DstVT), Mask, VL});
13542 } else {
13543 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13544 DstVT.getVectorElementCount());
13545
13546 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13547
13548 while (InterimIVT != DstVT) {
13549 SrcEltSize /= 2;
13550 Src = Result;
13551 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13552 DstVT.getVectorElementCount());
13553 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13554 Src, Mask, VL);
13555 }
13556 }
13557 }
13558 }
13559
13560 MVT VT = Op.getSimpleValueType();
13561 if (!VT.isFixedLengthVector())
13562 return Result;
13563 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13564}
13565
13566SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13567 SelectionDAG &DAG) const {
13568 SDLoc DL(Op);
13569 MVT VT = Op.getSimpleValueType();
13570 MVT XLenVT = Subtarget.getXLenVT();
13571
13572 SDValue Mask = Op.getOperand(0);
13573 SDValue TrueVal = Op.getOperand(1);
13574 SDValue FalseVal = Op.getOperand(2);
13575 SDValue VL = Op.getOperand(3);
13576
13577 // Use default legalization if a vector of EVL type would be legal.
13578 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13580 if (isTypeLegal(EVLVecVT))
13581 return SDValue();
13582
13583 MVT ContainerVT = VT;
13584 if (VT.isFixedLengthVector()) {
13585 ContainerVT = getContainerForFixedLengthVector(VT);
13586 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13587 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13588 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13589 }
13590
13591 // Promote to a vector of i8.
13592 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13593
13594 // Promote TrueVal and FalseVal using VLMax.
13595 // FIXME: Is there a better way to do this?
13596 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13597 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13598 DAG.getUNDEF(PromotedVT),
13599 DAG.getConstant(1, DL, XLenVT), VLMax);
13600 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13601 DAG.getUNDEF(PromotedVT),
13602 DAG.getConstant(0, DL, XLenVT), VLMax);
13603 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13604 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13605 // Any element past VL uses FalseVal, so use VLMax
13606 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13607 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13608
13609 // VP_MERGE the two promoted values.
13610 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13611 TrueVal, FalseVal, FalseVal, VL);
13612
13613 // Convert back to mask.
13614 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13615 SDValue Result = DAG.getNode(
13616 RISCVISD::SETCC_VL, DL, ContainerVT,
13617 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13618 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13619
13620 if (VT.isFixedLengthVector())
13621 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13622 return Result;
13623}
13624
13625SDValue
13626RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13627 SelectionDAG &DAG) const {
13628 using namespace SDPatternMatch;
13629
13630 SDLoc DL(Op);
13631
13632 SDValue Op1 = Op.getOperand(0);
13633 SDValue Op2 = Op.getOperand(1);
13634 SDValue Offset = Op.getOperand(2);
13635 SDValue Mask = Op.getOperand(3);
13636 SDValue EVL1 = Op.getOperand(4);
13637 SDValue EVL2 = Op.getOperand(5);
13638
13639 const MVT XLenVT = Subtarget.getXLenVT();
13640 MVT VT = Op.getSimpleValueType();
13641 MVT ContainerVT = VT;
13642 if (VT.isFixedLengthVector()) {
13643 ContainerVT = getContainerForFixedLengthVector(VT);
13644 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13645 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13646 MVT MaskVT = getMaskTypeFor(ContainerVT);
13647 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13648 }
13649
13650 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13651 if (IsMaskVector) {
13652 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13653
13654 // Expand input operands
13655 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13656 DAG.getUNDEF(ContainerVT),
13657 DAG.getConstant(1, DL, XLenVT), EVL1);
13658 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13659 DAG.getUNDEF(ContainerVT),
13660 DAG.getConstant(0, DL, XLenVT), EVL1);
13661 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13662 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13663
13664 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13665 DAG.getUNDEF(ContainerVT),
13666 DAG.getConstant(1, DL, XLenVT), EVL2);
13667 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13668 DAG.getUNDEF(ContainerVT),
13669 DAG.getConstant(0, DL, XLenVT), EVL2);
13670 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13671 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13672 }
13673
13674 auto getVectorFirstEle = [](SDValue Vec) {
13675 SDValue FirstEle;
13676 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13677 return FirstEle;
13678
13679 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13681 return Vec.getOperand(0);
13682
13683 return SDValue();
13684 };
13685
13686 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13687 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13688 MVT EltVT = ContainerVT.getVectorElementType();
13690 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13691 EltVT == MVT::bf16) {
13692 EltVT = EltVT.changeTypeToInteger();
13693 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13694 Op2 = DAG.getBitcast(ContainerVT, Op2);
13695 FirstEle =
13696 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13697 }
13698 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13699 : RISCVISD::VSLIDE1UP_VL,
13700 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13701 FirstEle, Mask, EVL2);
13702 Result = DAG.getBitcast(
13704 Result);
13705 return VT.isFixedLengthVector()
13706 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13707 : Result;
13708 }
13709
13710 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13711 SDValue DownOffset, UpOffset;
13712 if (ImmValue >= 0) {
13713 // The operand is a TargetConstant, we need to rebuild it as a regular
13714 // constant.
13715 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13716 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13717 } else {
13718 // The operand is a TargetConstant, we need to rebuild it as a regular
13719 // constant rather than negating the original operand.
13720 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13721 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13722 }
13723
13724 if (ImmValue != 0)
13725 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13726 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13727 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13728 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13729 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13730
13731 if (IsMaskVector) {
13732 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13733 Result = DAG.getNode(
13734 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13735 {Result, DAG.getConstant(0, DL, ContainerVT),
13736 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13737 Mask, EVL2});
13738 }
13739
13740 if (!VT.isFixedLengthVector())
13741 return Result;
13742 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13743}
13744
13745SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13746 SelectionDAG &DAG) const {
13747 SDLoc DL(Op);
13748 SDValue Val = Op.getOperand(0);
13749 SDValue Mask = Op.getOperand(1);
13750 SDValue VL = Op.getOperand(2);
13751 MVT VT = Op.getSimpleValueType();
13752
13753 MVT ContainerVT = VT;
13754 if (VT.isFixedLengthVector()) {
13755 ContainerVT = getContainerForFixedLengthVector(VT);
13756 MVT MaskVT = getMaskTypeFor(ContainerVT);
13757 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13758 }
13759
13761 if (VT.getScalarType() == MVT::i1) {
13762 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13763 Result =
13764 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13765 ContainerVT, VL);
13766 } else {
13767 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13768 SDValue LHS =
13769 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13770 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13771 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13772 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13773 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13774 DAG.getUNDEF(ContainerVT), Mask, VL});
13775 }
13776 } else {
13777 Result =
13778 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13779 }
13780
13781 if (!VT.isFixedLengthVector())
13782 return Result;
13783 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13784}
13785
13786SDValue
13787RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13788 SelectionDAG &DAG) const {
13789 SDLoc DL(Op);
13790 MVT VT = Op.getSimpleValueType();
13791 MVT XLenVT = Subtarget.getXLenVT();
13792
13793 SDValue Op1 = Op.getOperand(0);
13794 SDValue Mask = Op.getOperand(1);
13795 SDValue EVL = Op.getOperand(2);
13796
13797 MVT ContainerVT = VT;
13798 if (VT.isFixedLengthVector()) {
13799 ContainerVT = getContainerForFixedLengthVector(VT);
13800 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13801 MVT MaskVT = getMaskTypeFor(ContainerVT);
13802 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13803 }
13804
13805 MVT GatherVT = ContainerVT;
13806 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13807 // Check if we are working with mask vectors
13808 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13809 if (IsMaskVector) {
13810 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13811
13812 // Expand input operand
13813 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13814 DAG.getUNDEF(IndicesVT),
13815 DAG.getConstant(1, DL, XLenVT), EVL);
13816 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13817 DAG.getUNDEF(IndicesVT),
13818 DAG.getConstant(0, DL, XLenVT), EVL);
13819 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13820 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13821 }
13822
13823 unsigned EltSize = GatherVT.getScalarSizeInBits();
13824 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13825 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13826 unsigned MaxVLMAX =
13827 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13828
13829 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13830 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13831 // to use vrgatherei16.vv.
13832 // TODO: It's also possible to use vrgatherei16.vv for other types to
13833 // decrease register width for the index calculation.
13834 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13835 if (MaxVLMAX > 256 && EltSize == 8) {
13836 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13837 // Split the vector in half and reverse each half using a full register
13838 // reverse.
13839 // Swap the halves and concatenate them.
13840 // Slide the concatenated result by (VLMax - VL).
13841 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13842 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13843 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13844
13845 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13846 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13847
13848 // Reassemble the low and high pieces reversed.
13849 // NOTE: this Result is unmasked (because we do not need masks for
13850 // shuffles). If in the future this has to change, we can use a SELECT_VL
13851 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13852 SDValue Result =
13853 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13854
13855 // Slide off any elements from past EVL that were reversed into the low
13856 // elements.
13857 unsigned MinElts = GatherVT.getVectorMinNumElements();
13858 SDValue VLMax =
13859 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13860 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13861
13862 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13863 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13864
13865 if (IsMaskVector) {
13866 // Truncate Result back to a mask vector
13867 Result =
13868 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13869 {Result, DAG.getConstant(0, DL, GatherVT),
13871 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13872 }
13873
13874 if (!VT.isFixedLengthVector())
13875 return Result;
13876 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13877 }
13878
13879 // Just promote the int type to i16 which will double the LMUL.
13880 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13881 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13882 }
13883
13884 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13885 SDValue VecLen =
13886 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13887 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13888 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13889 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13890 DAG.getUNDEF(IndicesVT), Mask, EVL);
13891 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13892 DAG.getUNDEF(GatherVT), Mask, EVL);
13893
13894 if (IsMaskVector) {
13895 // Truncate Result back to a mask vector
13896 Result = DAG.getNode(
13897 RISCVISD::SETCC_VL, DL, ContainerVT,
13898 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13899 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13900 }
13901
13902 if (!VT.isFixedLengthVector())
13903 return Result;
13904 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13905}
13906
13907SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13908 SelectionDAG &DAG) const {
13909 MVT VT = Op.getSimpleValueType();
13910 if (VT.getVectorElementType() != MVT::i1)
13911 return lowerVPOp(Op, DAG);
13912
13913 // It is safe to drop mask parameter as masked-off elements are undef.
13914 SDValue Op1 = Op->getOperand(0);
13915 SDValue Op2 = Op->getOperand(1);
13916 SDValue VL = Op->getOperand(3);
13917
13918 MVT ContainerVT = VT;
13919 const bool IsFixed = VT.isFixedLengthVector();
13920 if (IsFixed) {
13921 ContainerVT = getContainerForFixedLengthVector(VT);
13922 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13923 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13924 }
13925
13926 SDLoc DL(Op);
13927 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13928 if (!IsFixed)
13929 return Val;
13930 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13931}
13932
13933SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13934 SelectionDAG &DAG) const {
13935 SDLoc DL(Op);
13936 MVT XLenVT = Subtarget.getXLenVT();
13937 MVT VT = Op.getSimpleValueType();
13938 MVT ContainerVT = VT;
13939 if (VT.isFixedLengthVector())
13940 ContainerVT = getContainerForFixedLengthVector(VT);
13941
13942 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13943
13944 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13945 // Check if the mask is known to be all ones
13946 SDValue Mask = VPNode->getMask();
13947 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13948
13949 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13950 : Intrinsic::riscv_vlse_mask,
13951 DL, XLenVT);
13952 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13953 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13954 VPNode->getStride()};
13955 if (!IsUnmasked) {
13956 if (VT.isFixedLengthVector()) {
13957 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13959 }
13960 Ops.push_back(Mask);
13961 }
13962 Ops.push_back(VPNode->getVectorLength());
13963 if (!IsUnmasked) {
13964 SDValue Policy =
13966 Ops.push_back(Policy);
13967 }
13968
13969 SDValue Result =
13971 VPNode->getMemoryVT(), VPNode->getMemOperand());
13972 SDValue Chain = Result.getValue(1);
13973
13974 if (VT.isFixedLengthVector())
13975 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13976
13977 return DAG.getMergeValues({Result, Chain}, DL);
13978}
13979
13980SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13981 SelectionDAG &DAG) const {
13982 SDLoc DL(Op);
13983 MVT XLenVT = Subtarget.getXLenVT();
13984
13985 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13986 SDValue StoreVal = VPNode->getValue();
13987 MVT VT = StoreVal.getSimpleValueType();
13988 MVT ContainerVT = VT;
13989 if (VT.isFixedLengthVector()) {
13990 ContainerVT = getContainerForFixedLengthVector(VT);
13991 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13992 }
13993
13994 // Check if the mask is known to be all ones
13995 SDValue Mask = VPNode->getMask();
13996 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13997
13998 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13999 : Intrinsic::riscv_vsse_mask,
14000 DL, XLenVT);
14001 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14002 VPNode->getBasePtr(), VPNode->getStride()};
14003 if (!IsUnmasked) {
14004 if (VT.isFixedLengthVector()) {
14005 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14006 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14007 }
14008 Ops.push_back(Mask);
14009 }
14010 Ops.push_back(VPNode->getVectorLength());
14011
14012 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14013 Ops, VPNode->getMemoryVT(),
14014 VPNode->getMemOperand());
14015}
14016
14017// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14018// matched to a RVV indexed load. The RVV indexed load instructions only
14019// support the "unsigned unscaled" addressing mode; indices are implicitly
14020// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14021// signed or scaled indexing is extended to the XLEN value type and scaled
14022// accordingly.
14023SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14024 SelectionDAG &DAG) const {
14025 SDLoc DL(Op);
14026 MVT VT = Op.getSimpleValueType();
14027
14028 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14029 EVT MemVT = MemSD->getMemoryVT();
14030 MachineMemOperand *MMO = MemSD->getMemOperand();
14031 SDValue Chain = MemSD->getChain();
14032 SDValue BasePtr = MemSD->getBasePtr();
14033
14034 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14035 SDValue Index, Mask, PassThru, VL;
14036
14037 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14038 Index = VPGN->getIndex();
14039 Mask = VPGN->getMask();
14040 PassThru = DAG.getUNDEF(VT);
14041 VL = VPGN->getVectorLength();
14042 // VP doesn't support extending loads.
14044 } else {
14045 // Else it must be a MGATHER.
14046 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14047 Index = MGN->getIndex();
14048 Mask = MGN->getMask();
14049 PassThru = MGN->getPassThru();
14050 LoadExtType = MGN->getExtensionType();
14051 }
14052
14053 MVT IndexVT = Index.getSimpleValueType();
14054 MVT XLenVT = Subtarget.getXLenVT();
14055
14057 "Unexpected VTs!");
14058 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14059 // Targets have to explicitly opt-in for extending vector loads.
14060 assert(LoadExtType == ISD::NON_EXTLOAD &&
14061 "Unexpected extending MGATHER/VP_GATHER");
14062
14063 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14064 // the selection of the masked intrinsics doesn't do this for us.
14065 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14066
14067 MVT ContainerVT = VT;
14068 if (VT.isFixedLengthVector()) {
14069 ContainerVT = getContainerForFixedLengthVector(VT);
14070 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14071 ContainerVT.getVectorElementCount());
14072
14073 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14074
14075 if (!IsUnmasked) {
14076 MVT MaskVT = getMaskTypeFor(ContainerVT);
14077 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14078 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14079 }
14080 }
14081
14082 if (!VL)
14083 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14084
14085 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14086 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14087 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14088 }
14089
14090 unsigned IntID =
14091 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14092 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14093 if (IsUnmasked)
14094 Ops.push_back(DAG.getUNDEF(ContainerVT));
14095 else
14096 Ops.push_back(PassThru);
14097 Ops.push_back(BasePtr);
14098 Ops.push_back(Index);
14099 if (!IsUnmasked)
14100 Ops.push_back(Mask);
14101 Ops.push_back(VL);
14102 if (!IsUnmasked)
14103 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14104
14105 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14106 SDValue Result =
14107 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14108 Chain = Result.getValue(1);
14109
14110 if (VT.isFixedLengthVector())
14111 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14112
14113 return DAG.getMergeValues({Result, Chain}, DL);
14114}
14115
14116// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14117// matched to a RVV indexed store. The RVV indexed store instructions only
14118// support the "unsigned unscaled" addressing mode; indices are implicitly
14119// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14120// signed or scaled indexing is extended to the XLEN value type and scaled
14121// accordingly.
14122SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14123 SelectionDAG &DAG) const {
14124 SDLoc DL(Op);
14125 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14126 EVT MemVT = MemSD->getMemoryVT();
14127 MachineMemOperand *MMO = MemSD->getMemOperand();
14128 SDValue Chain = MemSD->getChain();
14129 SDValue BasePtr = MemSD->getBasePtr();
14130
14131 [[maybe_unused]] bool IsTruncatingStore = false;
14132 SDValue Index, Mask, Val, VL;
14133
14134 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14135 Index = VPSN->getIndex();
14136 Mask = VPSN->getMask();
14137 Val = VPSN->getValue();
14138 VL = VPSN->getVectorLength();
14139 // VP doesn't support truncating stores.
14140 IsTruncatingStore = false;
14141 } else {
14142 // Else it must be a MSCATTER.
14143 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14144 Index = MSN->getIndex();
14145 Mask = MSN->getMask();
14146 Val = MSN->getValue();
14147 IsTruncatingStore = MSN->isTruncatingStore();
14148 }
14149
14150 MVT VT = Val.getSimpleValueType();
14151 MVT IndexVT = Index.getSimpleValueType();
14152 MVT XLenVT = Subtarget.getXLenVT();
14153
14155 "Unexpected VTs!");
14156 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14157 // Targets have to explicitly opt-in for extending vector loads and
14158 // truncating vector stores.
14159 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14160
14161 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14162 // the selection of the masked intrinsics doesn't do this for us.
14163 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14164
14165 MVT ContainerVT = VT;
14166 if (VT.isFixedLengthVector()) {
14167 ContainerVT = getContainerForFixedLengthVector(VT);
14168 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14169 ContainerVT.getVectorElementCount());
14170
14171 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14172 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14173
14174 if (!IsUnmasked) {
14175 MVT MaskVT = getMaskTypeFor(ContainerVT);
14176 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14177 }
14178 }
14179
14180 if (!VL)
14181 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14182
14183 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14184 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14185 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14186 }
14187
14188 unsigned IntID =
14189 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14190 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14191 Ops.push_back(Val);
14192 Ops.push_back(BasePtr);
14193 Ops.push_back(Index);
14194 if (!IsUnmasked)
14195 Ops.push_back(Mask);
14196 Ops.push_back(VL);
14197
14199 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14200}
14201
14202SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14203 SelectionDAG &DAG) const {
14204 const MVT XLenVT = Subtarget.getXLenVT();
14205 SDLoc DL(Op);
14206 SDValue Chain = Op->getOperand(0);
14207 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14208 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14209 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14210
14211 // Encoding used for rounding mode in RISC-V differs from that used in
14212 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14213 // table, which consists of a sequence of 4-bit fields, each representing
14214 // corresponding FLT_ROUNDS mode.
14215 static const int Table =
14221
14222 SDValue Shift =
14223 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14224 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14225 DAG.getConstant(Table, DL, XLenVT), Shift);
14226 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14227 DAG.getConstant(7, DL, XLenVT));
14228
14229 return DAG.getMergeValues({Masked, Chain}, DL);
14230}
14231
14232SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14233 SelectionDAG &DAG) const {
14234 const MVT XLenVT = Subtarget.getXLenVT();
14235 SDLoc DL(Op);
14236 SDValue Chain = Op->getOperand(0);
14237 SDValue RMValue = Op->getOperand(1);
14238 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14239
14240 // Encoding used for rounding mode in RISC-V differs from that used in
14241 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14242 // a table, which consists of a sequence of 4-bit fields, each representing
14243 // corresponding RISC-V mode.
14244 static const unsigned Table =
14250
14251 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14252
14253 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14254 DAG.getConstant(2, DL, XLenVT));
14255 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14256 DAG.getConstant(Table, DL, XLenVT), Shift);
14257 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14258 DAG.getConstant(0x7, DL, XLenVT));
14259 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14260 RMValue);
14261}
14262
14263SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14264 SelectionDAG &DAG) const {
14265 const MVT XLenVT = Subtarget.getXLenVT();
14266 SDLoc DL(Op);
14267 SDValue Chain = Op->getOperand(0);
14268 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14269 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14270 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14271}
14272
14273SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14274 SelectionDAG &DAG) const {
14275 const MVT XLenVT = Subtarget.getXLenVT();
14276 SDLoc DL(Op);
14277 SDValue Chain = Op->getOperand(0);
14278 SDValue EnvValue = Op->getOperand(1);
14279 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14280
14281 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14282 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14283 EnvValue);
14284}
14285
14286SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14287 SelectionDAG &DAG) const {
14288 const MVT XLenVT = Subtarget.getXLenVT();
14289 SDLoc DL(Op);
14290 SDValue Chain = Op->getOperand(0);
14291 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14292 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14293
14294 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14295 EnvValue);
14296}
14297
14300
14301SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14302 SelectionDAG &DAG) const {
14303 const MVT XLenVT = Subtarget.getXLenVT();
14304 SDLoc DL(Op);
14305 SDValue Chain = Op->getOperand(0);
14306 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14307 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14308 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14309 Chain = Result.getValue(1);
14310 return DAG.getMergeValues({Result, Chain}, DL);
14311}
14312
14313SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14314 SelectionDAG &DAG) const {
14315 const MVT XLenVT = Subtarget.getXLenVT();
14316 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14317 SDLoc DL(Op);
14318 SDValue Chain = Op->getOperand(0);
14319 SDValue EnvValue = Op->getOperand(1);
14320 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14321 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14322
14323 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14324 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14325 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14326 ModeMask);
14327 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14328 EnvValue);
14329}
14330
14331SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14332 SelectionDAG &DAG) const {
14333 const MVT XLenVT = Subtarget.getXLenVT();
14334 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14335 SDLoc DL(Op);
14336 SDValue Chain = Op->getOperand(0);
14337 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14338 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14339
14340 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14341 ModeMask);
14342}
14343
14344SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14345 SelectionDAG &DAG) const {
14346 MachineFunction &MF = DAG.getMachineFunction();
14347
14348 bool isRISCV64 = Subtarget.is64Bit();
14349 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14350
14351 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14352 return DAG.getFrameIndex(FI, PtrVT);
14353}
14354
14355// Returns the opcode of the target-specific SDNode that implements the 32-bit
14356// form of the given Opcode.
14357static unsigned getRISCVWOpcode(unsigned Opcode) {
14358 switch (Opcode) {
14359 default:
14360 llvm_unreachable("Unexpected opcode");
14361 case ISD::SHL:
14362 return RISCVISD::SLLW;
14363 case ISD::SRA:
14364 return RISCVISD::SRAW;
14365 case ISD::SRL:
14366 return RISCVISD::SRLW;
14367 case ISD::SDIV:
14368 return RISCVISD::DIVW;
14369 case ISD::UDIV:
14370 return RISCVISD::DIVUW;
14371 case ISD::UREM:
14372 return RISCVISD::REMUW;
14373 case ISD::ROTL:
14374 return RISCVISD::ROLW;
14375 case ISD::ROTR:
14376 return RISCVISD::RORW;
14377 }
14378}
14379
14380// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14381// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14382// otherwise be promoted to i64, making it difficult to select the
14383// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14384// type i8/i16/i32 is lost.
14386 unsigned ExtOpc = ISD::ANY_EXTEND) {
14387 SDLoc DL(N);
14388 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14389 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14390 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14391 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14392 // ReplaceNodeResults requires we maintain the same type for the return value.
14393 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14394}
14395
14396// Converts the given 32-bit operation to a i64 operation with signed extension
14397// semantic to reduce the signed extension instructions.
14399 SDLoc DL(N);
14400 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14401 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14402 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14403 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14404 DAG.getValueType(MVT::i32));
14405 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14406}
14407
14410 SelectionDAG &DAG) const {
14411 SDLoc DL(N);
14412 switch (N->getOpcode()) {
14413 default:
14414 llvm_unreachable("Don't know how to custom type legalize this operation!");
14417 case ISD::FP_TO_SINT:
14418 case ISD::FP_TO_UINT: {
14419 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14420 "Unexpected custom legalisation");
14421 bool IsStrict = N->isStrictFPOpcode();
14422 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14423 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14424 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14425 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14427 if (!isTypeLegal(Op0.getValueType()))
14428 return;
14429 if (IsStrict) {
14430 SDValue Chain = N->getOperand(0);
14431 // In absence of Zfh, promote f16 to f32, then convert.
14432 if (Op0.getValueType() == MVT::f16 &&
14433 !Subtarget.hasStdExtZfhOrZhinx()) {
14434 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14435 {Chain, Op0});
14436 Chain = Op0.getValue(1);
14437 }
14438 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14439 : RISCVISD::STRICT_FCVT_WU_RV64;
14440 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14441 SDValue Res = DAG.getNode(
14442 Opc, DL, VTs, Chain, Op0,
14443 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14444 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14445 Results.push_back(Res.getValue(1));
14446 return;
14447 }
14448 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14449 // convert.
14450 if ((Op0.getValueType() == MVT::f16 &&
14451 !Subtarget.hasStdExtZfhOrZhinx()) ||
14452 Op0.getValueType() == MVT::bf16)
14453 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14454
14455 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14456 SDValue Res =
14457 DAG.getNode(Opc, DL, MVT::i64, Op0,
14458 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14459 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14460 return;
14461 }
14462 // If the FP type needs to be softened, emit a library call using the 'si'
14463 // version. If we left it to default legalization we'd end up with 'di'. If
14464 // the FP type doesn't need to be softened just let generic type
14465 // legalization promote the result type.
14466 RTLIB::Libcall LC;
14467 if (IsSigned)
14468 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14469 else
14470 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14471 MakeLibCallOptions CallOptions;
14472 EVT OpVT = Op0.getValueType();
14473 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14474 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14475 SDValue Result;
14476 std::tie(Result, Chain) =
14477 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14478 Results.push_back(Result);
14479 if (IsStrict)
14480 Results.push_back(Chain);
14481 break;
14482 }
14483 case ISD::LROUND: {
14484 SDValue Op0 = N->getOperand(0);
14485 EVT Op0VT = Op0.getValueType();
14486 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14488 if (!isTypeLegal(Op0VT))
14489 return;
14490
14491 // In absence of Zfh, promote f16 to f32, then convert.
14492 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14493 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14494
14495 SDValue Res =
14496 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14497 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14498 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14499 return;
14500 }
14501 // If the FP type needs to be softened, emit a library call to lround. We'll
14502 // need to truncate the result. We assume any value that doesn't fit in i32
14503 // is allowed to return an unspecified value.
14504 RTLIB::Libcall LC =
14505 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14506 MakeLibCallOptions CallOptions;
14507 EVT OpVT = Op0.getValueType();
14508 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14509 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14510 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14511 Results.push_back(Result);
14512 break;
14513 }
14514 case ISD::READCYCLECOUNTER:
14515 case ISD::READSTEADYCOUNTER: {
14516 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14517 "has custom type legalization on riscv32");
14518
14519 SDValue LoCounter, HiCounter;
14520 MVT XLenVT = Subtarget.getXLenVT();
14521 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14522 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14523 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14524 } else {
14525 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14526 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14527 }
14528 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14529 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14530 N->getOperand(0), LoCounter, HiCounter);
14531
14532 Results.push_back(
14533 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14534 Results.push_back(RCW.getValue(2));
14535 break;
14536 }
14537 case ISD::LOAD: {
14538 if (!ISD::isNON_EXTLoad(N))
14539 return;
14540
14541 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14542 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14544
14545 if (N->getValueType(0) == MVT::i64) {
14546 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14547 "Unexpected custom legalisation");
14548
14549 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14550 return;
14551
14552 SDLoc DL(N);
14553 SDValue Result = DAG.getMemIntrinsicNode(
14554 RISCVISD::LD_RV32, DL,
14555 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14556 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14557 SDValue Lo = Result.getValue(0);
14558 SDValue Hi = Result.getValue(1);
14559 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14560 Results.append({Pair, Result.getValue(2)});
14561 return;
14562 }
14563
14564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14565 "Unexpected custom legalisation");
14566
14567 SDLoc dl(N);
14568 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14569 Ld->getBasePtr(), Ld->getMemoryVT(),
14570 Ld->getMemOperand());
14571 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14572 Results.push_back(Res.getValue(1));
14573 return;
14574 }
14575 case ISD::MUL: {
14576 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14577 unsigned XLen = Subtarget.getXLen();
14578 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14579 if (Size > XLen) {
14580 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14581 SDValue LHS = N->getOperand(0);
14582 SDValue RHS = N->getOperand(1);
14583 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14584
14585 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14586 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14587 // We need exactly one side to be unsigned.
14588 if (LHSIsU == RHSIsU)
14589 return;
14590
14591 auto MakeMULPair = [&](SDValue S, SDValue U) {
14592 MVT XLenVT = Subtarget.getXLenVT();
14593 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14594 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14595 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14596 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14597 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14598 };
14599
14600 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14601 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14602
14603 // The other operand should be signed, but still prefer MULH when
14604 // possible.
14605 if (RHSIsU && LHSIsS && !RHSIsS)
14606 Results.push_back(MakeMULPair(LHS, RHS));
14607 else if (LHSIsU && RHSIsS && !LHSIsS)
14608 Results.push_back(MakeMULPair(RHS, LHS));
14609
14610 return;
14611 }
14612 [[fallthrough]];
14613 }
14614 case ISD::ADD:
14615 case ISD::SUB:
14616 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14617 "Unexpected custom legalisation");
14618 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14619 break;
14620 case ISD::SHL:
14621 case ISD::SRA:
14622 case ISD::SRL:
14623 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14624 "Unexpected custom legalisation");
14625 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14626 // If we can use a BSET instruction, allow default promotion to apply.
14627 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14628 isOneConstant(N->getOperand(0)))
14629 break;
14630 Results.push_back(customLegalizeToWOp(N, DAG));
14631 break;
14632 }
14633
14634 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14635 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14636 // shift amount.
14637 if (N->getOpcode() == ISD::SHL) {
14638 SDLoc DL(N);
14639 SDValue NewOp0 =
14640 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14641 SDValue NewOp1 =
14642 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14643 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14644 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14645 DAG.getValueType(MVT::i32));
14646 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14647 }
14648
14649 break;
14650 case ISD::ROTL:
14651 case ISD::ROTR:
14652 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14653 "Unexpected custom legalisation");
14654 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14655 Subtarget.hasVendorXTHeadBb()) &&
14656 "Unexpected custom legalization");
14657 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14658 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14659 return;
14660 Results.push_back(customLegalizeToWOp(N, DAG));
14661 break;
14662 case ISD::CTTZ:
14664 case ISD::CTLZ:
14665 case ISD::CTLZ_ZERO_UNDEF: {
14666 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14667 "Unexpected custom legalisation");
14668
14669 SDValue NewOp0 =
14670 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14671 bool IsCTZ =
14672 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14673 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14674 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14675 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14676 return;
14677 }
14678 case ISD::SDIV:
14679 case ISD::UDIV:
14680 case ISD::UREM: {
14681 MVT VT = N->getSimpleValueType(0);
14682 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14683 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14684 "Unexpected custom legalisation");
14685 // Don't promote division/remainder by constant since we should expand those
14686 // to multiply by magic constant.
14687 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14688 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14689 !isIntDivCheap(N->getValueType(0), Attr))
14690 return;
14691
14692 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14693 // the upper 32 bits. For other types we need to sign or zero extend
14694 // based on the opcode.
14695 unsigned ExtOpc = ISD::ANY_EXTEND;
14696 if (VT != MVT::i32)
14697 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14699
14700 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14701 break;
14702 }
14703 case ISD::SADDO: {
14704 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14705 "Unexpected custom legalisation");
14706
14707 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14708 // use the default legalization.
14709 if (!isa<ConstantSDNode>(N->getOperand(1)))
14710 return;
14711
14712 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14713 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14714 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14715 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14716 DAG.getValueType(MVT::i32));
14717
14718 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14719
14720 // For an addition, the result should be less than one of the operands (LHS)
14721 // if and only if the other operand (RHS) is negative, otherwise there will
14722 // be overflow.
14723 // For a subtraction, the result should be less than one of the operands
14724 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14725 // otherwise there will be overflow.
14726 EVT OType = N->getValueType(1);
14727 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14728 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14729
14730 SDValue Overflow =
14731 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14732 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14733 Results.push_back(Overflow);
14734 return;
14735 }
14736 case ISD::UADDO:
14737 case ISD::USUBO: {
14738 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14739 "Unexpected custom legalisation");
14740 bool IsAdd = N->getOpcode() == ISD::UADDO;
14741 // Create an ADDW or SUBW.
14742 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14743 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14744 SDValue Res =
14745 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14746 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14747 DAG.getValueType(MVT::i32));
14748
14749 SDValue Overflow;
14750 if (IsAdd && isOneConstant(RHS)) {
14751 // Special case uaddo X, 1 overflowed if the addition result is 0.
14752 // The general case (X + C) < C is not necessarily beneficial. Although we
14753 // reduce the live range of X, we may introduce the materialization of
14754 // constant C, especially when the setcc result is used by branch. We have
14755 // no compare with constant and branch instructions.
14756 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14757 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14758 } else if (IsAdd && isAllOnesConstant(RHS)) {
14759 // Special case uaddo X, -1 overflowed if X != 0.
14760 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14761 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14762 } else {
14763 // Sign extend the LHS and perform an unsigned compare with the ADDW
14764 // result. Since the inputs are sign extended from i32, this is equivalent
14765 // to comparing the lower 32 bits.
14766 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14767 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14768 IsAdd ? ISD::SETULT : ISD::SETUGT);
14769 }
14770
14771 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14772 Results.push_back(Overflow);
14773 return;
14774 }
14775 case ISD::UADDSAT:
14776 case ISD::USUBSAT: {
14777 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14778 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14779 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14780 // promotion for UADDO/USUBO.
14781 Results.push_back(expandAddSubSat(N, DAG));
14782 return;
14783 }
14784 case ISD::SADDSAT:
14785 case ISD::SSUBSAT: {
14786 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14787 "Unexpected custom legalisation");
14788 Results.push_back(expandAddSubSat(N, DAG));
14789 return;
14790 }
14791 case ISD::ABS: {
14792 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14793 "Unexpected custom legalisation");
14794
14795 if (Subtarget.hasStdExtZbb()) {
14796 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14797 // This allows us to remember that the result is sign extended. Expanding
14798 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14799 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14800 N->getOperand(0));
14801 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14802 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14803 return;
14804 }
14805
14806 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14807 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14808
14809 // Freeze the source so we can increase it's use count.
14810 Src = DAG.getFreeze(Src);
14811
14812 // Copy sign bit to all bits using the sraiw pattern.
14813 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14814 DAG.getValueType(MVT::i32));
14815 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14816 DAG.getConstant(31, DL, MVT::i64));
14817
14818 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14819 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14820
14821 // NOTE: The result is only required to be anyextended, but sext is
14822 // consistent with type legalization of sub.
14823 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14824 DAG.getValueType(MVT::i32));
14825 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14826 return;
14827 }
14828 case ISD::BITCAST: {
14829 EVT VT = N->getValueType(0);
14830 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14831 SDValue Op0 = N->getOperand(0);
14832 EVT Op0VT = Op0.getValueType();
14833 MVT XLenVT = Subtarget.getXLenVT();
14834 if (VT == MVT::i16 &&
14835 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14836 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14837 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14838 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14839 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14840 Subtarget.hasStdExtFOrZfinx()) {
14841 SDValue FPConv =
14842 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14843 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14844 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14845 Subtarget.hasStdExtDOrZdinx()) {
14846 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14847 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14848 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14849 NewReg.getValue(0), NewReg.getValue(1));
14850 Results.push_back(RetReg);
14851 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14852 isTypeLegal(Op0VT)) {
14853 // Custom-legalize bitcasts from fixed-length vector types to illegal
14854 // scalar types in order to improve codegen. Bitcast the vector to a
14855 // one-element vector type whose element type is the same as the result
14856 // type, and extract the first element.
14857 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14858 if (isTypeLegal(BVT)) {
14859 SDValue BVec = DAG.getBitcast(BVT, Op0);
14860 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14861 }
14862 }
14863 break;
14864 }
14865 case ISD::BITREVERSE: {
14866 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14867 "Unexpected custom legalisation");
14868 MVT XLenVT = Subtarget.getXLenVT();
14869 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14870 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14871 // ReplaceNodeResults requires we maintain the same type for the return
14872 // value.
14873 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14874 break;
14875 }
14876 case RISCVISD::BREV8:
14877 case RISCVISD::ORC_B: {
14878 MVT VT = N->getSimpleValueType(0);
14879 MVT XLenVT = Subtarget.getXLenVT();
14880 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14881 "Unexpected custom legalisation");
14882 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14883 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14884 "Unexpected extension");
14885 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14886 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14887 // ReplaceNodeResults requires we maintain the same type for the return
14888 // value.
14889 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14890 break;
14891 }
14893 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14894 // type is illegal (currently only vXi64 RV32).
14895 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14896 // transferred to the destination register. We issue two of these from the
14897 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14898 // first element.
14899 SDValue Vec = N->getOperand(0);
14900 SDValue Idx = N->getOperand(1);
14901
14902 // The vector type hasn't been legalized yet so we can't issue target
14903 // specific nodes if it needs legalization.
14904 // FIXME: We would manually legalize if it's important.
14905 if (!isTypeLegal(Vec.getValueType()))
14906 return;
14907
14908 MVT VecVT = Vec.getSimpleValueType();
14909
14910 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14911 VecVT.getVectorElementType() == MVT::i64 &&
14912 "Unexpected EXTRACT_VECTOR_ELT legalization");
14913
14914 // If this is a fixed vector, we need to convert it to a scalable vector.
14915 MVT ContainerVT = VecVT;
14916 if (VecVT.isFixedLengthVector()) {
14917 ContainerVT = getContainerForFixedLengthVector(VecVT);
14918 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14919 }
14920
14921 MVT XLenVT = Subtarget.getXLenVT();
14922
14923 // Use a VL of 1 to avoid processing more elements than we need.
14924 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14925
14926 // Unless the index is known to be 0, we must slide the vector down to get
14927 // the desired element into index 0.
14928 if (!isNullConstant(Idx)) {
14929 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14930 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14931 }
14932
14933 // Extract the lower XLEN bits of the correct vector element.
14934 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14935
14936 // To extract the upper XLEN bits of the vector element, shift the first
14937 // element right by 32 bits and re-extract the lower XLEN bits.
14938 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14939 DAG.getUNDEF(ContainerVT),
14940 DAG.getConstant(32, DL, XLenVT), VL);
14941 SDValue LShr32 =
14942 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14943 DAG.getUNDEF(ContainerVT), Mask, VL);
14944
14945 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14946
14947 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14948 break;
14949 }
14951 unsigned IntNo = N->getConstantOperandVal(0);
14952 switch (IntNo) {
14953 default:
14955 "Don't know how to custom type legalize this intrinsic!");
14956 case Intrinsic::experimental_get_vector_length: {
14957 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14958 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14959 return;
14960 }
14961 case Intrinsic::experimental_cttz_elts: {
14962 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14963 Results.push_back(
14964 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14965 return;
14966 }
14967 case Intrinsic::riscv_orc_b:
14968 case Intrinsic::riscv_brev8:
14969 case Intrinsic::riscv_sha256sig0:
14970 case Intrinsic::riscv_sha256sig1:
14971 case Intrinsic::riscv_sha256sum0:
14972 case Intrinsic::riscv_sha256sum1:
14973 case Intrinsic::riscv_sm3p0:
14974 case Intrinsic::riscv_sm3p1: {
14975 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14976 return;
14977 unsigned Opc;
14978 switch (IntNo) {
14979 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14980 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14981 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14982 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14983 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14984 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14985 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14986 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14987 }
14988
14989 SDValue NewOp =
14990 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14991 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14992 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14993 return;
14994 }
14995 case Intrinsic::riscv_sm4ks:
14996 case Intrinsic::riscv_sm4ed: {
14997 unsigned Opc =
14998 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14999 SDValue NewOp0 =
15000 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15001 SDValue NewOp1 =
15002 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15003 SDValue Res =
15004 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15005 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15006 return;
15007 }
15008 case Intrinsic::riscv_mopr: {
15009 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15010 return;
15011 SDValue NewOp =
15012 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15013 SDValue Res = DAG.getNode(
15014 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15015 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15016 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15017 return;
15018 }
15019 case Intrinsic::riscv_moprr: {
15020 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15021 return;
15022 SDValue NewOp0 =
15023 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15024 SDValue NewOp1 =
15025 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15026 SDValue Res = DAG.getNode(
15027 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15028 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15029 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15030 return;
15031 }
15032 case Intrinsic::riscv_clmul: {
15033 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15034 return;
15035
15036 SDValue NewOp0 =
15037 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15038 SDValue NewOp1 =
15039 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15040 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15041 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15042 return;
15043 }
15044 case Intrinsic::riscv_clmulh:
15045 case Intrinsic::riscv_clmulr: {
15046 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15047 return;
15048
15049 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15050 // to the full 128-bit clmul result of multiplying two xlen values.
15051 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15052 // upper 32 bits.
15053 //
15054 // The alternative is to mask the inputs to 32 bits and use clmul, but
15055 // that requires two shifts to mask each input without zext.w.
15056 // FIXME: If the inputs are known zero extended or could be freely
15057 // zero extended, the mask form would be better.
15058 SDValue NewOp0 =
15059 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15060 SDValue NewOp1 =
15061 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15062 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15063 DAG.getConstant(32, DL, MVT::i64));
15064 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15065 DAG.getConstant(32, DL, MVT::i64));
15066 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15067 : RISCVISD::CLMULR;
15068 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15069 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15070 DAG.getConstant(32, DL, MVT::i64));
15071 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15072 return;
15073 }
15074 case Intrinsic::riscv_vmv_x_s: {
15075 EVT VT = N->getValueType(0);
15076 MVT XLenVT = Subtarget.getXLenVT();
15077 if (VT.bitsLT(XLenVT)) {
15078 // Simple case just extract using vmv.x.s and truncate.
15079 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15080 Subtarget.getXLenVT(), N->getOperand(1));
15081 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15082 return;
15083 }
15084
15085 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15086 "Unexpected custom legalization");
15087
15088 // We need to do the move in two steps.
15089 SDValue Vec = N->getOperand(1);
15090 MVT VecVT = Vec.getSimpleValueType();
15091
15092 // First extract the lower XLEN bits of the element.
15093 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15094
15095 // To extract the upper XLEN bits of the vector element, shift the first
15096 // element right by 32 bits and re-extract the lower XLEN bits.
15097 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15098
15099 SDValue ThirtyTwoV =
15100 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15101 DAG.getConstant(32, DL, XLenVT), VL);
15102 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15103 DAG.getUNDEF(VecVT), Mask, VL);
15104 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15105
15106 Results.push_back(
15107 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15108 break;
15109 }
15110 }
15111 break;
15112 }
15113 case ISD::VECREDUCE_ADD:
15114 case ISD::VECREDUCE_AND:
15115 case ISD::VECREDUCE_OR:
15116 case ISD::VECREDUCE_XOR:
15117 case ISD::VECREDUCE_SMAX:
15118 case ISD::VECREDUCE_UMAX:
15119 case ISD::VECREDUCE_SMIN:
15120 case ISD::VECREDUCE_UMIN:
15121 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15122 Results.push_back(V);
15123 break;
15124 case ISD::VP_REDUCE_ADD:
15125 case ISD::VP_REDUCE_AND:
15126 case ISD::VP_REDUCE_OR:
15127 case ISD::VP_REDUCE_XOR:
15128 case ISD::VP_REDUCE_SMAX:
15129 case ISD::VP_REDUCE_UMAX:
15130 case ISD::VP_REDUCE_SMIN:
15131 case ISD::VP_REDUCE_UMIN:
15132 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15133 Results.push_back(V);
15134 break;
15135 case ISD::GET_ROUNDING: {
15136 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15137 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15138 Results.push_back(Res.getValue(0));
15139 Results.push_back(Res.getValue(1));
15140 break;
15141 }
15142 }
15143}
15144
15145/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15146/// which corresponds to it.
15147static unsigned getVecReduceOpcode(unsigned Opc) {
15148 switch (Opc) {
15149 default:
15150 llvm_unreachable("Unhandled binary to transform reduction");
15151 case ISD::ADD:
15152 return ISD::VECREDUCE_ADD;
15153 case ISD::UMAX:
15154 return ISD::VECREDUCE_UMAX;
15155 case ISD::SMAX:
15156 return ISD::VECREDUCE_SMAX;
15157 case ISD::UMIN:
15158 return ISD::VECREDUCE_UMIN;
15159 case ISD::SMIN:
15160 return ISD::VECREDUCE_SMIN;
15161 case ISD::AND:
15162 return ISD::VECREDUCE_AND;
15163 case ISD::OR:
15164 return ISD::VECREDUCE_OR;
15165 case ISD::XOR:
15166 return ISD::VECREDUCE_XOR;
15167 case ISD::FADD:
15168 // Note: This is the associative form of the generic reduction opcode.
15169 return ISD::VECREDUCE_FADD;
15170 case ISD::FMAXNUM:
15171 return ISD::VECREDUCE_FMAX;
15172 case ISD::FMINNUM:
15173 return ISD::VECREDUCE_FMIN;
15174 }
15175}
15176
15177/// Perform two related transforms whose purpose is to incrementally recognize
15178/// an explode_vector followed by scalar reduction as a vector reduction node.
15179/// This exists to recover from a deficiency in SLP which can't handle
15180/// forests with multiple roots sharing common nodes. In some cases, one
15181/// of the trees will be vectorized, and the other will remain (unprofitably)
15182/// scalarized.
15183static SDValue
15185 const RISCVSubtarget &Subtarget) {
15186
15187 // This transforms need to run before all integer types have been legalized
15188 // to i64 (so that the vector element type matches the add type), and while
15189 // it's safe to introduce odd sized vector types.
15191 return SDValue();
15192
15193 // Without V, this transform isn't useful. We could form the (illegal)
15194 // operations and let them be scalarized again, but there's really no point.
15195 if (!Subtarget.hasVInstructions())
15196 return SDValue();
15197
15198 const SDLoc DL(N);
15199 const EVT VT = N->getValueType(0);
15200 const unsigned Opc = N->getOpcode();
15201
15202 if (!VT.isInteger()) {
15203 switch (Opc) {
15204 default:
15205 return SDValue();
15206 case ISD::FADD:
15207 // For FADD, we only handle the case with reassociation allowed. We
15208 // could handle strict reduction order, but at the moment, there's no
15209 // known reason to, and the complexity isn't worth it.
15210 if (!N->getFlags().hasAllowReassociation())
15211 return SDValue();
15212 break;
15213 case ISD::FMAXNUM:
15214 case ISD::FMINNUM:
15215 break;
15216 }
15217 }
15218
15219 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15220 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15221 "Inconsistent mappings");
15222 SDValue LHS = N->getOperand(0);
15223 SDValue RHS = N->getOperand(1);
15224
15225 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15226 return SDValue();
15227
15228 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15229 std::swap(LHS, RHS);
15230
15231 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15232 !isa<ConstantSDNode>(RHS.getOperand(1)))
15233 return SDValue();
15234
15235 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15236 SDValue SrcVec = RHS.getOperand(0);
15237 EVT SrcVecVT = SrcVec.getValueType();
15238 assert(SrcVecVT.getVectorElementType() == VT);
15239 if (SrcVecVT.isScalableVector())
15240 return SDValue();
15241
15242 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15243 return SDValue();
15244
15245 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15246 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15247 // root of our reduction tree. TODO: We could extend this to any two
15248 // adjacent aligned constant indices if desired.
15249 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15250 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15251 uint64_t LHSIdx =
15252 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15253 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15254 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15255 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15256 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15257 }
15258 }
15259
15260 // Match (binop (reduce (extract_subvector V, 0),
15261 // (extract_vector_elt V, sizeof(SubVec))))
15262 // into a reduction of one more element from the original vector V.
15263 if (LHS.getOpcode() != ReduceOpc)
15264 return SDValue();
15265
15266 SDValue ReduceVec = LHS.getOperand(0);
15267 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15268 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15269 isNullConstant(ReduceVec.getOperand(1)) &&
15270 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15271 // For illegal types (e.g. 3xi32), most will be combined again into a
15272 // wider (hopefully legal) type. If this is a terminal state, we are
15273 // relying on type legalization here to produce something reasonable
15274 // and this lowering quality could probably be improved. (TODO)
15275 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15276 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15277 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15278 ReduceVec->getFlags() & N->getFlags());
15279 }
15280
15281 return SDValue();
15282}
15283
15284
15285// Try to fold (<bop> x, (reduction.<bop> vec, start))
15287 const RISCVSubtarget &Subtarget) {
15288 auto BinOpToRVVReduce = [](unsigned Opc) {
15289 switch (Opc) {
15290 default:
15291 llvm_unreachable("Unhandled binary to transform reduction");
15292 case ISD::ADD:
15293 return RISCVISD::VECREDUCE_ADD_VL;
15294 case ISD::UMAX:
15295 return RISCVISD::VECREDUCE_UMAX_VL;
15296 case ISD::SMAX:
15297 return RISCVISD::VECREDUCE_SMAX_VL;
15298 case ISD::UMIN:
15299 return RISCVISD::VECREDUCE_UMIN_VL;
15300 case ISD::SMIN:
15301 return RISCVISD::VECREDUCE_SMIN_VL;
15302 case ISD::AND:
15303 return RISCVISD::VECREDUCE_AND_VL;
15304 case ISD::OR:
15305 return RISCVISD::VECREDUCE_OR_VL;
15306 case ISD::XOR:
15307 return RISCVISD::VECREDUCE_XOR_VL;
15308 case ISD::FADD:
15309 return RISCVISD::VECREDUCE_FADD_VL;
15310 case ISD::FMAXNUM:
15311 return RISCVISD::VECREDUCE_FMAX_VL;
15312 case ISD::FMINNUM:
15313 return RISCVISD::VECREDUCE_FMIN_VL;
15314 }
15315 };
15316
15317 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15318 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15319 isNullConstant(V.getOperand(1)) &&
15320 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15321 };
15322
15323 unsigned Opc = N->getOpcode();
15324 unsigned ReduceIdx;
15325 if (IsReduction(N->getOperand(0), Opc))
15326 ReduceIdx = 0;
15327 else if (IsReduction(N->getOperand(1), Opc))
15328 ReduceIdx = 1;
15329 else
15330 return SDValue();
15331
15332 // Skip if FADD disallows reassociation but the combiner needs.
15333 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15334 return SDValue();
15335
15336 SDValue Extract = N->getOperand(ReduceIdx);
15337 SDValue Reduce = Extract.getOperand(0);
15338 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15339 return SDValue();
15340
15341 SDValue ScalarV = Reduce.getOperand(2);
15342 EVT ScalarVT = ScalarV.getValueType();
15343 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15344 ScalarV.getOperand(0)->isUndef() &&
15345 isNullConstant(ScalarV.getOperand(2)))
15346 ScalarV = ScalarV.getOperand(1);
15347
15348 // Make sure that ScalarV is a splat with VL=1.
15349 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15350 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15351 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15352 return SDValue();
15353
15354 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15355 return SDValue();
15356
15357 // Check the scalar of ScalarV is neutral element
15358 // TODO: Deal with value other than neutral element.
15359 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15360 0))
15361 return SDValue();
15362
15363 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15364 // FIXME: We might be able to improve this if operand 0 is undef.
15365 if (!isNonZeroAVL(Reduce.getOperand(5)))
15366 return SDValue();
15367
15368 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15369
15370 SDLoc DL(N);
15371 SDValue NewScalarV =
15372 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15373 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15374
15375 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15376 if (ScalarVT != ScalarV.getValueType())
15377 NewScalarV =
15378 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15379
15380 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15381 NewScalarV, Reduce.getOperand(3),
15382 Reduce.getOperand(4), Reduce.getOperand(5)};
15383 SDValue NewReduce =
15384 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15385 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15386 Extract.getOperand(1));
15387}
15388
15389// Optimize (add (shl x, c0), (shl y, c1)) ->
15390// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15391// or
15392// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15394 const RISCVSubtarget &Subtarget) {
15395 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15396 // extension.
15397 if (!Subtarget.hasShlAdd(3))
15398 return SDValue();
15399
15400 // Skip for vector types and larger types.
15401 EVT VT = N->getValueType(0);
15402 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15403 return SDValue();
15404
15405 // The two operand nodes must be SHL and have no other use.
15406 SDValue N0 = N->getOperand(0);
15407 SDValue N1 = N->getOperand(1);
15408 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15409 !N0->hasOneUse() || !N1->hasOneUse())
15410 return SDValue();
15411
15412 // Check c0 and c1.
15413 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15414 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15415 if (!N0C || !N1C)
15416 return SDValue();
15417 int64_t C0 = N0C->getSExtValue();
15418 int64_t C1 = N1C->getSExtValue();
15419 if (C0 <= 0 || C1 <= 0)
15420 return SDValue();
15421
15422 int64_t Diff = std::abs(C0 - C1);
15423 if (!Subtarget.hasShlAdd(Diff))
15424 return SDValue();
15425
15426 // Build nodes.
15427 SDLoc DL(N);
15428 int64_t Bits = std::min(C0, C1);
15429 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15430 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15431 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15432 DAG.getConstant(Diff, DL, VT), NS);
15433 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15434}
15435
15436// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15437// or 3.
15439 SelectionDAG &DAG) {
15440 using namespace llvm::SDPatternMatch;
15441
15442 // Looking for a reg-reg add and not an addi.
15443 if (isa<ConstantSDNode>(N->getOperand(1)))
15444 return SDValue();
15445
15446 // Based on testing it seems that performance degrades if the ADDI has
15447 // more than 2 uses.
15448 if (AddI->use_size() > 2)
15449 return SDValue();
15450
15451 APInt AddVal;
15452 SDValue SHLVal;
15453 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15454 return SDValue();
15455
15456 APInt VShift;
15457 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15458 return SDValue();
15459
15460 if (VShift.slt(1) || VShift.sgt(3))
15461 return SDValue();
15462
15463 SDLoc DL(N);
15464 EVT VT = N->getValueType(0);
15465 // The shift must be positive but the add can be signed.
15466 uint64_t ShlConst = VShift.getZExtValue();
15467 int64_t AddConst = AddVal.getSExtValue();
15468
15469 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15470 DAG.getConstant(ShlConst, DL, VT), Other);
15471 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15472 DAG.getSignedConstant(AddConst, DL, VT));
15473}
15474
15475// Optimize (add (add (shl x, c0), c1), y) ->
15476// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15478 const RISCVSubtarget &Subtarget) {
15479 // Perform this optimization only in the zba extension.
15480 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15481 return SDValue();
15482
15483 // Skip for vector types and larger types.
15484 EVT VT = N->getValueType(0);
15485 if (VT != Subtarget.getXLenVT())
15486 return SDValue();
15487
15488 SDValue AddI = N->getOperand(0);
15489 SDValue Other = N->getOperand(1);
15490 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15491 return V;
15492 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15493 return V;
15494 return SDValue();
15495}
15496
15497// Combine a constant select operand into its use:
15498//
15499// (and (select cond, -1, c), x)
15500// -> (select cond, x, (and x, c)) [AllOnes=1]
15501// (or (select cond, 0, c), x)
15502// -> (select cond, x, (or x, c)) [AllOnes=0]
15503// (xor (select cond, 0, c), x)
15504// -> (select cond, x, (xor x, c)) [AllOnes=0]
15505// (add (select cond, 0, c), x)
15506// -> (select cond, x, (add x, c)) [AllOnes=0]
15507// (sub x, (select cond, 0, c))
15508// -> (select cond, x, (sub x, c)) [AllOnes=0]
15510 SelectionDAG &DAG, bool AllOnes,
15511 const RISCVSubtarget &Subtarget) {
15512 EVT VT = N->getValueType(0);
15513
15514 // Skip vectors.
15515 if (VT.isVector())
15516 return SDValue();
15517
15518 if (!Subtarget.hasConditionalMoveFusion()) {
15519 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15520 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15521 return SDValue();
15522
15523 // Maybe harmful when condition code has multiple use.
15524 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15525 return SDValue();
15526
15527 // Maybe harmful when VT is wider than XLen.
15528 if (VT.getSizeInBits() > Subtarget.getXLen())
15529 return SDValue();
15530 }
15531
15532 if ((Slct.getOpcode() != ISD::SELECT &&
15533 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15534 !Slct.hasOneUse())
15535 return SDValue();
15536
15537 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15539 };
15540
15541 bool SwapSelectOps;
15542 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15543 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15544 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15545 SDValue NonConstantVal;
15546 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15547 SwapSelectOps = false;
15548 NonConstantVal = FalseVal;
15549 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15550 SwapSelectOps = true;
15551 NonConstantVal = TrueVal;
15552 } else
15553 return SDValue();
15554
15555 // Slct is now know to be the desired identity constant when CC is true.
15556 TrueVal = OtherOp;
15557 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15558 // Unless SwapSelectOps says the condition should be false.
15559 if (SwapSelectOps)
15560 std::swap(TrueVal, FalseVal);
15561
15562 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15563 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15564 {Slct.getOperand(0), Slct.getOperand(1),
15565 Slct.getOperand(2), TrueVal, FalseVal});
15566
15567 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15568 {Slct.getOperand(0), TrueVal, FalseVal});
15569}
15570
15571// Attempt combineSelectAndUse on each operand of a commutative operator N.
15573 bool AllOnes,
15574 const RISCVSubtarget &Subtarget) {
15575 SDValue N0 = N->getOperand(0);
15576 SDValue N1 = N->getOperand(1);
15577 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15578 return Result;
15579 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15580 return Result;
15581 return SDValue();
15582}
15583
15584// Transform (add (mul x, c0), c1) ->
15585// (add (mul (add x, c1/c0), c0), c1%c0).
15586// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15587// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15588// to an infinite loop in DAGCombine if transformed.
15589// Or transform (add (mul x, c0), c1) ->
15590// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15591// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15592// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15593// lead to an infinite loop in DAGCombine if transformed.
15594// Or transform (add (mul x, c0), c1) ->
15595// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15596// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15597// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15598// lead to an infinite loop in DAGCombine if transformed.
15599// Or transform (add (mul x, c0), c1) ->
15600// (mul (add x, c1/c0), c0).
15601// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15603 const RISCVSubtarget &Subtarget) {
15604 // Skip for vector types and larger types.
15605 EVT VT = N->getValueType(0);
15606 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15607 return SDValue();
15608 // The first operand node must be a MUL and has no other use.
15609 SDValue N0 = N->getOperand(0);
15610 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15611 return SDValue();
15612 // Check if c0 and c1 match above conditions.
15613 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15614 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15615 if (!N0C || !N1C)
15616 return SDValue();
15617 // If N0C has multiple uses it's possible one of the cases in
15618 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15619 // in an infinite loop.
15620 if (!N0C->hasOneUse())
15621 return SDValue();
15622 int64_t C0 = N0C->getSExtValue();
15623 int64_t C1 = N1C->getSExtValue();
15624 int64_t CA, CB;
15625 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15626 return SDValue();
15627 // Search for proper CA (non-zero) and CB that both are simm12.
15628 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15629 !isInt<12>(C0 * (C1 / C0))) {
15630 CA = C1 / C0;
15631 CB = C1 % C0;
15632 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15633 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15634 CA = C1 / C0 + 1;
15635 CB = C1 % C0 - C0;
15636 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15637 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15638 CA = C1 / C0 - 1;
15639 CB = C1 % C0 + C0;
15640 } else
15641 return SDValue();
15642 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15643 SDLoc DL(N);
15644 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15645 DAG.getSignedConstant(CA, DL, VT));
15646 SDValue New1 =
15647 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15648 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15649}
15650
15651// add (zext, zext) -> zext (add (zext, zext))
15652// sub (zext, zext) -> sext (sub (zext, zext))
15653// mul (zext, zext) -> zext (mul (zext, zext))
15654// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15655// udiv (zext, zext) -> zext (udiv (zext, zext))
15656// srem (zext, zext) -> zext (srem (zext, zext))
15657// urem (zext, zext) -> zext (urem (zext, zext))
15658//
15659// where the sum of the extend widths match, and the the range of the bin op
15660// fits inside the width of the narrower bin op. (For profitability on rvv, we
15661// use a power of two for both inner and outer extend.)
15663
15664 EVT VT = N->getValueType(0);
15665 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15666 return SDValue();
15667
15668 SDValue N0 = N->getOperand(0);
15669 SDValue N1 = N->getOperand(1);
15671 return SDValue();
15672 if (!N0.hasOneUse() || !N1.hasOneUse())
15673 return SDValue();
15674
15675 SDValue Src0 = N0.getOperand(0);
15676 SDValue Src1 = N1.getOperand(0);
15677 EVT SrcVT = Src0.getValueType();
15678 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15679 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15680 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15681 return SDValue();
15682
15683 LLVMContext &C = *DAG.getContext();
15685 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15686
15687 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15688 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15689
15690 // Src0 and Src1 are zero extended, so they're always positive if signed.
15691 //
15692 // sub can produce a negative from two positive operands, so it needs sign
15693 // extended. Other nodes produce a positive from two positive operands, so
15694 // zero extend instead.
15695 unsigned OuterExtend =
15696 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15697
15698 return DAG.getNode(
15699 OuterExtend, SDLoc(N), VT,
15700 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15701}
15702
15703// Try to turn (add (xor bool, 1) -1) into (neg bool).
15705 SDValue N0 = N->getOperand(0);
15706 SDValue N1 = N->getOperand(1);
15707 EVT VT = N->getValueType(0);
15708 SDLoc DL(N);
15709
15710 // RHS should be -1.
15711 if (!isAllOnesConstant(N1))
15712 return SDValue();
15713
15714 // Look for (xor X, 1).
15715 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15716 return SDValue();
15717
15718 // First xor input should be 0 or 1.
15720 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15721 return SDValue();
15722
15723 // Emit a negate of the setcc.
15724 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15725 N0.getOperand(0));
15726}
15727
15730 const RISCVSubtarget &Subtarget) {
15731 SelectionDAG &DAG = DCI.DAG;
15732 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15733 return V;
15734 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15735 return V;
15736 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15737 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15738 return V;
15739 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15740 return V;
15741 }
15742 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15743 return V;
15744 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15745 return V;
15746 if (SDValue V = combineBinOpOfZExt(N, DAG))
15747 return V;
15748
15749 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15750 // (select lhs, rhs, cc, x, (add x, y))
15751 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15752}
15753
15754// Try to turn a sub boolean RHS and constant LHS into an addi.
15756 SDValue N0 = N->getOperand(0);
15757 SDValue N1 = N->getOperand(1);
15758 EVT VT = N->getValueType(0);
15759 SDLoc DL(N);
15760
15761 // Require a constant LHS.
15762 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15763 if (!N0C)
15764 return SDValue();
15765
15766 // All our optimizations involve subtracting 1 from the immediate and forming
15767 // an ADDI. Make sure the new immediate is valid for an ADDI.
15768 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15769 if (!ImmValMinus1.isSignedIntN(12))
15770 return SDValue();
15771
15772 SDValue NewLHS;
15773 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15774 // (sub constant, (setcc x, y, eq/neq)) ->
15775 // (add (setcc x, y, neq/eq), constant - 1)
15776 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15777 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15778 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15779 return SDValue();
15780 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15781 NewLHS =
15782 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15783 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15784 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15785 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15786 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15787 NewLHS = N1.getOperand(0);
15788 } else
15789 return SDValue();
15790
15791 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15792 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15793}
15794
15795// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15796// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15797// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15798// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15800 const RISCVSubtarget &Subtarget) {
15801 if (!Subtarget.hasStdExtZbb())
15802 return SDValue();
15803
15804 EVT VT = N->getValueType(0);
15805
15806 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15807 return SDValue();
15808
15809 SDValue N0 = N->getOperand(0);
15810 SDValue N1 = N->getOperand(1);
15811
15812 if (N0->getOpcode() != ISD::SHL)
15813 return SDValue();
15814
15815 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15816 if (!ShAmtCLeft)
15817 return SDValue();
15818 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15819
15820 if (ShiftedAmount >= 8)
15821 return SDValue();
15822
15823 SDValue LeftShiftOperand = N0->getOperand(0);
15824 SDValue RightShiftOperand = N1;
15825
15826 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15827 if (N1->getOpcode() != ISD::SRL)
15828 return SDValue();
15829 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15830 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15831 return SDValue();
15832 RightShiftOperand = N1.getOperand(0);
15833 }
15834
15835 // At least one shift should have a single use.
15836 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15837 return SDValue();
15838
15839 if (LeftShiftOperand != RightShiftOperand)
15840 return SDValue();
15841
15842 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15843 Mask <<= ShiftedAmount;
15844 // Check that X has indeed the right shape (only the Y-th bit can be set in
15845 // every byte).
15846 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15847 return SDValue();
15848
15849 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15850}
15851
15853 const RISCVSubtarget &Subtarget) {
15854 if (SDValue V = combineSubOfBoolean(N, DAG))
15855 return V;
15856
15857 EVT VT = N->getValueType(0);
15858 SDValue N0 = N->getOperand(0);
15859 SDValue N1 = N->getOperand(1);
15860 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15861 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15862 isNullConstant(N1.getOperand(1)) &&
15863 N1.getValueType() == N1.getOperand(0).getValueType()) {
15864 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15865 if (CCVal == ISD::SETLT) {
15866 SDLoc DL(N);
15867 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15868 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15869 DAG.getConstant(ShAmt, DL, VT));
15870 }
15871 }
15872
15873 if (SDValue V = combineBinOpOfZExt(N, DAG))
15874 return V;
15875 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15876 return V;
15877
15878 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15879 // (select lhs, rhs, cc, x, (sub x, y))
15880 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15881}
15882
15883// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15884// Legalizing setcc can introduce xors like this. Doing this transform reduces
15885// the number of xors and may allow the xor to fold into a branch condition.
15887 SDValue N0 = N->getOperand(0);
15888 SDValue N1 = N->getOperand(1);
15889 bool IsAnd = N->getOpcode() == ISD::AND;
15890
15891 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15892 return SDValue();
15893
15894 if (!N0.hasOneUse() || !N1.hasOneUse())
15895 return SDValue();
15896
15897 SDValue N01 = N0.getOperand(1);
15898 SDValue N11 = N1.getOperand(1);
15899
15900 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15901 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15902 // operation is And, allow one of the Xors to use -1.
15903 if (isOneConstant(N01)) {
15904 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15905 return SDValue();
15906 } else if (isOneConstant(N11)) {
15907 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15908 if (!(IsAnd && isAllOnesConstant(N01)))
15909 return SDValue();
15910 } else
15911 return SDValue();
15912
15913 EVT VT = N->getValueType(0);
15914
15915 SDValue N00 = N0.getOperand(0);
15916 SDValue N10 = N1.getOperand(0);
15917
15918 // The LHS of the xors needs to be 0/1.
15920 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15921 return SDValue();
15922
15923 // Invert the opcode and insert a new xor.
15924 SDLoc DL(N);
15925 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15926 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15927 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15928}
15929
15930// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15931// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15932// value to an unsigned value. This will be lowered to vmax and series of
15933// vnclipu instructions later. This can be extended to other truncated types
15934// other than i8 by replacing 256 and 255 with the equivalent constants for the
15935// type.
15937 EVT VT = N->getValueType(0);
15938 SDValue N0 = N->getOperand(0);
15939 EVT SrcVT = N0.getValueType();
15940
15941 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15942 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15943 return SDValue();
15944
15945 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15946 return SDValue();
15947
15948 SDValue Cond = N0.getOperand(0);
15949 SDValue True = N0.getOperand(1);
15950 SDValue False = N0.getOperand(2);
15951
15952 if (Cond.getOpcode() != ISD::SETCC)
15953 return SDValue();
15954
15955 // FIXME: Support the version of this pattern with the select operands
15956 // swapped.
15957 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15958 if (CCVal != ISD::SETULT)
15959 return SDValue();
15960
15961 SDValue CondLHS = Cond.getOperand(0);
15962 SDValue CondRHS = Cond.getOperand(1);
15963
15964 if (CondLHS != True)
15965 return SDValue();
15966
15967 unsigned ScalarBits = VT.getScalarSizeInBits();
15968
15969 // FIXME: Support other constants.
15970 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15971 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15972 return SDValue();
15973
15974 if (False.getOpcode() != ISD::SIGN_EXTEND)
15975 return SDValue();
15976
15977 False = False.getOperand(0);
15978
15979 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15980 return SDValue();
15981
15982 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15983 if (!FalseRHSC || !FalseRHSC->isZero())
15984 return SDValue();
15985
15986 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15987 if (CCVal2 != ISD::SETGT)
15988 return SDValue();
15989
15990 // Emit the signed to unsigned saturation pattern.
15991 SDLoc DL(N);
15992 SDValue Max =
15993 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15994 SDValue Min =
15995 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15996 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15997 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15998}
15999
16001 const RISCVSubtarget &Subtarget) {
16002 SDValue N0 = N->getOperand(0);
16003 EVT VT = N->getValueType(0);
16004
16005 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16006 // extending X. This is safe since we only need the LSB after the shift and
16007 // shift amounts larger than 31 would produce poison. If we wait until
16008 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16009 // to use a BEXT instruction.
16010 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16011 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16012 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16013 SDLoc DL(N0);
16014 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16015 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16016 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16017 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16018 }
16019
16020 return combineTruncSelectToSMaxUSat(N, DAG);
16021}
16022
16023// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16024// truncation. But RVV doesn't have truncation instructions for more than twice
16025// the bitwidth.
16026//
16027// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16028//
16029// vsetvli a0, zero, e32, m2, ta, ma
16030// vnsrl.wi v12, v8, 0
16031// vsetvli zero, zero, e16, m1, ta, ma
16032// vnsrl.wi v8, v12, 0
16033// vsetvli zero, zero, e8, mf2, ta, ma
16034// vnsrl.wi v8, v8, 0
16035//
16036// So reverse the combine so we generate an vmseq/vmsne again:
16037//
16038// and (lshr (trunc X), ShAmt), 1
16039// -->
16040// zext (icmp ne (and X, (1 << ShAmt)), 0)
16041//
16042// and (lshr (not (trunc X)), ShAmt), 1
16043// -->
16044// zext (icmp eq (and X, (1 << ShAmt)), 0)
16046 const RISCVSubtarget &Subtarget) {
16047 using namespace SDPatternMatch;
16048 SDLoc DL(N);
16049
16050 if (!Subtarget.hasVInstructions())
16051 return SDValue();
16052
16053 EVT VT = N->getValueType(0);
16054 if (!VT.isVector())
16055 return SDValue();
16056
16057 APInt ShAmt;
16058 SDValue Inner;
16059 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16060 m_One())))
16061 return SDValue();
16062
16063 SDValue X;
16064 bool IsNot;
16065 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16066 IsNot = true;
16067 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16068 IsNot = false;
16069 else
16070 return SDValue();
16071
16072 EVT WideVT = X.getValueType();
16073 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16074 return SDValue();
16075
16076 SDValue Res =
16077 DAG.getNode(ISD::AND, DL, WideVT, X,
16078 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16079 Res = DAG.getSetCC(DL,
16080 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16081 WideVT.getVectorElementCount()),
16082 Res, DAG.getConstant(0, DL, WideVT),
16083 IsNot ? ISD::SETEQ : ISD::SETNE);
16084 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16085}
16086
16089 SelectionDAG &DAG = DCI.DAG;
16090 if (N->getOpcode() != ISD::AND)
16091 return SDValue();
16092
16093 SDValue N0 = N->getOperand(0);
16094 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16095 return SDValue();
16096 if (!N0.hasOneUse())
16097 return SDValue();
16098
16101 return SDValue();
16102
16103 EVT LoadedVT = ALoad->getMemoryVT();
16104 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16105 if (!MaskConst)
16106 return SDValue();
16107 uint64_t Mask = MaskConst->getZExtValue();
16108 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16109 if (Mask != ExpectedMask)
16110 return SDValue();
16111
16112 SDValue ZextLoad = DAG.getAtomicLoad(
16113 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16114 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16115 DCI.CombineTo(N, ZextLoad);
16116 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16118 return SDValue(N, 0);
16119}
16120
16121// Combines two comparison operation and logic operation to one selection
16122// operation(min, max) and logic operation. Returns new constructed Node if
16123// conditions for optimization are satisfied.
16126 const RISCVSubtarget &Subtarget) {
16127 SelectionDAG &DAG = DCI.DAG;
16128
16129 SDValue N0 = N->getOperand(0);
16130 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16131 // extending X. This is safe since we only need the LSB after the shift and
16132 // shift amounts larger than 31 would produce poison. If we wait until
16133 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16134 // to use a BEXT instruction.
16135 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16136 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16137 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16138 N0.hasOneUse()) {
16139 SDLoc DL(N);
16140 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16141 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16142 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16143 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16144 DAG.getConstant(1, DL, MVT::i64));
16145 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16146 }
16147
16148 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16149 return V;
16150
16151 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16152 return V;
16153 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16154 return V;
16155 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16156 return V;
16157
16158 if (DCI.isAfterLegalizeDAG())
16159 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16160 return V;
16161
16162 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16163 // (select lhs, rhs, cc, x, (and x, y))
16164 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16165}
16166
16167// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16168// FIXME: Generalize to other binary operators with same operand.
16170 SelectionDAG &DAG) {
16171 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16172
16173 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16174 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16175 !N0.hasOneUse() || !N1.hasOneUse())
16176 return SDValue();
16177
16178 // Should have the same condition.
16179 SDValue Cond = N0.getOperand(1);
16180 if (Cond != N1.getOperand(1))
16181 return SDValue();
16182
16183 SDValue TrueV = N0.getOperand(0);
16184 SDValue FalseV = N1.getOperand(0);
16185
16186 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16187 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16188 !isOneConstant(TrueV.getOperand(1)) ||
16189 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16190 return SDValue();
16191
16192 EVT VT = N->getValueType(0);
16193 SDLoc DL(N);
16194
16195 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16196 Cond);
16197 SDValue NewN1 =
16198 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16199 SDValue NewOr =
16200 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16201 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16202}
16203
16204// (xor X, (xor (and X, C2), Y))
16205// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16206// where C2 is a shifted mask with width = Width and shift = ShAmt
16207// qc_insb might become qc.insb or qc.insbi depending on the operands.
16209 const RISCVSubtarget &Subtarget) {
16210 if (!Subtarget.hasVendorXqcibm())
16211 return SDValue();
16212
16213 using namespace SDPatternMatch;
16214 SDValue Base, Inserted;
16215 APInt CMask;
16216 if (!sd_match(N, m_Xor(m_Value(Base),
16218 m_ConstInt(CMask))),
16219 m_Value(Inserted))))))
16220 return SDValue();
16221
16222 if (N->getValueType(0) != MVT::i32)
16223 return SDValue();
16224 unsigned Width, ShAmt;
16225 if (!CMask.isShiftedMask(ShAmt, Width))
16226 return SDValue();
16227
16228 // Check if all zero bits in CMask are also zero in Inserted
16229 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16230 return SDValue();
16231
16232 SDLoc DL(N);
16233
16234 // `Inserted` needs to be right shifted before it is put into the
16235 // instruction.
16236 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16237 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16238
16239 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16240 DAG.getConstant(ShAmt, DL, MVT::i32)};
16241 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16242}
16243
16245 const RISCVSubtarget &Subtarget) {
16246 if (!Subtarget.hasVendorXqcibm())
16247 return SDValue();
16248
16249 using namespace SDPatternMatch;
16250
16251 SDValue X;
16252 APInt MaskImm;
16253 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16254 return SDValue();
16255
16256 unsigned ShAmt, Width;
16257 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16258 return SDValue();
16259
16260 if (N->getValueType(0) != MVT::i32)
16261 return SDValue();
16262
16263 // If Zbs is enabled and it is a single bit set we can use BSETI which
16264 // can be compressed to C_BSETI when Xqcibm in enabled.
16265 if (Width == 1 && Subtarget.hasStdExtZbs())
16266 return SDValue();
16267
16268 // If C1 is a shifted mask (but can't be formed as an ORI),
16269 // use a bitfield insert of -1.
16270 // Transform (or x, C1)
16271 // -> (qc.insbi x, -1, width, shift)
16272 SDLoc DL(N);
16273
16274 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16275 DAG.getConstant(Width, DL, MVT::i32),
16276 DAG.getConstant(ShAmt, DL, MVT::i32)};
16277 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16278}
16279
16280// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16281// being inserted only sets known zero bits.
16283 const RISCVSubtarget &Subtarget) {
16284 // Supported only in Xqcibm for now.
16285 if (!Subtarget.hasVendorXqcibm())
16286 return SDValue();
16287
16288 using namespace SDPatternMatch;
16289
16290 SDValue Inserted;
16291 APInt MaskImm, OrImm;
16292 if (!sd_match(
16293 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16294 m_ConstInt(MaskImm))),
16295 m_ConstInt(OrImm)))))
16296 return SDValue();
16297
16298 // Compute the Known Zero for the AND as this allows us to catch more general
16299 // cases than just looking for AND with imm.
16300 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
16301
16302 // The bits being inserted must only set those bits that are known to be
16303 // zero.
16304 if (!OrImm.isSubsetOf(Known.Zero)) {
16305 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
16306 // currently handle this case.
16307 return SDValue();
16308 }
16309
16310 unsigned ShAmt, Width;
16311 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
16312 if (!Known.Zero.isShiftedMask(ShAmt, Width))
16313 return SDValue();
16314
16315 // QC_INSB(I) dst, src, #width, #shamt.
16316 SDLoc DL(N);
16317
16318 SDValue ImmNode =
16319 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
16320
16321 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
16322 DAG.getConstant(ShAmt, DL, MVT::i32)};
16323 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16324}
16325
16327 const RISCVSubtarget &Subtarget) {
16328 SelectionDAG &DAG = DCI.DAG;
16329
16330 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
16331 return V;
16332 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
16333 return V;
16334 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16335 return V;
16336 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16337 return V;
16338
16339 if (DCI.isAfterLegalizeDAG())
16340 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16341 return V;
16342
16343 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16344 // We may be able to pull a common operation out of the true and false value.
16345 SDValue N0 = N->getOperand(0);
16346 SDValue N1 = N->getOperand(1);
16347 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16348 return V;
16349 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16350 return V;
16351
16352 // fold (or (select cond, 0, y), x) ->
16353 // (select cond, x, (or x, y))
16354 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16355}
16356
16358 const RISCVSubtarget &Subtarget) {
16359 SDValue N0 = N->getOperand(0);
16360 SDValue N1 = N->getOperand(1);
16361
16362 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16363 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16364 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16365 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16366 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16367 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16368 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16369 SDLoc DL(N);
16370 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16371 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16372 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16373 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16374 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16375 }
16376
16377 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16378 // NOTE: Assumes ROL being legal means ROLW is legal.
16379 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16380 if (N0.getOpcode() == RISCVISD::SLLW &&
16382 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16383 SDLoc DL(N);
16384 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16385 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16386 }
16387
16388 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16389 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16390 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16392 if (ConstN00 && CC == ISD::SETLT) {
16393 EVT VT = N0.getValueType();
16394 SDLoc DL(N0);
16395 const APInt &Imm = ConstN00->getAPIntValue();
16396 if ((Imm + 1).isSignedIntN(12))
16397 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16398 DAG.getConstant(Imm + 1, DL, VT), CC);
16399 }
16400 }
16401
16402 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16403 return V;
16404
16405 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16406 return V;
16407 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16408 return V;
16409
16410 // fold (xor (select cond, 0, y), x) ->
16411 // (select cond, x, (xor x, y))
16412 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16413}
16414
16415// Try to expand a multiply to a sequence of shifts and add/subs,
16416// for a machine without native mul instruction.
16418 uint64_t MulAmt) {
16419 SDLoc DL(N);
16420 EVT VT = N->getValueType(0);
16422
16423 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16424 SDValue N0 = N->getOperand(0);
16425
16426 // Find the Non-adjacent form of the multiplier.
16427 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16428 if (E & 1) {
16429 bool IsAdd = (E & 3) == 1;
16430 E -= IsAdd ? 1 : -1;
16431 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16432 DAG.getShiftAmountConstant(I, VT, DL));
16433 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16434 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16435 }
16436 }
16437
16438 return Result;
16439}
16440
16441// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16443 uint64_t MulAmt) {
16444 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16446 uint64_t ShiftAmt1;
16447 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16448 Op = ISD::SUB;
16449 ShiftAmt1 = MulAmt + MulAmtLowBit;
16450 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16451 Op = ISD::ADD;
16452 ShiftAmt1 = MulAmt - MulAmtLowBit;
16453 } else {
16454 return SDValue();
16455 }
16456 EVT VT = N->getValueType(0);
16457 SDLoc DL(N);
16458 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16459 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16460 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16461 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16462 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16463}
16464
16465// Try to expand a scalar multiply to a faster sequence.
16468 const RISCVSubtarget &Subtarget) {
16469
16470 EVT VT = N->getValueType(0);
16471
16472 // LI + MUL is usually smaller than the alternative sequence.
16474 return SDValue();
16475
16476 if (VT != Subtarget.getXLenVT())
16477 return SDValue();
16478
16479 bool ShouldExpandMul =
16480 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16481 !Subtarget.hasStdExtZmmul();
16482 if (!ShouldExpandMul)
16483 return SDValue();
16484
16485 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16486 if (!CNode)
16487 return SDValue();
16488 uint64_t MulAmt = CNode->getZExtValue();
16489
16490 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16491 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16492 return SDValue();
16493
16494 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16495 // We're adding additional uses of X here, and in principle, we should be freezing
16496 // X before doing so. However, adding freeze here causes real regressions, and no
16497 // other target properly freezes X in these cases either.
16498 SDValue X = N->getOperand(0);
16499
16500 if (Subtarget.hasShlAdd(3)) {
16501 for (uint64_t Divisor : {3, 5, 9}) {
16502 if (MulAmt % Divisor != 0)
16503 continue;
16504 uint64_t MulAmt2 = MulAmt / Divisor;
16505 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16506 if (isPowerOf2_64(MulAmt2)) {
16507 SDLoc DL(N);
16508 SDValue X = N->getOperand(0);
16509 // Put the shift first if we can fold a zext into the
16510 // shift forming a slli.uw.
16511 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16512 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16513 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16514 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16515 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16516 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16517 Shl);
16518 }
16519 // Otherwise, put rhe shl second so that it can fold with following
16520 // instructions (e.g. sext or add).
16521 SDValue Mul359 =
16522 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16523 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16524 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16525 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16526 }
16527
16528 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16529 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16530 SDLoc DL(N);
16531 SDValue Mul359 =
16532 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16533 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16534 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16535 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16536 Mul359);
16537 }
16538 }
16539
16540 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16541 // shXadd. First check if this a sum of two power of 2s because that's
16542 // easy. Then count how many zeros are up to the first bit.
16543 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16544 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16545 if (ScaleShift >= 1 && ScaleShift < 4) {
16546 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16547 SDLoc DL(N);
16548 SDValue Shift1 =
16549 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16550 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16551 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16552 }
16553 }
16554
16555 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16556 // This is the two instruction form, there are also three instruction
16557 // variants we could implement. e.g.
16558 // (2^(1,2,3) * 3,5,9 + 1) << C2
16559 // 2^(C1>3) * 3,5,9 +/- 1
16560 for (uint64_t Divisor : {3, 5, 9}) {
16561 uint64_t C = MulAmt - 1;
16562 if (C <= Divisor)
16563 continue;
16564 unsigned TZ = llvm::countr_zero(C);
16565 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16566 SDLoc DL(N);
16567 SDValue Mul359 =
16568 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16569 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16570 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16571 DAG.getConstant(TZ, DL, VT), X);
16572 }
16573 }
16574
16575 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16576 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16577 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16578 if (ScaleShift >= 1 && ScaleShift < 4) {
16579 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16580 SDLoc DL(N);
16581 SDValue Shift1 =
16582 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16583 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16584 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16585 DAG.getConstant(ScaleShift, DL, VT), X));
16586 }
16587 }
16588
16589 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16590 for (uint64_t Offset : {3, 5, 9}) {
16591 if (isPowerOf2_64(MulAmt + Offset)) {
16592 unsigned ShAmt = Log2_64(MulAmt + Offset);
16593 if (ShAmt >= VT.getSizeInBits())
16594 continue;
16595 SDLoc DL(N);
16596 SDValue Shift1 =
16597 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16598 SDValue Mul359 =
16599 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16600 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16601 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16602 }
16603 }
16604
16605 for (uint64_t Divisor : {3, 5, 9}) {
16606 if (MulAmt % Divisor != 0)
16607 continue;
16608 uint64_t MulAmt2 = MulAmt / Divisor;
16609 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16610 // of 25 which happen to be quite common.
16611 for (uint64_t Divisor2 : {3, 5, 9}) {
16612 if (MulAmt2 % Divisor2 != 0)
16613 continue;
16614 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16615 if (isPowerOf2_64(MulAmt3)) {
16616 SDLoc DL(N);
16617 SDValue Mul359A =
16618 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16619 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16620 SDValue Mul359B = DAG.getNode(
16621 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16622 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16623 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16624 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16625 }
16626 }
16627 }
16628 }
16629
16630 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16631 return V;
16632
16633 if (!Subtarget.hasStdExtZmmul())
16634 return expandMulToNAFSequence(N, DAG, MulAmt);
16635
16636 return SDValue();
16637}
16638
16639// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16640// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16641// Same for other equivalent types with other equivalent constants.
16643 EVT VT = N->getValueType(0);
16644 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16645
16646 // Do this for legal vectors unless they are i1 or i8 vectors.
16647 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16648 return SDValue();
16649
16650 if (N->getOperand(0).getOpcode() != ISD::AND ||
16651 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16652 return SDValue();
16653
16654 SDValue And = N->getOperand(0);
16655 SDValue Srl = And.getOperand(0);
16656
16657 APInt V1, V2, V3;
16658 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16659 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16661 return SDValue();
16662
16663 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16664 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16665 V3 != (HalfSize - 1))
16666 return SDValue();
16667
16668 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16669 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16670 VT.getVectorElementCount() * 2);
16671 SDLoc DL(N);
16672 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16673 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16674 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16675 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16676}
16677
16680 const RISCVSubtarget &Subtarget) {
16681 EVT VT = N->getValueType(0);
16682 if (!VT.isVector())
16683 return expandMul(N, DAG, DCI, Subtarget);
16684
16685 SDLoc DL(N);
16686 SDValue N0 = N->getOperand(0);
16687 SDValue N1 = N->getOperand(1);
16688 SDValue MulOper;
16689 unsigned AddSubOpc;
16690
16691 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16692 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16693 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16694 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16695 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16696 AddSubOpc = V->getOpcode();
16697 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16698 SDValue Opnd = V->getOperand(1);
16699 MulOper = V->getOperand(0);
16700 if (AddSubOpc == ISD::SUB)
16701 std::swap(Opnd, MulOper);
16702 if (isOneOrOneSplat(Opnd))
16703 return true;
16704 }
16705 return false;
16706 };
16707
16708 if (IsAddSubWith1(N0)) {
16709 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16710 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16711 }
16712
16713 if (IsAddSubWith1(N1)) {
16714 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16715 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16716 }
16717
16718 if (SDValue V = combineBinOpOfZExt(N, DAG))
16719 return V;
16720
16722 return V;
16723
16724 return SDValue();
16725}
16726
16727/// According to the property that indexed load/store instructions zero-extend
16728/// their indices, try to narrow the type of index operand.
16729static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16730 if (isIndexTypeSigned(IndexType))
16731 return false;
16732
16733 if (!N->hasOneUse())
16734 return false;
16735
16736 EVT VT = N.getValueType();
16737 SDLoc DL(N);
16738
16739 // In general, what we're doing here is seeing if we can sink a truncate to
16740 // a smaller element type into the expression tree building our index.
16741 // TODO: We can generalize this and handle a bunch more cases if useful.
16742
16743 // Narrow a buildvector to the narrowest element type. This requires less
16744 // work and less register pressure at high LMUL, and creates smaller constants
16745 // which may be cheaper to materialize.
16746 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16747 KnownBits Known = DAG.computeKnownBits(N);
16748 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16749 LLVMContext &C = *DAG.getContext();
16750 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16751 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16752 N = DAG.getNode(ISD::TRUNCATE, DL,
16753 VT.changeVectorElementType(ResultVT), N);
16754 return true;
16755 }
16756 }
16757
16758 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16759 if (N.getOpcode() != ISD::SHL)
16760 return false;
16761
16762 SDValue N0 = N.getOperand(0);
16763 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16764 N0.getOpcode() != RISCVISD::VZEXT_VL)
16765 return false;
16766 if (!N0->hasOneUse())
16767 return false;
16768
16769 APInt ShAmt;
16770 SDValue N1 = N.getOperand(1);
16771 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16772 return false;
16773
16774 SDValue Src = N0.getOperand(0);
16775 EVT SrcVT = Src.getValueType();
16776 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16777 unsigned ShAmtV = ShAmt.getZExtValue();
16778 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16779 NewElen = std::max(NewElen, 8U);
16780
16781 // Skip if NewElen is not narrower than the original extended type.
16782 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16783 return false;
16784
16785 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16786 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16787
16788 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16789 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16790 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16791 return true;
16792}
16793
16794/// Try to map an integer comparison with size > XLEN to vector instructions
16795/// before type legalization splits it up into chunks.
16796static SDValue
16798 const SDLoc &DL, SelectionDAG &DAG,
16799 const RISCVSubtarget &Subtarget) {
16800 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16801
16802 if (!Subtarget.hasVInstructions())
16803 return SDValue();
16804
16805 MVT XLenVT = Subtarget.getXLenVT();
16806 EVT OpVT = X.getValueType();
16807 // We're looking for an oversized integer equality comparison.
16808 if (!OpVT.isScalarInteger())
16809 return SDValue();
16810
16811 unsigned OpSize = OpVT.getSizeInBits();
16812 // The size should be larger than XLen and smaller than the maximum vector
16813 // size.
16814 if (OpSize <= Subtarget.getXLen() ||
16815 OpSize > Subtarget.getRealMinVLen() *
16817 return SDValue();
16818
16819 // Don't perform this combine if constructing the vector will be expensive.
16820 auto IsVectorBitCastCheap = [](SDValue X) {
16822 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16823 X.getOpcode() == ISD::LOAD;
16824 };
16825 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16826 return SDValue();
16827
16829 Attribute::NoImplicitFloat))
16830 return SDValue();
16831
16832 // Bail out for non-byte-sized types.
16833 if (!OpVT.isByteSized())
16834 return SDValue();
16835
16836 unsigned VecSize = OpSize / 8;
16837 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16838 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16839
16840 SDValue VecX = DAG.getBitcast(VecVT, X);
16841 SDValue VecY = DAG.getBitcast(VecVT, Y);
16842 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16843 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16844
16845 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16846 DAG.getCondCode(ISD::SETNE), Mask, VL);
16847 return DAG.getSetCC(DL, VT,
16848 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16849 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16850 VL),
16851 DAG.getConstant(0, DL, XLenVT), CC);
16852}
16853
16856 const RISCVSubtarget &Subtarget) {
16857 SelectionDAG &DAG = DCI.DAG;
16858 SDLoc dl(N);
16859 SDValue N0 = N->getOperand(0);
16860 SDValue N1 = N->getOperand(1);
16861 EVT VT = N->getValueType(0);
16862 EVT OpVT = N0.getValueType();
16863
16864 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16865 // Looking for an equality compare.
16866 if (!isIntEqualitySetCC(Cond))
16867 return SDValue();
16868
16869 if (SDValue V =
16870 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16871 return V;
16872
16873 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16874 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16876 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16877 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16878 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16879 AndRHSC.isNegatedPowerOf2()) {
16880 unsigned ShiftBits = AndRHSC.countr_zero();
16881 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16882 DAG.getConstant(ShiftBits, dl, OpVT));
16883 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16884 }
16885
16886 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16887 // comparing with constants other than 0 if the constant can be folded into
16888 // addi or xori after shifting.
16889 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16890 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16891 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
16892 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16893 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16894 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16895 if (NewC >= -2048 && NewC <= 2048) {
16896 SDValue SExt =
16897 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16898 DAG.getValueType(MVT::i32));
16899 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16900 DAG.getConstant(ShiftBits, dl, OpVT));
16901 return DAG.getSetCC(dl, VT, Shift,
16902 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16903 }
16904 }
16905 }
16906
16907 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16908 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16909 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16910 // sext_inreg can become a sext.w instead of a shift pair.
16911 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16912 return SDValue();
16913
16914 // RHS needs to be a constant.
16915 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16916 if (!N1C)
16917 return SDValue();
16918
16919 // LHS needs to be (and X, 0xffffffff).
16920 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16922 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16923 return SDValue();
16924
16925 // Don't do this if the sign bit is provably zero, it will be turned back into
16926 // an AND.
16927 APInt SignMask = APInt::getOneBitSet(64, 31);
16928 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16929 return SDValue();
16930
16931 const APInt &C1 = N1C->getAPIntValue();
16932
16933 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16934 // to be equal.
16935 if (C1.getActiveBits() > 32)
16936 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16937
16938 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16939 N0.getOperand(0), DAG.getValueType(MVT::i32));
16940 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16941 dl, OpVT), Cond);
16942}
16943
16944static SDValue
16946 const RISCVSubtarget &Subtarget) {
16947 SelectionDAG &DAG = DCI.DAG;
16948 SDValue Src = N->getOperand(0);
16949 EVT VT = N->getValueType(0);
16950 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16951 unsigned Opc = Src.getOpcode();
16952 SDLoc DL(N);
16953
16954 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16955 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16956 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16957 Subtarget.hasStdExtZfhmin())
16958 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16959
16960 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16961 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16962 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16963 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16964 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16965 Src.getOperand(1));
16966
16967 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16968 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16969 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16970
16971 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16972 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16973 isAllOnesConstant(Src.getOperand(1)) &&
16974 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16975 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16976 DAG.getAllOnesConstant(DL, VT));
16977
16978 return SDValue();
16979}
16980
16981namespace {
16982// Forward declaration of the structure holding the necessary information to
16983// apply a combine.
16984struct CombineResult;
16985
16986enum ExtKind : uint8_t {
16987 ZExt = 1 << 0,
16988 SExt = 1 << 1,
16989 FPExt = 1 << 2,
16990 BF16Ext = 1 << 3
16991};
16992/// Helper class for folding sign/zero extensions.
16993/// In particular, this class is used for the following combines:
16994/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16995/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16996/// mul | mul_vl -> vwmul(u) | vwmul_su
16997/// shl | shl_vl -> vwsll
16998/// fadd -> vfwadd | vfwadd_w
16999/// fsub -> vfwsub | vfwsub_w
17000/// fmul -> vfwmul
17001/// An object of this class represents an operand of the operation we want to
17002/// combine.
17003/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17004/// NodeExtensionHelper for `a` and one for `b`.
17005///
17006/// This class abstracts away how the extension is materialized and
17007/// how its number of users affect the combines.
17008///
17009/// In particular:
17010/// - VWADD_W is conceptually == add(op0, sext(op1))
17011/// - VWADDU_W == add(op0, zext(op1))
17012/// - VWSUB_W == sub(op0, sext(op1))
17013/// - VWSUBU_W == sub(op0, zext(op1))
17014/// - VFWADD_W == fadd(op0, fpext(op1))
17015/// - VFWSUB_W == fsub(op0, fpext(op1))
17016/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17017/// zext|sext(smaller_value).
17018struct NodeExtensionHelper {
17019 /// Records if this operand is like being zero extended.
17020 bool SupportsZExt;
17021 /// Records if this operand is like being sign extended.
17022 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17023 /// instance, a splat constant (e.g., 3), would support being both sign and
17024 /// zero extended.
17025 bool SupportsSExt;
17026 /// Records if this operand is like being floating point extended.
17027 bool SupportsFPExt;
17028 /// Records if this operand is extended from bf16.
17029 bool SupportsBF16Ext;
17030 /// This boolean captures whether we care if this operand would still be
17031 /// around after the folding happens.
17032 bool EnforceOneUse;
17033 /// Original value that this NodeExtensionHelper represents.
17034 SDValue OrigOperand;
17035
17036 /// Get the value feeding the extension or the value itself.
17037 /// E.g., for zext(a), this would return a.
17038 SDValue getSource() const {
17039 switch (OrigOperand.getOpcode()) {
17040 case ISD::ZERO_EXTEND:
17041 case ISD::SIGN_EXTEND:
17042 case RISCVISD::VSEXT_VL:
17043 case RISCVISD::VZEXT_VL:
17044 case RISCVISD::FP_EXTEND_VL:
17045 return OrigOperand.getOperand(0);
17046 default:
17047 return OrigOperand;
17048 }
17049 }
17050
17051 /// Check if this instance represents a splat.
17052 bool isSplat() const {
17053 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17054 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17055 }
17056
17057 /// Get the extended opcode.
17058 unsigned getExtOpc(ExtKind SupportsExt) const {
17059 switch (SupportsExt) {
17060 case ExtKind::SExt:
17061 return RISCVISD::VSEXT_VL;
17062 case ExtKind::ZExt:
17063 return RISCVISD::VZEXT_VL;
17064 case ExtKind::FPExt:
17065 case ExtKind::BF16Ext:
17066 return RISCVISD::FP_EXTEND_VL;
17067 }
17068 llvm_unreachable("Unknown ExtKind enum");
17069 }
17070
17071 /// Get or create a value that can feed \p Root with the given extension \p
17072 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17073 /// operand. \see ::getSource().
17074 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17075 const RISCVSubtarget &Subtarget,
17076 std::optional<ExtKind> SupportsExt) const {
17077 if (!SupportsExt.has_value())
17078 return OrigOperand;
17079
17080 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17081
17082 SDValue Source = getSource();
17083 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17084 if (Source.getValueType() == NarrowVT)
17085 return Source;
17086
17087 unsigned ExtOpc = getExtOpc(*SupportsExt);
17088
17089 // If we need an extension, we should be changing the type.
17090 SDLoc DL(OrigOperand);
17091 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17092 switch (OrigOperand.getOpcode()) {
17093 case ISD::ZERO_EXTEND:
17094 case ISD::SIGN_EXTEND:
17095 case RISCVISD::VSEXT_VL:
17096 case RISCVISD::VZEXT_VL:
17097 case RISCVISD::FP_EXTEND_VL:
17098 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17099 case ISD::SPLAT_VECTOR:
17100 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17101 case RISCVISD::VMV_V_X_VL:
17102 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17103 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17104 case RISCVISD::VFMV_V_F_VL:
17105 Source = Source.getOperand(1);
17106 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17107 Source = Source.getOperand(0);
17108 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17109 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17110 DAG.getUNDEF(NarrowVT), Source, VL);
17111 default:
17112 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17113 // and that operand should already have the right NarrowVT so no
17114 // extension should be required at this point.
17115 llvm_unreachable("Unsupported opcode");
17116 }
17117 }
17118
17119 /// Helper function to get the narrow type for \p Root.
17120 /// The narrow type is the type of \p Root where we divided the size of each
17121 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17122 /// \pre Both the narrow type and the original type should be legal.
17123 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17124 MVT VT = Root->getSimpleValueType(0);
17125
17126 // Determine the narrow size.
17127 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17128
17129 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17130 : SupportsExt == ExtKind::FPExt
17131 ? MVT::getFloatingPointVT(NarrowSize)
17132 : MVT::getIntegerVT(NarrowSize);
17133
17134 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17135 "Trying to extend something we can't represent");
17136 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17137 return NarrowVT;
17138 }
17139
17140 /// Get the opcode to materialize:
17141 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17142 static unsigned getSExtOpcode(unsigned Opcode) {
17143 switch (Opcode) {
17144 case ISD::ADD:
17145 case RISCVISD::ADD_VL:
17146 case RISCVISD::VWADD_W_VL:
17147 case RISCVISD::VWADDU_W_VL:
17148 case ISD::OR:
17149 case RISCVISD::OR_VL:
17150 return RISCVISD::VWADD_VL;
17151 case ISD::SUB:
17152 case RISCVISD::SUB_VL:
17153 case RISCVISD::VWSUB_W_VL:
17154 case RISCVISD::VWSUBU_W_VL:
17155 return RISCVISD::VWSUB_VL;
17156 case ISD::MUL:
17157 case RISCVISD::MUL_VL:
17158 return RISCVISD::VWMUL_VL;
17159 default:
17160 llvm_unreachable("Unexpected opcode");
17161 }
17162 }
17163
17164 /// Get the opcode to materialize:
17165 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17166 static unsigned getZExtOpcode(unsigned Opcode) {
17167 switch (Opcode) {
17168 case ISD::ADD:
17169 case RISCVISD::ADD_VL:
17170 case RISCVISD::VWADD_W_VL:
17171 case RISCVISD::VWADDU_W_VL:
17172 case ISD::OR:
17173 case RISCVISD::OR_VL:
17174 return RISCVISD::VWADDU_VL;
17175 case ISD::SUB:
17176 case RISCVISD::SUB_VL:
17177 case RISCVISD::VWSUB_W_VL:
17178 case RISCVISD::VWSUBU_W_VL:
17179 return RISCVISD::VWSUBU_VL;
17180 case ISD::MUL:
17181 case RISCVISD::MUL_VL:
17182 return RISCVISD::VWMULU_VL;
17183 case ISD::SHL:
17184 case RISCVISD::SHL_VL:
17185 return RISCVISD::VWSLL_VL;
17186 default:
17187 llvm_unreachable("Unexpected opcode");
17188 }
17189 }
17190
17191 /// Get the opcode to materialize:
17192 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17193 static unsigned getFPExtOpcode(unsigned Opcode) {
17194 switch (Opcode) {
17195 case RISCVISD::FADD_VL:
17196 case RISCVISD::VFWADD_W_VL:
17197 return RISCVISD::VFWADD_VL;
17198 case RISCVISD::FSUB_VL:
17199 case RISCVISD::VFWSUB_W_VL:
17200 return RISCVISD::VFWSUB_VL;
17201 case RISCVISD::FMUL_VL:
17202 return RISCVISD::VFWMUL_VL;
17203 case RISCVISD::VFMADD_VL:
17204 return RISCVISD::VFWMADD_VL;
17205 case RISCVISD::VFMSUB_VL:
17206 return RISCVISD::VFWMSUB_VL;
17207 case RISCVISD::VFNMADD_VL:
17208 return RISCVISD::VFWNMADD_VL;
17209 case RISCVISD::VFNMSUB_VL:
17210 return RISCVISD::VFWNMSUB_VL;
17211 default:
17212 llvm_unreachable("Unexpected opcode");
17213 }
17214 }
17215
17216 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17217 /// newOpcode(a, b).
17218 static unsigned getSUOpcode(unsigned Opcode) {
17219 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17220 "SU is only supported for MUL");
17221 return RISCVISD::VWMULSU_VL;
17222 }
17223
17224 /// Get the opcode to materialize
17225 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17226 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17227 switch (Opcode) {
17228 case ISD::ADD:
17229 case RISCVISD::ADD_VL:
17230 case ISD::OR:
17231 case RISCVISD::OR_VL:
17232 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17233 : RISCVISD::VWADDU_W_VL;
17234 case ISD::SUB:
17235 case RISCVISD::SUB_VL:
17236 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17237 : RISCVISD::VWSUBU_W_VL;
17238 case RISCVISD::FADD_VL:
17239 return RISCVISD::VFWADD_W_VL;
17240 case RISCVISD::FSUB_VL:
17241 return RISCVISD::VFWSUB_W_VL;
17242 default:
17243 llvm_unreachable("Unexpected opcode");
17244 }
17245 }
17246
17247 using CombineToTry = std::function<std::optional<CombineResult>(
17248 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17249 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17250 const RISCVSubtarget &)>;
17251
17252 /// Check if this node needs to be fully folded or extended for all users.
17253 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17254
17255 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17256 const RISCVSubtarget &Subtarget) {
17257 unsigned Opc = OrigOperand.getOpcode();
17258 MVT VT = OrigOperand.getSimpleValueType();
17259
17260 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17261 "Unexpected Opcode");
17262
17263 // The pasthru must be undef for tail agnostic.
17264 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17265 return;
17266
17267 // Get the scalar value.
17268 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17269 : OrigOperand.getOperand(1);
17270
17271 // See if we have enough sign bits or zero bits in the scalar to use a
17272 // widening opcode by splatting to smaller element size.
17273 unsigned EltBits = VT.getScalarSizeInBits();
17274 unsigned ScalarBits = Op.getValueSizeInBits();
17275 // If we're not getting all bits from the element, we need special handling.
17276 if (ScalarBits < EltBits) {
17277 // This should only occur on RV32.
17278 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17279 !Subtarget.is64Bit() && "Unexpected splat");
17280 // vmv.v.x sign extends narrow inputs.
17281 SupportsSExt = true;
17282
17283 // If the input is positive, then sign extend is also zero extend.
17284 if (DAG.SignBitIsZero(Op))
17285 SupportsZExt = true;
17286
17287 EnforceOneUse = false;
17288 return;
17289 }
17290
17291 unsigned NarrowSize = EltBits / 2;
17292 // If the narrow type cannot be expressed with a legal VMV,
17293 // this is not a valid candidate.
17294 if (NarrowSize < 8)
17295 return;
17296
17297 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17298 SupportsSExt = true;
17299
17300 if (DAG.MaskedValueIsZero(Op,
17301 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17302 SupportsZExt = true;
17303
17304 EnforceOneUse = false;
17305 }
17306
17307 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17308 return (NarrowEltVT == MVT::f32 ||
17309 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17310 }
17311
17312 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17313 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17314 }
17315
17316 /// Helper method to set the various fields of this struct based on the
17317 /// type of \p Root.
17318 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17319 const RISCVSubtarget &Subtarget) {
17320 SupportsZExt = false;
17321 SupportsSExt = false;
17322 SupportsFPExt = false;
17323 SupportsBF16Ext = false;
17324 EnforceOneUse = true;
17325 unsigned Opc = OrigOperand.getOpcode();
17326 // For the nodes we handle below, we end up using their inputs directly: see
17327 // getSource(). However since they either don't have a passthru or we check
17328 // that their passthru is undef, we can safely ignore their mask and VL.
17329 switch (Opc) {
17330 case ISD::ZERO_EXTEND:
17331 case ISD::SIGN_EXTEND: {
17332 MVT VT = OrigOperand.getSimpleValueType();
17333 if (!VT.isVector())
17334 break;
17335
17336 SDValue NarrowElt = OrigOperand.getOperand(0);
17337 MVT NarrowVT = NarrowElt.getSimpleValueType();
17338 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17339 if (NarrowVT.getVectorElementType() == MVT::i1)
17340 break;
17341
17342 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17343 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17344 break;
17345 }
17346 case RISCVISD::VZEXT_VL:
17347 SupportsZExt = true;
17348 break;
17349 case RISCVISD::VSEXT_VL:
17350 SupportsSExt = true;
17351 break;
17352 case RISCVISD::FP_EXTEND_VL: {
17353 MVT NarrowEltVT =
17355 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17356 SupportsFPExt = true;
17357 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17358 SupportsBF16Ext = true;
17359
17360 break;
17361 }
17362 case ISD::SPLAT_VECTOR:
17363 case RISCVISD::VMV_V_X_VL:
17364 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17365 break;
17366 case RISCVISD::VFMV_V_F_VL: {
17367 MVT VT = OrigOperand.getSimpleValueType();
17368
17369 if (!OrigOperand.getOperand(0).isUndef())
17370 break;
17371
17372 SDValue Op = OrigOperand.getOperand(1);
17373 if (Op.getOpcode() != ISD::FP_EXTEND)
17374 break;
17375
17376 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17377 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17378 if (NarrowSize != ScalarBits)
17379 break;
17380
17381 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17382 SupportsFPExt = true;
17383 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17384 Subtarget))
17385 SupportsBF16Ext = true;
17386 break;
17387 }
17388 default:
17389 break;
17390 }
17391 }
17392
17393 /// Check if \p Root supports any extension folding combines.
17394 static bool isSupportedRoot(const SDNode *Root,
17395 const RISCVSubtarget &Subtarget) {
17396 switch (Root->getOpcode()) {
17397 case ISD::ADD:
17398 case ISD::SUB:
17399 case ISD::MUL: {
17400 return Root->getValueType(0).isScalableVector();
17401 }
17402 case ISD::OR: {
17403 return Root->getValueType(0).isScalableVector() &&
17404 Root->getFlags().hasDisjoint();
17405 }
17406 // Vector Widening Integer Add/Sub/Mul Instructions
17407 case RISCVISD::ADD_VL:
17408 case RISCVISD::MUL_VL:
17409 case RISCVISD::VWADD_W_VL:
17410 case RISCVISD::VWADDU_W_VL:
17411 case RISCVISD::SUB_VL:
17412 case RISCVISD::VWSUB_W_VL:
17413 case RISCVISD::VWSUBU_W_VL:
17414 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17415 case RISCVISD::FADD_VL:
17416 case RISCVISD::FSUB_VL:
17417 case RISCVISD::FMUL_VL:
17418 case RISCVISD::VFWADD_W_VL:
17419 case RISCVISD::VFWSUB_W_VL:
17420 return true;
17421 case RISCVISD::OR_VL:
17422 return Root->getFlags().hasDisjoint();
17423 case ISD::SHL:
17424 return Root->getValueType(0).isScalableVector() &&
17425 Subtarget.hasStdExtZvbb();
17426 case RISCVISD::SHL_VL:
17427 return Subtarget.hasStdExtZvbb();
17428 case RISCVISD::VFMADD_VL:
17429 case RISCVISD::VFNMSUB_VL:
17430 case RISCVISD::VFNMADD_VL:
17431 case RISCVISD::VFMSUB_VL:
17432 return true;
17433 default:
17434 return false;
17435 }
17436 }
17437
17438 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17439 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17440 const RISCVSubtarget &Subtarget) {
17441 assert(isSupportedRoot(Root, Subtarget) &&
17442 "Trying to build an helper with an "
17443 "unsupported root");
17444 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17446 OrigOperand = Root->getOperand(OperandIdx);
17447
17448 unsigned Opc = Root->getOpcode();
17449 switch (Opc) {
17450 // We consider
17451 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17452 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17453 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17454 case RISCVISD::VWADD_W_VL:
17455 case RISCVISD::VWADDU_W_VL:
17456 case RISCVISD::VWSUB_W_VL:
17457 case RISCVISD::VWSUBU_W_VL:
17458 case RISCVISD::VFWADD_W_VL:
17459 case RISCVISD::VFWSUB_W_VL:
17460 // Operand 1 can't be changed.
17461 if (OperandIdx == 1)
17462 break;
17463 [[fallthrough]];
17464 default:
17465 fillUpExtensionSupport(Root, DAG, Subtarget);
17466 break;
17467 }
17468 }
17469
17470 /// Helper function to get the Mask and VL from \p Root.
17471 static std::pair<SDValue, SDValue>
17472 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17473 const RISCVSubtarget &Subtarget) {
17474 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17475 switch (Root->getOpcode()) {
17476 case ISD::ADD:
17477 case ISD::SUB:
17478 case ISD::MUL:
17479 case ISD::OR:
17480 case ISD::SHL: {
17481 SDLoc DL(Root);
17482 MVT VT = Root->getSimpleValueType(0);
17483 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17484 }
17485 default:
17486 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17487 }
17488 }
17489
17490 /// Helper function to check if \p N is commutative with respect to the
17491 /// foldings that are supported by this class.
17492 static bool isCommutative(const SDNode *N) {
17493 switch (N->getOpcode()) {
17494 case ISD::ADD:
17495 case ISD::MUL:
17496 case ISD::OR:
17497 case RISCVISD::ADD_VL:
17498 case RISCVISD::MUL_VL:
17499 case RISCVISD::OR_VL:
17500 case RISCVISD::FADD_VL:
17501 case RISCVISD::FMUL_VL:
17502 case RISCVISD::VFMADD_VL:
17503 case RISCVISD::VFNMSUB_VL:
17504 case RISCVISD::VFNMADD_VL:
17505 case RISCVISD::VFMSUB_VL:
17506 return true;
17507 case RISCVISD::VWADD_W_VL:
17508 case RISCVISD::VWADDU_W_VL:
17509 case ISD::SUB:
17510 case RISCVISD::SUB_VL:
17511 case RISCVISD::VWSUB_W_VL:
17512 case RISCVISD::VWSUBU_W_VL:
17513 case RISCVISD::VFWADD_W_VL:
17514 case RISCVISD::FSUB_VL:
17515 case RISCVISD::VFWSUB_W_VL:
17516 case ISD::SHL:
17517 case RISCVISD::SHL_VL:
17518 return false;
17519 default:
17520 llvm_unreachable("Unexpected opcode");
17521 }
17522 }
17523
17524 /// Get a list of combine to try for folding extensions in \p Root.
17525 /// Note that each returned CombineToTry function doesn't actually modify
17526 /// anything. Instead they produce an optional CombineResult that if not None,
17527 /// need to be materialized for the combine to be applied.
17528 /// \see CombineResult::materialize.
17529 /// If the related CombineToTry function returns std::nullopt, that means the
17530 /// combine didn't match.
17531 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17532};
17533
17534/// Helper structure that holds all the necessary information to materialize a
17535/// combine that does some extension folding.
17536struct CombineResult {
17537 /// Opcode to be generated when materializing the combine.
17538 unsigned TargetOpcode;
17539 // No value means no extension is needed.
17540 std::optional<ExtKind> LHSExt;
17541 std::optional<ExtKind> RHSExt;
17542 /// Root of the combine.
17543 SDNode *Root;
17544 /// LHS of the TargetOpcode.
17545 NodeExtensionHelper LHS;
17546 /// RHS of the TargetOpcode.
17547 NodeExtensionHelper RHS;
17548
17549 CombineResult(unsigned TargetOpcode, SDNode *Root,
17550 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17551 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17552 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17553 LHS(LHS), RHS(RHS) {}
17554
17555 /// Return a value that uses TargetOpcode and that can be used to replace
17556 /// Root.
17557 /// The actual replacement is *not* done in that method.
17558 SDValue materialize(SelectionDAG &DAG,
17559 const RISCVSubtarget &Subtarget) const {
17560 SDValue Mask, VL, Passthru;
17561 std::tie(Mask, VL) =
17562 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17563 switch (Root->getOpcode()) {
17564 default:
17565 Passthru = Root->getOperand(2);
17566 break;
17567 case ISD::ADD:
17568 case ISD::SUB:
17569 case ISD::MUL:
17570 case ISD::OR:
17571 case ISD::SHL:
17572 Passthru = DAG.getUNDEF(Root->getValueType(0));
17573 break;
17574 }
17575 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17576 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17577 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17578 Passthru, Mask, VL);
17579 }
17580};
17581
17582/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17583/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17584/// are zext) and LHS and RHS can be folded into Root.
17585/// AllowExtMask define which form `ext` can take in this pattern.
17586///
17587/// \note If the pattern can match with both zext and sext, the returned
17588/// CombineResult will feature the zext result.
17589///
17590/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17591/// can be used to apply the pattern.
17592static std::optional<CombineResult>
17593canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17594 const NodeExtensionHelper &RHS,
17595 uint8_t AllowExtMask, SelectionDAG &DAG,
17596 const RISCVSubtarget &Subtarget) {
17597 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17598 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17599 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17600 /*RHSExt=*/{ExtKind::ZExt});
17601 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17602 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17603 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17604 /*RHSExt=*/{ExtKind::SExt});
17605 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17606 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17607 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17608 /*RHSExt=*/{ExtKind::FPExt});
17609 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17610 RHS.SupportsBF16Ext)
17611 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17612 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17613 /*RHSExt=*/{ExtKind::BF16Ext});
17614 return std::nullopt;
17615}
17616
17617/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17618/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17619/// are zext) and LHS and RHS can be folded into Root.
17620///
17621/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17622/// can be used to apply the pattern.
17623static std::optional<CombineResult>
17624canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17625 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17626 const RISCVSubtarget &Subtarget) {
17627 return canFoldToVWWithSameExtensionImpl(
17628 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17629 Subtarget);
17630}
17631
17632/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17633///
17634/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17635/// can be used to apply the pattern.
17636static std::optional<CombineResult>
17637canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17638 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17639 const RISCVSubtarget &Subtarget) {
17640 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17641 Subtarget);
17642}
17643
17644/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17645///
17646/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17647/// can be used to apply the pattern.
17648static std::optional<CombineResult>
17649canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
17650 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17651 const RISCVSubtarget &Subtarget) {
17652 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17653 Subtarget);
17654}
17655
17656/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17657///
17658/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17659/// can be used to apply the pattern.
17660static std::optional<CombineResult>
17661canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17662 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17663 const RISCVSubtarget &Subtarget) {
17664 if (RHS.SupportsFPExt)
17665 return CombineResult(
17666 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17667 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17668
17669 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17670 // sext/zext?
17671 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17672 // purposes.
17673 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17674 return CombineResult(
17675 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17676 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17677 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17678 return CombineResult(
17679 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17680 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17681 return std::nullopt;
17682}
17683
17684/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
17685///
17686/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17687/// can be used to apply the pattern.
17688static std::optional<CombineResult>
17689canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17690 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17691 const RISCVSubtarget &Subtarget) {
17692 if (LHS.SupportsSExt)
17693 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17694 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17695 /*RHSExt=*/std::nullopt);
17696 return std::nullopt;
17697}
17698
17699/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
17700///
17701/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17702/// can be used to apply the pattern.
17703static std::optional<CombineResult>
17704canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17705 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17706 const RISCVSubtarget &Subtarget) {
17707 if (LHS.SupportsZExt)
17708 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17709 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17710 /*RHSExt=*/std::nullopt);
17711 return std::nullopt;
17712}
17713
17714/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
17715///
17716/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17717/// can be used to apply the pattern.
17718static std::optional<CombineResult>
17719canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17720 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17721 const RISCVSubtarget &Subtarget) {
17722 if (LHS.SupportsFPExt)
17723 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17724 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17725 /*RHSExt=*/std::nullopt);
17726 return std::nullopt;
17727}
17728
17729/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17730///
17731/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17732/// can be used to apply the pattern.
17733static std::optional<CombineResult>
17734canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17735 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17736 const RISCVSubtarget &Subtarget) {
17737
17738 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17739 return std::nullopt;
17740 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17741 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17742 /*RHSExt=*/{ExtKind::ZExt});
17743}
17744
17746NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17747 SmallVector<CombineToTry> Strategies;
17748 switch (Root->getOpcode()) {
17749 case ISD::ADD:
17750 case ISD::SUB:
17751 case ISD::OR:
17752 case RISCVISD::ADD_VL:
17753 case RISCVISD::SUB_VL:
17754 case RISCVISD::OR_VL:
17755 case RISCVISD::FADD_VL:
17756 case RISCVISD::FSUB_VL:
17757 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17758 Strategies.push_back(canFoldToVWWithSameExtension);
17759 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17760 Strategies.push_back(canFoldToVW_W);
17761 break;
17762 case RISCVISD::FMUL_VL:
17763 case RISCVISD::VFMADD_VL:
17764 case RISCVISD::VFMSUB_VL:
17765 case RISCVISD::VFNMADD_VL:
17766 case RISCVISD::VFNMSUB_VL:
17767 Strategies.push_back(canFoldToVWWithSameExtension);
17768 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17769 Strategies.push_back(canFoldToVWWithSameExtBF16);
17770 break;
17771 case ISD::MUL:
17772 case RISCVISD::MUL_VL:
17773 // mul -> vwmul(u)
17774 Strategies.push_back(canFoldToVWWithSameExtension);
17775 // mul -> vwmulsu
17776 Strategies.push_back(canFoldToVW_SU);
17777 break;
17778 case ISD::SHL:
17779 case RISCVISD::SHL_VL:
17780 // shl -> vwsll
17781 Strategies.push_back(canFoldToVWWithSameExtZEXT);
17782 break;
17783 case RISCVISD::VWADD_W_VL:
17784 case RISCVISD::VWSUB_W_VL:
17785 // vwadd_w|vwsub_w -> vwadd|vwsub
17786 Strategies.push_back(canFoldToVWWithSEXT);
17787 break;
17788 case RISCVISD::VWADDU_W_VL:
17789 case RISCVISD::VWSUBU_W_VL:
17790 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17791 Strategies.push_back(canFoldToVWWithZEXT);
17792 break;
17793 case RISCVISD::VFWADD_W_VL:
17794 case RISCVISD::VFWSUB_W_VL:
17795 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17796 Strategies.push_back(canFoldToVWWithFPEXT);
17797 break;
17798 default:
17799 llvm_unreachable("Unexpected opcode");
17800 }
17801 return Strategies;
17802}
17803} // End anonymous namespace.
17804
17806 // TODO: Extend this to other binops using generic identity logic
17807 assert(N->getOpcode() == RISCVISD::ADD_VL);
17808 SDValue A = N->getOperand(0);
17809 SDValue B = N->getOperand(1);
17810 SDValue Passthru = N->getOperand(2);
17811 if (!Passthru.isUndef())
17812 // TODO:This could be a vmerge instead
17813 return SDValue();
17814 ;
17816 return A;
17817 // Peek through fixed to scalable
17818 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17819 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17820 return A;
17821 return SDValue();
17822}
17823
17824/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17825/// The supported combines are:
17826/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17827/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17828/// mul | mul_vl -> vwmul(u) | vwmul_su
17829/// shl | shl_vl -> vwsll
17830/// fadd_vl -> vfwadd | vfwadd_w
17831/// fsub_vl -> vfwsub | vfwsub_w
17832/// fmul_vl -> vfwmul
17833/// vwadd_w(u) -> vwadd(u)
17834/// vwsub_w(u) -> vwsub(u)
17835/// vfwadd_w -> vfwadd
17836/// vfwsub_w -> vfwsub
17839 const RISCVSubtarget &Subtarget) {
17840 SelectionDAG &DAG = DCI.DAG;
17841 if (DCI.isBeforeLegalize())
17842 return SDValue();
17843
17844 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17845 return SDValue();
17846
17847 SmallVector<SDNode *> Worklist;
17848 SmallPtrSet<SDNode *, 8> Inserted;
17849 Worklist.push_back(N);
17850 Inserted.insert(N);
17851 SmallVector<CombineResult> CombinesToApply;
17852
17853 while (!Worklist.empty()) {
17854 SDNode *Root = Worklist.pop_back_val();
17855
17856 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17857 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17858 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17859 &Inserted](const NodeExtensionHelper &Op) {
17860 if (Op.needToPromoteOtherUsers()) {
17861 for (SDUse &Use : Op.OrigOperand->uses()) {
17862 SDNode *TheUser = Use.getUser();
17863 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17864 return false;
17865 // We only support the first 2 operands of FMA.
17866 if (Use.getOperandNo() >= 2)
17867 return false;
17868 if (Inserted.insert(TheUser).second)
17869 Worklist.push_back(TheUser);
17870 }
17871 }
17872 return true;
17873 };
17874
17875 // Control the compile time by limiting the number of node we look at in
17876 // total.
17877 if (Inserted.size() > ExtensionMaxWebSize)
17878 return SDValue();
17879
17881 NodeExtensionHelper::getSupportedFoldings(Root);
17882
17883 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17884 bool Matched = false;
17885 for (int Attempt = 0;
17886 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17887 ++Attempt) {
17888
17889 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17890 FoldingStrategies) {
17891 std::optional<CombineResult> Res =
17892 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17893 if (Res) {
17894 Matched = true;
17895 CombinesToApply.push_back(*Res);
17896 // All the inputs that are extended need to be folded, otherwise
17897 // we would be leaving the old input (since it is may still be used),
17898 // and the new one.
17899 if (Res->LHSExt.has_value())
17900 if (!AppendUsersIfNeeded(LHS))
17901 return SDValue();
17902 if (Res->RHSExt.has_value())
17903 if (!AppendUsersIfNeeded(RHS))
17904 return SDValue();
17905 break;
17906 }
17907 }
17908 std::swap(LHS, RHS);
17909 }
17910 // Right now we do an all or nothing approach.
17911 if (!Matched)
17912 return SDValue();
17913 }
17914 // Store the value for the replacement of the input node separately.
17915 SDValue InputRootReplacement;
17916 // We do the RAUW after we materialize all the combines, because some replaced
17917 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17918 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17919 // yet-to-be-visited CombinesToApply roots.
17921 ValuesToReplace.reserve(CombinesToApply.size());
17922 for (CombineResult Res : CombinesToApply) {
17923 SDValue NewValue = Res.materialize(DAG, Subtarget);
17924 if (!InputRootReplacement) {
17925 assert(Res.Root == N &&
17926 "First element is expected to be the current node");
17927 InputRootReplacement = NewValue;
17928 } else {
17929 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17930 }
17931 }
17932 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17933 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17934 DCI.AddToWorklist(OldNewValues.second.getNode());
17935 }
17936 return InputRootReplacement;
17937}
17938
17939// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17940// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17941// y will be the Passthru and cond will be the Mask.
17943 unsigned Opc = N->getOpcode();
17944 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17945 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17946
17947 SDValue Y = N->getOperand(0);
17948 SDValue MergeOp = N->getOperand(1);
17949 unsigned MergeOpc = MergeOp.getOpcode();
17950
17951 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17952 return SDValue();
17953
17954 SDValue X = MergeOp->getOperand(1);
17955
17956 if (!MergeOp.hasOneUse())
17957 return SDValue();
17958
17959 // Passthru should be undef
17960 SDValue Passthru = N->getOperand(2);
17961 if (!Passthru.isUndef())
17962 return SDValue();
17963
17964 // Mask should be all ones
17965 SDValue Mask = N->getOperand(3);
17966 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17967 return SDValue();
17968
17969 // False value of MergeOp should be all zeros
17970 SDValue Z = MergeOp->getOperand(2);
17971
17972 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17973 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17974 Z = Z.getOperand(1);
17975
17976 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17977 return SDValue();
17978
17979 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17980 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17981 N->getFlags());
17982}
17983
17986 const RISCVSubtarget &Subtarget) {
17987 [[maybe_unused]] unsigned Opc = N->getOpcode();
17988 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17989 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17990
17991 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17992 return V;
17993
17994 return combineVWADDSUBWSelect(N, DCI.DAG);
17995}
17996
17997// Helper function for performMemPairCombine.
17998// Try to combine the memory loads/stores LSNode1 and LSNode2
17999// into a single memory pair operation.
18001 LSBaseSDNode *LSNode2, SDValue BasePtr,
18002 uint64_t Imm) {
18004 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18005
18006 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18007 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18008 return SDValue();
18009
18011 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18012
18013 // The new operation has twice the width.
18014 MVT XLenVT = Subtarget.getXLenVT();
18015 EVT MemVT = LSNode1->getMemoryVT();
18016 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18017 MachineMemOperand *MMO = LSNode1->getMemOperand();
18019 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18020
18021 if (LSNode1->getOpcode() == ISD::LOAD) {
18022 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18023 unsigned Opcode;
18024 if (MemVT == MVT::i32)
18025 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18026 else
18027 Opcode = RISCVISD::TH_LDD;
18028
18029 SDValue Res = DAG.getMemIntrinsicNode(
18030 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18031 {LSNode1->getChain(), BasePtr,
18032 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18033 NewMemVT, NewMMO);
18034
18035 SDValue Node1 =
18036 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18037 SDValue Node2 =
18038 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18039
18040 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18041 return Node1;
18042 } else {
18043 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18044
18045 SDValue Res = DAG.getMemIntrinsicNode(
18046 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18047 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18048 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18049 NewMemVT, NewMMO);
18050
18051 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18052 return Res;
18053 }
18054}
18055
18056// Try to combine two adjacent loads/stores to a single pair instruction from
18057// the XTHeadMemPair vendor extension.
18060 SelectionDAG &DAG = DCI.DAG;
18062 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18063
18064 // Target does not support load/store pair.
18065 if (!Subtarget.hasVendorXTHeadMemPair())
18066 return SDValue();
18067
18068 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18069 EVT MemVT = LSNode1->getMemoryVT();
18070 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18071
18072 // No volatile, indexed or atomic loads/stores.
18073 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18074 return SDValue();
18075
18076 // Function to get a base + constant representation from a memory value.
18077 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18078 if (Ptr->getOpcode() == ISD::ADD)
18079 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18080 return {Ptr->getOperand(0), C1->getZExtValue()};
18081 return {Ptr, 0};
18082 };
18083
18084 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18085
18086 SDValue Chain = N->getOperand(0);
18087 for (SDUse &Use : Chain->uses()) {
18088 if (Use.getUser() != N && Use.getResNo() == 0 &&
18089 Use.getUser()->getOpcode() == N->getOpcode()) {
18091
18092 // No volatile, indexed or atomic loads/stores.
18093 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18094 continue;
18095
18096 // Check if LSNode1 and LSNode2 have the same type and extension.
18097 if (LSNode1->getOpcode() == ISD::LOAD)
18098 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18100 continue;
18101
18102 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18103 continue;
18104
18105 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18106
18107 // Check if the base pointer is the same for both instruction.
18108 if (Base1 != Base2)
18109 continue;
18110
18111 // Check if the offsets match the XTHeadMemPair encoding constraints.
18112 bool Valid = false;
18113 if (MemVT == MVT::i32) {
18114 // Check for adjacent i32 values and a 2-bit index.
18115 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18116 Valid = true;
18117 } else if (MemVT == MVT::i64) {
18118 // Check for adjacent i64 values and a 2-bit index.
18119 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18120 Valid = true;
18121 }
18122
18123 if (!Valid)
18124 continue;
18125
18126 // Try to combine.
18127 if (SDValue Res =
18128 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18129 return Res;
18130 }
18131 }
18132
18133 return SDValue();
18134}
18135
18136// Fold
18137// (fp_to_int (froundeven X)) -> fcvt X, rne
18138// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18139// (fp_to_int (ffloor X)) -> fcvt X, rdn
18140// (fp_to_int (fceil X)) -> fcvt X, rup
18141// (fp_to_int (fround X)) -> fcvt X, rmm
18142// (fp_to_int (frint X)) -> fcvt X
18145 const RISCVSubtarget &Subtarget) {
18146 SelectionDAG &DAG = DCI.DAG;
18147 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18148 MVT XLenVT = Subtarget.getXLenVT();
18149
18150 SDValue Src = N->getOperand(0);
18151
18152 // Don't do this for strict-fp Src.
18153 if (Src->isStrictFPOpcode())
18154 return SDValue();
18155
18156 // Ensure the FP type is legal.
18157 if (!TLI.isTypeLegal(Src.getValueType()))
18158 return SDValue();
18159
18160 // Don't do this for f16 with Zfhmin and not Zfh.
18161 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18162 return SDValue();
18163
18164 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18165 // If the result is invalid, we didn't find a foldable instruction.
18166 if (FRM == RISCVFPRndMode::Invalid)
18167 return SDValue();
18168
18169 SDLoc DL(N);
18170 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18171 EVT VT = N->getValueType(0);
18172
18173 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18174 MVT SrcVT = Src.getSimpleValueType();
18175 MVT SrcContainerVT = SrcVT;
18176 MVT ContainerVT = VT.getSimpleVT();
18177 SDValue XVal = Src.getOperand(0);
18178
18179 // For widening and narrowing conversions we just combine it into a
18180 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18181 // end up getting lowered to their appropriate pseudo instructions based on
18182 // their operand types
18183 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18184 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18185 return SDValue();
18186
18187 // Make fixed-length vectors scalable first
18188 if (SrcVT.isFixedLengthVector()) {
18189 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18190 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18191 ContainerVT =
18192 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18193 }
18194
18195 auto [Mask, VL] =
18196 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18197
18198 SDValue FpToInt;
18199 if (FRM == RISCVFPRndMode::RTZ) {
18200 // Use the dedicated trunc static rounding mode if we're truncating so we
18201 // don't need to generate calls to fsrmi/fsrm
18202 unsigned Opc =
18203 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18204 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18205 } else {
18206 unsigned Opc =
18207 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18208 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18209 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18210 }
18211
18212 // If converted from fixed-length to scalable, convert back
18213 if (VT.isFixedLengthVector())
18214 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18215
18216 return FpToInt;
18217 }
18218
18219 // Only handle XLen or i32 types. Other types narrower than XLen will
18220 // eventually be legalized to XLenVT.
18221 if (VT != MVT::i32 && VT != XLenVT)
18222 return SDValue();
18223
18224 unsigned Opc;
18225 if (VT == XLenVT)
18226 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18227 else
18228 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18229
18230 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18231 DAG.getTargetConstant(FRM, DL, XLenVT));
18232 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18233}
18234
18235// Fold
18236// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18237// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18238// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18239// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18240// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18241// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18244 const RISCVSubtarget &Subtarget) {
18245 SelectionDAG &DAG = DCI.DAG;
18246 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18247 MVT XLenVT = Subtarget.getXLenVT();
18248
18249 // Only handle XLen types. Other types narrower than XLen will eventually be
18250 // legalized to XLenVT.
18251 EVT DstVT = N->getValueType(0);
18252 if (DstVT != XLenVT)
18253 return SDValue();
18254
18255 SDValue Src = N->getOperand(0);
18256
18257 // Don't do this for strict-fp Src.
18258 if (Src->isStrictFPOpcode())
18259 return SDValue();
18260
18261 // Ensure the FP type is also legal.
18262 if (!TLI.isTypeLegal(Src.getValueType()))
18263 return SDValue();
18264
18265 // Don't do this for f16 with Zfhmin and not Zfh.
18266 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18267 return SDValue();
18268
18269 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18270
18271 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18272 if (FRM == RISCVFPRndMode::Invalid)
18273 return SDValue();
18274
18275 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18276
18277 unsigned Opc;
18278 if (SatVT == DstVT)
18279 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18280 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18281 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18282 else
18283 return SDValue();
18284 // FIXME: Support other SatVTs by clamping before or after the conversion.
18285
18286 Src = Src.getOperand(0);
18287
18288 SDLoc DL(N);
18289 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18290 DAG.getTargetConstant(FRM, DL, XLenVT));
18291
18292 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18293 // extend.
18294 if (Opc == RISCVISD::FCVT_WU_RV64)
18295 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18296
18297 // RISC-V FP-to-int conversions saturate to the destination register size, but
18298 // don't produce 0 for nan.
18299 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18300 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18301}
18302
18303// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18304// smaller than XLenVT.
18306 const RISCVSubtarget &Subtarget) {
18307 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18308
18309 SDValue Src = N->getOperand(0);
18310 if (Src.getOpcode() != ISD::BSWAP)
18311 return SDValue();
18312
18313 EVT VT = N->getValueType(0);
18314 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18316 return SDValue();
18317
18318 SDLoc DL(N);
18319 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18320}
18321
18323 const RISCVSubtarget &Subtarget) {
18324 // Fold:
18325 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18326
18327 // Check if its first operand is a vp.load.
18328 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18329 if (!VPLoad)
18330 return SDValue();
18331
18332 EVT LoadVT = VPLoad->getValueType(0);
18333 // We do not have a strided_load version for masks, and the evl of vp.reverse
18334 // and vp.load should always be the same.
18335 if (!LoadVT.getVectorElementType().isByteSized() ||
18336 N->getOperand(2) != VPLoad->getVectorLength() ||
18337 !N->getOperand(0).hasOneUse())
18338 return SDValue();
18339
18340 // Check if the mask of outer vp.reverse are all 1's.
18341 if (!isOneOrOneSplat(N->getOperand(1)))
18342 return SDValue();
18343
18344 SDValue LoadMask = VPLoad->getMask();
18345 // If Mask is all ones, then load is unmasked and can be reversed.
18346 if (!isOneOrOneSplat(LoadMask)) {
18347 // If the mask is not all ones, we can reverse the load if the mask was also
18348 // reversed by an unmasked vp.reverse with the same EVL.
18349 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18350 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18351 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18352 return SDValue();
18353 LoadMask = LoadMask.getOperand(0);
18354 }
18355
18356 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18357 SDLoc DL(N);
18358 MVT XLenVT = Subtarget.getXLenVT();
18359 SDValue NumElem = VPLoad->getVectorLength();
18360 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18361
18362 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18363 DAG.getConstant(1, DL, XLenVT));
18364 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18365 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18366 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18367 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18368
18370 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18372 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18373 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18374
18375 SDValue Ret = DAG.getStridedLoadVP(
18376 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18377 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18378
18379 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18380
18381 return Ret;
18382}
18383
18385 const RISCVSubtarget &Subtarget) {
18386 // Fold:
18387 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18388 // -1, MASK)
18389 auto *VPStore = cast<VPStoreSDNode>(N);
18390
18391 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18392 return SDValue();
18393
18394 SDValue VPReverse = VPStore->getValue();
18395 EVT ReverseVT = VPReverse->getValueType(0);
18396
18397 // We do not have a strided_store version for masks, and the evl of vp.reverse
18398 // and vp.store should always be the same.
18399 if (!ReverseVT.getVectorElementType().isByteSized() ||
18400 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18401 !VPReverse.hasOneUse())
18402 return SDValue();
18403
18404 SDValue StoreMask = VPStore->getMask();
18405 // If Mask is all ones, then load is unmasked and can be reversed.
18406 if (!isOneOrOneSplat(StoreMask)) {
18407 // If the mask is not all ones, we can reverse the store if the mask was
18408 // also reversed by an unmasked vp.reverse with the same EVL.
18409 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18410 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18411 StoreMask.getOperand(2) != VPStore->getVectorLength())
18412 return SDValue();
18413 StoreMask = StoreMask.getOperand(0);
18414 }
18415
18416 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18417 SDLoc DL(N);
18418 MVT XLenVT = Subtarget.getXLenVT();
18419 SDValue NumElem = VPStore->getVectorLength();
18420 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18421
18422 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18423 DAG.getConstant(1, DL, XLenVT));
18424 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18425 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18426 SDValue Base =
18427 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18428 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18429
18431 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18433 PtrInfo, VPStore->getMemOperand()->getFlags(),
18434 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18435
18436 return DAG.getStridedStoreVP(
18437 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18438 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18439 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18440 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18441}
18442
18443// Peephole avgceil pattern.
18444// %1 = zext <N x i8> %a to <N x i32>
18445// %2 = zext <N x i8> %b to <N x i32>
18446// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18447// %4 = add nuw nsw <N x i32> %3, %2
18448// %5 = lshr <N x i32> %4, splat (i32 1)
18449// %6 = trunc <N x i32> %5 to <N x i8>
18451 const RISCVSubtarget &Subtarget) {
18452 EVT VT = N->getValueType(0);
18453
18454 // Ignore fixed vectors.
18455 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18456 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18457 return SDValue();
18458
18459 SDValue In = N->getOperand(0);
18460 SDValue Mask = N->getOperand(1);
18461 SDValue VL = N->getOperand(2);
18462
18463 // Input should be a vp_srl with same mask and VL.
18464 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18465 In.getOperand(3) != VL)
18466 return SDValue();
18467
18468 // Shift amount should be 1.
18469 if (!isOneOrOneSplat(In.getOperand(1)))
18470 return SDValue();
18471
18472 // Shifted value should be a vp_add with same mask and VL.
18473 SDValue LHS = In.getOperand(0);
18474 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18475 LHS.getOperand(3) != VL)
18476 return SDValue();
18477
18478 SDValue Operands[3];
18479
18480 // Matches another VP_ADD with same VL and Mask.
18481 auto FindAdd = [&](SDValue V, SDValue Other) {
18482 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18483 V.getOperand(3) != VL)
18484 return false;
18485
18486 Operands[0] = Other;
18487 Operands[1] = V.getOperand(1);
18488 Operands[2] = V.getOperand(0);
18489 return true;
18490 };
18491
18492 // We need to find another VP_ADD in one of the operands.
18493 SDValue LHS0 = LHS.getOperand(0);
18494 SDValue LHS1 = LHS.getOperand(1);
18495 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18496 return SDValue();
18497
18498 // Now we have three operands of two additions. Check that one of them is a
18499 // constant vector with ones.
18500 auto I = llvm::find_if(Operands,
18501 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18502 if (I == std::end(Operands))
18503 return SDValue();
18504 // We found a vector with ones, move if it to the end of the Operands array.
18505 std::swap(*I, Operands[2]);
18506
18507 // Make sure the other 2 operands can be promoted from the result type.
18508 for (SDValue Op : drop_end(Operands)) {
18509 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18510 Op.getOperand(2) != VL)
18511 return SDValue();
18512 // Input must be the same size or smaller than our result.
18513 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18514 return SDValue();
18515 }
18516
18517 // Pattern is detected.
18518 // Rebuild the zero extends in case the inputs are smaller than our result.
18519 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18520 Operands[0].getOperand(0), Mask, VL);
18521 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18522 Operands[1].getOperand(0), Mask, VL);
18523 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18524 // mode.
18525 SDLoc DL(N);
18526 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18527 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18528}
18529
18530// Convert from one FMA opcode to another based on whether we are negating the
18531// multiply result and/or the accumulator.
18532// NOTE: Only supports RVV operations with VL.
18533static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18534 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18535 if (NegMul) {
18536 // clang-format off
18537 switch (Opcode) {
18538 default: llvm_unreachable("Unexpected opcode");
18539 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18540 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18541 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18542 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18543 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18544 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18545 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18546 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18547 }
18548 // clang-format on
18549 }
18550
18551 // Negating the accumulator changes ADD<->SUB.
18552 if (NegAcc) {
18553 // clang-format off
18554 switch (Opcode) {
18555 default: llvm_unreachable("Unexpected opcode");
18556 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18557 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18558 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18559 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18560 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18561 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18562 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18563 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18564 }
18565 // clang-format on
18566 }
18567
18568 return Opcode;
18569}
18570
18572 // Fold FNEG_VL into FMA opcodes.
18573 // The first operand of strict-fp is chain.
18574 bool IsStrict =
18575 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18576 unsigned Offset = IsStrict ? 1 : 0;
18577 SDValue A = N->getOperand(0 + Offset);
18578 SDValue B = N->getOperand(1 + Offset);
18579 SDValue C = N->getOperand(2 + Offset);
18580 SDValue Mask = N->getOperand(3 + Offset);
18581 SDValue VL = N->getOperand(4 + Offset);
18582
18583 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18584 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18585 V.getOperand(2) == VL) {
18586 // Return the negated input.
18587 V = V.getOperand(0);
18588 return true;
18589 }
18590
18591 return false;
18592 };
18593
18594 bool NegA = invertIfNegative(A);
18595 bool NegB = invertIfNegative(B);
18596 bool NegC = invertIfNegative(C);
18597
18598 // If no operands are negated, we're done.
18599 if (!NegA && !NegB && !NegC)
18600 return SDValue();
18601
18602 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18603 if (IsStrict)
18604 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18605 {N->getOperand(0), A, B, C, Mask, VL});
18606 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18607 VL);
18608}
18609
18612 const RISCVSubtarget &Subtarget) {
18613 SelectionDAG &DAG = DCI.DAG;
18614
18616 return V;
18617
18618 // FIXME: Ignore strict opcodes for now.
18619 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18620 return SDValue();
18621
18622 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18623}
18624
18626 const RISCVSubtarget &Subtarget) {
18627 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18628
18629 EVT VT = N->getValueType(0);
18630
18631 if (VT != Subtarget.getXLenVT())
18632 return SDValue();
18633
18634 if (!isa<ConstantSDNode>(N->getOperand(1)))
18635 return SDValue();
18636 uint64_t ShAmt = N->getConstantOperandVal(1);
18637
18638 SDValue N0 = N->getOperand(0);
18639
18640 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18641 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18642 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18643 unsigned ExtSize =
18644 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18645 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18646 N0.getOperand(0).hasOneUse() &&
18648 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18649 if (LShAmt < ExtSize) {
18650 unsigned Size = VT.getSizeInBits();
18651 SDLoc ShlDL(N0.getOperand(0));
18652 SDValue Shl =
18653 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18654 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18655 SDLoc DL(N);
18656 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18657 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18658 }
18659 }
18660 }
18661
18662 if (ShAmt > 32 || VT != MVT::i64)
18663 return SDValue();
18664
18665 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18666 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18667 //
18668 // Also try these folds where an add or sub is in the middle.
18669 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18670 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18671 SDValue Shl;
18672 ConstantSDNode *AddC = nullptr;
18673
18674 // We might have an ADD or SUB between the SRA and SHL.
18675 bool IsAdd = N0.getOpcode() == ISD::ADD;
18676 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18677 // Other operand needs to be a constant we can modify.
18678 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18679 if (!AddC)
18680 return SDValue();
18681
18682 // AddC needs to have at least 32 trailing zeros.
18683 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18684 return SDValue();
18685
18686 // All users should be a shift by constant less than or equal to 32. This
18687 // ensures we'll do this optimization for each of them to produce an
18688 // add/sub+sext_inreg they can all share.
18689 for (SDNode *U : N0->users()) {
18690 if (U->getOpcode() != ISD::SRA ||
18691 !isa<ConstantSDNode>(U->getOperand(1)) ||
18692 U->getConstantOperandVal(1) > 32)
18693 return SDValue();
18694 }
18695
18696 Shl = N0.getOperand(IsAdd ? 0 : 1);
18697 } else {
18698 // Not an ADD or SUB.
18699 Shl = N0;
18700 }
18701
18702 // Look for a shift left by 32.
18703 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18704 Shl.getConstantOperandVal(1) != 32)
18705 return SDValue();
18706
18707 // We if we didn't look through an add/sub, then the shl should have one use.
18708 // If we did look through an add/sub, the sext_inreg we create is free so
18709 // we're only creating 2 new instructions. It's enough to only remove the
18710 // original sra+add/sub.
18711 if (!AddC && !Shl.hasOneUse())
18712 return SDValue();
18713
18714 SDLoc DL(N);
18715 SDValue In = Shl.getOperand(0);
18716
18717 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18718 // constant.
18719 if (AddC) {
18720 SDValue ShiftedAddC =
18721 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18722 if (IsAdd)
18723 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18724 else
18725 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18726 }
18727
18728 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18729 DAG.getValueType(MVT::i32));
18730 if (ShAmt == 32)
18731 return SExt;
18732
18733 return DAG.getNode(
18734 ISD::SHL, DL, MVT::i64, SExt,
18735 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18736}
18737
18738// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18739// the result is used as the condition of a br_cc or select_cc we can invert,
18740// inverting the setcc is free, and Z is 0/1. Caller will invert the
18741// br_cc/select_cc.
18743 bool IsAnd = Cond.getOpcode() == ISD::AND;
18744 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18745 return SDValue();
18746
18747 if (!Cond.hasOneUse())
18748 return SDValue();
18749
18750 SDValue Setcc = Cond.getOperand(0);
18751 SDValue Xor = Cond.getOperand(1);
18752 // Canonicalize setcc to LHS.
18753 if (Setcc.getOpcode() != ISD::SETCC)
18754 std::swap(Setcc, Xor);
18755 // LHS should be a setcc and RHS should be an xor.
18756 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18757 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18758 return SDValue();
18759
18760 // If the condition is an And, SimplifyDemandedBits may have changed
18761 // (xor Z, 1) to (not Z).
18762 SDValue Xor1 = Xor.getOperand(1);
18763 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18764 return SDValue();
18765
18766 EVT VT = Cond.getValueType();
18767 SDValue Xor0 = Xor.getOperand(0);
18768
18769 // The LHS of the xor needs to be 0/1.
18771 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18772 return SDValue();
18773
18774 // We can only invert integer setccs.
18775 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18776 if (!SetCCOpVT.isScalarInteger())
18777 return SDValue();
18778
18779 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18780 if (ISD::isIntEqualitySetCC(CCVal)) {
18781 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18782 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18783 Setcc.getOperand(1), CCVal);
18784 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18785 // Invert (setlt 0, X) by converting to (setlt X, 1).
18786 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18787 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18788 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18789 // (setlt X, 1) by converting to (setlt 0, X).
18790 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18791 DAG.getConstant(0, SDLoc(Setcc), VT),
18792 Setcc.getOperand(0), CCVal);
18793 } else
18794 return SDValue();
18795
18796 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18797 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18798}
18799
18800// Perform common combines for BR_CC and SELECT_CC conditions.
18801static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18802 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18803 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18804
18805 // As far as arithmetic right shift always saves the sign,
18806 // shift can be omitted.
18807 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18808 // setge (sra X, N), 0 -> setge X, 0
18809 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18810 LHS.getOpcode() == ISD::SRA) {
18811 LHS = LHS.getOperand(0);
18812 return true;
18813 }
18814
18815 if (!ISD::isIntEqualitySetCC(CCVal))
18816 return false;
18817
18818 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18819 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18820 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18821 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18822 // If we're looking for eq 0 instead of ne 0, we need to invert the
18823 // condition.
18824 bool Invert = CCVal == ISD::SETEQ;
18825 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18826 if (Invert)
18827 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18828
18829 RHS = LHS.getOperand(1);
18830 LHS = LHS.getOperand(0);
18831 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18832
18833 CC = DAG.getCondCode(CCVal);
18834 return true;
18835 }
18836
18837 // If XOR is reused and has an immediate that will fit in XORI,
18838 // do not fold.
18839 auto isXorImmediate = [](const SDValue &Op) -> bool {
18840 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18841 return isInt<12>(XorCnst->getSExtValue());
18842 return false;
18843 };
18844 // Fold (X(i1) ^ 1) == 0 -> X != 0
18845 auto singleBitOp = [&DAG](const SDValue &VarOp,
18846 const SDValue &ConstOp) -> bool {
18847 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18848 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18849 return (XorCnst->getSExtValue() == 1) &&
18850 DAG.MaskedValueIsZero(VarOp, Mask);
18851 }
18852 return false;
18853 };
18854 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18855 for (const SDNode *UserNode : Op->users()) {
18856 const unsigned Opcode = UserNode->getOpcode();
18857 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18858 return false;
18859 }
18860 return true;
18861 };
18862 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18863 const SDValue &LHS, const SDValue &RHS) -> bool {
18864 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18865 (!isXorImmediate(LHS.getOperand(1)) ||
18866 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18867 onlyUsedBySelectOrBR(LHS));
18868 };
18869 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18870 if (isFoldableXorEq(LHS, RHS)) {
18871 RHS = LHS.getOperand(1);
18872 LHS = LHS.getOperand(0);
18873 return true;
18874 }
18875 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18876 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18877 const SDValue LHS0 = LHS.getOperand(0);
18878 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18879 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18880 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18881 LHS0.getOperand(1), LHS.getOperand(1));
18882 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18883 LHS0.getOperand(0), LHS.getOperand(1));
18884 return true;
18885 }
18886 }
18887
18888 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18889 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18890 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18891 SDValue LHS0 = LHS.getOperand(0);
18892 if (LHS0.getOpcode() == ISD::AND &&
18893 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18894 uint64_t Mask = LHS0.getConstantOperandVal(1);
18895 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18896 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18897 // XAndesPerf supports branch on test bit.
18898 if (Subtarget.hasVendorXAndesPerf()) {
18899 LHS =
18900 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18901 DAG.getConstant(Mask, DL, LHS.getValueType()));
18902 return true;
18903 }
18904
18905 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18906 CC = DAG.getCondCode(CCVal);
18907
18908 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18909 LHS = LHS0.getOperand(0);
18910 if (ShAmt != 0)
18911 LHS =
18912 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18913 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18914 return true;
18915 }
18916 }
18917 }
18918
18919 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18920 // This can occur when legalizing some floating point comparisons.
18921 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18922 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18923 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18924 CC = DAG.getCondCode(CCVal);
18925 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18926 return true;
18927 }
18928
18929 if (isNullConstant(RHS)) {
18930 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18931 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18932 CC = DAG.getCondCode(CCVal);
18933 LHS = NewCond;
18934 return true;
18935 }
18936 }
18937
18938 return false;
18939}
18940
18941// Fold
18942// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18943// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18944// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18945// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18946// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18947// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18949 SDValue TrueVal, SDValue FalseVal,
18950 bool Swapped) {
18951 bool Commutative = true;
18952 unsigned Opc = TrueVal.getOpcode();
18953 switch (Opc) {
18954 default:
18955 return SDValue();
18956 case ISD::SHL:
18957 case ISD::SRA:
18958 case ISD::SRL:
18959 case ISD::SUB:
18960 case ISD::ROTL:
18961 case ISD::ROTR:
18962 Commutative = false;
18963 break;
18964 case ISD::ADD:
18965 case ISD::OR:
18966 case ISD::XOR:
18967 case ISD::UMIN:
18968 case ISD::UMAX:
18969 break;
18970 }
18971
18972 if (!TrueVal.hasOneUse())
18973 return SDValue();
18974
18975 unsigned OpToFold;
18976 if (FalseVal == TrueVal.getOperand(0))
18977 OpToFold = 0;
18978 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18979 OpToFold = 1;
18980 else
18981 return SDValue();
18982
18983 EVT VT = N->getValueType(0);
18984 SDLoc DL(N);
18985 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18986 EVT OtherOpVT = OtherOp.getValueType();
18987 SDValue IdentityOperand =
18988 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18989 if (!Commutative)
18990 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18991 assert(IdentityOperand && "No identity operand!");
18992
18993 if (Swapped)
18994 std::swap(OtherOp, IdentityOperand);
18995 SDValue NewSel =
18996 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18997 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18998}
18999
19000// This tries to get rid of `select` and `icmp` that are being used to handle
19001// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19003 SDValue Cond = N->getOperand(0);
19004
19005 // This represents either CTTZ or CTLZ instruction.
19006 SDValue CountZeroes;
19007
19008 SDValue ValOnZero;
19009
19010 if (Cond.getOpcode() != ISD::SETCC)
19011 return SDValue();
19012
19013 if (!isNullConstant(Cond->getOperand(1)))
19014 return SDValue();
19015
19016 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19017 if (CCVal == ISD::CondCode::SETEQ) {
19018 CountZeroes = N->getOperand(2);
19019 ValOnZero = N->getOperand(1);
19020 } else if (CCVal == ISD::CondCode::SETNE) {
19021 CountZeroes = N->getOperand(1);
19022 ValOnZero = N->getOperand(2);
19023 } else {
19024 return SDValue();
19025 }
19026
19027 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19028 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19029 CountZeroes = CountZeroes.getOperand(0);
19030
19031 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19032 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19033 CountZeroes.getOpcode() != ISD::CTLZ &&
19034 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19035 return SDValue();
19036
19037 if (!isNullConstant(ValOnZero))
19038 return SDValue();
19039
19040 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19041 if (Cond->getOperand(0) != CountZeroesArgument)
19042 return SDValue();
19043
19044 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19045 if (!isPowerOf2_32(BitWidth))
19046 return SDValue();
19047
19048 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19049 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19050 CountZeroes.getValueType(), CountZeroesArgument);
19051 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19052 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19053 CountZeroes.getValueType(), CountZeroesArgument);
19054 }
19055
19056 SDValue BitWidthMinusOne =
19057 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19058
19059 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19060 CountZeroes, BitWidthMinusOne);
19061 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19062}
19063
19065 const RISCVSubtarget &Subtarget) {
19066 SDValue Cond = N->getOperand(0);
19067 SDValue True = N->getOperand(1);
19068 SDValue False = N->getOperand(2);
19069 SDLoc DL(N);
19070 EVT VT = N->getValueType(0);
19071 EVT CondVT = Cond.getValueType();
19072
19073 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19074 return SDValue();
19075
19076 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19077 // BEXTI, where C is power of 2.
19078 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19079 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19080 SDValue LHS = Cond.getOperand(0);
19081 SDValue RHS = Cond.getOperand(1);
19082 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19083 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19084 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19085 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19086 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19087 return DAG.getSelect(DL, VT,
19088 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19089 False, True);
19090 }
19091 }
19092 return SDValue();
19093}
19094
19095static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19096 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19097 return false;
19098
19099 SwapCC = false;
19100 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19101 std::swap(TrueVal, FalseVal);
19102 SwapCC = true;
19103 }
19104
19105 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19106 return false;
19107
19108 SDValue A = FalseVal.getOperand(0);
19109 SDValue B = FalseVal.getOperand(1);
19110 // Add is commutative, so check both orders
19111 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19112 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19113}
19114
19115/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19116/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19117/// register pressure over the add followed by masked vsub sequence.
19119 SDLoc DL(N);
19120 EVT VT = N->getValueType(0);
19121 SDValue CC = N->getOperand(0);
19122 SDValue TrueVal = N->getOperand(1);
19123 SDValue FalseVal = N->getOperand(2);
19124
19125 bool SwapCC;
19126 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19127 return SDValue();
19128
19129 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19130 SDValue A = Sub.getOperand(0);
19131 SDValue B = Sub.getOperand(1);
19132
19133 // Arrange the select such that we can match a masked
19134 // vrsub.vi to perform the conditional negate
19135 SDValue NegB = DAG.getNegative(B, DL, VT);
19136 if (!SwapCC)
19137 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19138 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19139 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19140}
19141
19143 const RISCVSubtarget &Subtarget) {
19144 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19145 return Folded;
19146
19147 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19148 return V;
19149
19150 if (Subtarget.hasConditionalMoveFusion())
19151 return SDValue();
19152
19153 SDValue TrueVal = N->getOperand(1);
19154 SDValue FalseVal = N->getOperand(2);
19155 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19156 return V;
19157 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19158}
19159
19160/// If we have a build_vector where each lane is binop X, C, where C
19161/// is a constant (but not necessarily the same constant on all lanes),
19162/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19163/// We assume that materializing a constant build vector will be no more
19164/// expensive that performing O(n) binops.
19166 const RISCVSubtarget &Subtarget,
19167 const RISCVTargetLowering &TLI) {
19168 SDLoc DL(N);
19169 EVT VT = N->getValueType(0);
19170
19171 assert(!VT.isScalableVector() && "unexpected build vector");
19172
19173 if (VT.getVectorNumElements() == 1)
19174 return SDValue();
19175
19176 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19177 if (!TLI.isBinOp(Opcode))
19178 return SDValue();
19179
19180 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19181 return SDValue();
19182
19183 // This BUILD_VECTOR involves an implicit truncation, and sinking
19184 // truncates through binops is non-trivial.
19185 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19186 return SDValue();
19187
19188 SmallVector<SDValue> LHSOps;
19189 SmallVector<SDValue> RHSOps;
19190 for (SDValue Op : N->ops()) {
19191 if (Op.isUndef()) {
19192 // We can't form a divide or remainder from undef.
19193 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19194 return SDValue();
19195
19196 LHSOps.push_back(Op);
19197 RHSOps.push_back(Op);
19198 continue;
19199 }
19200
19201 // TODO: We can handle operations which have an neutral rhs value
19202 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19203 // of profit in a more explicit manner.
19204 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19205 return SDValue();
19206
19207 LHSOps.push_back(Op.getOperand(0));
19208 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19209 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19210 return SDValue();
19211 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19212 // have different LHS and RHS types.
19213 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19214 return SDValue();
19215
19216 RHSOps.push_back(Op.getOperand(1));
19217 }
19218
19219 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19220 DAG.getBuildVector(VT, DL, RHSOps));
19221}
19222
19224 ElementCount OpEC = OpVT.getVectorElementCount();
19225 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19226 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19227}
19228
19229/// Given fixed length vectors A and B with equal element types, but possibly
19230/// different number of elements, return A + B where either A or B is zero
19231/// padded to the larger number of elements.
19233 SelectionDAG &DAG) {
19234 // NOTE: Manually doing the extract/add/insert scheme produces
19235 // significantly better codegen than the naive pad with zeros
19236 // and add scheme.
19237 EVT AVT = A.getValueType();
19238 EVT BVT = B.getValueType();
19241 std::swap(A, B);
19242 std::swap(AVT, BVT);
19243 }
19244
19245 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19246 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19247 return DAG.getInsertSubvector(DL, B, Res, 0);
19248}
19249
19251 SelectionDAG &DAG,
19252 const RISCVSubtarget &Subtarget,
19253 const RISCVTargetLowering &TLI) {
19254 using namespace SDPatternMatch;
19255 // Note: We intentionally do not check the legality of the reduction type.
19256 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19257 // intermediate types flow through here.
19258 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19260 return SDValue();
19261
19262 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19263 // form).
19264 SDValue A, B;
19265 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19266 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19267 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19268 if (AOpt || BOpt) {
19269 if (AOpt)
19270 A = AOpt;
19271 if (BOpt)
19272 B = BOpt;
19273 // From here, we're doing A + B with mixed types, implicitly zero
19274 // padded to the wider type. Note that we *don't* need the result
19275 // type to be the original VT, and in fact prefer narrower ones
19276 // if possible.
19277 return getZeroPaddedAdd(DL, A, B, DAG);
19278 }
19279 }
19280
19281 // zext a <--> partial_reduce_umla 0, a, 1
19282 // sext a <--> partial_reduce_smla 0, a, 1
19283 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19284 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19285 SDValue A = InVec.getOperand(0);
19286 EVT OpVT = A.getValueType();
19287 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19288 return SDValue();
19289
19290 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19291 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19292 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19293 unsigned Opc =
19294 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19295 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19296 }
19297
19298 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19299 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19300 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19301 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19302 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19303 return SDValue();
19304
19305 if (!ISD::isExtOpcode(A.getOpcode()))
19306 return SDValue();
19307
19308 EVT OpVT = A.getOperand(0).getValueType();
19309 if (OpVT.getVectorElementType() != MVT::i8 ||
19310 OpVT != B.getOperand(0).getValueType() ||
19311 !TLI.isTypeLegal(A.getValueType()))
19312 return SDValue();
19313
19314 unsigned Opc;
19315 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19316 Opc = ISD::PARTIAL_REDUCE_SMLA;
19317 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19318 B.getOpcode() == ISD::ZERO_EXTEND)
19319 Opc = ISD::PARTIAL_REDUCE_UMLA;
19320 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19321 B.getOpcode() == ISD::ZERO_EXTEND)
19322 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19323 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19324 B.getOpcode() == ISD::SIGN_EXTEND) {
19325 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19326 std::swap(A, B);
19327 } else
19328 return SDValue();
19329
19330 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19331 return DAG.getNode(
19332 Opc, DL, ResVT,
19333 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19334}
19335
19337 const RISCVSubtarget &Subtarget,
19338 const RISCVTargetLowering &TLI) {
19339 if (!Subtarget.hasStdExtZvqdotq())
19340 return SDValue();
19341
19342 SDLoc DL(N);
19343 EVT VT = N->getValueType(0);
19344 SDValue InVec = N->getOperand(0);
19345 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19346 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19347 return SDValue();
19348}
19349
19351 const RISCVSubtarget &Subtarget,
19352 const RISCVTargetLowering &TLI) {
19353 SDValue InVec = N->getOperand(0);
19354 SDValue InVal = N->getOperand(1);
19355 SDValue EltNo = N->getOperand(2);
19356 SDLoc DL(N);
19357
19358 EVT VT = InVec.getValueType();
19359 if (VT.isScalableVector())
19360 return SDValue();
19361
19362 if (!InVec.hasOneUse())
19363 return SDValue();
19364
19365 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19366 // move the insert_vector_elts into the arms of the binop. Note that
19367 // the new RHS must be a constant.
19368 const unsigned InVecOpcode = InVec->getOpcode();
19369 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19370 InVal.hasOneUse()) {
19371 SDValue InVecLHS = InVec->getOperand(0);
19372 SDValue InVecRHS = InVec->getOperand(1);
19373 SDValue InValLHS = InVal->getOperand(0);
19374 SDValue InValRHS = InVal->getOperand(1);
19375
19377 return SDValue();
19378 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19379 return SDValue();
19380 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19381 // have different LHS and RHS types.
19382 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19383 return SDValue();
19385 InVecLHS, InValLHS, EltNo);
19387 InVecRHS, InValRHS, EltNo);
19388 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19389 }
19390
19391 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19392 // move the insert_vector_elt to the source operand of the concat_vector.
19393 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19394 return SDValue();
19395
19396 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19397 if (!IndexC)
19398 return SDValue();
19399 unsigned Elt = IndexC->getZExtValue();
19400
19401 EVT ConcatVT = InVec.getOperand(0).getValueType();
19402 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19403 return SDValue();
19404 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19405 unsigned NewIdx = Elt % ConcatNumElts;
19406
19407 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19408 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19409 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19410
19411 SmallVector<SDValue> ConcatOps(InVec->ops());
19412 ConcatOps[ConcatOpIdx] = ConcatOp;
19413 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19414}
19415
19416// If we're concatenating a series of vector loads like
19417// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19418// Then we can turn this into a strided load by widening the vector elements
19419// vlse32 p, stride=n
19421 const RISCVSubtarget &Subtarget,
19422 const RISCVTargetLowering &TLI) {
19423 SDLoc DL(N);
19424 EVT VT = N->getValueType(0);
19425
19426 // Only perform this combine on legal MVTs.
19427 if (!TLI.isTypeLegal(VT))
19428 return SDValue();
19429
19430 // TODO: Potentially extend this to scalable vectors
19431 if (VT.isScalableVector())
19432 return SDValue();
19433
19434 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19435 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19436 !SDValue(BaseLd, 0).hasOneUse())
19437 return SDValue();
19438
19439 EVT BaseLdVT = BaseLd->getValueType(0);
19440
19441 // Go through the loads and check that they're strided
19443 Lds.push_back(BaseLd);
19444 Align Align = BaseLd->getAlign();
19445 for (SDValue Op : N->ops().drop_front()) {
19446 auto *Ld = dyn_cast<LoadSDNode>(Op);
19447 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19448 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19449 Ld->getValueType(0) != BaseLdVT)
19450 return SDValue();
19451
19452 Lds.push_back(Ld);
19453
19454 // The common alignment is the most restrictive (smallest) of all the loads
19455 Align = std::min(Align, Ld->getAlign());
19456 }
19457
19458 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19459 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19460 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19461 // If the load ptrs can be decomposed into a common (Base + Index) with a
19462 // common constant stride, then return the constant stride.
19463 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19464 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19465 if (BIO1.equalBaseIndex(BIO2, DAG))
19466 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19467
19468 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19469 SDValue P1 = Ld1->getBasePtr();
19470 SDValue P2 = Ld2->getBasePtr();
19471 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19472 return {{P2.getOperand(1), false}};
19473 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19474 return {{P1.getOperand(1), true}};
19475
19476 return std::nullopt;
19477 };
19478
19479 // Get the distance between the first and second loads
19480 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19481 if (!BaseDiff)
19482 return SDValue();
19483
19484 // Check all the loads are the same distance apart
19485 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19486 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19487 return SDValue();
19488
19489 // TODO: At this point, we've successfully matched a generalized gather
19490 // load. Maybe we should emit that, and then move the specialized
19491 // matchers above and below into a DAG combine?
19492
19493 // Get the widened scalar type, e.g. v4i8 -> i64
19494 unsigned WideScalarBitWidth =
19495 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19496 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19497
19498 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19499 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19500 if (!TLI.isTypeLegal(WideVecVT))
19501 return SDValue();
19502
19503 // Check that the operation is legal
19504 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19505 return SDValue();
19506
19507 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19508 SDValue Stride =
19509 std::holds_alternative<SDValue>(StrideVariant)
19510 ? std::get<SDValue>(StrideVariant)
19511 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19512 Lds[0]->getOffset().getValueType());
19513 if (MustNegateStride)
19514 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19515
19516 SDValue AllOneMask =
19517 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19518 DAG.getConstant(1, DL, MVT::i1));
19519
19520 uint64_t MemSize;
19521 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19522 ConstStride && ConstStride->getSExtValue() >= 0)
19523 // total size = (elsize * n) + (stride - elsize) * (n-1)
19524 // = elsize + stride * (n-1)
19525 MemSize = WideScalarVT.getSizeInBits() +
19526 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19527 else
19528 // If Stride isn't constant, then we can't know how much it will load
19530
19532 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19533 Align);
19534
19535 SDValue StridedLoad = DAG.getStridedLoadVP(
19536 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19537 AllOneMask,
19538 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19539
19540 for (SDValue Ld : N->ops())
19541 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19542
19543 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19544}
19545
19547 const RISCVSubtarget &Subtarget,
19548 const RISCVTargetLowering &TLI) {
19549 SDLoc DL(N);
19550 EVT VT = N->getValueType(0);
19551 const unsigned ElementSize = VT.getScalarSizeInBits();
19552 const unsigned NumElts = VT.getVectorNumElements();
19553 SDValue V1 = N->getOperand(0);
19554 SDValue V2 = N->getOperand(1);
19555 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19556 MVT XLenVT = Subtarget.getXLenVT();
19557
19558 // Recognized a disguised select of add/sub.
19559 bool SwapCC;
19560 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19561 matchSelectAddSub(V1, V2, SwapCC)) {
19562 SDValue Sub = SwapCC ? V1 : V2;
19563 SDValue A = Sub.getOperand(0);
19564 SDValue B = Sub.getOperand(1);
19565
19566 SmallVector<SDValue> MaskVals;
19567 for (int MaskIndex : Mask) {
19568 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19569 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19570 }
19571 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19572 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19573 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19574
19575 // Arrange the select such that we can match a masked
19576 // vrsub.vi to perform the conditional negate
19577 SDValue NegB = DAG.getNegative(B, DL, VT);
19578 if (!SwapCC)
19579 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19580 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19581 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19582 }
19583
19584 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19585 // during the combine phase before type legalization, and relies on
19586 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19587 // for the source mask.
19588 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19589 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19590 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19591 return SDValue();
19592
19593 SmallVector<int, 8> NewMask;
19594 narrowShuffleMaskElts(2, Mask, NewMask);
19595
19596 LLVMContext &C = *DAG.getContext();
19597 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19598 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19599 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19600 DAG.getBitcast(NewVT, V2), NewMask);
19601 return DAG.getBitcast(VT, Res);
19602}
19603
19605 const RISCVSubtarget &Subtarget) {
19606 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19607
19608 if (N->getValueType(0).isFixedLengthVector())
19609 return SDValue();
19610
19611 SDValue Addend = N->getOperand(0);
19612 SDValue MulOp = N->getOperand(1);
19613
19614 if (N->getOpcode() == RISCVISD::ADD_VL) {
19615 SDValue AddPassthruOp = N->getOperand(2);
19616 if (!AddPassthruOp.isUndef())
19617 return SDValue();
19618 }
19619
19620 auto IsVWMulOpc = [](unsigned Opc) {
19621 switch (Opc) {
19622 case RISCVISD::VWMUL_VL:
19623 case RISCVISD::VWMULU_VL:
19624 case RISCVISD::VWMULSU_VL:
19625 return true;
19626 default:
19627 return false;
19628 }
19629 };
19630
19631 if (!IsVWMulOpc(MulOp.getOpcode()))
19632 std::swap(Addend, MulOp);
19633
19634 if (!IsVWMulOpc(MulOp.getOpcode()))
19635 return SDValue();
19636
19637 SDValue MulPassthruOp = MulOp.getOperand(2);
19638
19639 if (!MulPassthruOp.isUndef())
19640 return SDValue();
19641
19642 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19643 const RISCVSubtarget &Subtarget) {
19644 if (N->getOpcode() == ISD::ADD) {
19645 SDLoc DL(N);
19646 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19647 Subtarget);
19648 }
19649 return std::make_pair(N->getOperand(3), N->getOperand(4));
19650 }(N, DAG, Subtarget);
19651
19652 SDValue MulMask = MulOp.getOperand(3);
19653 SDValue MulVL = MulOp.getOperand(4);
19654
19655 if (AddMask != MulMask || AddVL != MulVL)
19656 return SDValue();
19657
19658 const auto &TSInfo =
19659 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19660 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19661
19662 SDLoc DL(N);
19663 EVT VT = N->getValueType(0);
19664 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19665 AddVL};
19666 return DAG.getNode(Opc, DL, VT, Ops);
19667}
19668
19670 const RISCVSubtarget &Subtarget) {
19671
19672 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19673
19674 if (!N->getValueType(0).isVector())
19675 return SDValue();
19676
19677 SDValue Addend = N->getOperand(0);
19678 SDValue DotOp = N->getOperand(1);
19679
19680 if (N->getOpcode() == RISCVISD::ADD_VL) {
19681 SDValue AddPassthruOp = N->getOperand(2);
19682 if (!AddPassthruOp.isUndef())
19683 return SDValue();
19684 }
19685
19686 auto IsVqdotqOpc = [](unsigned Opc) {
19687 switch (Opc) {
19688 case RISCVISD::VQDOT_VL:
19689 case RISCVISD::VQDOTU_VL:
19690 case RISCVISD::VQDOTSU_VL:
19691 return true;
19692 default:
19693 return false;
19694 }
19695 };
19696
19697 if (!IsVqdotqOpc(DotOp.getOpcode()))
19698 std::swap(Addend, DotOp);
19699
19700 if (!IsVqdotqOpc(DotOp.getOpcode()))
19701 return SDValue();
19702
19703 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19704 const RISCVSubtarget &Subtarget) {
19705 if (N->getOpcode() == ISD::ADD) {
19706 SDLoc DL(N);
19707 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19708 Subtarget);
19709 }
19710 return std::make_pair(N->getOperand(3), N->getOperand(4));
19711 }(N, DAG, Subtarget);
19712
19713 SDValue MulVL = DotOp.getOperand(4);
19714 if (AddVL != MulVL)
19715 return SDValue();
19716
19717 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19718 AddMask.getOperand(0) != MulVL)
19719 return SDValue();
19720
19721 SDValue AccumOp = DotOp.getOperand(2);
19722 SDLoc DL(N);
19723 EVT VT = N->getValueType(0);
19724 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19725 DAG.getUNDEF(VT), AddMask, AddVL);
19726
19727 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19728 DotOp.getOperand(3), DotOp->getOperand(4)};
19729 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19730}
19731
19732static bool
19734 ISD::MemIndexType &IndexType,
19736 if (!DCI.isBeforeLegalize())
19737 return false;
19738
19739 SelectionDAG &DAG = DCI.DAG;
19740 const MVT XLenVT =
19741 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19742
19743 const EVT IndexVT = Index.getValueType();
19744
19745 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19746 // mode, so anything else must be manually legalized.
19747 if (!isIndexTypeSigned(IndexType))
19748 return false;
19749
19750 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19751 // Any index legalization should first promote to XLenVT, so we don't lose
19752 // bits when scaling. This may create an illegal index type so we let
19753 // LLVM's legalization take care of the splitting.
19754 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19755 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19756 IndexVT.changeVectorElementType(XLenVT), Index);
19757 }
19758 IndexType = ISD::UNSIGNED_SCALED;
19759 return true;
19760}
19761
19762/// Match the index vector of a scatter or gather node as the shuffle mask
19763/// which performs the rearrangement if possible. Will only match if
19764/// all lanes are touched, and thus replacing the scatter or gather with
19765/// a unit strided access and shuffle is legal.
19766static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19767 SmallVector<int> &ShuffleMask) {
19768 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19769 return false;
19770 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19771 return false;
19772
19773 const unsigned ElementSize = VT.getScalarStoreSize();
19774 const unsigned NumElems = VT.getVectorNumElements();
19775
19776 // Create the shuffle mask and check all bits active
19777 assert(ShuffleMask.empty());
19778 BitVector ActiveLanes(NumElems);
19779 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19780 // TODO: We've found an active bit of UB, and could be
19781 // more aggressive here if desired.
19782 if (Index->getOperand(i)->isUndef())
19783 return false;
19784 uint64_t C = Index->getConstantOperandVal(i);
19785 if (C % ElementSize != 0)
19786 return false;
19787 C = C / ElementSize;
19788 if (C >= NumElems)
19789 return false;
19790 ShuffleMask.push_back(C);
19791 ActiveLanes.set(C);
19792 }
19793 return ActiveLanes.all();
19794}
19795
19796/// Match the index of a gather or scatter operation as an operation
19797/// with twice the element width and half the number of elements. This is
19798/// generally profitable (if legal) because these operations are linear
19799/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19800/// come out ahead.
19801static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19802 Align BaseAlign, const RISCVSubtarget &ST) {
19803 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19804 return false;
19805 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19806 return false;
19807
19808 // Attempt a doubling. If we can use a element type 4x or 8x in
19809 // size, this will happen via multiply iterations of the transform.
19810 const unsigned NumElems = VT.getVectorNumElements();
19811 if (NumElems % 2 != 0)
19812 return false;
19813
19814 const unsigned ElementSize = VT.getScalarStoreSize();
19815 const unsigned WiderElementSize = ElementSize * 2;
19816 if (WiderElementSize > ST.getELen()/8)
19817 return false;
19818
19819 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19820 return false;
19821
19822 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19823 // TODO: We've found an active bit of UB, and could be
19824 // more aggressive here if desired.
19825 if (Index->getOperand(i)->isUndef())
19826 return false;
19827 // TODO: This offset check is too strict if we support fully
19828 // misaligned memory operations.
19829 uint64_t C = Index->getConstantOperandVal(i);
19830 if (i % 2 == 0) {
19831 if (C % WiderElementSize != 0)
19832 return false;
19833 continue;
19834 }
19835 uint64_t Last = Index->getConstantOperandVal(i-1);
19836 if (C != Last + ElementSize)
19837 return false;
19838 }
19839 return true;
19840}
19841
19842// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19843// This would be benefit for the cases where X and Y are both the same value
19844// type of low precision vectors. Since the truncate would be lowered into
19845// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19846// restriction, such pattern would be expanded into a series of "vsetvli"
19847// and "vnsrl" instructions later to reach this point.
19849 SDValue Mask = N->getOperand(1);
19850 SDValue VL = N->getOperand(2);
19851
19852 bool IsVLMAX = isAllOnesConstant(VL) ||
19853 (isa<RegisterSDNode>(VL) &&
19854 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19855 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19856 Mask.getOperand(0) != VL)
19857 return SDValue();
19858
19859 auto IsTruncNode = [&](SDValue V) {
19860 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19861 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19862 };
19863
19864 SDValue Op = N->getOperand(0);
19865
19866 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19867 // to distinguish such pattern.
19868 while (IsTruncNode(Op)) {
19869 if (!Op.hasOneUse())
19870 return SDValue();
19871 Op = Op.getOperand(0);
19872 }
19873
19874 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19875 return SDValue();
19876
19877 SDValue N0 = Op.getOperand(0);
19878 SDValue N1 = Op.getOperand(1);
19879 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19880 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19881 return SDValue();
19882
19883 SDValue N00 = N0.getOperand(0);
19884 SDValue N10 = N1.getOperand(0);
19885 if (!N00.getValueType().isVector() ||
19886 N00.getValueType() != N10.getValueType() ||
19887 N->getValueType(0) != N10.getValueType())
19888 return SDValue();
19889
19890 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19891 SDValue SMin =
19892 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19893 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19894 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19895}
19896
19897// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19898// maximum value for the truncated type.
19899// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19900// is the signed maximum value for the truncated type and C2 is the signed
19901// minimum value.
19903 const RISCVSubtarget &Subtarget) {
19904 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19905
19906 MVT VT = N->getSimpleValueType(0);
19907
19908 SDValue Mask = N->getOperand(1);
19909 SDValue VL = N->getOperand(2);
19910
19911 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19912 APInt &SplatVal) {
19913 if (V.getOpcode() != Opc &&
19914 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19915 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19916 return SDValue();
19917
19918 SDValue Op = V.getOperand(1);
19919
19920 // Peek through conversion between fixed and scalable vectors.
19921 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19922 isNullConstant(Op.getOperand(2)) &&
19923 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19924 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19925 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19926 isNullConstant(Op.getOperand(1).getOperand(1)))
19927 Op = Op.getOperand(1).getOperand(0);
19928
19929 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19930 return V.getOperand(0);
19931
19932 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19933 Op.getOperand(2) == VL) {
19934 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19935 SplatVal =
19936 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19937 return V.getOperand(0);
19938 }
19939 }
19940
19941 return SDValue();
19942 };
19943
19944 SDLoc DL(N);
19945
19946 auto DetectUSatPattern = [&](SDValue V) {
19947 APInt LoC, HiC;
19948
19949 // Simple case, V is a UMIN.
19950 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19951 if (HiC.isMask(VT.getScalarSizeInBits()))
19952 return UMinOp;
19953
19954 // If we have an SMAX that removes negative numbers first, then we can match
19955 // SMIN instead of UMIN.
19956 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19957 if (SDValue SMaxOp =
19958 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19959 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19960 return SMinOp;
19961
19962 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19963 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19964 // first.
19965 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19966 if (SDValue SMinOp =
19967 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19968 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19969 HiC.uge(LoC))
19970 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19971 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19972 Mask, VL);
19973
19974 return SDValue();
19975 };
19976
19977 auto DetectSSatPattern = [&](SDValue V) {
19978 unsigned NumDstBits = VT.getScalarSizeInBits();
19979 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19980 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19981 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19982
19983 APInt HiC, LoC;
19984 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19985 if (SDValue SMaxOp =
19986 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19987 if (HiC == SignedMax && LoC == SignedMin)
19988 return SMaxOp;
19989
19990 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19991 if (SDValue SMinOp =
19992 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19993 if (HiC == SignedMax && LoC == SignedMin)
19994 return SMinOp;
19995
19996 return SDValue();
19997 };
19998
19999 SDValue Src = N->getOperand(0);
20000
20001 // Look through multiple layers of truncates.
20002 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20003 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20004 Src.hasOneUse())
20005 Src = Src.getOperand(0);
20006
20007 SDValue Val;
20008 unsigned ClipOpc;
20009 if ((Val = DetectUSatPattern(Src)))
20010 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20011 else if ((Val = DetectSSatPattern(Src)))
20012 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20013 else
20014 return SDValue();
20015
20016 MVT ValVT = Val.getSimpleValueType();
20017
20018 do {
20019 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20020 ValVT = ValVT.changeVectorElementType(ValEltVT);
20021 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20022 } while (ValVT != VT);
20023
20024 return Val;
20025}
20026
20027// Convert
20028// (iX ctpop (bitcast (vXi1 A)))
20029// ->
20030// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20031// and
20032// (iN reduce.add (zext (vXi1 A to vXiN))
20033// ->
20034// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20035// FIXME: It's complicated to match all the variations of this after type
20036// legalization so we only handle the pre-type legalization pattern, but that
20037// requires the fixed vector type to be legal.
20039 const RISCVSubtarget &Subtarget) {
20040 unsigned Opc = N->getOpcode();
20041 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
20042 "Unexpected opcode");
20043 EVT VT = N->getValueType(0);
20044 if (!VT.isScalarInteger())
20045 return SDValue();
20046
20047 SDValue Src = N->getOperand(0);
20048
20049 if (Opc == ISD::CTPOP) {
20050 // Peek through zero_extend. It doesn't change the count.
20051 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20052 Src = Src.getOperand(0);
20053
20054 if (Src.getOpcode() != ISD::BITCAST)
20055 return SDValue();
20056 Src = Src.getOperand(0);
20057 } else if (Opc == ISD::VECREDUCE_ADD) {
20058 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20059 return SDValue();
20060 Src = Src.getOperand(0);
20061 }
20062
20063 EVT SrcEVT = Src.getValueType();
20064 if (!SrcEVT.isSimple())
20065 return SDValue();
20066
20067 MVT SrcMVT = SrcEVT.getSimpleVT();
20068 // Make sure the input is an i1 vector.
20069 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20070 return SDValue();
20071
20072 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20073 if (!TLI.isTypeLegal(SrcMVT))
20074 return SDValue();
20075
20076 // Check that destination type is large enough to hold result without
20077 // overflow.
20078 if (Opc == ISD::VECREDUCE_ADD) {
20079 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20080 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20081 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20082 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20083 ? SrcMVT.getVectorNumElements()
20085 VectorBitsMax, EltSize, MinSize);
20086 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20087 return SDValue();
20088 }
20089
20090 MVT ContainerVT = SrcMVT;
20091 if (SrcMVT.isFixedLengthVector()) {
20092 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20093 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20094 }
20095
20096 SDLoc DL(N);
20097 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20098
20099 MVT XLenVT = Subtarget.getXLenVT();
20100 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20101 return DAG.getZExtOrTrunc(Pop, DL, VT);
20102}
20103
20106 const RISCVSubtarget &Subtarget) {
20107 // (shl (zext x), y) -> (vwsll x, y)
20108 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20109 return V;
20110
20111 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20112 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20113
20114 if (!DCI.isAfterLegalizeDAG())
20115 return SDValue();
20116
20117 SDValue LHS = N->getOperand(0);
20118 if (!LHS.hasOneUse())
20119 return SDValue();
20120 unsigned Opcode;
20121 switch (LHS.getOpcode()) {
20122 case ISD::SIGN_EXTEND:
20123 case RISCVISD::VSEXT_VL:
20124 Opcode = RISCVISD::VWMULSU_VL;
20125 break;
20126 case ISD::ZERO_EXTEND:
20127 case RISCVISD::VZEXT_VL:
20128 Opcode = RISCVISD::VWMULU_VL;
20129 break;
20130 default:
20131 return SDValue();
20132 }
20133
20134 SDValue RHS = N->getOperand(1);
20135 APInt ShAmt;
20136 uint64_t ShAmtInt;
20137 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20138 ShAmtInt = ShAmt.getZExtValue();
20139 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20140 RHS.getOperand(1).getOpcode() == ISD::Constant)
20141 ShAmtInt = RHS.getConstantOperandVal(1);
20142 else
20143 return SDValue();
20144
20145 // Better foldings:
20146 // (shl (sext x), 1) -> (vwadd x, x)
20147 // (shl (zext x), 1) -> (vwaddu x, x)
20148 if (ShAmtInt <= 1)
20149 return SDValue();
20150
20151 SDValue NarrowOp = LHS.getOperand(0);
20152 MVT NarrowVT = NarrowOp.getSimpleValueType();
20153 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20154 if (ShAmtInt >= NarrowBits)
20155 return SDValue();
20156 MVT VT = N->getSimpleValueType(0);
20157 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20158 return SDValue();
20159
20160 SelectionDAG &DAG = DCI.DAG;
20161 SDLoc DL(N);
20162 SDValue Passthru, Mask, VL;
20163 switch (N->getOpcode()) {
20164 case ISD::SHL:
20165 Passthru = DAG.getUNDEF(VT);
20166 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20167 break;
20168 case RISCVISD::SHL_VL:
20169 Passthru = N->getOperand(2);
20170 Mask = N->getOperand(3);
20171 VL = N->getOperand(4);
20172 break;
20173 default:
20174 llvm_unreachable("Expected SHL");
20175 }
20176 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20177 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20178 Passthru, Mask, VL);
20179}
20180
20182 DAGCombinerInfo &DCI) const {
20183 SelectionDAG &DAG = DCI.DAG;
20184 const MVT XLenVT = Subtarget.getXLenVT();
20185 SDLoc DL(N);
20186
20187 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20188 // bits are demanded. N will be added to the Worklist if it was not deleted.
20189 // Caller should return SDValue(N, 0) if this returns true.
20190 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20191 SDValue Op = N->getOperand(OpNo);
20192 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20193 if (!SimplifyDemandedBits(Op, Mask, DCI))
20194 return false;
20195
20196 if (N->getOpcode() != ISD::DELETED_NODE)
20197 DCI.AddToWorklist(N);
20198 return true;
20199 };
20200
20201 switch (N->getOpcode()) {
20202 default:
20203 break;
20204 case RISCVISD::SplitF64: {
20205 SDValue Op0 = N->getOperand(0);
20206 // If the input to SplitF64 is just BuildPairF64 then the operation is
20207 // redundant. Instead, use BuildPairF64's operands directly.
20208 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20209 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20210
20211 if (Op0->isUndef()) {
20212 SDValue Lo = DAG.getUNDEF(MVT::i32);
20213 SDValue Hi = DAG.getUNDEF(MVT::i32);
20214 return DCI.CombineTo(N, Lo, Hi);
20215 }
20216
20217 // It's cheaper to materialise two 32-bit integers than to load a double
20218 // from the constant pool and transfer it to integer registers through the
20219 // stack.
20221 APInt V = C->getValueAPF().bitcastToAPInt();
20222 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20223 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20224 return DCI.CombineTo(N, Lo, Hi);
20225 }
20226
20227 // This is a target-specific version of a DAGCombine performed in
20228 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20229 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20230 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20231 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20232 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20233 break;
20234 SDValue NewSplitF64 =
20235 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20236 Op0.getOperand(0));
20237 SDValue Lo = NewSplitF64.getValue(0);
20238 SDValue Hi = NewSplitF64.getValue(1);
20239 APInt SignBit = APInt::getSignMask(32);
20240 if (Op0.getOpcode() == ISD::FNEG) {
20241 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20242 DAG.getConstant(SignBit, DL, MVT::i32));
20243 return DCI.CombineTo(N, Lo, NewHi);
20244 }
20245 assert(Op0.getOpcode() == ISD::FABS);
20246 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20247 DAG.getConstant(~SignBit, DL, MVT::i32));
20248 return DCI.CombineTo(N, Lo, NewHi);
20249 }
20250 case RISCVISD::SLLW:
20251 case RISCVISD::SRAW:
20252 case RISCVISD::SRLW:
20253 case RISCVISD::RORW:
20254 case RISCVISD::ROLW: {
20255 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20256 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20257 SimplifyDemandedLowBitsHelper(1, 5))
20258 return SDValue(N, 0);
20259
20260 break;
20261 }
20262 case RISCVISD::CLZW:
20263 case RISCVISD::CTZW: {
20264 // Only the lower 32 bits of the first operand are read
20265 if (SimplifyDemandedLowBitsHelper(0, 32))
20266 return SDValue(N, 0);
20267 break;
20268 }
20269 case RISCVISD::FMV_W_X_RV64: {
20270 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20271 // conversion is unnecessary and can be replaced with the
20272 // FMV_X_ANYEXTW_RV64 operand.
20273 SDValue Op0 = N->getOperand(0);
20274 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20275 return Op0.getOperand(0);
20276 break;
20277 }
20278 case RISCVISD::FMV_X_ANYEXTH:
20279 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20280 SDLoc DL(N);
20281 SDValue Op0 = N->getOperand(0);
20282 MVT VT = N->getSimpleValueType(0);
20283
20284 // Constant fold.
20285 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20286 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20287 return DAG.getConstant(Val, DL, VT);
20288 }
20289
20290 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20291 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20292 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20293 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20294 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20295 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20296 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20297 assert(Op0.getOperand(0).getValueType() == VT &&
20298 "Unexpected value type!");
20299 return Op0.getOperand(0);
20300 }
20301
20302 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20303 cast<LoadSDNode>(Op0)->isSimple()) {
20305 auto *LN0 = cast<LoadSDNode>(Op0);
20306 SDValue Load =
20307 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20308 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20309 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20310 return Load;
20311 }
20312
20313 // This is a target-specific version of a DAGCombine performed in
20314 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20315 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20316 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20317 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20318 !Op0.getNode()->hasOneUse())
20319 break;
20320 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20321 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20322 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20323 if (Op0.getOpcode() == ISD::FNEG)
20324 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20325 DAG.getConstant(SignBit, DL, VT));
20326
20327 assert(Op0.getOpcode() == ISD::FABS);
20328 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20329 DAG.getConstant(~SignBit, DL, VT));
20330 }
20331 case ISD::ABS: {
20332 EVT VT = N->getValueType(0);
20333 SDValue N0 = N->getOperand(0);
20334 // abs (sext) -> zext (abs)
20335 // abs (zext) -> zext (handled elsewhere)
20336 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20337 SDValue Src = N0.getOperand(0);
20338 SDLoc DL(N);
20339 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20340 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20341 }
20342 break;
20343 }
20344 case ISD::ADD: {
20345 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20346 return V;
20347 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20348 return V;
20349 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20350 return V;
20351 return performADDCombine(N, DCI, Subtarget);
20352 }
20353 case ISD::SUB: {
20354 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20355 return V;
20356 return performSUBCombine(N, DAG, Subtarget);
20357 }
20358 case ISD::AND:
20359 return performANDCombine(N, DCI, Subtarget);
20360 case ISD::OR: {
20361 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20362 return V;
20363 return performORCombine(N, DCI, Subtarget);
20364 }
20365 case ISD::XOR:
20366 return performXORCombine(N, DAG, Subtarget);
20367 case ISD::MUL:
20368 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20369 return V;
20370 return performMULCombine(N, DAG, DCI, Subtarget);
20371 case ISD::SDIV:
20372 case ISD::UDIV:
20373 case ISD::SREM:
20374 case ISD::UREM:
20375 if (SDValue V = combineBinOpOfZExt(N, DAG))
20376 return V;
20377 break;
20378 case ISD::FMUL: {
20379 using namespace SDPatternMatch;
20380 SDLoc DL(N);
20381 EVT VT = N->getValueType(0);
20382 SDValue X, Y;
20383 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20384 // hoistFNegAboveFMulFDiv.
20385 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20387 return DAG.getNode(ISD::FNEG, DL, VT,
20388 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20389
20390 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20391 SDValue N0 = N->getOperand(0);
20392 SDValue N1 = N->getOperand(1);
20393 if (N0->getOpcode() != ISD::FCOPYSIGN)
20394 std::swap(N0, N1);
20395 if (N0->getOpcode() != ISD::FCOPYSIGN)
20396 return SDValue();
20398 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20399 return SDValue();
20400 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20401 return SDValue();
20402 SDValue Sign = N0->getOperand(1);
20403 if (Sign.getValueType() != VT)
20404 return SDValue();
20405 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20406 }
20407 case ISD::FADD:
20408 case ISD::UMAX:
20409 case ISD::UMIN:
20410 case ISD::SMAX:
20411 case ISD::SMIN:
20412 case ISD::FMAXNUM:
20413 case ISD::FMINNUM: {
20414 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20415 return V;
20416 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20417 return V;
20418 return SDValue();
20419 }
20420 case ISD::SETCC:
20421 return performSETCCCombine(N, DCI, Subtarget);
20423 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20424 case ISD::ZERO_EXTEND:
20425 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20426 // type legalization. This is safe because fp_to_uint produces poison if
20427 // it overflows.
20428 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20429 SDValue Src = N->getOperand(0);
20430 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20431 isTypeLegal(Src.getOperand(0).getValueType()))
20432 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20433 Src.getOperand(0));
20434 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20435 isTypeLegal(Src.getOperand(1).getValueType())) {
20436 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20437 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20438 Src.getOperand(0), Src.getOperand(1));
20439 DCI.CombineTo(N, Res);
20440 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20441 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20442 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20443 }
20444 }
20445 return SDValue();
20446 case RISCVISD::TRUNCATE_VECTOR_VL:
20447 if (SDValue V = combineTruncOfSraSext(N, DAG))
20448 return V;
20449 return combineTruncToVnclip(N, DAG, Subtarget);
20450 case ISD::VP_TRUNCATE:
20451 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20452 case ISD::TRUNCATE:
20453 return performTRUNCATECombine(N, DAG, Subtarget);
20454 case ISD::SELECT:
20455 return performSELECTCombine(N, DAG, Subtarget);
20456 case ISD::VSELECT:
20457 return performVSELECTCombine(N, DAG);
20458 case RISCVISD::CZERO_EQZ:
20459 case RISCVISD::CZERO_NEZ: {
20460 SDValue Val = N->getOperand(0);
20461 SDValue Cond = N->getOperand(1);
20462
20463 unsigned Opc = N->getOpcode();
20464
20465 // czero_eqz x, x -> x
20466 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20467 return Val;
20468
20469 unsigned InvOpc =
20470 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20471
20472 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20473 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20474 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20475 SDValue NewCond = Cond.getOperand(0);
20476 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20477 if (DAG.MaskedValueIsZero(NewCond, Mask))
20478 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20479 }
20480 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20481 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20482 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20483 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20484 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20485 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20486 if (ISD::isIntEqualitySetCC(CCVal))
20487 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20488 N->getValueType(0), Val, Cond.getOperand(0));
20489 }
20490 return SDValue();
20491 }
20492 case RISCVISD::SELECT_CC: {
20493 // Transform
20494 SDValue LHS = N->getOperand(0);
20495 SDValue RHS = N->getOperand(1);
20496 SDValue CC = N->getOperand(2);
20497 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20498 SDValue TrueV = N->getOperand(3);
20499 SDValue FalseV = N->getOperand(4);
20500 SDLoc DL(N);
20501 EVT VT = N->getValueType(0);
20502
20503 // If the True and False values are the same, we don't need a select_cc.
20504 if (TrueV == FalseV)
20505 return TrueV;
20506
20507 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20508 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20509 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20510 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20511 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20512 if (CCVal == ISD::CondCode::SETGE)
20513 std::swap(TrueV, FalseV);
20514
20515 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20516 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20517 // Only handle simm12, if it is not in this range, it can be considered as
20518 // register.
20519 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20520 isInt<12>(TrueSImm - FalseSImm)) {
20521 SDValue SRA =
20522 DAG.getNode(ISD::SRA, DL, VT, LHS,
20523 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20524 SDValue AND =
20525 DAG.getNode(ISD::AND, DL, VT, SRA,
20526 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20527 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20528 }
20529
20530 if (CCVal == ISD::CondCode::SETGE)
20531 std::swap(TrueV, FalseV);
20532 }
20533
20534 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20535 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20536 {LHS, RHS, CC, TrueV, FalseV});
20537
20538 if (!Subtarget.hasConditionalMoveFusion()) {
20539 // (select c, -1, y) -> -c | y
20540 if (isAllOnesConstant(TrueV)) {
20541 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20542 SDValue Neg = DAG.getNegative(C, DL, VT);
20543 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20544 }
20545 // (select c, y, -1) -> -!c | y
20546 if (isAllOnesConstant(FalseV)) {
20547 SDValue C =
20548 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20549 SDValue Neg = DAG.getNegative(C, DL, VT);
20550 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20551 }
20552
20553 // (select c, 0, y) -> -!c & y
20554 if (isNullConstant(TrueV)) {
20555 SDValue C =
20556 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20557 SDValue Neg = DAG.getNegative(C, DL, VT);
20558 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20559 }
20560 // (select c, y, 0) -> -c & y
20561 if (isNullConstant(FalseV)) {
20562 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20563 SDValue Neg = DAG.getNegative(C, DL, VT);
20564 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20565 }
20566 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20567 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20568 if (((isOneConstant(FalseV) && LHS == TrueV &&
20569 CCVal == ISD::CondCode::SETNE) ||
20570 (isOneConstant(TrueV) && LHS == FalseV &&
20571 CCVal == ISD::CondCode::SETEQ)) &&
20572 isNullConstant(RHS)) {
20573 // freeze it to be safe.
20574 LHS = DAG.getFreeze(LHS);
20575 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20576 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20577 }
20578 }
20579
20580 // If both true/false are an xor with 1, pull through the select.
20581 // This can occur after op legalization if both operands are setccs that
20582 // require an xor to invert.
20583 // FIXME: Generalize to other binary ops with identical operand?
20584 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20585 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20586 isOneConstant(TrueV.getOperand(1)) &&
20587 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20588 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20589 TrueV.getOperand(0), FalseV.getOperand(0));
20590 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20591 }
20592
20593 return SDValue();
20594 }
20595 case RISCVISD::BR_CC: {
20596 SDValue LHS = N->getOperand(1);
20597 SDValue RHS = N->getOperand(2);
20598 SDValue CC = N->getOperand(3);
20599 SDLoc DL(N);
20600
20601 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20602 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20603 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20604
20605 return SDValue();
20606 }
20607 case ISD::BITREVERSE:
20608 return performBITREVERSECombine(N, DAG, Subtarget);
20609 case ISD::FP_TO_SINT:
20610 case ISD::FP_TO_UINT:
20611 return performFP_TO_INTCombine(N, DCI, Subtarget);
20614 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20615 case ISD::FCOPYSIGN: {
20616 EVT VT = N->getValueType(0);
20617 if (!VT.isVector())
20618 break;
20619 // There is a form of VFSGNJ which injects the negated sign of its second
20620 // operand. Try and bubble any FNEG up after the extend/round to produce
20621 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20622 // TRUNC=1.
20623 SDValue In2 = N->getOperand(1);
20624 // Avoid cases where the extend/round has multiple uses, as duplicating
20625 // those is typically more expensive than removing a fneg.
20626 if (!In2.hasOneUse())
20627 break;
20628 if (In2.getOpcode() != ISD::FP_EXTEND &&
20629 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20630 break;
20631 In2 = In2.getOperand(0);
20632 if (In2.getOpcode() != ISD::FNEG)
20633 break;
20634 SDLoc DL(N);
20635 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20636 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20637 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20638 }
20639 case ISD::MGATHER: {
20640 const auto *MGN = cast<MaskedGatherSDNode>(N);
20641 const EVT VT = N->getValueType(0);
20642 SDValue Index = MGN->getIndex();
20643 SDValue ScaleOp = MGN->getScale();
20644 ISD::MemIndexType IndexType = MGN->getIndexType();
20645 assert(!MGN->isIndexScaled() &&
20646 "Scaled gather/scatter should not be formed");
20647
20648 SDLoc DL(N);
20649 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20650 return DAG.getMaskedGather(
20651 N->getVTList(), MGN->getMemoryVT(), DL,
20652 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20653 MGN->getBasePtr(), Index, ScaleOp},
20654 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20655
20656 if (narrowIndex(Index, IndexType, DAG))
20657 return DAG.getMaskedGather(
20658 N->getVTList(), MGN->getMemoryVT(), DL,
20659 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20660 MGN->getBasePtr(), Index, ScaleOp},
20661 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20662
20663 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20664 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20665 // The sequence will be XLenVT, not the type of Index. Tell
20666 // isSimpleVIDSequence this so we avoid overflow.
20667 if (std::optional<VIDSequence> SimpleVID =
20668 isSimpleVIDSequence(Index, Subtarget.getXLen());
20669 SimpleVID && SimpleVID->StepDenominator == 1) {
20670 const int64_t StepNumerator = SimpleVID->StepNumerator;
20671 const int64_t Addend = SimpleVID->Addend;
20672
20673 // Note: We don't need to check alignment here since (by assumption
20674 // from the existence of the gather), our offsets must be sufficiently
20675 // aligned.
20676
20677 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20678 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20679 assert(IndexType == ISD::UNSIGNED_SCALED);
20680 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20681 DAG.getSignedConstant(Addend, DL, PtrVT));
20682
20683 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20685 SDValue StridedLoad = DAG.getStridedLoadVP(
20686 VT, DL, MGN->getChain(), BasePtr,
20687 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20688 EVL, MGN->getMemOperand());
20689 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20690 MGN->getPassThru());
20691 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20692 DL);
20693 }
20694 }
20695
20696 SmallVector<int> ShuffleMask;
20697 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20698 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20699 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20700 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20701 MGN->getMask(), DAG.getUNDEF(VT),
20702 MGN->getMemoryVT(), MGN->getMemOperand(),
20704 SDValue Shuffle =
20705 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20706 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20707 }
20708
20709 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20710 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20711 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20712 SmallVector<SDValue> NewIndices;
20713 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20714 NewIndices.push_back(Index.getOperand(i));
20715 EVT IndexVT = Index.getValueType()
20717 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20718
20719 unsigned ElementSize = VT.getScalarStoreSize();
20720 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20721 auto EltCnt = VT.getVectorElementCount();
20722 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20723 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20724 EltCnt.divideCoefficientBy(2));
20725 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20726 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20727 EltCnt.divideCoefficientBy(2));
20728 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20729
20730 SDValue Gather =
20731 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20732 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20733 Index, ScaleOp},
20734 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20735 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20736 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20737 }
20738 break;
20739 }
20740 case ISD::MSCATTER:{
20741 const auto *MSN = cast<MaskedScatterSDNode>(N);
20742 SDValue Index = MSN->getIndex();
20743 SDValue ScaleOp = MSN->getScale();
20744 ISD::MemIndexType IndexType = MSN->getIndexType();
20745 assert(!MSN->isIndexScaled() &&
20746 "Scaled gather/scatter should not be formed");
20747
20748 SDLoc DL(N);
20749 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20750 return DAG.getMaskedScatter(
20751 N->getVTList(), MSN->getMemoryVT(), DL,
20752 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20753 Index, ScaleOp},
20754 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20755
20756 if (narrowIndex(Index, IndexType, DAG))
20757 return DAG.getMaskedScatter(
20758 N->getVTList(), MSN->getMemoryVT(), DL,
20759 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20760 Index, ScaleOp},
20761 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20762
20763 EVT VT = MSN->getValue()->getValueType(0);
20764 SmallVector<int> ShuffleMask;
20765 if (!MSN->isTruncatingStore() &&
20766 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20767 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20768 DAG.getUNDEF(VT), ShuffleMask);
20769 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20770 DAG.getUNDEF(XLenVT), MSN->getMask(),
20771 MSN->getMemoryVT(), MSN->getMemOperand(),
20772 ISD::UNINDEXED, false);
20773 }
20774 break;
20775 }
20776 case ISD::VP_GATHER: {
20777 const auto *VPGN = cast<VPGatherSDNode>(N);
20778 SDValue Index = VPGN->getIndex();
20779 SDValue ScaleOp = VPGN->getScale();
20780 ISD::MemIndexType IndexType = VPGN->getIndexType();
20781 assert(!VPGN->isIndexScaled() &&
20782 "Scaled gather/scatter should not be formed");
20783
20784 SDLoc DL(N);
20785 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20786 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20787 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20788 ScaleOp, VPGN->getMask(),
20789 VPGN->getVectorLength()},
20790 VPGN->getMemOperand(), IndexType);
20791
20792 if (narrowIndex(Index, IndexType, DAG))
20793 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20794 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20795 ScaleOp, VPGN->getMask(),
20796 VPGN->getVectorLength()},
20797 VPGN->getMemOperand(), IndexType);
20798
20799 break;
20800 }
20801 case ISD::VP_SCATTER: {
20802 const auto *VPSN = cast<VPScatterSDNode>(N);
20803 SDValue Index = VPSN->getIndex();
20804 SDValue ScaleOp = VPSN->getScale();
20805 ISD::MemIndexType IndexType = VPSN->getIndexType();
20806 assert(!VPSN->isIndexScaled() &&
20807 "Scaled gather/scatter should not be formed");
20808
20809 SDLoc DL(N);
20810 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20811 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20812 {VPSN->getChain(), VPSN->getValue(),
20813 VPSN->getBasePtr(), Index, ScaleOp,
20814 VPSN->getMask(), VPSN->getVectorLength()},
20815 VPSN->getMemOperand(), IndexType);
20816
20817 if (narrowIndex(Index, IndexType, DAG))
20818 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20819 {VPSN->getChain(), VPSN->getValue(),
20820 VPSN->getBasePtr(), Index, ScaleOp,
20821 VPSN->getMask(), VPSN->getVectorLength()},
20822 VPSN->getMemOperand(), IndexType);
20823 break;
20824 }
20825 case RISCVISD::SHL_VL:
20826 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20827 return V;
20828 [[fallthrough]];
20829 case RISCVISD::SRA_VL:
20830 case RISCVISD::SRL_VL: {
20831 SDValue ShAmt = N->getOperand(1);
20832 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20833 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20834 SDLoc DL(N);
20835 SDValue VL = N->getOperand(4);
20836 EVT VT = N->getValueType(0);
20837 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20838 ShAmt.getOperand(1), VL);
20839 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20840 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20841 }
20842 break;
20843 }
20844 case ISD::SRA:
20845 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20846 return V;
20847 [[fallthrough]];
20848 case ISD::SRL:
20849 case ISD::SHL: {
20850 if (N->getOpcode() == ISD::SHL) {
20851 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20852 return V;
20853 }
20854 SDValue ShAmt = N->getOperand(1);
20855 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20856 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20857 SDLoc DL(N);
20858 EVT VT = N->getValueType(0);
20859 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20860 ShAmt.getOperand(1),
20861 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20862 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20863 }
20864 break;
20865 }
20866 case RISCVISD::ADD_VL:
20867 if (SDValue V = simplifyOp_VL(N))
20868 return V;
20869 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20870 return V;
20871 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20872 return V;
20873 return combineToVWMACC(N, DAG, Subtarget);
20874 case RISCVISD::VWADD_W_VL:
20875 case RISCVISD::VWADDU_W_VL:
20876 case RISCVISD::VWSUB_W_VL:
20877 case RISCVISD::VWSUBU_W_VL:
20878 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20879 case RISCVISD::OR_VL:
20880 case RISCVISD::SUB_VL:
20881 case RISCVISD::MUL_VL:
20882 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20883 case RISCVISD::VFMADD_VL:
20884 case RISCVISD::VFNMADD_VL:
20885 case RISCVISD::VFMSUB_VL:
20886 case RISCVISD::VFNMSUB_VL:
20887 case RISCVISD::STRICT_VFMADD_VL:
20888 case RISCVISD::STRICT_VFNMADD_VL:
20889 case RISCVISD::STRICT_VFMSUB_VL:
20890 case RISCVISD::STRICT_VFNMSUB_VL:
20891 return performVFMADD_VLCombine(N, DCI, Subtarget);
20892 case RISCVISD::FADD_VL:
20893 case RISCVISD::FSUB_VL:
20894 case RISCVISD::FMUL_VL:
20895 case RISCVISD::VFWADD_W_VL:
20896 case RISCVISD::VFWSUB_W_VL:
20897 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20898 case ISD::LOAD:
20899 case ISD::STORE: {
20900 if (DCI.isAfterLegalizeDAG())
20901 if (SDValue V = performMemPairCombine(N, DCI))
20902 return V;
20903
20904 if (N->getOpcode() != ISD::STORE)
20905 break;
20906
20907 auto *Store = cast<StoreSDNode>(N);
20908 SDValue Chain = Store->getChain();
20909 EVT MemVT = Store->getMemoryVT();
20910 SDValue Val = Store->getValue();
20911 SDLoc DL(N);
20912
20913 bool IsScalarizable =
20914 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20915 Store->isSimple() &&
20916 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20917 isPowerOf2_64(MemVT.getSizeInBits()) &&
20918 MemVT.getSizeInBits() <= Subtarget.getXLen();
20919
20920 // If sufficiently aligned we can scalarize stores of constant vectors of
20921 // any power-of-two size up to XLen bits, provided that they aren't too
20922 // expensive to materialize.
20923 // vsetivli zero, 2, e8, m1, ta, ma
20924 // vmv.v.i v8, 4
20925 // vse64.v v8, (a0)
20926 // ->
20927 // li a1, 1028
20928 // sh a1, 0(a0)
20929 if (DCI.isBeforeLegalize() && IsScalarizable &&
20931 // Get the constant vector bits
20932 APInt NewC(Val.getValueSizeInBits(), 0);
20933 uint64_t EltSize = Val.getScalarValueSizeInBits();
20934 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20935 if (Val.getOperand(i).isUndef())
20936 continue;
20937 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20938 i * EltSize);
20939 }
20940 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20941
20942 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20943 true) <= 2 &&
20945 NewVT, *Store->getMemOperand())) {
20946 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20947 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20948 Store->getPointerInfo(), Store->getBaseAlign(),
20949 Store->getMemOperand()->getFlags());
20950 }
20951 }
20952
20953 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20954 // vsetivli zero, 2, e16, m1, ta, ma
20955 // vle16.v v8, (a0)
20956 // vse16.v v8, (a1)
20957 if (auto *L = dyn_cast<LoadSDNode>(Val);
20958 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20959 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20960 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20961 L->getMemoryVT() == MemVT) {
20962 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20964 NewVT, *Store->getMemOperand()) &&
20966 NewVT, *L->getMemOperand())) {
20967 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20968 L->getPointerInfo(), L->getBaseAlign(),
20969 L->getMemOperand()->getFlags());
20970 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20971 Store->getPointerInfo(), Store->getBaseAlign(),
20972 Store->getMemOperand()->getFlags());
20973 }
20974 }
20975
20976 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20977 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20978 // any illegal types.
20979 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20980 (DCI.isAfterLegalizeDAG() &&
20982 isNullConstant(Val.getOperand(1)))) &&
20983 Val.hasOneUse()) {
20984 SDValue Src = Val.getOperand(0);
20985 MVT VecVT = Src.getSimpleValueType();
20986 // VecVT should be scalable and memory VT should match the element type.
20987 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20988 MemVT == VecVT.getVectorElementType()) {
20989 SDLoc DL(N);
20990 MVT MaskVT = getMaskTypeFor(VecVT);
20991 return DAG.getStoreVP(
20992 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20993 DAG.getConstant(1, DL, MaskVT),
20994 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20995 Store->getMemOperand(), Store->getAddressingMode(),
20996 Store->isTruncatingStore(), /*IsCompress*/ false);
20997 }
20998 }
20999
21000 break;
21001 }
21002 case ISD::SPLAT_VECTOR: {
21003 EVT VT = N->getValueType(0);
21004 // Only perform this combine on legal MVT types.
21005 if (!isTypeLegal(VT))
21006 break;
21007 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
21008 DAG, Subtarget))
21009 return Gather;
21010 break;
21011 }
21012 case ISD::BUILD_VECTOR:
21013 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
21014 return V;
21015 break;
21017 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
21018 return V;
21019 break;
21021 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
21022 return V;
21023 break;
21025 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
21026 return V;
21027 break;
21028 case RISCVISD::VFMV_V_F_VL: {
21029 const MVT VT = N->getSimpleValueType(0);
21030 SDValue Passthru = N->getOperand(0);
21031 SDValue Scalar = N->getOperand(1);
21032 SDValue VL = N->getOperand(2);
21033
21034 // If VL is 1, we can use vfmv.s.f.
21035 if (isOneConstant(VL))
21036 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
21037 break;
21038 }
21039 case RISCVISD::VMV_V_X_VL: {
21040 const MVT VT = N->getSimpleValueType(0);
21041 SDValue Passthru = N->getOperand(0);
21042 SDValue Scalar = N->getOperand(1);
21043 SDValue VL = N->getOperand(2);
21044
21045 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
21046 // scalar input.
21047 unsigned ScalarSize = Scalar.getValueSizeInBits();
21048 unsigned EltWidth = VT.getScalarSizeInBits();
21049 if (ScalarSize > EltWidth && Passthru.isUndef())
21050 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
21051 return SDValue(N, 0);
21052
21053 // If VL is 1 and the scalar value won't benefit from immediate, we can
21054 // use vmv.s.x.
21056 if (isOneConstant(VL) &&
21057 (!Const || Const->isZero() ||
21058 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
21059 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
21060
21061 break;
21062 }
21063 case RISCVISD::VFMV_S_F_VL: {
21064 SDValue Src = N->getOperand(1);
21065 // Try to remove vector->scalar->vector if the scalar->vector is inserting
21066 // into an undef vector.
21067 // TODO: Could use a vslide or vmv.v.v for non-undef.
21068 if (N->getOperand(0).isUndef() &&
21069 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21070 isNullConstant(Src.getOperand(1)) &&
21071 Src.getOperand(0).getValueType().isScalableVector()) {
21072 EVT VT = N->getValueType(0);
21073 SDValue EVSrc = Src.getOperand(0);
21074 EVT EVSrcVT = EVSrc.getValueType();
21076 // Widths match, just return the original vector.
21077 if (EVSrcVT == VT)
21078 return EVSrc;
21079 SDLoc DL(N);
21080 // Width is narrower, using insert_subvector.
21081 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
21082 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
21083 EVSrc,
21084 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21085 }
21086 // Width is wider, using extract_subvector.
21087 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
21088 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21089 }
21090 [[fallthrough]];
21091 }
21092 case RISCVISD::VMV_S_X_VL: {
21093 const MVT VT = N->getSimpleValueType(0);
21094 SDValue Passthru = N->getOperand(0);
21095 SDValue Scalar = N->getOperand(1);
21096 SDValue VL = N->getOperand(2);
21097
21098 // The vmv.s.x instruction copies the scalar integer register to element 0
21099 // of the destination vector register. If SEW < XLEN, the least-significant
21100 // bits are copied and the upper XLEN-SEW bits are ignored.
21101 unsigned ScalarSize = Scalar.getValueSizeInBits();
21102 unsigned EltWidth = VT.getScalarSizeInBits();
21103 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21104 return SDValue(N, 0);
21105
21106 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21107 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21108 return Scalar.getOperand(0);
21109
21110 // Use M1 or smaller to avoid over constraining register allocation
21111 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21112 if (M1VT.bitsLT(VT)) {
21113 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21114 SDValue Result =
21115 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21116 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21117 return Result;
21118 }
21119
21120 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21121 // higher would involve overly constraining the register allocator for
21122 // no purpose.
21123 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21124 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21125 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21126 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21127
21128 break;
21129 }
21130 case RISCVISD::VMV_X_S: {
21131 SDValue Vec = N->getOperand(0);
21132 MVT VecVT = N->getOperand(0).getSimpleValueType();
21133 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21134 if (M1VT.bitsLT(VecVT)) {
21135 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21136 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21137 }
21138 break;
21139 }
21143 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21144 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21145 switch (IntNo) {
21146 // By default we do not combine any intrinsic.
21147 default:
21148 return SDValue();
21149 case Intrinsic::riscv_vcpop:
21150 case Intrinsic::riscv_vcpop_mask:
21151 case Intrinsic::riscv_vfirst:
21152 case Intrinsic::riscv_vfirst_mask: {
21153 SDValue VL = N->getOperand(2);
21154 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21155 IntNo == Intrinsic::riscv_vfirst_mask)
21156 VL = N->getOperand(3);
21157 if (!isNullConstant(VL))
21158 return SDValue();
21159 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21160 SDLoc DL(N);
21161 EVT VT = N->getValueType(0);
21162 if (IntNo == Intrinsic::riscv_vfirst ||
21163 IntNo == Intrinsic::riscv_vfirst_mask)
21164 return DAG.getAllOnesConstant(DL, VT);
21165 return DAG.getConstant(0, DL, VT);
21166 }
21167 case Intrinsic::riscv_vsseg2_mask:
21168 case Intrinsic::riscv_vsseg3_mask:
21169 case Intrinsic::riscv_vsseg4_mask:
21170 case Intrinsic::riscv_vsseg5_mask:
21171 case Intrinsic::riscv_vsseg6_mask:
21172 case Intrinsic::riscv_vsseg7_mask:
21173 case Intrinsic::riscv_vsseg8_mask: {
21174 SDValue Tuple = N->getOperand(2);
21175 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21176
21177 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21178 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21179 !Tuple.getOperand(0).isUndef())
21180 return SDValue();
21181
21182 SDValue Val = Tuple.getOperand(1);
21183 unsigned Idx = Tuple.getConstantOperandVal(2);
21184
21185 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21186 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21187 "Type mismatch without bitcast?");
21188 unsigned Stride = SEW / 8 * NF;
21189 unsigned Offset = SEW / 8 * Idx;
21190
21191 SDValue Ops[] = {
21192 /*Chain=*/N->getOperand(0),
21193 /*IntID=*/
21194 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21195 /*StoredVal=*/Val,
21196 /*Ptr=*/
21197 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21198 DAG.getConstant(Offset, DL, XLenVT)),
21199 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21200 /*Mask=*/N->getOperand(4),
21201 /*VL=*/N->getOperand(5)};
21202
21203 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21204 // Match getTgtMemIntrinsic for non-unit stride case
21205 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21208 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21209
21210 SDVTList VTs = DAG.getVTList(MVT::Other);
21211 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21212 MMO);
21213 }
21214 }
21215 }
21216 case ISD::EXPERIMENTAL_VP_REVERSE:
21217 return performVP_REVERSECombine(N, DAG, Subtarget);
21218 case ISD::VP_STORE:
21219 return performVP_STORECombine(N, DAG, Subtarget);
21220 case ISD::BITCAST: {
21221 assert(Subtarget.useRVVForFixedLengthVectors());
21222 SDValue N0 = N->getOperand(0);
21223 EVT VT = N->getValueType(0);
21224 EVT SrcVT = N0.getValueType();
21225 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21226 unsigned NF = VT.getRISCVVectorTupleNumFields();
21227 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21228 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21229 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21230
21231 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21232
21233 SDValue Result = DAG.getUNDEF(VT);
21234 for (unsigned i = 0; i < NF; ++i)
21235 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21236 DAG.getTargetConstant(i, DL, MVT::i32));
21237 return Result;
21238 }
21239 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21240 // type, widen both sides to avoid a trip through memory.
21241 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21242 VT.isScalarInteger()) {
21243 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21244 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21245 Ops[0] = N0;
21246 SDLoc DL(N);
21247 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21248 N0 = DAG.getBitcast(MVT::i8, N0);
21249 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21250 }
21251
21252 return SDValue();
21253 }
21254 case ISD::VECREDUCE_ADD:
21255 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21256 return V;
21257 [[fallthrough]];
21258 case ISD::CTPOP:
21259 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21260 return V;
21261 break;
21262 case RISCVISD::VRGATHER_VX_VL: {
21263 // Note this assumes that out of bounds indices produce poison
21264 // and can thus be replaced without having to prove them inbounds..
21265 EVT VT = N->getValueType(0);
21266 SDValue Src = N->getOperand(0);
21267 SDValue Idx = N->getOperand(1);
21268 SDValue Passthru = N->getOperand(2);
21269 SDValue VL = N->getOperand(4);
21270
21271 // Warning: Unlike most cases we strip an insert_subvector, this one
21272 // does not require the first operand to be undef.
21273 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21274 isNullConstant(Src.getOperand(2)))
21275 Src = Src.getOperand(1);
21276
21277 switch (Src.getOpcode()) {
21278 default:
21279 break;
21280 case RISCVISD::VMV_V_X_VL:
21281 case RISCVISD::VFMV_V_F_VL:
21282 // Drop a redundant vrgather_vx.
21283 // TODO: Remove the type restriction if we find a motivating
21284 // test case?
21285 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21286 Src.getValueType() == VT)
21287 return Src;
21288 break;
21289 case RISCVISD::VMV_S_X_VL:
21290 case RISCVISD::VFMV_S_F_VL:
21291 // If this use only demands lane zero from the source vmv.s.x, and
21292 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21293 // a vmv.v.x. Note that there can be other uses of the original
21294 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21295 if (isNullConstant(Idx) && Passthru.isUndef() &&
21296 VL == Src.getOperand(2)) {
21297 unsigned Opc =
21298 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21299 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21300 VL);
21301 }
21302 break;
21303 }
21304 break;
21305 }
21306 case RISCVISD::TUPLE_EXTRACT: {
21307 EVT VT = N->getValueType(0);
21308 SDValue Tuple = N->getOperand(0);
21309 unsigned Idx = N->getConstantOperandVal(1);
21310 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21311 break;
21312
21313 unsigned NF = 0;
21314 switch (Tuple.getConstantOperandVal(1)) {
21315 default:
21316 break;
21317 case Intrinsic::riscv_vlseg2_mask:
21318 case Intrinsic::riscv_vlseg3_mask:
21319 case Intrinsic::riscv_vlseg4_mask:
21320 case Intrinsic::riscv_vlseg5_mask:
21321 case Intrinsic::riscv_vlseg6_mask:
21322 case Intrinsic::riscv_vlseg7_mask:
21323 case Intrinsic::riscv_vlseg8_mask:
21324 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21325 break;
21326 }
21327
21328 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21329 break;
21330
21331 unsigned SEW = VT.getScalarSizeInBits();
21332 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21333 "Type mismatch without bitcast?");
21334 unsigned Stride = SEW / 8 * NF;
21335 unsigned Offset = SEW / 8 * Idx;
21336
21337 SDValue Ops[] = {
21338 /*Chain=*/Tuple.getOperand(0),
21339 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21340 /*Passthru=*/Tuple.getOperand(2),
21341 /*Ptr=*/
21342 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21343 DAG.getConstant(Offset, DL, XLenVT)),
21344 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21345 /*Mask=*/Tuple.getOperand(4),
21346 /*VL=*/Tuple.getOperand(5),
21347 /*Policy=*/Tuple.getOperand(6)};
21348
21349 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21350 // Match getTgtMemIntrinsic for non-unit stride case
21351 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21354 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21355
21356 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21358 Ops, MemVT, MMO);
21359 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21360 return Result.getValue(0);
21361 }
21362 case RISCVISD::TUPLE_INSERT: {
21363 // tuple_insert tuple, undef, idx -> tuple
21364 if (N->getOperand(1).isUndef())
21365 return N->getOperand(0);
21366 break;
21367 }
21368 case RISCVISD::VSLIDE1UP_VL:
21369 case RISCVISD::VFSLIDE1UP_VL: {
21370 using namespace SDPatternMatch;
21371 SDValue SrcVec;
21372 SDLoc DL(N);
21373 MVT VT = N->getSimpleValueType(0);
21374 // If the scalar we're sliding in was extracted from the first element of a
21375 // vector, we can use that vector as the passthru in a normal slideup of 1.
21376 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21377 if (!N->getOperand(0).isUndef() ||
21378 !sd_match(N->getOperand(2),
21379 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21380 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21381 break;
21382
21383 MVT SrcVecVT = SrcVec.getSimpleValueType();
21384 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21385 break;
21386 // Adapt the value type of source vector.
21387 if (SrcVecVT.isFixedLengthVector()) {
21388 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21389 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21390 }
21392 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21393 else
21394 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21395
21396 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21397 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21398 N->getOperand(4));
21399 }
21400 }
21401
21402 return SDValue();
21403}
21404
21406 EVT XVT, unsigned KeptBits) const {
21407 // For vectors, we don't have a preference..
21408 if (XVT.isVector())
21409 return false;
21410
21411 if (XVT != MVT::i32 && XVT != MVT::i64)
21412 return false;
21413
21414 // We can use sext.w for RV64 or an srai 31 on RV32.
21415 if (KeptBits == 32 || KeptBits == 64)
21416 return true;
21417
21418 // With Zbb we can use sext.h/sext.b.
21419 return Subtarget.hasStdExtZbb() &&
21420 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21421 KeptBits == 16);
21422}
21423
21425 const SDNode *N, CombineLevel Level) const {
21426 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21427 N->getOpcode() == ISD::SRL) &&
21428 "Expected shift op");
21429
21430 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21431 // materialised in fewer instructions than `(OP _, c1)`:
21432 //
21433 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21434 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21435 SDValue N0 = N->getOperand(0);
21436 EVT Ty = N0.getValueType();
21437
21438 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21439 // LD/ST, it can still complete the folding optimization operation performed
21440 // above.
21441 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21442 for (SDNode *Use : X->users()) {
21443 // This use is the one we're on right now. Skip it
21444 if (Use == User || Use->getOpcode() == ISD::SELECT)
21445 continue;
21447 return false;
21448 }
21449 return true;
21450 };
21451
21452 if (Ty.isScalarInteger() &&
21453 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21454 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21455 return isUsedByLdSt(N0.getNode(), N);
21456
21457 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21458 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21459
21460 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21461 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21462 N->user_begin()->getOpcode() == ISD::ADD &&
21463 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21464 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21465 return false;
21466
21467 if (C1 && C2) {
21468 const APInt &C1Int = C1->getAPIntValue();
21469 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21470
21471 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21472 // and the combine should happen, to potentially allow further combines
21473 // later.
21474 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21475 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21476 return true;
21477
21478 // We can materialise `c1` in an add immediate, so it's "free", and the
21479 // combine should be prevented.
21480 if (C1Int.getSignificantBits() <= 64 &&
21482 return false;
21483
21484 // Neither constant will fit into an immediate, so find materialisation
21485 // costs.
21486 int C1Cost =
21487 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21488 /*CompressionCost*/ true);
21489 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21490 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21491 /*CompressionCost*/ true);
21492
21493 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21494 // combine should be prevented.
21495 if (C1Cost < ShiftedC1Cost)
21496 return false;
21497 }
21498 }
21499
21500 if (!N0->hasOneUse())
21501 return false;
21502
21503 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21504 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21505 !N0->getOperand(0)->hasOneUse())
21506 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21507
21508 return true;
21509}
21510
21512 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21513 TargetLoweringOpt &TLO) const {
21514 // Delay this optimization as late as possible.
21515 if (!TLO.LegalOps)
21516 return false;
21517
21518 EVT VT = Op.getValueType();
21519 if (VT.isVector())
21520 return false;
21521
21522 unsigned Opcode = Op.getOpcode();
21523 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21524 return false;
21525
21526 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21527 if (!C)
21528 return false;
21529
21530 const APInt &Mask = C->getAPIntValue();
21531
21532 // Clear all non-demanded bits initially.
21533 APInt ShrunkMask = Mask & DemandedBits;
21534
21535 // Try to make a smaller immediate by setting undemanded bits.
21536
21537 APInt ExpandedMask = Mask | ~DemandedBits;
21538
21539 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21540 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21541 };
21542 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21543 if (NewMask == Mask)
21544 return true;
21545 SDLoc DL(Op);
21546 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21547 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21548 Op.getOperand(0), NewC);
21549 return TLO.CombineTo(Op, NewOp);
21550 };
21551
21552 // If the shrunk mask fits in sign extended 12 bits, let the target
21553 // independent code apply it.
21554 if (ShrunkMask.isSignedIntN(12))
21555 return false;
21556
21557 // And has a few special cases for zext.
21558 if (Opcode == ISD::AND) {
21559 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21560 // otherwise use SLLI + SRLI.
21561 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21562 if (IsLegalMask(NewMask))
21563 return UseMask(NewMask);
21564
21565 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21566 if (VT == MVT::i64) {
21567 APInt NewMask = APInt(64, 0xffffffff);
21568 if (IsLegalMask(NewMask))
21569 return UseMask(NewMask);
21570 }
21571 }
21572
21573 // For the remaining optimizations, we need to be able to make a negative
21574 // number through a combination of mask and undemanded bits.
21575 if (!ExpandedMask.isNegative())
21576 return false;
21577
21578 // What is the fewest number of bits we need to represent the negative number.
21579 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21580
21581 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21582 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21583 // If we can't create a simm12, we shouldn't change opaque constants.
21584 APInt NewMask = ShrunkMask;
21585 if (MinSignedBits <= 12)
21586 NewMask.setBitsFrom(11);
21587 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21588 NewMask.setBitsFrom(31);
21589 else
21590 return false;
21591
21592 // Check that our new mask is a subset of the demanded mask.
21593 assert(IsLegalMask(NewMask));
21594 return UseMask(NewMask);
21595}
21596
21597static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21598 static const uint64_t GREVMasks[] = {
21599 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21600 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21601
21602 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21603 unsigned Shift = 1 << Stage;
21604 if (ShAmt & Shift) {
21605 uint64_t Mask = GREVMasks[Stage];
21606 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21607 if (IsGORC)
21608 Res |= x;
21609 x = Res;
21610 }
21611 }
21612
21613 return x;
21614}
21615
21617 KnownBits &Known,
21618 const APInt &DemandedElts,
21619 const SelectionDAG &DAG,
21620 unsigned Depth) const {
21621 unsigned BitWidth = Known.getBitWidth();
21622 unsigned Opc = Op.getOpcode();
21627 "Should use MaskedValueIsZero if you don't know whether Op"
21628 " is a target node!");
21629
21630 Known.resetAll();
21631 switch (Opc) {
21632 default: break;
21633 case RISCVISD::SELECT_CC: {
21634 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21635 // If we don't know any bits, early out.
21636 if (Known.isUnknown())
21637 break;
21638 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21639
21640 // Only known if known in both the LHS and RHS.
21641 Known = Known.intersectWith(Known2);
21642 break;
21643 }
21644 case RISCVISD::VCPOP_VL: {
21645 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21646 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21647 break;
21648 }
21649 case RISCVISD::CZERO_EQZ:
21650 case RISCVISD::CZERO_NEZ:
21651 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21652 // Result is either all zero or operand 0. We can propagate zeros, but not
21653 // ones.
21654 Known.One.clearAllBits();
21655 break;
21656 case RISCVISD::REMUW: {
21657 KnownBits Known2;
21658 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21659 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21660 // We only care about the lower 32 bits.
21661 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21662 // Restore the original width by sign extending.
21663 Known = Known.sext(BitWidth);
21664 break;
21665 }
21666 case RISCVISD::DIVUW: {
21667 KnownBits Known2;
21668 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21669 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21670 // We only care about the lower 32 bits.
21671 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21672 // Restore the original width by sign extending.
21673 Known = Known.sext(BitWidth);
21674 break;
21675 }
21676 case RISCVISD::SLLW: {
21677 KnownBits Known2;
21678 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21679 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21680 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21681 // Restore the original width by sign extending.
21682 Known = Known.sext(BitWidth);
21683 break;
21684 }
21685 case RISCVISD::SRLW: {
21686 KnownBits Known2;
21687 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21688 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21689 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21690 // Restore the original width by sign extending.
21691 Known = Known.sext(BitWidth);
21692 break;
21693 }
21694 case RISCVISD::SRAW: {
21695 KnownBits Known2;
21696 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21697 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21698 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21699 // Restore the original width by sign extending.
21700 Known = Known.sext(BitWidth);
21701 break;
21702 }
21703 case RISCVISD::SHL_ADD: {
21704 KnownBits Known2;
21705 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21706 unsigned ShAmt = Op.getConstantOperandVal(1);
21707 Known <<= ShAmt;
21708 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
21709 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
21710 Known = KnownBits::add(Known, Known2);
21711 break;
21712 }
21713 case RISCVISD::CTZW: {
21714 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21715 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21716 unsigned LowBits = llvm::bit_width(PossibleTZ);
21717 Known.Zero.setBitsFrom(LowBits);
21718 break;
21719 }
21720 case RISCVISD::CLZW: {
21721 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21722 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21723 unsigned LowBits = llvm::bit_width(PossibleLZ);
21724 Known.Zero.setBitsFrom(LowBits);
21725 break;
21726 }
21727 case RISCVISD::BREV8:
21728 case RISCVISD::ORC_B: {
21729 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21730 // control value of 7 is equivalent to brev8 and orc.b.
21731 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21732 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21733 // To compute zeros for ORC_B, we need to invert the value and invert it
21734 // back after. This inverting is harmless for BREV8.
21735 Known.Zero =
21736 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21737 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21738 break;
21739 }
21740 case RISCVISD::READ_VLENB: {
21741 // We can use the minimum and maximum VLEN values to bound VLENB. We
21742 // know VLEN must be a power of two.
21743 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21744 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21745 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21746 Known.Zero.setLowBits(Log2_32(MinVLenB));
21747 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21748 if (MaxVLenB == MinVLenB)
21749 Known.One.setBit(Log2_32(MinVLenB));
21750 break;
21751 }
21752 case RISCVISD::FCLASS: {
21753 // fclass will only set one of the low 10 bits.
21754 Known.Zero.setBitsFrom(10);
21755 break;
21756 }
21759 unsigned IntNo =
21760 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21761 switch (IntNo) {
21762 default:
21763 // We can't do anything for most intrinsics.
21764 break;
21765 case Intrinsic::riscv_vsetvli:
21766 case Intrinsic::riscv_vsetvlimax: {
21767 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21768 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21769 RISCVVType::VLMUL VLMUL =
21770 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21771 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21772 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21773 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21774 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21775
21776 // Result of vsetvli must be not larger than AVL.
21777 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21778 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21779
21780 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21781 if (BitWidth > KnownZeroFirstBit)
21782 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21783 break;
21784 }
21785 }
21786 break;
21787 }
21788 }
21789}
21790
21792 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21793 unsigned Depth) const {
21794 switch (Op.getOpcode()) {
21795 default:
21796 break;
21797 case RISCVISD::SELECT_CC: {
21798 unsigned Tmp =
21799 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21800 if (Tmp == 1) return 1; // Early out.
21801 unsigned Tmp2 =
21802 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21803 return std::min(Tmp, Tmp2);
21804 }
21805 case RISCVISD::CZERO_EQZ:
21806 case RISCVISD::CZERO_NEZ:
21807 // Output is either all zero or operand 0. We can propagate sign bit count
21808 // from operand 0.
21809 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21810 case RISCVISD::ABSW: {
21811 // We expand this at isel to negw+max. The result will have 33 sign bits
21812 // if the input has at least 33 sign bits.
21813 unsigned Tmp =
21814 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21815 if (Tmp < 33) return 1;
21816 return 33;
21817 }
21818 case RISCVISD::SRAW: {
21819 unsigned Tmp =
21820 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21821 // sraw produces at least 33 sign bits. If the input already has more than
21822 // 33 sign bits sraw, will preserve them.
21823 // TODO: A more precise answer could be calculated depending on known bits
21824 // in the shift amount.
21825 return std::max(Tmp, 33U);
21826 }
21827 case RISCVISD::SLLW:
21828 case RISCVISD::SRLW:
21829 case RISCVISD::DIVW:
21830 case RISCVISD::DIVUW:
21831 case RISCVISD::REMUW:
21832 case RISCVISD::ROLW:
21833 case RISCVISD::RORW:
21834 case RISCVISD::FCVT_W_RV64:
21835 case RISCVISD::FCVT_WU_RV64:
21836 case RISCVISD::STRICT_FCVT_W_RV64:
21837 case RISCVISD::STRICT_FCVT_WU_RV64:
21838 // TODO: As the result is sign-extended, this is conservatively correct.
21839 return 33;
21840 case RISCVISD::VMV_X_S: {
21841 // The number of sign bits of the scalar result is computed by obtaining the
21842 // element type of the input vector operand, subtracting its width from the
21843 // XLEN, and then adding one (sign bit within the element type). If the
21844 // element type is wider than XLen, the least-significant XLEN bits are
21845 // taken.
21846 unsigned XLen = Subtarget.getXLen();
21847 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21848 if (EltBits <= XLen)
21849 return XLen - EltBits + 1;
21850 break;
21851 }
21853 unsigned IntNo = Op.getConstantOperandVal(1);
21854 switch (IntNo) {
21855 default:
21856 break;
21857 case Intrinsic::riscv_masked_atomicrmw_xchg:
21858 case Intrinsic::riscv_masked_atomicrmw_add:
21859 case Intrinsic::riscv_masked_atomicrmw_sub:
21860 case Intrinsic::riscv_masked_atomicrmw_nand:
21861 case Intrinsic::riscv_masked_atomicrmw_max:
21862 case Intrinsic::riscv_masked_atomicrmw_min:
21863 case Intrinsic::riscv_masked_atomicrmw_umax:
21864 case Intrinsic::riscv_masked_atomicrmw_umin:
21865 case Intrinsic::riscv_masked_cmpxchg:
21866 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21867 // narrow atomic operation. These are implemented using atomic
21868 // operations at the minimum supported atomicrmw/cmpxchg width whose
21869 // result is then sign extended to XLEN. With +A, the minimum width is
21870 // 32 for both 64 and 32.
21872 assert(Subtarget.hasStdExtA());
21873 return Op.getValueSizeInBits() - 31;
21874 }
21875 break;
21876 }
21877 }
21878
21879 return 1;
21880}
21881
21883 SDValue Op, const APInt &OriginalDemandedBits,
21884 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21885 unsigned Depth) const {
21886 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21887
21888 switch (Op.getOpcode()) {
21889 case RISCVISD::BREV8:
21890 case RISCVISD::ORC_B: {
21891 KnownBits Known2;
21892 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21893 // For BREV8, we need to do BREV8 on the demanded bits.
21894 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21895 // So we need to do ORC_B on the demanded bits.
21897 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21898 7, IsGORC));
21899 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21900 OriginalDemandedElts, Known2, TLO, Depth + 1))
21901 return true;
21902
21903 // To compute zeros for ORC_B, we need to invert the value and invert it
21904 // back after. This inverting is harmless for BREV8.
21905 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21906 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21907 return false;
21908 }
21909 }
21910
21912 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21913}
21914
21916 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21917 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21918
21919 // TODO: Add more target nodes.
21920 switch (Op.getOpcode()) {
21921 case RISCVISD::SLLW:
21922 case RISCVISD::SRAW:
21923 case RISCVISD::SRLW:
21924 case RISCVISD::RORW:
21925 case RISCVISD::ROLW:
21926 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21927 // amount is bounds.
21928 return false;
21929 case RISCVISD::SELECT_CC:
21930 // Integer comparisons cannot create poison.
21931 assert(Op.getOperand(0).getValueType().isInteger() &&
21932 "RISCVISD::SELECT_CC only compares integers");
21933 return false;
21934 }
21936 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21937}
21938
21939const Constant *
21941 assert(Ld && "Unexpected null LoadSDNode");
21942 if (!ISD::isNormalLoad(Ld))
21943 return nullptr;
21944
21945 SDValue Ptr = Ld->getBasePtr();
21946
21947 // Only constant pools with no offset are supported.
21948 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21949 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21950 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21951 CNode->getOffset() != 0)
21952 return nullptr;
21953
21954 return CNode;
21955 };
21956
21957 // Simple case, LLA.
21958 if (Ptr.getOpcode() == RISCVISD::LLA) {
21959 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21960 if (!CNode || CNode->getTargetFlags() != 0)
21961 return nullptr;
21962
21963 return CNode->getConstVal();
21964 }
21965
21966 // Look for a HI and ADD_LO pair.
21967 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21968 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21969 return nullptr;
21970
21971 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21972 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21973
21974 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21975 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21976 return nullptr;
21977
21978 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21979 return nullptr;
21980
21981 return CNodeLo->getConstVal();
21982}
21983
21985 MachineBasicBlock *BB) {
21986 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21987
21988 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21989 // Should the count have wrapped while it was being read, we need to try
21990 // again.
21991 // For example:
21992 // ```
21993 // read:
21994 // csrrs x3, counterh # load high word of counter
21995 // csrrs x2, counter # load low word of counter
21996 // csrrs x4, counterh # load high word of counter
21997 // bne x3, x4, read # check if high word reads match, otherwise try again
21998 // ```
21999
22000 MachineFunction &MF = *BB->getParent();
22001 const BasicBlock *LLVMBB = BB->getBasicBlock();
22003
22004 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
22005 MF.insert(It, LoopMBB);
22006
22007 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
22008 MF.insert(It, DoneMBB);
22009
22010 // Transfer the remainder of BB and its successor edges to DoneMBB.
22011 DoneMBB->splice(DoneMBB->begin(), BB,
22012 std::next(MachineBasicBlock::iterator(MI)), BB->end());
22014
22015 BB->addSuccessor(LoopMBB);
22016
22018 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22019 Register LoReg = MI.getOperand(0).getReg();
22020 Register HiReg = MI.getOperand(1).getReg();
22021 int64_t LoCounter = MI.getOperand(2).getImm();
22022 int64_t HiCounter = MI.getOperand(3).getImm();
22023 DebugLoc DL = MI.getDebugLoc();
22024
22026 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
22027 .addImm(HiCounter)
22028 .addReg(RISCV::X0);
22029 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
22030 .addImm(LoCounter)
22031 .addReg(RISCV::X0);
22032 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
22033 .addImm(HiCounter)
22034 .addReg(RISCV::X0);
22035
22036 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
22037 .addReg(HiReg)
22038 .addReg(ReadAgainReg)
22039 .addMBB(LoopMBB);
22040
22041 LoopMBB->addSuccessor(LoopMBB);
22042 LoopMBB->addSuccessor(DoneMBB);
22043
22044 MI.eraseFromParent();
22045
22046 return DoneMBB;
22047}
22048
22051 const RISCVSubtarget &Subtarget) {
22052 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
22053
22054 MachineFunction &MF = *BB->getParent();
22055 DebugLoc DL = MI.getDebugLoc();
22058 Register LoReg = MI.getOperand(0).getReg();
22059 Register HiReg = MI.getOperand(1).getReg();
22060 Register SrcReg = MI.getOperand(2).getReg();
22061
22062 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
22063 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22064
22065 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
22066 RI, Register());
22068 MachineMemOperand *MMOLo =
22072 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
22073 .addFrameIndex(FI)
22074 .addImm(0)
22075 .addMemOperand(MMOLo);
22076 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
22077 .addFrameIndex(FI)
22078 .addImm(4)
22079 .addMemOperand(MMOHi);
22080 MI.eraseFromParent(); // The pseudo instruction is gone now.
22081 return BB;
22082}
22083
22086 const RISCVSubtarget &Subtarget) {
22087 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
22088 "Unexpected instruction");
22089
22090 MachineFunction &MF = *BB->getParent();
22091 DebugLoc DL = MI.getDebugLoc();
22094 Register DstReg = MI.getOperand(0).getReg();
22095 Register LoReg = MI.getOperand(1).getReg();
22096 Register HiReg = MI.getOperand(2).getReg();
22097
22098 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
22099 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22100
22102 MachineMemOperand *MMOLo =
22106 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22107 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22108 .addFrameIndex(FI)
22109 .addImm(0)
22110 .addMemOperand(MMOLo);
22111 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22112 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22113 .addFrameIndex(FI)
22114 .addImm(4)
22115 .addMemOperand(MMOHi);
22116 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
22117 MI.eraseFromParent(); // The pseudo instruction is gone now.
22118 return BB;
22119}
22120
22122 unsigned RelOpcode, unsigned EqOpcode,
22123 const RISCVSubtarget &Subtarget) {
22124 DebugLoc DL = MI.getDebugLoc();
22125 Register DstReg = MI.getOperand(0).getReg();
22126 Register Src1Reg = MI.getOperand(1).getReg();
22127 Register Src2Reg = MI.getOperand(2).getReg();
22129 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22131
22132 // Save the current FFLAGS.
22133 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22134
22135 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22136 .addReg(Src1Reg)
22137 .addReg(Src2Reg);
22140
22141 // Restore the FFLAGS.
22142 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22143 .addReg(SavedFFlags, RegState::Kill);
22144
22145 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22146 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22147 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22148 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22151
22152 // Erase the pseudoinstruction.
22153 MI.eraseFromParent();
22154 return BB;
22155}
22156
22157static MachineBasicBlock *
22159 MachineBasicBlock *ThisMBB,
22160 const RISCVSubtarget &Subtarget) {
22161 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22162 // Without this, custom-inserter would have generated:
22163 //
22164 // A
22165 // | \
22166 // | B
22167 // | /
22168 // C
22169 // | \
22170 // | D
22171 // | /
22172 // E
22173 //
22174 // A: X = ...; Y = ...
22175 // B: empty
22176 // C: Z = PHI [X, A], [Y, B]
22177 // D: empty
22178 // E: PHI [X, C], [Z, D]
22179 //
22180 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22181 //
22182 // A
22183 // | \
22184 // | C
22185 // | /|
22186 // |/ |
22187 // | |
22188 // | D
22189 // | /
22190 // E
22191 //
22192 // A: X = ...; Y = ...
22193 // D: empty
22194 // E: PHI [X, A], [X, C], [Y, D]
22195
22196 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22197 const DebugLoc &DL = First.getDebugLoc();
22198 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22199 MachineFunction *F = ThisMBB->getParent();
22200 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22201 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22202 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22203 MachineFunction::iterator It = ++ThisMBB->getIterator();
22204 F->insert(It, FirstMBB);
22205 F->insert(It, SecondMBB);
22206 F->insert(It, SinkMBB);
22207
22208 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22209 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22211 ThisMBB->end());
22212 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22213
22214 // Fallthrough block for ThisMBB.
22215 ThisMBB->addSuccessor(FirstMBB);
22216 // Fallthrough block for FirstMBB.
22217 FirstMBB->addSuccessor(SecondMBB);
22218 ThisMBB->addSuccessor(SinkMBB);
22219 FirstMBB->addSuccessor(SinkMBB);
22220 // This is fallthrough.
22221 SecondMBB->addSuccessor(SinkMBB);
22222
22223 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22224 Register FLHS = First.getOperand(1).getReg();
22225 Register FRHS = First.getOperand(2).getReg();
22226 // Insert appropriate branch.
22227 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22228 .addReg(FLHS)
22229 .addReg(FRHS)
22230 .addMBB(SinkMBB);
22231
22232 Register SLHS = Second.getOperand(1).getReg();
22233 Register SRHS = Second.getOperand(2).getReg();
22234 Register Op1Reg4 = First.getOperand(4).getReg();
22235 Register Op1Reg5 = First.getOperand(5).getReg();
22236
22237 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22238 // Insert appropriate branch.
22239 BuildMI(ThisMBB, DL,
22240 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22241 .addReg(SLHS)
22242 .addReg(SRHS)
22243 .addMBB(SinkMBB);
22244
22245 Register DestReg = Second.getOperand(0).getReg();
22246 Register Op2Reg4 = Second.getOperand(4).getReg();
22247 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22248 .addReg(Op2Reg4)
22249 .addMBB(ThisMBB)
22250 .addReg(Op1Reg4)
22251 .addMBB(FirstMBB)
22252 .addReg(Op1Reg5)
22253 .addMBB(SecondMBB);
22254
22255 // Now remove the Select_FPRX_s.
22256 First.eraseFromParent();
22257 Second.eraseFromParent();
22258 return SinkMBB;
22259}
22260
22263 const RISCVSubtarget &Subtarget) {
22264 // To "insert" Select_* instructions, we actually have to insert the triangle
22265 // control-flow pattern. The incoming instructions know the destination vreg
22266 // to set, the condition code register to branch on, the true/false values to
22267 // select between, and the condcode to use to select the appropriate branch.
22268 //
22269 // We produce the following control flow:
22270 // HeadMBB
22271 // | \
22272 // | IfFalseMBB
22273 // | /
22274 // TailMBB
22275 //
22276 // When we find a sequence of selects we attempt to optimize their emission
22277 // by sharing the control flow. Currently we only handle cases where we have
22278 // multiple selects with the exact same condition (same LHS, RHS and CC).
22279 // The selects may be interleaved with other instructions if the other
22280 // instructions meet some requirements we deem safe:
22281 // - They are not pseudo instructions.
22282 // - They are debug instructions. Otherwise,
22283 // - They do not have side-effects, do not access memory and their inputs do
22284 // not depend on the results of the select pseudo-instructions.
22285 // - They don't adjust stack.
22286 // The TrueV/FalseV operands of the selects cannot depend on the result of
22287 // previous selects in the sequence.
22288 // These conditions could be further relaxed. See the X86 target for a
22289 // related approach and more information.
22290 //
22291 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22292 // is checked here and handled by a separate function -
22293 // EmitLoweredCascadedSelect.
22294
22295 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22296 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22297 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22298 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22299 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22300 Next->getOperand(5).isKill())
22301 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22302
22303 Register LHS = MI.getOperand(1).getReg();
22304 Register RHS;
22305 if (MI.getOperand(2).isReg())
22306 RHS = MI.getOperand(2).getReg();
22307 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22308
22309 SmallVector<MachineInstr *, 4> SelectDebugValues;
22310 SmallSet<Register, 4> SelectDests;
22311 SelectDests.insert(MI.getOperand(0).getReg());
22312
22313 MachineInstr *LastSelectPseudo = &MI;
22314 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22315
22316 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22317 SequenceMBBI != E; ++SequenceMBBI) {
22318 if (SequenceMBBI->isDebugInstr())
22319 continue;
22320 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22321 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22322 !SequenceMBBI->getOperand(2).isReg() ||
22323 SequenceMBBI->getOperand(2).getReg() != RHS ||
22324 SequenceMBBI->getOperand(3).getImm() != CC ||
22325 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22326 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22327 break;
22328 LastSelectPseudo = &*SequenceMBBI;
22329 SequenceMBBI->collectDebugValues(SelectDebugValues);
22330 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22331 continue;
22332 }
22333 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22334 SequenceMBBI->mayLoadOrStore() ||
22335 SequenceMBBI->usesCustomInsertionHook() ||
22336 TII.isFrameInstr(*SequenceMBBI) ||
22337 SequenceMBBI->isStackAligningInlineAsm())
22338 break;
22339 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22340 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22341 }))
22342 break;
22343 }
22344
22345 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22346 DebugLoc DL = MI.getDebugLoc();
22348
22349 MachineBasicBlock *HeadMBB = BB;
22350 MachineFunction *F = BB->getParent();
22351 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22352 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22353
22354 F->insert(I, IfFalseMBB);
22355 F->insert(I, TailMBB);
22356
22357 // Set the call frame size on entry to the new basic blocks.
22358 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22359 IfFalseMBB->setCallFrameSize(CallFrameSize);
22360 TailMBB->setCallFrameSize(CallFrameSize);
22361
22362 // Transfer debug instructions associated with the selects to TailMBB.
22363 for (MachineInstr *DebugInstr : SelectDebugValues) {
22364 TailMBB->push_back(DebugInstr->removeFromParent());
22365 }
22366
22367 // Move all instructions after the sequence to TailMBB.
22368 TailMBB->splice(TailMBB->end(), HeadMBB,
22369 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22370 // Update machine-CFG edges by transferring all successors of the current
22371 // block to the new block which will contain the Phi nodes for the selects.
22372 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22373 // Set the successors for HeadMBB.
22374 HeadMBB->addSuccessor(IfFalseMBB);
22375 HeadMBB->addSuccessor(TailMBB);
22376
22377 // Insert appropriate branch.
22378 if (MI.getOperand(2).isImm())
22379 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22380 .addReg(LHS)
22381 .addImm(MI.getOperand(2).getImm())
22382 .addMBB(TailMBB);
22383 else
22384 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22385 .addReg(LHS)
22386 .addReg(RHS)
22387 .addMBB(TailMBB);
22388
22389 // IfFalseMBB just falls through to TailMBB.
22390 IfFalseMBB->addSuccessor(TailMBB);
22391
22392 // Create PHIs for all of the select pseudo-instructions.
22393 auto SelectMBBI = MI.getIterator();
22394 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22395 auto InsertionPoint = TailMBB->begin();
22396 while (SelectMBBI != SelectEnd) {
22397 auto Next = std::next(SelectMBBI);
22398 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22399 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22400 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22401 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22402 .addReg(SelectMBBI->getOperand(4).getReg())
22403 .addMBB(HeadMBB)
22404 .addReg(SelectMBBI->getOperand(5).getReg())
22405 .addMBB(IfFalseMBB);
22406 SelectMBBI->eraseFromParent();
22407 }
22408 SelectMBBI = Next;
22409 }
22410
22411 F->getProperties().resetNoPHIs();
22412 return TailMBB;
22413}
22414
22415// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22416static const RISCV::RISCVMaskedPseudoInfo *
22417lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22419 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22420 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22422 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22423 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22424 return Masked;
22425}
22426
22429 unsigned CVTXOpc) {
22430 DebugLoc DL = MI.getDebugLoc();
22431
22433
22435 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22436
22437 // Save the old value of FFLAGS.
22438 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22439
22440 assert(MI.getNumOperands() == 7);
22441
22442 // Emit a VFCVT_X_F
22443 const TargetRegisterInfo *TRI =
22445 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22446 Register Tmp = MRI.createVirtualRegister(RC);
22447 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22448 .add(MI.getOperand(1))
22449 .add(MI.getOperand(2))
22450 .add(MI.getOperand(3))
22451 .add(MachineOperand::CreateImm(7)) // frm = DYN
22452 .add(MI.getOperand(4))
22453 .add(MI.getOperand(5))
22454 .add(MI.getOperand(6))
22455 .add(MachineOperand::CreateReg(RISCV::FRM,
22456 /*IsDef*/ false,
22457 /*IsImp*/ true));
22458
22459 // Emit a VFCVT_F_X
22460 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22461 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22462 // There is no E8 variant for VFCVT_F_X.
22463 assert(Log2SEW >= 4);
22464 unsigned CVTFOpc =
22465 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22466 ->MaskedPseudo;
22467
22468 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22469 .add(MI.getOperand(0))
22470 .add(MI.getOperand(1))
22471 .addReg(Tmp)
22472 .add(MI.getOperand(3))
22473 .add(MachineOperand::CreateImm(7)) // frm = DYN
22474 .add(MI.getOperand(4))
22475 .add(MI.getOperand(5))
22476 .add(MI.getOperand(6))
22477 .add(MachineOperand::CreateReg(RISCV::FRM,
22478 /*IsDef*/ false,
22479 /*IsImp*/ true));
22480
22481 // Restore FFLAGS.
22482 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22483 .addReg(SavedFFLAGS, RegState::Kill);
22484
22485 // Erase the pseudoinstruction.
22486 MI.eraseFromParent();
22487 return BB;
22488}
22489
22491 const RISCVSubtarget &Subtarget) {
22492 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22493 const TargetRegisterClass *RC;
22494 switch (MI.getOpcode()) {
22495 default:
22496 llvm_unreachable("Unexpected opcode");
22497 case RISCV::PseudoFROUND_H:
22498 CmpOpc = RISCV::FLT_H;
22499 F2IOpc = RISCV::FCVT_W_H;
22500 I2FOpc = RISCV::FCVT_H_W;
22501 FSGNJOpc = RISCV::FSGNJ_H;
22502 FSGNJXOpc = RISCV::FSGNJX_H;
22503 RC = &RISCV::FPR16RegClass;
22504 break;
22505 case RISCV::PseudoFROUND_H_INX:
22506 CmpOpc = RISCV::FLT_H_INX;
22507 F2IOpc = RISCV::FCVT_W_H_INX;
22508 I2FOpc = RISCV::FCVT_H_W_INX;
22509 FSGNJOpc = RISCV::FSGNJ_H_INX;
22510 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22511 RC = &RISCV::GPRF16RegClass;
22512 break;
22513 case RISCV::PseudoFROUND_S:
22514 CmpOpc = RISCV::FLT_S;
22515 F2IOpc = RISCV::FCVT_W_S;
22516 I2FOpc = RISCV::FCVT_S_W;
22517 FSGNJOpc = RISCV::FSGNJ_S;
22518 FSGNJXOpc = RISCV::FSGNJX_S;
22519 RC = &RISCV::FPR32RegClass;
22520 break;
22521 case RISCV::PseudoFROUND_S_INX:
22522 CmpOpc = RISCV::FLT_S_INX;
22523 F2IOpc = RISCV::FCVT_W_S_INX;
22524 I2FOpc = RISCV::FCVT_S_W_INX;
22525 FSGNJOpc = RISCV::FSGNJ_S_INX;
22526 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22527 RC = &RISCV::GPRF32RegClass;
22528 break;
22529 case RISCV::PseudoFROUND_D:
22530 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22531 CmpOpc = RISCV::FLT_D;
22532 F2IOpc = RISCV::FCVT_L_D;
22533 I2FOpc = RISCV::FCVT_D_L;
22534 FSGNJOpc = RISCV::FSGNJ_D;
22535 FSGNJXOpc = RISCV::FSGNJX_D;
22536 RC = &RISCV::FPR64RegClass;
22537 break;
22538 case RISCV::PseudoFROUND_D_INX:
22539 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22540 CmpOpc = RISCV::FLT_D_INX;
22541 F2IOpc = RISCV::FCVT_L_D_INX;
22542 I2FOpc = RISCV::FCVT_D_L_INX;
22543 FSGNJOpc = RISCV::FSGNJ_D_INX;
22544 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22545 RC = &RISCV::GPRRegClass;
22546 break;
22547 }
22548
22549 const BasicBlock *BB = MBB->getBasicBlock();
22550 DebugLoc DL = MI.getDebugLoc();
22551 MachineFunction::iterator I = ++MBB->getIterator();
22552
22553 MachineFunction *F = MBB->getParent();
22554 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22555 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22556
22557 F->insert(I, CvtMBB);
22558 F->insert(I, DoneMBB);
22559 // Move all instructions after the sequence to DoneMBB.
22560 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22561 MBB->end());
22562 // Update machine-CFG edges by transferring all successors of the current
22563 // block to the new block which will contain the Phi nodes for the selects.
22565 // Set the successors for MBB.
22566 MBB->addSuccessor(CvtMBB);
22567 MBB->addSuccessor(DoneMBB);
22568
22569 Register DstReg = MI.getOperand(0).getReg();
22570 Register SrcReg = MI.getOperand(1).getReg();
22571 Register MaxReg = MI.getOperand(2).getReg();
22572 int64_t FRM = MI.getOperand(3).getImm();
22573
22574 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22575 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22576
22577 Register FabsReg = MRI.createVirtualRegister(RC);
22578 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22579
22580 // Compare the FP value to the max value.
22581 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22582 auto MIB =
22583 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22586
22587 // Insert branch.
22588 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22589 .addReg(CmpReg)
22590 .addReg(RISCV::X0)
22591 .addMBB(DoneMBB);
22592
22593 CvtMBB->addSuccessor(DoneMBB);
22594
22595 // Convert to integer.
22596 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22597 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22600
22601 // Convert back to FP.
22602 Register I2FReg = MRI.createVirtualRegister(RC);
22603 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22606
22607 // Restore the sign bit.
22608 Register CvtReg = MRI.createVirtualRegister(RC);
22609 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22610
22611 // Merge the results.
22612 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22613 .addReg(SrcReg)
22614 .addMBB(MBB)
22615 .addReg(CvtReg)
22616 .addMBB(CvtMBB);
22617
22618 MI.eraseFromParent();
22619 return DoneMBB;
22620}
22621
22624 MachineBasicBlock *BB) const {
22625 switch (MI.getOpcode()) {
22626 default:
22627 llvm_unreachable("Unexpected instr type to insert");
22628 case RISCV::ReadCounterWide:
22629 assert(!Subtarget.is64Bit() &&
22630 "ReadCounterWide is only to be used on riscv32");
22631 return emitReadCounterWidePseudo(MI, BB);
22632 case RISCV::Select_GPR_Using_CC_GPR:
22633 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
22634 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22635 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22636 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22637 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22638 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22639 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22640 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22641 case RISCV::Select_FPR16_Using_CC_GPR:
22642 case RISCV::Select_FPR16INX_Using_CC_GPR:
22643 case RISCV::Select_FPR32_Using_CC_GPR:
22644 case RISCV::Select_FPR32INX_Using_CC_GPR:
22645 case RISCV::Select_FPR64_Using_CC_GPR:
22646 case RISCV::Select_FPR64INX_Using_CC_GPR:
22647 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22648 return emitSelectPseudo(MI, BB, Subtarget);
22649 case RISCV::BuildPairF64Pseudo:
22650 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22651 case RISCV::SplitF64Pseudo:
22652 return emitSplitF64Pseudo(MI, BB, Subtarget);
22653 case RISCV::PseudoQuietFLE_H:
22654 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22655 case RISCV::PseudoQuietFLE_H_INX:
22656 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22657 case RISCV::PseudoQuietFLT_H:
22658 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22659 case RISCV::PseudoQuietFLT_H_INX:
22660 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22661 case RISCV::PseudoQuietFLE_S:
22662 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22663 case RISCV::PseudoQuietFLE_S_INX:
22664 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22665 case RISCV::PseudoQuietFLT_S:
22666 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22667 case RISCV::PseudoQuietFLT_S_INX:
22668 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22669 case RISCV::PseudoQuietFLE_D:
22670 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22671 case RISCV::PseudoQuietFLE_D_INX:
22672 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22673 case RISCV::PseudoQuietFLE_D_IN32X:
22674 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22675 Subtarget);
22676 case RISCV::PseudoQuietFLT_D:
22677 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22678 case RISCV::PseudoQuietFLT_D_INX:
22679 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22680 case RISCV::PseudoQuietFLT_D_IN32X:
22681 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22682 Subtarget);
22683
22684 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22685 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22686 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22687 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22688 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22689 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22690 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22691 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22692 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22693 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22694 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22695 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22696 case RISCV::PseudoFROUND_H:
22697 case RISCV::PseudoFROUND_H_INX:
22698 case RISCV::PseudoFROUND_S:
22699 case RISCV::PseudoFROUND_S_INX:
22700 case RISCV::PseudoFROUND_D:
22701 case RISCV::PseudoFROUND_D_INX:
22702 case RISCV::PseudoFROUND_D_IN32X:
22703 return emitFROUND(MI, BB, Subtarget);
22704 case RISCV::PROBED_STACKALLOC_DYN:
22705 return emitDynamicProbedAlloc(MI, BB);
22706 case TargetOpcode::STATEPOINT:
22707 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22708 // while jal call instruction (where statepoint will be lowered at the end)
22709 // has implicit def. This def is early-clobber as it will be set at
22710 // the moment of the call and earlier than any use is read.
22711 // Add this implicit dead def here as a workaround.
22712 MI.addOperand(*MI.getMF(),
22714 RISCV::X1, /*isDef*/ true,
22715 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22716 /*isUndef*/ false, /*isEarlyClobber*/ true));
22717 [[fallthrough]];
22718 case TargetOpcode::STACKMAP:
22719 case TargetOpcode::PATCHPOINT:
22720 if (!Subtarget.is64Bit())
22721 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22722 "supported on 64-bit targets");
22723 return emitPatchPoint(MI, BB);
22724 }
22725}
22726
22728 SDNode *Node) const {
22729 // If instruction defines FRM operand, conservatively set it as non-dead to
22730 // express data dependency with FRM users and prevent incorrect instruction
22731 // reordering.
22732 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22733 FRMDef->setIsDead(false);
22734 return;
22735 }
22736 // Add FRM dependency to any instructions with dynamic rounding mode.
22737 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22738 if (Idx < 0) {
22739 // Vector pseudos have FRM index indicated by TSFlags.
22740 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22741 if (Idx < 0)
22742 return;
22743 }
22744 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22745 return;
22746 // If the instruction already reads FRM, don't add another read.
22747 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22748 return;
22749 MI.addOperand(
22750 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22751}
22752
22753void RISCVTargetLowering::analyzeInputArgs(
22754 MachineFunction &MF, CCState &CCInfo,
22755 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22756 RISCVCCAssignFn Fn) const {
22757 for (const auto &[Idx, In] : enumerate(Ins)) {
22758 MVT ArgVT = In.VT;
22759 ISD::ArgFlagsTy ArgFlags = In.Flags;
22760
22761 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22762 In.OrigTy)) {
22763 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22764 << ArgVT << '\n');
22765 llvm_unreachable(nullptr);
22766 }
22767 }
22768}
22769
22770void RISCVTargetLowering::analyzeOutputArgs(
22771 MachineFunction &MF, CCState &CCInfo,
22772 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22773 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22774 for (const auto &[Idx, Out] : enumerate(Outs)) {
22775 MVT ArgVT = Out.VT;
22776 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22777
22778 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22779 Out.OrigTy)) {
22780 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22781 << ArgVT << "\n");
22782 llvm_unreachable(nullptr);
22783 }
22784 }
22785}
22786
22787// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22788// values.
22790 const CCValAssign &VA, const SDLoc &DL,
22791 const RISCVSubtarget &Subtarget) {
22792 if (VA.needsCustom()) {
22793 if (VA.getLocVT().isInteger() &&
22794 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22795 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22796 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22797 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22799 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22800 llvm_unreachable("Unexpected Custom handling.");
22801 }
22802
22803 switch (VA.getLocInfo()) {
22804 default:
22805 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22806 case CCValAssign::Full:
22807 break;
22808 case CCValAssign::BCvt:
22809 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22810 break;
22811 }
22812 return Val;
22813}
22814
22815// The caller is responsible for loading the full value if the argument is
22816// passed with CCValAssign::Indirect.
22818 const CCValAssign &VA, const SDLoc &DL,
22819 const ISD::InputArg &In,
22820 const RISCVTargetLowering &TLI) {
22823 EVT LocVT = VA.getLocVT();
22824 SDValue Val;
22825 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22826 Register VReg = RegInfo.createVirtualRegister(RC);
22827 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22828 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22829
22830 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22831 if (In.isOrigArg()) {
22832 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22833 if (OrigArg->getType()->isIntegerTy()) {
22834 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22835 // An input zero extended from i31 can also be considered sign extended.
22836 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22837 (BitWidth < 32 && In.Flags.isZExt())) {
22839 RVFI->addSExt32Register(VReg);
22840 }
22841 }
22842 }
22843
22845 return Val;
22846
22847 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22848}
22849
22851 const CCValAssign &VA, const SDLoc &DL,
22852 const RISCVSubtarget &Subtarget) {
22853 EVT LocVT = VA.getLocVT();
22854
22855 if (VA.needsCustom()) {
22856 if (LocVT.isInteger() &&
22857 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22858 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22859 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22860 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22861 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22862 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22863 llvm_unreachable("Unexpected Custom handling.");
22864 }
22865
22866 switch (VA.getLocInfo()) {
22867 default:
22868 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22869 case CCValAssign::Full:
22870 break;
22871 case CCValAssign::BCvt:
22872 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22873 break;
22874 }
22875 return Val;
22876}
22877
22878// The caller is responsible for loading the full value if the argument is
22879// passed with CCValAssign::Indirect.
22881 const CCValAssign &VA, const SDLoc &DL) {
22883 MachineFrameInfo &MFI = MF.getFrameInfo();
22884 EVT LocVT = VA.getLocVT();
22885 EVT ValVT = VA.getValVT();
22887 if (VA.getLocInfo() == CCValAssign::Indirect) {
22888 // When the value is a scalable vector, we save the pointer which points to
22889 // the scalable vector value in the stack. The ValVT will be the pointer
22890 // type, instead of the scalable vector type.
22891 ValVT = LocVT;
22892 }
22893 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22894 /*IsImmutable=*/true);
22895 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22896 SDValue Val;
22897
22899 switch (VA.getLocInfo()) {
22900 default:
22901 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22902 case CCValAssign::Full:
22904 case CCValAssign::BCvt:
22905 break;
22906 }
22907 Val = DAG.getExtLoad(
22908 ExtType, DL, LocVT, Chain, FIN,
22910 return Val;
22911}
22912
22914 const CCValAssign &VA,
22915 const CCValAssign &HiVA,
22916 const SDLoc &DL) {
22917 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22918 "Unexpected VA");
22920 MachineFrameInfo &MFI = MF.getFrameInfo();
22922
22923 assert(VA.isRegLoc() && "Expected register VA assignment");
22924
22925 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22926 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22927 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22928 SDValue Hi;
22929 if (HiVA.isMemLoc()) {
22930 // Second half of f64 is passed on the stack.
22931 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22932 /*IsImmutable=*/true);
22933 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22934 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22936 } else {
22937 // Second half of f64 is passed in another GPR.
22938 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22939 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22940 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22941 }
22942 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22943}
22944
22945// Transform physical registers into virtual registers.
22947 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22948 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22949 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22950
22952
22953 switch (CallConv) {
22954 default:
22955 reportFatalUsageError("Unsupported calling convention");
22956 case CallingConv::C:
22957 case CallingConv::Fast:
22960 case CallingConv::GRAAL:
22962#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22963 CC_VLS_CASE(32)
22964 CC_VLS_CASE(64)
22965 CC_VLS_CASE(128)
22966 CC_VLS_CASE(256)
22967 CC_VLS_CASE(512)
22968 CC_VLS_CASE(1024)
22969 CC_VLS_CASE(2048)
22970 CC_VLS_CASE(4096)
22971 CC_VLS_CASE(8192)
22972 CC_VLS_CASE(16384)
22973 CC_VLS_CASE(32768)
22974 CC_VLS_CASE(65536)
22975#undef CC_VLS_CASE
22976 break;
22977 case CallingConv::GHC:
22978 if (Subtarget.hasStdExtE())
22979 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22980 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22981 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22982 "(Zdinx/D) instruction set extensions");
22983 }
22984
22985 const Function &Func = MF.getFunction();
22986 if (Func.hasFnAttribute("interrupt")) {
22987 if (!Func.arg_empty())
22989 "Functions with the interrupt attribute cannot have arguments!");
22990
22991 StringRef Kind =
22992 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22993
22994 constexpr StringLiteral SupportedInterruptKinds[] = {
22995 "machine",
22996 "supervisor",
22997 "rnmi",
22998 "qci-nest",
22999 "qci-nonest",
23000 "SiFive-CLIC-preemptible",
23001 "SiFive-CLIC-stack-swap",
23002 "SiFive-CLIC-preemptible-stack-swap",
23003 };
23004 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
23006 "Function interrupt attribute argument not supported!");
23007
23008 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
23010 "'qci-*' interrupt kinds require Xqciint extension");
23011
23012 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
23014 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
23015
23016 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
23017 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
23018 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
23019 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
23020 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
23021 "have a frame pointer");
23022 }
23023
23024 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23025 MVT XLenVT = Subtarget.getXLenVT();
23026 unsigned XLenInBytes = Subtarget.getXLen() / 8;
23027 // Used with vargs to accumulate store chains.
23028 std::vector<SDValue> OutChains;
23029
23030 // Assign locations to all of the incoming arguments.
23032 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23033
23034 if (CallConv == CallingConv::GHC)
23036 else
23037 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
23039 : CC_RISCV);
23040
23041 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
23042 CCValAssign &VA = ArgLocs[i];
23043 SDValue ArgValue;
23044 // Passing f64 on RV32D with a soft float ABI must be handled as a special
23045 // case.
23046 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23047 assert(VA.needsCustom());
23048 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
23049 } else if (VA.isRegLoc())
23050 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
23051 else
23052 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
23053
23054 if (VA.getLocInfo() == CCValAssign::Indirect) {
23055 // If the original argument was split and passed by reference (e.g. i128
23056 // on RV32), we need to load all parts of it here (using the same
23057 // address). Vectors may be partly split to registers and partly to the
23058 // stack, in which case the base address is partly offset and subsequent
23059 // stores are relative to that.
23060 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
23062 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
23063 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
23064 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23065 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
23066 CCValAssign &PartVA = ArgLocs[i + 1];
23067 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
23068 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23069 if (PartVA.getValVT().isScalableVector())
23070 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23071 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
23072 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
23074 ++i;
23075 ++InsIdx;
23076 }
23077 continue;
23078 }
23079 InVals.push_back(ArgValue);
23080 }
23081
23082 if (any_of(ArgLocs,
23083 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23085
23086 if (IsVarArg) {
23087 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
23088 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
23089 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
23090 MachineFrameInfo &MFI = MF.getFrameInfo();
23091 MachineRegisterInfo &RegInfo = MF.getRegInfo();
23093
23094 // Size of the vararg save area. For now, the varargs save area is either
23095 // zero or large enough to hold a0-a7.
23096 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
23097 int FI;
23098
23099 // If all registers are allocated, then all varargs must be passed on the
23100 // stack and we don't need to save any argregs.
23101 if (VarArgsSaveSize == 0) {
23102 int VaArgOffset = CCInfo.getStackSize();
23103 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
23104 } else {
23105 int VaArgOffset = -VarArgsSaveSize;
23106 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
23107
23108 // If saving an odd number of registers then create an extra stack slot to
23109 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
23110 // offsets to even-numbered registered remain 2*XLEN-aligned.
23111 if (Idx % 2) {
23113 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
23114 VarArgsSaveSize += XLenInBytes;
23115 }
23116
23117 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23118
23119 // Copy the integer registers that may have been used for passing varargs
23120 // to the vararg save area.
23121 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
23122 const Register Reg = RegInfo.createVirtualRegister(RC);
23123 RegInfo.addLiveIn(ArgRegs[I], Reg);
23124 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23125 SDValue Store = DAG.getStore(
23126 Chain, DL, ArgValue, FIN,
23127 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23128 OutChains.push_back(Store);
23129 FIN =
23130 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23131 }
23132 }
23133
23134 // Record the frame index of the first variable argument
23135 // which is a value necessary to VASTART.
23136 RVFI->setVarArgsFrameIndex(FI);
23137 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23138 }
23139
23140 // All stores are grouped in one node to allow the matching between
23141 // the size of Ins and InVals. This only happens for vararg functions.
23142 if (!OutChains.empty()) {
23143 OutChains.push_back(Chain);
23144 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23145 }
23146
23147 return Chain;
23148}
23149
23150/// isEligibleForTailCallOptimization - Check whether the call is eligible
23151/// for tail call optimization.
23152/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23153bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23154 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23155 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23156
23157 auto CalleeCC = CLI.CallConv;
23158 auto &Outs = CLI.Outs;
23159 auto &Caller = MF.getFunction();
23160 auto CallerCC = Caller.getCallingConv();
23161
23162 // Exception-handling functions need a special set of instructions to
23163 // indicate a return to the hardware. Tail-calling another function would
23164 // probably break this.
23165 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23166 // should be expanded as new function attributes are introduced.
23167 if (Caller.hasFnAttribute("interrupt"))
23168 return false;
23169
23170 // Do not tail call opt if the stack is used to pass parameters.
23171 if (CCInfo.getStackSize() != 0)
23172 return false;
23173
23174 // Do not tail call opt if any parameters need to be passed indirectly.
23175 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23176 // passed indirectly. So the address of the value will be passed in a
23177 // register, or if not available, then the address is put on the stack. In
23178 // order to pass indirectly, space on the stack often needs to be allocated
23179 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23180 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23181 // are passed CCValAssign::Indirect.
23182 for (auto &VA : ArgLocs)
23183 if (VA.getLocInfo() == CCValAssign::Indirect)
23184 return false;
23185
23186 // Do not tail call opt if either caller or callee uses struct return
23187 // semantics.
23188 auto IsCallerStructRet = Caller.hasStructRetAttr();
23189 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23190 if (IsCallerStructRet || IsCalleeStructRet)
23191 return false;
23192
23193 // The callee has to preserve all registers the caller needs to preserve.
23194 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23195 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23196 if (CalleeCC != CallerCC) {
23197 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23198 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23199 return false;
23200 }
23201
23202 // Byval parameters hand the function a pointer directly into the stack area
23203 // we want to reuse during a tail call. Working around this *is* possible
23204 // but less efficient and uglier in LowerCall.
23205 for (auto &Arg : Outs)
23206 if (Arg.Flags.isByVal())
23207 return false;
23208
23209 return true;
23210}
23211
23213 return DAG.getDataLayout().getPrefTypeAlign(
23214 VT.getTypeForEVT(*DAG.getContext()));
23215}
23216
23217// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23218// and output parameter nodes.
23220 SmallVectorImpl<SDValue> &InVals) const {
23221 SelectionDAG &DAG = CLI.DAG;
23222 SDLoc &DL = CLI.DL;
23224 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23226 SDValue Chain = CLI.Chain;
23227 SDValue Callee = CLI.Callee;
23228 bool &IsTailCall = CLI.IsTailCall;
23229 CallingConv::ID CallConv = CLI.CallConv;
23230 bool IsVarArg = CLI.IsVarArg;
23231 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23232 MVT XLenVT = Subtarget.getXLenVT();
23233 const CallBase *CB = CLI.CB;
23234
23237
23238 // Set type id for call site info.
23239 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23240 CSInfo = MachineFunction::CallSiteInfo(*CB);
23241
23242 // Analyze the operands of the call, assigning locations to each operand.
23244 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23245
23246 if (CallConv == CallingConv::GHC) {
23247 if (Subtarget.hasStdExtE())
23248 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23249 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23250 } else
23251 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23253 : CC_RISCV);
23254
23255 // Check if it's really possible to do a tail call.
23256 if (IsTailCall)
23257 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23258
23259 if (IsTailCall)
23260 ++NumTailCalls;
23261 else if (CLI.CB && CLI.CB->isMustTailCall())
23262 reportFatalInternalError("failed to perform tail call elimination on a "
23263 "call site marked musttail");
23264
23265 // Get a count of how many bytes are to be pushed on the stack.
23266 unsigned NumBytes = ArgCCInfo.getStackSize();
23267
23268 // Create local copies for byval args
23269 SmallVector<SDValue, 8> ByValArgs;
23270 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23271 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23272 if (!Flags.isByVal())
23273 continue;
23274
23275 SDValue Arg = OutVals[i];
23276 unsigned Size = Flags.getByValSize();
23277 Align Alignment = Flags.getNonZeroByValAlign();
23278
23279 int FI =
23280 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23281 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23282 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23283
23284 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23285 /*IsVolatile=*/false,
23286 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23288 ByValArgs.push_back(FIPtr);
23289 }
23290
23291 if (!IsTailCall)
23292 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23293
23294 // Copy argument values to their designated locations.
23296 SmallVector<SDValue, 8> MemOpChains;
23297 SDValue StackPtr;
23298 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23299 ++i, ++OutIdx) {
23300 CCValAssign &VA = ArgLocs[i];
23301 SDValue ArgValue = OutVals[OutIdx];
23302 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23303
23304 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23305 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23306 assert(VA.isRegLoc() && "Expected register VA assignment");
23307 assert(VA.needsCustom());
23308 SDValue SplitF64 = DAG.getNode(
23309 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23310 SDValue Lo = SplitF64.getValue(0);
23311 SDValue Hi = SplitF64.getValue(1);
23312
23313 Register RegLo = VA.getLocReg();
23314 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23315
23316 // Get the CCValAssign for the Hi part.
23317 CCValAssign &HiVA = ArgLocs[++i];
23318
23319 if (HiVA.isMemLoc()) {
23320 // Second half of f64 is passed on the stack.
23321 if (!StackPtr.getNode())
23322 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23324 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23325 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23326 // Emit the store.
23327 MemOpChains.push_back(DAG.getStore(
23328 Chain, DL, Hi, Address,
23330 } else {
23331 // Second half of f64 is passed in another GPR.
23332 Register RegHigh = HiVA.getLocReg();
23333 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23334 }
23335 continue;
23336 }
23337
23338 // Promote the value if needed.
23339 // For now, only handle fully promoted and indirect arguments.
23340 if (VA.getLocInfo() == CCValAssign::Indirect) {
23341 // Store the argument in a stack slot and pass its address.
23342 Align StackAlign =
23343 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23344 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23345 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23346 // If the original argument was split (e.g. i128), we need
23347 // to store the required parts of it here (and pass just one address).
23348 // Vectors may be partly split to registers and partly to the stack, in
23349 // which case the base address is partly offset and subsequent stores are
23350 // relative to that.
23351 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23352 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23353 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23354 // Calculate the total size to store. We don't have access to what we're
23355 // actually storing other than performing the loop and collecting the
23356 // info.
23358 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23359 SDValue PartValue = OutVals[OutIdx + 1];
23360 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23361 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23362 EVT PartVT = PartValue.getValueType();
23363 if (PartVT.isScalableVector())
23364 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23365 StoredSize += PartVT.getStoreSize();
23366 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23367 Parts.push_back(std::make_pair(PartValue, Offset));
23368 ++i;
23369 ++OutIdx;
23370 }
23371 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23372 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23373 MemOpChains.push_back(
23374 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23376 for (const auto &Part : Parts) {
23377 SDValue PartValue = Part.first;
23378 SDValue PartOffset = Part.second;
23380 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23381 MemOpChains.push_back(
23382 DAG.getStore(Chain, DL, PartValue, Address,
23384 }
23385 ArgValue = SpillSlot;
23386 } else {
23387 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23388 }
23389
23390 // Use local copy if it is a byval arg.
23391 if (Flags.isByVal())
23392 ArgValue = ByValArgs[j++];
23393
23394 if (VA.isRegLoc()) {
23395 // Queue up the argument copies and emit them at the end.
23396 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23397
23398 const TargetOptions &Options = DAG.getTarget().Options;
23399 if (Options.EmitCallSiteInfo)
23400 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23401 } else {
23402 assert(VA.isMemLoc() && "Argument not register or memory");
23403 assert(!IsTailCall && "Tail call not allowed if stack is used "
23404 "for passing parameters");
23405
23406 // Work out the address of the stack slot.
23407 if (!StackPtr.getNode())
23408 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23410 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23412
23413 // Emit the store.
23414 MemOpChains.push_back(
23415 DAG.getStore(Chain, DL, ArgValue, Address,
23417 }
23418 }
23419
23420 // Join the stores, which are independent of one another.
23421 if (!MemOpChains.empty())
23422 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23423
23424 SDValue Glue;
23425
23426 // Build a sequence of copy-to-reg nodes, chained and glued together.
23427 for (auto &Reg : RegsToPass) {
23428 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23429 Glue = Chain.getValue(1);
23430 }
23431
23432 // Validate that none of the argument registers have been marked as
23433 // reserved, if so report an error. Do the same for the return address if this
23434 // is not a tailcall.
23435 validateCCReservedRegs(RegsToPass, MF);
23436 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23438 MF.getFunction(),
23439 "Return address register required, but has been reserved."});
23440
23441 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23442 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23443 // split it and then direct call can be matched by PseudoCALL.
23444 bool CalleeIsLargeExternalSymbol = false;
23446 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23447 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23448 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23449 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23450 CalleeIsLargeExternalSymbol = true;
23451 }
23452 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23453 const GlobalValue *GV = S->getGlobal();
23454 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23455 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23456 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23457 }
23458
23459 // The first call operand is the chain and the second is the target address.
23461 Ops.push_back(Chain);
23462 Ops.push_back(Callee);
23463
23464 // Add argument registers to the end of the list so that they are
23465 // known live into the call.
23466 for (auto &Reg : RegsToPass)
23467 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23468
23469 // Add a register mask operand representing the call-preserved registers.
23470 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23471 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23472 assert(Mask && "Missing call preserved mask for calling convention");
23473 Ops.push_back(DAG.getRegisterMask(Mask));
23474
23475 // Glue the call to the argument copies, if any.
23476 if (Glue.getNode())
23477 Ops.push_back(Glue);
23478
23479 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23480 "Unexpected CFI type for a direct call");
23481
23482 // Emit the call.
23483 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23484
23485 // Use software guarded branch for large code model non-indirect calls
23486 // Tail call to external symbol will have a null CLI.CB and we need another
23487 // way to determine the callsite type
23488 bool NeedSWGuarded = false;
23490 Subtarget.hasStdExtZicfilp() &&
23491 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23492 NeedSWGuarded = true;
23493
23494 if (IsTailCall) {
23496 unsigned CallOpc =
23497 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23498 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23499 if (CLI.CFIType)
23500 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23501 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23502 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23503 return Ret;
23504 }
23505
23506 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23507 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23508 if (CLI.CFIType)
23509 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23510
23511 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23512 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23513 Glue = Chain.getValue(1);
23514
23515 // Mark the end of the call, which is glued to the call itself.
23516 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23517 Glue = Chain.getValue(1);
23518
23519 // Assign locations to each value returned by this call.
23521 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23522 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23523
23524 // Copy all of the result registers out of their specified physreg.
23525 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23526 auto &VA = RVLocs[i];
23527 // Copy the value out
23528 SDValue RetValue =
23529 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23530 // Glue the RetValue to the end of the call sequence
23531 Chain = RetValue.getValue(1);
23532 Glue = RetValue.getValue(2);
23533
23534 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23535 assert(VA.needsCustom());
23536 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23537 MVT::i32, Glue);
23538 Chain = RetValue2.getValue(1);
23539 Glue = RetValue2.getValue(2);
23540 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23541 RetValue2);
23542 } else
23543 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23544
23545 InVals.push_back(RetValue);
23546 }
23547
23548 return Chain;
23549}
23550
23552 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23553 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23554 const Type *RetTy) const {
23556 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23557
23558 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23559 MVT VT = Outs[i].VT;
23560 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23561 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23562 /*IsRet=*/true, Outs[i].OrigTy))
23563 return false;
23564 }
23565 return true;
23566}
23567
23568SDValue
23570 bool IsVarArg,
23572 const SmallVectorImpl<SDValue> &OutVals,
23573 const SDLoc &DL, SelectionDAG &DAG) const {
23575
23576 // Stores the assignment of the return value to a location.
23578
23579 // Info about the registers and stack slot.
23580 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23581 *DAG.getContext());
23582
23583 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23584 nullptr, CC_RISCV);
23585
23586 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23587 reportFatalUsageError("GHC functions return void only");
23588
23589 SDValue Glue;
23590 SmallVector<SDValue, 4> RetOps(1, Chain);
23591
23592 // Copy the result values into the output registers.
23593 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23594 SDValue Val = OutVals[OutIdx];
23595 CCValAssign &VA = RVLocs[i];
23596 assert(VA.isRegLoc() && "Can only return in registers!");
23597
23598 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23599 // Handle returning f64 on RV32D with a soft float ABI.
23600 assert(VA.isRegLoc() && "Expected return via registers");
23601 assert(VA.needsCustom());
23602 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23603 DAG.getVTList(MVT::i32, MVT::i32), Val);
23604 SDValue Lo = SplitF64.getValue(0);
23605 SDValue Hi = SplitF64.getValue(1);
23606 Register RegLo = VA.getLocReg();
23607 Register RegHi = RVLocs[++i].getLocReg();
23608
23609 if (Subtarget.isRegisterReservedByUser(RegLo) ||
23610 Subtarget.isRegisterReservedByUser(RegHi))
23612 MF.getFunction(),
23613 "Return value register required, but has been reserved."});
23614
23615 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23616 Glue = Chain.getValue(1);
23617 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23618 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23619 Glue = Chain.getValue(1);
23620 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23621 } else {
23622 // Handle a 'normal' return.
23623 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23624 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23625
23626 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
23628 MF.getFunction(),
23629 "Return value register required, but has been reserved."});
23630
23631 // Guarantee that all emitted copies are stuck together.
23632 Glue = Chain.getValue(1);
23633 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23634 }
23635 }
23636
23637 RetOps[0] = Chain; // Update chain.
23638
23639 // Add the glue node if we have it.
23640 if (Glue.getNode()) {
23641 RetOps.push_back(Glue);
23642 }
23643
23644 if (any_of(RVLocs,
23645 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23647
23648 unsigned RetOpc = RISCVISD::RET_GLUE;
23649 // Interrupt service routines use different return instructions.
23650 const Function &Func = DAG.getMachineFunction().getFunction();
23651 if (Func.hasFnAttribute("interrupt")) {
23652 if (!Func.getReturnType()->isVoidTy())
23654 "Functions with the interrupt attribute must have void return type!");
23655
23657 StringRef Kind =
23658 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23659
23660 if (Kind == "supervisor")
23661 RetOpc = RISCVISD::SRET_GLUE;
23662 else if (Kind == "rnmi") {
23663 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23664 "Need Smrnmi extension for rnmi");
23665 RetOpc = RISCVISD::MNRET_GLUE;
23666 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23667 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
23668 "Need Xqciint for qci-(no)nest");
23669 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23670 } else
23671 RetOpc = RISCVISD::MRET_GLUE;
23672 }
23673
23674 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23675}
23676
23677void RISCVTargetLowering::validateCCReservedRegs(
23678 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23679 MachineFunction &MF) const {
23680 const Function &F = MF.getFunction();
23681
23682 if (llvm::any_of(Regs, [this](auto Reg) {
23683 return Subtarget.isRegisterReservedByUser(Reg.first);
23684 }))
23685 F.getContext().diagnose(DiagnosticInfoUnsupported{
23686 F, "Argument register required, but has been reserved."});
23687}
23688
23689// Check if the result of the node is only used as a return value, as
23690// otherwise we can't perform a tail-call.
23692 if (N->getNumValues() != 1)
23693 return false;
23694 if (!N->hasNUsesOfValue(1, 0))
23695 return false;
23696
23697 SDNode *Copy = *N->user_begin();
23698
23699 if (Copy->getOpcode() == ISD::BITCAST) {
23700 return isUsedByReturnOnly(Copy, Chain);
23701 }
23702
23703 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23704 // with soft float ABIs.
23705 if (Copy->getOpcode() != ISD::CopyToReg) {
23706 return false;
23707 }
23708
23709 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23710 // isn't safe to perform a tail call.
23711 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23712 return false;
23713
23714 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23715 bool HasRet = false;
23716 for (SDNode *Node : Copy->users()) {
23717 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23718 return false;
23719 HasRet = true;
23720 }
23721 if (!HasRet)
23722 return false;
23723
23724 Chain = Copy->getOperand(0);
23725 return true;
23726}
23727
23729 return CI->isTailCall();
23730}
23731
23732/// getConstraintType - Given a constraint letter, return the type of
23733/// constraint it is for this target.
23736 if (Constraint.size() == 1) {
23737 switch (Constraint[0]) {
23738 default:
23739 break;
23740 case 'f':
23741 case 'R':
23742 return C_RegisterClass;
23743 case 'I':
23744 case 'J':
23745 case 'K':
23746 return C_Immediate;
23747 case 'A':
23748 return C_Memory;
23749 case 's':
23750 case 'S': // A symbolic address
23751 return C_Other;
23752 }
23753 } else {
23754 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23755 return C_RegisterClass;
23756 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23757 return C_RegisterClass;
23758 }
23759 return TargetLowering::getConstraintType(Constraint);
23760}
23761
23762std::pair<unsigned, const TargetRegisterClass *>
23764 StringRef Constraint,
23765 MVT VT) const {
23766 // First, see if this is a constraint that directly corresponds to a RISC-V
23767 // register class.
23768 if (Constraint.size() == 1) {
23769 switch (Constraint[0]) {
23770 case 'r':
23771 // TODO: Support fixed vectors up to XLen for P extension?
23772 if (VT.isVector())
23773 break;
23774 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23775 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23776 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23777 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23778 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23779 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23780 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23781 case 'f':
23782 if (VT == MVT::f16) {
23783 if (Subtarget.hasStdExtZfhmin())
23784 return std::make_pair(0U, &RISCV::FPR16RegClass);
23785 if (Subtarget.hasStdExtZhinxmin())
23786 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23787 } else if (VT == MVT::f32) {
23788 if (Subtarget.hasStdExtF())
23789 return std::make_pair(0U, &RISCV::FPR32RegClass);
23790 if (Subtarget.hasStdExtZfinx())
23791 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23792 } else if (VT == MVT::f64) {
23793 if (Subtarget.hasStdExtD())
23794 return std::make_pair(0U, &RISCV::FPR64RegClass);
23795 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23796 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23797 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23798 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23799 }
23800 break;
23801 case 'R':
23802 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23803 (VT == MVT::i128 && Subtarget.is64Bit()))
23804 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23805 break;
23806 default:
23807 break;
23808 }
23809 } else if (Constraint == "vr") {
23810 for (const auto *RC :
23811 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23812 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23813 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23814 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23815 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23816 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23817 &RISCV::VRN2M4RegClass}) {
23818 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23819 return std::make_pair(0U, RC);
23820
23821 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23823 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23824 return std::make_pair(0U, RC);
23825 }
23826 }
23827 } else if (Constraint == "vd") {
23828 for (const auto *RC :
23829 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23830 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23831 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23832 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23833 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23834 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23835 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23836 &RISCV::VRN2M4NoV0RegClass}) {
23837 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23838 return std::make_pair(0U, RC);
23839
23840 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23841 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23842 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23843 return std::make_pair(0U, RC);
23844 }
23845 }
23846 } else if (Constraint == "vm") {
23847 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23848 return std::make_pair(0U, &RISCV::VMV0RegClass);
23849
23850 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23851 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23852 // VT here might be coerced to vector with i8 elements, so we need to
23853 // check if this is a M1 register here instead of checking VMV0RegClass.
23854 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23855 return std::make_pair(0U, &RISCV::VMV0RegClass);
23856 }
23857 } else if (Constraint == "cr") {
23858 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23859 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23860 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23861 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23862 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23863 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23864 if (!VT.isVector())
23865 return std::make_pair(0U, &RISCV::GPRCRegClass);
23866 } else if (Constraint == "cR") {
23867 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23868 (VT == MVT::i128 && Subtarget.is64Bit()))
23869 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23870 } else if (Constraint == "cf") {
23871 if (VT == MVT::f16) {
23872 if (Subtarget.hasStdExtZfhmin())
23873 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23874 if (Subtarget.hasStdExtZhinxmin())
23875 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23876 } else if (VT == MVT::f32) {
23877 if (Subtarget.hasStdExtF())
23878 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23879 if (Subtarget.hasStdExtZfinx())
23880 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23881 } else if (VT == MVT::f64) {
23882 if (Subtarget.hasStdExtD())
23883 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23884 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23885 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23886 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23887 return std::make_pair(0U, &RISCV::GPRCRegClass);
23888 }
23889 }
23890
23891 // Clang will correctly decode the usage of register name aliases into their
23892 // official names. However, other frontends like `rustc` do not. This allows
23893 // users of these frontends to use the ABI names for registers in LLVM-style
23894 // register constraints.
23895 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23896 .Case("{zero}", RISCV::X0)
23897 .Case("{ra}", RISCV::X1)
23898 .Case("{sp}", RISCV::X2)
23899 .Case("{gp}", RISCV::X3)
23900 .Case("{tp}", RISCV::X4)
23901 .Case("{t0}", RISCV::X5)
23902 .Case("{t1}", RISCV::X6)
23903 .Case("{t2}", RISCV::X7)
23904 .Cases("{s0}", "{fp}", RISCV::X8)
23905 .Case("{s1}", RISCV::X9)
23906 .Case("{a0}", RISCV::X10)
23907 .Case("{a1}", RISCV::X11)
23908 .Case("{a2}", RISCV::X12)
23909 .Case("{a3}", RISCV::X13)
23910 .Case("{a4}", RISCV::X14)
23911 .Case("{a5}", RISCV::X15)
23912 .Case("{a6}", RISCV::X16)
23913 .Case("{a7}", RISCV::X17)
23914 .Case("{s2}", RISCV::X18)
23915 .Case("{s3}", RISCV::X19)
23916 .Case("{s4}", RISCV::X20)
23917 .Case("{s5}", RISCV::X21)
23918 .Case("{s6}", RISCV::X22)
23919 .Case("{s7}", RISCV::X23)
23920 .Case("{s8}", RISCV::X24)
23921 .Case("{s9}", RISCV::X25)
23922 .Case("{s10}", RISCV::X26)
23923 .Case("{s11}", RISCV::X27)
23924 .Case("{t3}", RISCV::X28)
23925 .Case("{t4}", RISCV::X29)
23926 .Case("{t5}", RISCV::X30)
23927 .Case("{t6}", RISCV::X31)
23928 .Default(RISCV::NoRegister);
23929 if (XRegFromAlias != RISCV::NoRegister)
23930 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23931
23932 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23933 // TableGen record rather than the AsmName to choose registers for InlineAsm
23934 // constraints, plus we want to match those names to the widest floating point
23935 // register type available, manually select floating point registers here.
23936 //
23937 // The second case is the ABI name of the register, so that frontends can also
23938 // use the ABI names in register constraint lists.
23939 if (Subtarget.hasStdExtF()) {
23940 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23941 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23942 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23943 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23944 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23945 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23946 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23947 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23948 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23949 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23950 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23951 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23952 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23953 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23954 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23955 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23956 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23957 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23958 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23959 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23960 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23961 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23962 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23963 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23964 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23965 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23966 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23967 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23968 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23969 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23970 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23971 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23972 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23973 .Default(RISCV::NoRegister);
23974 if (FReg != RISCV::NoRegister) {
23975 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23976 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23977 unsigned RegNo = FReg - RISCV::F0_F;
23978 unsigned DReg = RISCV::F0_D + RegNo;
23979 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23980 }
23981 if (VT == MVT::f32 || VT == MVT::Other)
23982 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23983 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23984 unsigned RegNo = FReg - RISCV::F0_F;
23985 unsigned HReg = RISCV::F0_H + RegNo;
23986 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23987 }
23988 }
23989 }
23990
23991 if (Subtarget.hasVInstructions()) {
23992 Register VReg = StringSwitch<Register>(Constraint.lower())
23993 .Case("{v0}", RISCV::V0)
23994 .Case("{v1}", RISCV::V1)
23995 .Case("{v2}", RISCV::V2)
23996 .Case("{v3}", RISCV::V3)
23997 .Case("{v4}", RISCV::V4)
23998 .Case("{v5}", RISCV::V5)
23999 .Case("{v6}", RISCV::V6)
24000 .Case("{v7}", RISCV::V7)
24001 .Case("{v8}", RISCV::V8)
24002 .Case("{v9}", RISCV::V9)
24003 .Case("{v10}", RISCV::V10)
24004 .Case("{v11}", RISCV::V11)
24005 .Case("{v12}", RISCV::V12)
24006 .Case("{v13}", RISCV::V13)
24007 .Case("{v14}", RISCV::V14)
24008 .Case("{v15}", RISCV::V15)
24009 .Case("{v16}", RISCV::V16)
24010 .Case("{v17}", RISCV::V17)
24011 .Case("{v18}", RISCV::V18)
24012 .Case("{v19}", RISCV::V19)
24013 .Case("{v20}", RISCV::V20)
24014 .Case("{v21}", RISCV::V21)
24015 .Case("{v22}", RISCV::V22)
24016 .Case("{v23}", RISCV::V23)
24017 .Case("{v24}", RISCV::V24)
24018 .Case("{v25}", RISCV::V25)
24019 .Case("{v26}", RISCV::V26)
24020 .Case("{v27}", RISCV::V27)
24021 .Case("{v28}", RISCV::V28)
24022 .Case("{v29}", RISCV::V29)
24023 .Case("{v30}", RISCV::V30)
24024 .Case("{v31}", RISCV::V31)
24025 .Default(RISCV::NoRegister);
24026 if (VReg != RISCV::NoRegister) {
24027 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
24028 return std::make_pair(VReg, &RISCV::VMRegClass);
24029 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
24030 return std::make_pair(VReg, &RISCV::VRRegClass);
24031 for (const auto *RC :
24032 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
24033 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
24034 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
24035 return std::make_pair(VReg, RC);
24036 }
24037 }
24038 }
24039 }
24040
24041 std::pair<Register, const TargetRegisterClass *> Res =
24043
24044 // If we picked one of the Zfinx register classes, remap it to the GPR class.
24045 // FIXME: When Zfinx is supported in CodeGen this will need to take the
24046 // Subtarget into account.
24047 if (Res.second == &RISCV::GPRF16RegClass ||
24048 Res.second == &RISCV::GPRF32RegClass ||
24049 Res.second == &RISCV::GPRPairRegClass)
24050 return std::make_pair(Res.first, &RISCV::GPRRegClass);
24051
24052 return Res;
24053}
24054
24057 // Currently only support length 1 constraints.
24058 if (ConstraintCode.size() == 1) {
24059 switch (ConstraintCode[0]) {
24060 case 'A':
24062 default:
24063 break;
24064 }
24065 }
24066
24067 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
24068}
24069
24071 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
24072 SelectionDAG &DAG) const {
24073 // Currently only support length 1 constraints.
24074 if (Constraint.size() == 1) {
24075 switch (Constraint[0]) {
24076 case 'I':
24077 // Validate & create a 12-bit signed immediate operand.
24078 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24079 uint64_t CVal = C->getSExtValue();
24080 if (isInt<12>(CVal))
24081 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
24082 Subtarget.getXLenVT()));
24083 }
24084 return;
24085 case 'J':
24086 // Validate & create an integer zero operand.
24087 if (isNullConstant(Op))
24088 Ops.push_back(
24089 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
24090 return;
24091 case 'K':
24092 // Validate & create a 5-bit unsigned immediate operand.
24093 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24094 uint64_t CVal = C->getZExtValue();
24095 if (isUInt<5>(CVal))
24096 Ops.push_back(
24097 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
24098 }
24099 return;
24100 case 'S':
24102 return;
24103 default:
24104 break;
24105 }
24106 }
24108}
24109
24111 Instruction *Inst,
24112 AtomicOrdering Ord) const {
24113 if (Subtarget.hasStdExtZtso()) {
24115 return Builder.CreateFence(Ord);
24116 return nullptr;
24117 }
24118
24120 return Builder.CreateFence(Ord);
24121 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24122 return Builder.CreateFence(AtomicOrdering::Release);
24123 return nullptr;
24124}
24125
24127 Instruction *Inst,
24128 AtomicOrdering Ord) const {
24129 if (Subtarget.hasStdExtZtso()) {
24131 return Builder.CreateFence(Ord);
24132 return nullptr;
24133 }
24134
24135 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24136 return Builder.CreateFence(AtomicOrdering::Acquire);
24137 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24139 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24140 return nullptr;
24141}
24142
24145 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24146 // point operations can't be used in an lr/sc sequence without breaking the
24147 // forward-progress guarantee.
24148 if (AI->isFloatingPointOperation() ||
24154
24155 // Don't expand forced atomics, we want to have __sync libcalls instead.
24156 if (Subtarget.hasForcedAtomics())
24158
24159 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24160 if (AI->getOperation() == AtomicRMWInst::Nand) {
24161 if (Subtarget.hasStdExtZacas() &&
24162 (Size >= 32 || Subtarget.hasStdExtZabha()))
24164 if (Size < 32)
24166 }
24167
24168 if (Size < 32 && !Subtarget.hasStdExtZabha())
24170
24172}
24173
24174static Intrinsic::ID
24176 switch (BinOp) {
24177 default:
24178 llvm_unreachable("Unexpected AtomicRMW BinOp");
24180 return Intrinsic::riscv_masked_atomicrmw_xchg;
24181 case AtomicRMWInst::Add:
24182 return Intrinsic::riscv_masked_atomicrmw_add;
24183 case AtomicRMWInst::Sub:
24184 return Intrinsic::riscv_masked_atomicrmw_sub;
24186 return Intrinsic::riscv_masked_atomicrmw_nand;
24187 case AtomicRMWInst::Max:
24188 return Intrinsic::riscv_masked_atomicrmw_max;
24189 case AtomicRMWInst::Min:
24190 return Intrinsic::riscv_masked_atomicrmw_min;
24192 return Intrinsic::riscv_masked_atomicrmw_umax;
24194 return Intrinsic::riscv_masked_atomicrmw_umin;
24195 }
24196}
24197
24199 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24200 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24201 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24202 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24203 // mask, as this produces better code than the LR/SC loop emitted by
24204 // int_riscv_masked_atomicrmw_xchg.
24205 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24208 if (CVal->isZero())
24209 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24210 Builder.CreateNot(Mask, "Inv_Mask"),
24211 AI->getAlign(), Ord);
24212 if (CVal->isMinusOne())
24213 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24214 AI->getAlign(), Ord);
24215 }
24216
24217 unsigned XLen = Subtarget.getXLen();
24218 Value *Ordering =
24219 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24220 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24222 AI->getModule(),
24224
24225 if (XLen == 64) {
24226 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24227 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24228 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24229 }
24230
24231 Value *Result;
24232
24233 // Must pass the shift amount needed to sign extend the loaded value prior
24234 // to performing a signed comparison for min/max. ShiftAmt is the number of
24235 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24236 // is the number of bits to left+right shift the value in order to
24237 // sign-extend.
24238 if (AI->getOperation() == AtomicRMWInst::Min ||
24240 const DataLayout &DL = AI->getDataLayout();
24241 unsigned ValWidth =
24242 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24243 Value *SextShamt =
24244 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24245 Result = Builder.CreateCall(LrwOpScwLoop,
24246 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24247 } else {
24248 Result =
24249 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24250 }
24251
24252 if (XLen == 64)
24253 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24254 return Result;
24255}
24256
24259 AtomicCmpXchgInst *CI) const {
24260 // Don't expand forced atomics, we want to have __sync libcalls instead.
24261 if (Subtarget.hasForcedAtomics())
24263
24265 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24266 (Size == 8 || Size == 16))
24269}
24270
24272 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24273 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24274 unsigned XLen = Subtarget.getXLen();
24275 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24276 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24277 if (XLen == 64) {
24278 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24279 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24280 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24281 }
24282 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24283 Value *Result = Builder.CreateIntrinsic(
24284 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24285 if (XLen == 64)
24286 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24287 return Result;
24288}
24289
24291 EVT DataVT) const {
24292 // We have indexed loads for all supported EEW types. Indices are always
24293 // zero extended.
24294 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24295 isTypeLegal(Extend.getValueType()) &&
24296 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24297 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24298}
24299
24301 EVT VT) const {
24302 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24303 return false;
24304
24305 switch (FPVT.getSimpleVT().SimpleTy) {
24306 case MVT::f16:
24307 return Subtarget.hasStdExtZfhmin();
24308 case MVT::f32:
24309 return Subtarget.hasStdExtF();
24310 case MVT::f64:
24311 return Subtarget.hasStdExtD();
24312 default:
24313 return false;
24314 }
24315}
24316
24318 // If we are using the small code model, we can reduce size of jump table
24319 // entry to 4 bytes.
24320 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24323 }
24325}
24326
24328 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24329 unsigned uid, MCContext &Ctx) const {
24330 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24332 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24333}
24334
24336 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24337 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24338 // a power of two as well.
24339 // FIXME: This doesn't work for zve32, but that's already broken
24340 // elsewhere for the same reason.
24341 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24342 static_assert(RISCV::RVVBitsPerBlock == 64,
24343 "RVVBitsPerBlock changed, audit needed");
24344 return true;
24345}
24346
24348 SDValue &Offset,
24350 SelectionDAG &DAG) const {
24351 // Target does not support indexed loads.
24352 if (!Subtarget.hasVendorXTHeadMemIdx())
24353 return false;
24354
24355 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24356 return false;
24357
24358 Base = Op->getOperand(0);
24359 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24360 int64_t RHSC = RHS->getSExtValue();
24361 if (Op->getOpcode() == ISD::SUB)
24362 RHSC = -(uint64_t)RHSC;
24363
24364 // The constants that can be encoded in the THeadMemIdx instructions
24365 // are of the form (sign_extend(imm5) << imm2).
24366 bool isLegalIndexedOffset = false;
24367 for (unsigned i = 0; i < 4; i++)
24368 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24369 isLegalIndexedOffset = true;
24370 break;
24371 }
24372
24373 if (!isLegalIndexedOffset)
24374 return false;
24375
24376 Offset = Op->getOperand(1);
24377 return true;
24378 }
24379
24380 return false;
24381}
24382
24384 SDValue &Offset,
24386 SelectionDAG &DAG) const {
24387 EVT VT;
24388 SDValue Ptr;
24389 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24390 VT = LD->getMemoryVT();
24391 Ptr = LD->getBasePtr();
24392 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24393 VT = ST->getMemoryVT();
24394 Ptr = ST->getBasePtr();
24395 } else
24396 return false;
24397
24398 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24399 return false;
24400
24401 AM = ISD::PRE_INC;
24402 return true;
24403}
24404
24406 SDValue &Base,
24407 SDValue &Offset,
24409 SelectionDAG &DAG) const {
24410 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24411 if (Op->getOpcode() != ISD::ADD)
24412 return false;
24413
24415 Base = LS->getBasePtr();
24416 else
24417 return false;
24418
24419 if (Base == Op->getOperand(0))
24420 Offset = Op->getOperand(1);
24421 else if (Base == Op->getOperand(1))
24422 Offset = Op->getOperand(0);
24423 else
24424 return false;
24425
24426 AM = ISD::POST_INC;
24427 return true;
24428 }
24429
24430 EVT VT;
24431 SDValue Ptr;
24432 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24433 VT = LD->getMemoryVT();
24434 Ptr = LD->getBasePtr();
24435 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24436 VT = ST->getMemoryVT();
24437 Ptr = ST->getBasePtr();
24438 } else
24439 return false;
24440
24441 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24442 return false;
24443 // Post-indexing updates the base, so it's not a valid transform
24444 // if that's not the same as the load's pointer.
24445 if (Ptr != Base)
24446 return false;
24447
24448 AM = ISD::POST_INC;
24449 return true;
24450}
24451
24453 EVT VT) const {
24454 EVT SVT = VT.getScalarType();
24455
24456 if (!SVT.isSimple())
24457 return false;
24458
24459 switch (SVT.getSimpleVT().SimpleTy) {
24460 case MVT::f16:
24461 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24462 : Subtarget.hasStdExtZfhOrZhinx();
24463 case MVT::f32:
24464 return Subtarget.hasStdExtFOrZfinx();
24465 case MVT::f64:
24466 return Subtarget.hasStdExtDOrZdinx();
24467 default:
24468 break;
24469 }
24470
24471 return false;
24472}
24473
24475 // Zacas will use amocas.w which does not require extension.
24476 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24477}
24478
24480 const Constant *PersonalityFn) const {
24481 return RISCV::X10;
24482}
24483
24485 const Constant *PersonalityFn) const {
24486 return RISCV::X11;
24487}
24488
24490 // Return false to suppress the unnecessary extensions if the LibCall
24491 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24492 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24493 Type.getSizeInBits() < Subtarget.getXLen()))
24494 return false;
24495
24496 return true;
24497}
24498
24500 bool IsSigned) const {
24501 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24502 return true;
24503
24504 return IsSigned;
24505}
24506
24508 SDValue C) const {
24509 // Check integral scalar types.
24510 if (!VT.isScalarInteger())
24511 return false;
24512
24513 // Omit the optimization if the sub target has the M extension and the data
24514 // size exceeds XLen.
24515 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24516 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24517 return false;
24518
24519 auto *ConstNode = cast<ConstantSDNode>(C);
24520 const APInt &Imm = ConstNode->getAPIntValue();
24521
24522 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24523 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24524 return false;
24525
24526 // Break the MUL to a SLLI and an ADD/SUB.
24527 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24528 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24529 return true;
24530
24531 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24532 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24533 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24534 (Imm - 8).isPowerOf2()))
24535 return true;
24536
24537 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24538 // a pair of LUI/ADDI.
24539 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24540 ConstNode->hasOneUse()) {
24541 APInt ImmS = Imm.ashr(Imm.countr_zero());
24542 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24543 (1 - ImmS).isPowerOf2())
24544 return true;
24545 }
24546
24547 return false;
24548}
24549
24551 SDValue ConstNode) const {
24552 // Let the DAGCombiner decide for vectors.
24553 EVT VT = AddNode.getValueType();
24554 if (VT.isVector())
24555 return true;
24556
24557 // Let the DAGCombiner decide for larger types.
24558 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24559 return true;
24560
24561 // It is worse if c1 is simm12 while c1*c2 is not.
24562 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24563 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24564 const APInt &C1 = C1Node->getAPIntValue();
24565 const APInt &C2 = C2Node->getAPIntValue();
24566 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24567 return false;
24568
24569 // Default to true and let the DAGCombiner decide.
24570 return true;
24571}
24572
24574 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24575 unsigned *Fast) const {
24576 if (!VT.isVector()) {
24577 if (Fast)
24578 *Fast = Subtarget.enableUnalignedScalarMem();
24579 return Subtarget.enableUnalignedScalarMem();
24580 }
24581
24582 // All vector implementations must support element alignment
24583 EVT ElemVT = VT.getVectorElementType();
24584 if (Alignment >= ElemVT.getStoreSize()) {
24585 if (Fast)
24586 *Fast = 1;
24587 return true;
24588 }
24589
24590 // Note: We lower an unmasked unaligned vector access to an equally sized
24591 // e8 element type access. Given this, we effectively support all unmasked
24592 // misaligned accesses. TODO: Work through the codegen implications of
24593 // allowing such accesses to be formed, and considered fast.
24594 if (Fast)
24595 *Fast = Subtarget.enableUnalignedVectorMem();
24596 return Subtarget.enableUnalignedVectorMem();
24597}
24598
24600 LLVMContext &Context, const MemOp &Op,
24601 const AttributeList &FuncAttributes) const {
24602 if (!Subtarget.hasVInstructions())
24603 return MVT::Other;
24604
24605 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24606 return MVT::Other;
24607
24608 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24609 // has an expansion threshold, and we want the number of hardware memory
24610 // operations to correspond roughly to that threshold. LMUL>1 operations
24611 // are typically expanded linearly internally, and thus correspond to more
24612 // than one actual memory operation. Note that store merging and load
24613 // combining will typically form larger LMUL operations from the LMUL1
24614 // operations emitted here, and that's okay because combining isn't
24615 // introducing new memory operations; it's just merging existing ones.
24616 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24617 const unsigned MinVLenInBytes =
24618 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24619
24620 if (Op.size() < MinVLenInBytes)
24621 // TODO: Figure out short memops. For the moment, do the default thing
24622 // which ends up using scalar sequences.
24623 return MVT::Other;
24624
24625 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24626 // fixed vectors.
24627 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24628 return MVT::Other;
24629
24630 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24631 // a large scalar constant and instead use vmv.v.x/i to do the
24632 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24633 // maximize the chance we can encode the size in the vsetvli.
24634 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24635 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24636
24637 // Do we have sufficient alignment for our preferred VT? If not, revert
24638 // to largest size allowed by our alignment criteria.
24639 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24640 Align RequiredAlign(PreferredVT.getStoreSize());
24641 if (Op.isFixedDstAlign())
24642 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24643 if (Op.isMemcpy())
24644 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24645 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24646 }
24647 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24648}
24649
24651 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24652 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24653 bool IsABIRegCopy = CC.has_value();
24654 EVT ValueVT = Val.getValueType();
24655
24656 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24657 if ((ValueVT == PairVT ||
24658 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24659 ValueVT == MVT::f64)) &&
24660 NumParts == 1 && PartVT == MVT::Untyped) {
24661 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24662 MVT XLenVT = Subtarget.getXLenVT();
24663 if (ValueVT == MVT::f64)
24664 Val = DAG.getBitcast(MVT::i64, Val);
24665 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24666 // Always creating an MVT::Untyped part, so always use
24667 // RISCVISD::BuildGPRPair.
24668 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24669 return true;
24670 }
24671
24672 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24673 PartVT == MVT::f32) {
24674 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24675 // nan, and cast to f32.
24676 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24677 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24678 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24679 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24680 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24681 Parts[0] = Val;
24682 return true;
24683 }
24684
24685 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24686#ifndef NDEBUG
24687 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24688 [[maybe_unused]] unsigned ValLMUL =
24690 ValNF * RISCV::RVVBitsPerBlock);
24691 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24692 [[maybe_unused]] unsigned PartLMUL =
24694 PartNF * RISCV::RVVBitsPerBlock);
24695 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24696 "RISC-V vector tuple type only accepts same register class type "
24697 "TUPLE_INSERT");
24698#endif
24699
24700 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24701 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24702 Parts[0] = Val;
24703 return true;
24704 }
24705
24706 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24707 PartVT.isScalableVector()) {
24708 if (ValueVT.isFixedLengthVector()) {
24709 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24710 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24711 }
24712 LLVMContext &Context = *DAG.getContext();
24713 EVT ValueEltVT = ValueVT.getVectorElementType();
24714 EVT PartEltVT = PartVT.getVectorElementType();
24715 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24716 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24717 if (PartVTBitSize % ValueVTBitSize == 0) {
24718 assert(PartVTBitSize >= ValueVTBitSize);
24719 // If the element types are different, bitcast to the same element type of
24720 // PartVT first.
24721 // Give an example here, we want copy a <vscale x 1 x i8> value to
24722 // <vscale x 4 x i16>.
24723 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24724 // subvector, then we can bitcast to <vscale x 4 x i16>.
24725 if (ValueEltVT != PartEltVT) {
24726 if (PartVTBitSize > ValueVTBitSize) {
24727 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24728 assert(Count != 0 && "The number of element should not be zero.");
24729 EVT SameEltTypeVT =
24730 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24731 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24732 }
24733 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24734 } else {
24735 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24736 }
24737 Parts[0] = Val;
24738 return true;
24739 }
24740 }
24741
24742 return false;
24743}
24744
24746 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24747 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24748 bool IsABIRegCopy = CC.has_value();
24749
24750 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24751 if ((ValueVT == PairVT ||
24752 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24753 ValueVT == MVT::f64)) &&
24754 NumParts == 1 && PartVT == MVT::Untyped) {
24755 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24756 MVT XLenVT = Subtarget.getXLenVT();
24757
24758 SDValue Val = Parts[0];
24759 // Always starting with an MVT::Untyped part, so always use
24760 // RISCVISD::SplitGPRPair
24761 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24762 Val);
24763 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24764 Val.getValue(1));
24765 if (ValueVT == MVT::f64)
24766 Val = DAG.getBitcast(ValueVT, Val);
24767 return Val;
24768 }
24769
24770 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24771 PartVT == MVT::f32) {
24772 SDValue Val = Parts[0];
24773
24774 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24775 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24776 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24777 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24778 return Val;
24779 }
24780
24781 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24782 PartVT.isScalableVector()) {
24783 LLVMContext &Context = *DAG.getContext();
24784 SDValue Val = Parts[0];
24785 EVT ValueEltVT = ValueVT.getVectorElementType();
24786 EVT PartEltVT = PartVT.getVectorElementType();
24787 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24788 if (ValueVT.isFixedLengthVector())
24789 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24790 .getSizeInBits()
24792 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24793 if (PartVTBitSize % ValueVTBitSize == 0) {
24794 assert(PartVTBitSize >= ValueVTBitSize);
24795 EVT SameEltTypeVT = ValueVT;
24796 // If the element types are different, convert it to the same element type
24797 // of PartVT.
24798 // Give an example here, we want copy a <vscale x 1 x i8> value from
24799 // <vscale x 4 x i16>.
24800 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24801 // then we can extract <vscale x 1 x i8>.
24802 if (ValueEltVT != PartEltVT) {
24803 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24804 assert(Count != 0 && "The number of element should not be zero.");
24805 SameEltTypeVT =
24806 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24807 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24808 }
24809 if (ValueVT.isFixedLengthVector())
24810 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24811 else
24812 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24813 return Val;
24814 }
24815 }
24816 return SDValue();
24817}
24818
24819bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24820 // When aggressively optimizing for code size, we prefer to use a div
24821 // instruction, as it is usually smaller than the alternative sequence.
24822 // TODO: Add vector division?
24823 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24824 return OptSize && !VT.isVector();
24825}
24826
24828 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24829 // some situation.
24830 unsigned Opc = N->getOpcode();
24832 return false;
24833 return true;
24834}
24835
24836static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24837 Module *M = IRB.GetInsertBlock()->getModule();
24838 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24839 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24840 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24841 IRB.CreateCall(ThreadPointerFunc), Offset);
24842}
24843
24845 // Fuchsia provides a fixed TLS slot for the stack cookie.
24846 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24847 if (Subtarget.isTargetFuchsia())
24848 return useTpOffset(IRB, -0x10);
24849
24850 // Android provides a fixed TLS slot for the stack cookie. See the definition
24851 // of TLS_SLOT_STACK_GUARD in
24852 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24853 if (Subtarget.isTargetAndroid())
24854 return useTpOffset(IRB, -0x18);
24855
24856 Module *M = IRB.GetInsertBlock()->getModule();
24857
24858 if (M->getStackProtectorGuard() == "tls") {
24859 // Users must specify the offset explicitly
24860 int Offset = M->getStackProtectorGuardOffset();
24861 return useTpOffset(IRB, Offset);
24862 }
24863
24865}
24866
24868 Align Alignment) const {
24869 if (!Subtarget.hasVInstructions())
24870 return false;
24871
24872 // Only support fixed vectors if we know the minimum vector size.
24873 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24874 return false;
24875
24876 EVT ScalarType = DataType.getScalarType();
24877 if (!isLegalElementTypeForRVV(ScalarType))
24878 return false;
24879
24880 if (!Subtarget.enableUnalignedVectorMem() &&
24881 Alignment < ScalarType.getStoreSize())
24882 return false;
24883
24884 return true;
24885}
24886
24890 const TargetInstrInfo *TII) const {
24891 assert(MBBI->isCall() && MBBI->getCFIType() &&
24892 "Invalid call instruction for a KCFI check");
24893 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24894 MBBI->getOpcode()));
24895
24896 MachineOperand &Target = MBBI->getOperand(0);
24897 Target.setIsRenamable(false);
24898
24899 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24900 .addReg(Target.getReg())
24901 .addImm(MBBI->getCFIType())
24902 .getInstr();
24903}
24904
24905#define GET_REGISTER_MATCHER
24906#include "RISCVGenAsmMatcher.inc"
24907
24910 const MachineFunction &MF) const {
24912 if (!Reg)
24914 if (!Reg)
24915 return Reg;
24916
24917 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24918 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24919 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24920 StringRef(RegName) + "\"."));
24921 return Reg;
24922}
24923
24926 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24927
24928 if (NontemporalInfo == nullptr)
24930
24931 // 1 for default value work as __RISCV_NTLH_ALL
24932 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24933 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24934 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24935 // 5 -> __RISCV_NTLH_ALL
24936 int NontemporalLevel = 5;
24937 const MDNode *RISCVNontemporalInfo =
24938 I.getMetadata("riscv-nontemporal-domain");
24939 if (RISCVNontemporalInfo != nullptr)
24940 NontemporalLevel =
24942 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24943 ->getValue())
24944 ->getZExtValue();
24945
24946 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24947 "RISC-V target doesn't support this non-temporal domain.");
24948
24949 NontemporalLevel -= 2;
24951 if (NontemporalLevel & 0b1)
24952 Flags |= MONontemporalBit0;
24953 if (NontemporalLevel & 0b10)
24954 Flags |= MONontemporalBit1;
24955
24956 return Flags;
24957}
24958
24961
24962 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24964 TargetFlags |= (NodeFlags & MONontemporalBit0);
24965 TargetFlags |= (NodeFlags & MONontemporalBit1);
24966 return TargetFlags;
24967}
24968
24970 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24971 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24972}
24973
24975 if (VT.isVector()) {
24976 EVT SVT = VT.getVectorElementType();
24977 // If the element type is legal we can use cpop.v if it is enabled.
24978 if (isLegalElementTypeForRVV(SVT))
24979 return Subtarget.hasStdExtZvbb();
24980 // Don't consider it fast if the type needs to be legalized or scalarized.
24981 return false;
24982 }
24983
24984 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
24985}
24986
24988 ISD::CondCode Cond) const {
24989 return isCtpopFast(VT) ? 0 : 1;
24990}
24991
24993 const Instruction *I) const {
24994 if (Subtarget.hasStdExtZalasr()) {
24995 if (Subtarget.hasStdExtZtso()) {
24996 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24997 // should be lowered to plain load/store. The easiest way to do this is
24998 // to say we should insert fences for them, and the fence insertion code
24999 // will just not insert any fences
25000 auto *LI = dyn_cast<LoadInst>(I);
25001 auto *SI = dyn_cast<StoreInst>(I);
25002 if ((LI &&
25003 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
25004 (SI &&
25005 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
25006 // Here, this is a load or store which is seq_cst, and needs a .aq or
25007 // .rl therefore we shouldn't try to insert fences
25008 return false;
25009 }
25010 // Here, we are a TSO inst that isn't a seq_cst load/store
25011 return isa<LoadInst>(I) || isa<StoreInst>(I);
25012 }
25013 return false;
25014 }
25015 // Note that one specific case requires fence insertion for an
25016 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
25017 // than this hook due to limitations in the interface here.
25018 return isa<LoadInst>(I) || isa<StoreInst>(I);
25019}
25020
25022
25023 // GISel support is in progress or complete for these opcodes.
25024 unsigned Op = Inst.getOpcode();
25025 if (Op == Instruction::Add || Op == Instruction::Sub ||
25026 Op == Instruction::And || Op == Instruction::Or ||
25027 Op == Instruction::Xor || Op == Instruction::InsertElement ||
25028 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
25029 Op == Instruction::Freeze || Op == Instruction::Store)
25030 return false;
25031
25032 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
25033 // Mark RVV intrinsic as supported.
25034 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID()))
25035 return false;
25036 }
25037
25038 if (Inst.getType()->isScalableTy())
25039 return true;
25040
25041 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
25042 if (Inst.getOperand(i)->getType()->isScalableTy() &&
25043 !isa<ReturnInst>(&Inst))
25044 return true;
25045
25046 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
25047 if (AI->getAllocatedType()->isScalableTy())
25048 return true;
25049 }
25050
25051 return false;
25052}
25053
25054SDValue
25055RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
25056 SelectionDAG &DAG,
25057 SmallVectorImpl<SDNode *> &Created) const {
25058 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
25059 if (isIntDivCheap(N->getValueType(0), Attr))
25060 return SDValue(N, 0); // Lower SDIV as SDIV
25061
25062 // Only perform this transform if short forward branch opt is supported.
25063 if (!Subtarget.hasShortForwardBranchOpt())
25064 return SDValue();
25065 EVT VT = N->getValueType(0);
25066 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
25067 return SDValue();
25068
25069 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
25070 if (Divisor.sgt(2048) || Divisor.slt(-2048))
25071 return SDValue();
25072 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
25073}
25074
25075bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
25076 EVT VT, const APInt &AndMask) const {
25077 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
25078 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
25080}
25081
25083 return Subtarget.getMinimumJumpTableEntries();
25084}
25085
25087 SDValue Value, SDValue Addr,
25088 int JTI,
25089 SelectionDAG &DAG) const {
25090 if (Subtarget.hasStdExtZicfilp()) {
25091 // When Zicfilp enabled, we need to use software guarded branch for jump
25092 // table branch.
25093 SDValue Chain = Value;
25094 // Jump table debug info is only needed if CodeView is enabled.
25096 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
25097 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
25098 }
25099 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
25100}
25101
25102// If an output pattern produces multiple instructions tablegen may pick an
25103// arbitrary type from an instructions destination register class to use for the
25104// VT of that MachineSDNode. This VT may be used to look up the representative
25105// register class. If the type isn't legal, the default implementation will
25106// not find a register class.
25107//
25108// Some integer types smaller than XLen are listed in the GPR register class to
25109// support isel patterns for GISel, but are not legal in SelectionDAG. The
25110// arbitrary type tablegen picks may be one of these smaller types.
25111//
25112// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
25113// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
25114std::pair<const TargetRegisterClass *, uint8_t>
25115RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
25116 MVT VT) const {
25117 switch (VT.SimpleTy) {
25118 default:
25119 break;
25120 case MVT::i8:
25121 case MVT::i16:
25122 case MVT::i32:
25124 case MVT::bf16:
25125 case MVT::f16:
25127 }
25128
25130}
25131
25133
25134#define GET_RISCVVIntrinsicsTable_IMPL
25135#include "RISCVGenSearchableTables.inc"
25136
25137} // namespace llvm::RISCVVIntrinsicsTable
25138
25140
25141 // If the function specifically requests inline stack probes, emit them.
25142 if (MF.getFunction().hasFnAttribute("probe-stack"))
25143 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25144 "inline-asm";
25145
25146 return false;
25147}
25148
25150 Align StackAlign) const {
25151 // The default stack probe size is 4096 if the function has no
25152 // stack-probe-size attribute.
25153 const Function &Fn = MF.getFunction();
25154 unsigned StackProbeSize =
25155 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25156 // Round down to the stack alignment.
25157 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25158 return StackProbeSize ? StackProbeSize : StackAlign.value();
25159}
25160
25161SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25162 SelectionDAG &DAG) const {
25164 if (!hasInlineStackProbe(MF))
25165 return SDValue();
25166
25167 MVT XLenVT = Subtarget.getXLenVT();
25168 // Get the inputs.
25169 SDValue Chain = Op.getOperand(0);
25170 SDValue Size = Op.getOperand(1);
25171
25173 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25174 SDLoc dl(Op);
25175 EVT VT = Op.getValueType();
25176
25177 // Construct the new SP value in a GPR.
25178 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25179 Chain = SP.getValue(1);
25180 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25181 if (Align)
25182 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25183 DAG.getSignedConstant(-Align->value(), dl, VT));
25184
25185 // Set the real SP to the new value with a probing loop.
25186 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25187 return DAG.getMergeValues({SP, Chain}, dl);
25188}
25189
25192 MachineBasicBlock *MBB) const {
25193 MachineFunction &MF = *MBB->getParent();
25194 MachineBasicBlock::iterator MBBI = MI.getIterator();
25195 DebugLoc DL = MBB->findDebugLoc(MBBI);
25196 Register TargetReg = MI.getOperand(0).getReg();
25197
25198 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25199 bool IsRV64 = Subtarget.is64Bit();
25200 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25201 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25202 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25203
25204 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25205 MachineBasicBlock *LoopTestMBB =
25206 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25207 MF.insert(MBBInsertPoint, LoopTestMBB);
25208 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25209 MF.insert(MBBInsertPoint, ExitMBB);
25210 Register SPReg = RISCV::X2;
25211 Register ScratchReg =
25212 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25213
25214 // ScratchReg = ProbeSize
25215 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25216
25217 // LoopTest:
25218 // SUB SP, SP, ProbeSize
25219 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25220 .addReg(SPReg)
25221 .addReg(ScratchReg);
25222
25223 // s[d|w] zero, 0(sp)
25224 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25225 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25226 .addReg(RISCV::X0)
25227 .addReg(SPReg)
25228 .addImm(0);
25229
25230 // BLT TargetReg, SP, LoopTest
25231 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25232 .addReg(TargetReg)
25233 .addReg(SPReg)
25234 .addMBB(LoopTestMBB);
25235
25236 // Adjust with: MV SP, TargetReg.
25237 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25238 .addReg(TargetReg)
25239 .addImm(0);
25240
25241 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25243
25244 LoopTestMBB->addSuccessor(ExitMBB);
25245 LoopTestMBB->addSuccessor(LoopTestMBB);
25246 MBB->addSuccessor(LoopTestMBB);
25247
25248 MI.eraseFromParent();
25249 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25250 return ExitMBB->begin()->getParent();
25251}
25252
25254 if (Subtarget.hasStdExtFOrZfinx()) {
25255 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25256 return RCRegs;
25257 }
25258 return {};
25259}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:269
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1936
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1441
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:428
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...