Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
318 }
319 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
321 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
323 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
326 }
327 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 setOperationAction(ISD::FSQRT, VT, Legal);
332 setOperationAction(ISD::FNEG, VT, Legal);
335 VT, Expand);
337 }
339 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
340 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
341 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
342 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
343
344 for (MVT VT :
345 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
346 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
348 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
352 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
356 }
357 }
358
359 // Set operations for 'LASX' feature.
360
361 if (Subtarget.hasExtLASX()) {
362 for (MVT VT : LASXVTs) {
363 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
364 setOperationAction(ISD::BITCAST, VT, Legal);
366
372
376 }
377 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
380 Legal);
382 VT, Legal);
389 Expand);
397 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
398 }
399 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
401 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
403 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
406 }
407 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
411 setOperationAction(ISD::FSQRT, VT, Legal);
412 setOperationAction(ISD::FNEG, VT, Legal);
415 VT, Expand);
417 }
418 }
419
420 // Set DAG combine for LA32 and LA64.
421
426
427 // Set DAG combine for 'LSX' feature.
428
429 if (Subtarget.hasExtLSX()) {
431 setTargetDAGCombine(ISD::BITCAST);
432 }
433
434 // Set DAG combine for 'LASX' feature.
435
436 if (Subtarget.hasExtLASX())
438
439 // Compute derived properties from the register classes.
440 computeRegisterProperties(Subtarget.getRegisterInfo());
441
443
446
447 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
448
450
451 // Function alignments.
453 // Set preferred alignments.
454 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
455 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
456 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
457
458 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
459 if (Subtarget.hasLAMCAS())
461
462 if (Subtarget.hasSCQ()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
465 }
466}
467
469 const GlobalAddressSDNode *GA) const {
470 // In order to maximise the opportunity for common subexpression elimination,
471 // keep a separate ADD node for the global address offset instead of folding
472 // it in the global address node. Later peephole optimisations may choose to
473 // fold it back in when profitable.
474 return false;
475}
476
478 SelectionDAG &DAG) const {
479 switch (Op.getOpcode()) {
480 case ISD::ATOMIC_FENCE:
481 return lowerATOMIC_FENCE(Op, DAG);
483 return lowerEH_DWARF_CFA(Op, DAG);
485 return lowerGlobalAddress(Op, DAG);
487 return lowerGlobalTLSAddress(Op, DAG);
489 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
491 return lowerINTRINSIC_W_CHAIN(Op, DAG);
493 return lowerINTRINSIC_VOID(Op, DAG);
495 return lowerBlockAddress(Op, DAG);
496 case ISD::JumpTable:
497 return lowerJumpTable(Op, DAG);
498 case ISD::SHL_PARTS:
499 return lowerShiftLeftParts(Op, DAG);
500 case ISD::SRA_PARTS:
501 return lowerShiftRightParts(Op, DAG, true);
502 case ISD::SRL_PARTS:
503 return lowerShiftRightParts(Op, DAG, false);
505 return lowerConstantPool(Op, DAG);
506 case ISD::FP_TO_SINT:
507 return lowerFP_TO_SINT(Op, DAG);
508 case ISD::BITCAST:
509 return lowerBITCAST(Op, DAG);
510 case ISD::UINT_TO_FP:
511 return lowerUINT_TO_FP(Op, DAG);
512 case ISD::SINT_TO_FP:
513 return lowerSINT_TO_FP(Op, DAG);
514 case ISD::VASTART:
515 return lowerVASTART(Op, DAG);
516 case ISD::FRAMEADDR:
517 return lowerFRAMEADDR(Op, DAG);
518 case ISD::RETURNADDR:
519 return lowerRETURNADDR(Op, DAG);
521 return lowerWRITE_REGISTER(Op, DAG);
523 return lowerINSERT_VECTOR_ELT(Op, DAG);
525 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
527 return lowerBUILD_VECTOR(Op, DAG);
529 return lowerCONCAT_VECTORS(Op, DAG);
531 return lowerVECTOR_SHUFFLE(Op, DAG);
532 case ISD::BITREVERSE:
533 return lowerBITREVERSE(Op, DAG);
535 return lowerSCALAR_TO_VECTOR(Op, DAG);
536 case ISD::PREFETCH:
537 return lowerPREFETCH(Op, DAG);
538 case ISD::SELECT:
539 return lowerSELECT(Op, DAG);
540 case ISD::BRCOND:
541 return lowerBRCOND(Op, DAG);
542 case ISD::FP_TO_FP16:
543 return lowerFP_TO_FP16(Op, DAG);
544 case ISD::FP16_TO_FP:
545 return lowerFP16_TO_FP(Op, DAG);
546 case ISD::FP_TO_BF16:
547 return lowerFP_TO_BF16(Op, DAG);
548 case ISD::BF16_TO_FP:
549 return lowerBF16_TO_FP(Op, DAG);
550 case ISD::VECREDUCE_ADD:
551 return lowerVECREDUCE_ADD(Op, DAG);
552 case ISD::VECREDUCE_AND:
553 case ISD::VECREDUCE_OR:
554 case ISD::VECREDUCE_XOR:
555 case ISD::VECREDUCE_SMAX:
556 case ISD::VECREDUCE_SMIN:
557 case ISD::VECREDUCE_UMAX:
558 case ISD::VECREDUCE_UMIN:
559 return lowerVECREDUCE(Op, DAG);
560 }
561 return SDValue();
562}
563
564// Lower vecreduce_add using vhaddw instructions.
565// For Example:
566// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
567// can be lowered to:
568// VHADDW_D_W vr0, vr0, vr0
569// VHADDW_Q_D vr0, vr0, vr0
570// VPICKVE2GR_D a0, vr0, 0
571// ADDI_W a0, a0, 0
572SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
573 SelectionDAG &DAG) const {
574
575 SDLoc DL(Op);
576 MVT OpVT = Op.getSimpleValueType();
577 SDValue Val = Op.getOperand(0);
578
579 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
580 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
581
582 unsigned LegalVecSize = 128;
583 bool isLASX256Vector =
584 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
585
586 // Ensure operand type legal or enable it legal.
587 while (!isTypeLegal(Val.getSimpleValueType())) {
588 Val = DAG.WidenVector(Val, DL);
589 }
590
591 // NumEles is designed for iterations count, v4i32 for LSX
592 // and v8i32 for LASX should have the same count.
593 if (isLASX256Vector) {
594 NumEles /= 2;
595 LegalVecSize = 256;
596 }
597
598 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
599 MVT IntTy = MVT::getIntegerVT(EleBits);
600 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
601 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
602 }
603
604 if (isLASX256Vector) {
605 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
606 DAG.getConstant(2, DL, MVT::i64));
607 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
608 }
609
610 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
611 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
612}
613
614// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
615// For Example:
616// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
617// can be lowered to:
618// VBSRL_V vr1, vr0, 8
619// VMAX_W vr0, vr1, vr0
620// VBSRL_V vr1, vr0, 4
621// VMAX_W vr0, vr1, vr0
622// VPICKVE2GR_W a0, vr0, 0
623// For 256 bit vector, it is illegal and will be spilt into
624// two 128 bit vector by default then processed by this.
625SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
626 SelectionDAG &DAG) const {
627 SDLoc DL(Op);
628
629 MVT OpVT = Op.getSimpleValueType();
630 SDValue Val = Op.getOperand(0);
631
632 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
633 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
634
635 // Ensure operand type legal or enable it legal.
636 while (!isTypeLegal(Val.getSimpleValueType())) {
637 Val = DAG.WidenVector(Val, DL);
638 }
639
640 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
641 MVT VecTy = Val.getSimpleValueType();
642
643 for (int i = NumEles; i > 1; i /= 2) {
644 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
645 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
646 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
647 }
648
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
650 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
651}
652
653SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
654 SelectionDAG &DAG) const {
655 unsigned IsData = Op.getConstantOperandVal(4);
656
657 // We don't support non-data prefetch.
658 // Just preserve the chain.
659 if (!IsData)
660 return Op.getOperand(0);
661
662 return Op;
663}
664
665// Return true if Val is equal to (setcc LHS, RHS, CC).
666// Return false if Val is the inverse of (setcc LHS, RHS, CC).
667// Otherwise, return std::nullopt.
668static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
669 ISD::CondCode CC, SDValue Val) {
670 assert(Val->getOpcode() == ISD::SETCC);
671 SDValue LHS2 = Val.getOperand(0);
672 SDValue RHS2 = Val.getOperand(1);
673 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
674
675 if (LHS == LHS2 && RHS == RHS2) {
676 if (CC == CC2)
677 return true;
678 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
679 return false;
680 } else if (LHS == RHS2 && RHS == LHS2) {
682 if (CC == CC2)
683 return true;
684 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
685 return false;
686 }
687
688 return std::nullopt;
689}
690
692 const LoongArchSubtarget &Subtarget) {
693 SDValue CondV = N->getOperand(0);
694 SDValue TrueV = N->getOperand(1);
695 SDValue FalseV = N->getOperand(2);
696 MVT VT = N->getSimpleValueType(0);
697 SDLoc DL(N);
698
699 // (select c, -1, y) -> -c | y
700 if (isAllOnesConstant(TrueV)) {
701 SDValue Neg = DAG.getNegative(CondV, DL, VT);
702 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, -1) -> (c-1) | y
705 if (isAllOnesConstant(FalseV)) {
706 SDValue Neg =
707 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
708 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
709 }
710
711 // (select c, 0, y) -> (c-1) & y
712 if (isNullConstant(TrueV)) {
713 SDValue Neg =
714 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
715 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
716 }
717 // (select c, y, 0) -> -c & y
718 if (isNullConstant(FalseV)) {
719 SDValue Neg = DAG.getNegative(CondV, DL, VT);
720 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
721 }
722
723 // select c, ~x, x --> xor -c, x
724 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
725 const APInt &TrueVal = TrueV->getAsAPIntVal();
726 const APInt &FalseVal = FalseV->getAsAPIntVal();
727 if (~TrueVal == FalseVal) {
728 SDValue Neg = DAG.getNegative(CondV, DL, VT);
729 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
730 }
731 }
732
733 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
734 // when both truev and falsev are also setcc.
735 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
736 FalseV.getOpcode() == ISD::SETCC) {
737 SDValue LHS = CondV.getOperand(0);
738 SDValue RHS = CondV.getOperand(1);
739 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
740
741 // (select x, x, y) -> x | y
742 // (select !x, x, y) -> x & y
743 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
744 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
745 DAG.getFreeze(FalseV));
746 }
747 // (select x, y, x) -> x & y
748 // (select !x, y, x) -> x | y
749 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
750 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
751 DAG.getFreeze(TrueV), FalseV);
752 }
753 }
754
755 return SDValue();
756}
757
758// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
759// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
760// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
761// being `0` or `-1`. In such cases we can replace `select` with `and`.
762// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
763// than `c0`?
764static SDValue
766 const LoongArchSubtarget &Subtarget) {
767 unsigned SelOpNo = 0;
768 SDValue Sel = BO->getOperand(0);
769 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
770 SelOpNo = 1;
771 Sel = BO->getOperand(1);
772 }
773
774 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
775 return SDValue();
776
777 unsigned ConstSelOpNo = 1;
778 unsigned OtherSelOpNo = 2;
779 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
780 ConstSelOpNo = 2;
781 OtherSelOpNo = 1;
782 }
783 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
784 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
785 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
786 return SDValue();
787
788 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
789 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
790 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
791 return SDValue();
792
793 SDLoc DL(Sel);
794 EVT VT = BO->getValueType(0);
795
796 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
797 if (SelOpNo == 1)
798 std::swap(NewConstOps[0], NewConstOps[1]);
799
800 SDValue NewConstOp =
801 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
802 if (!NewConstOp)
803 return SDValue();
804
805 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
806 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
807 return SDValue();
808
809 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
810 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
811 if (SelOpNo == 1)
812 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
813 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
814
815 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
816 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
817 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
818}
819
820// Changes the condition code and swaps operands if necessary, so the SetCC
821// operation matches one of the comparisons supported directly by branches
822// in the LoongArch ISA. May adjust compares to favor compare with 0 over
823// compare with 1/-1.
825 ISD::CondCode &CC, SelectionDAG &DAG) {
826 // If this is a single bit test that can't be handled by ANDI, shift the
827 // bit to be tested to the MSB and perform a signed compare with 0.
828 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
829 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
830 isa<ConstantSDNode>(LHS.getOperand(1))) {
831 uint64_t Mask = LHS.getConstantOperandVal(1);
832 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
833 unsigned ShAmt = 0;
834 if (isPowerOf2_64(Mask)) {
835 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
836 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
837 } else {
838 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
839 }
840
841 LHS = LHS.getOperand(0);
842 if (ShAmt != 0)
843 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
844 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
845 return;
846 }
847 }
848
849 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
850 int64_t C = RHSC->getSExtValue();
851 switch (CC) {
852 default:
853 break;
854 case ISD::SETGT:
855 // Convert X > -1 to X >= 0.
856 if (C == -1) {
857 RHS = DAG.getConstant(0, DL, RHS.getValueType());
858 CC = ISD::SETGE;
859 return;
860 }
861 break;
862 case ISD::SETLT:
863 // Convert X < 1 to 0 >= X.
864 if (C == 1) {
865 RHS = LHS;
866 LHS = DAG.getConstant(0, DL, RHS.getValueType());
867 CC = ISD::SETGE;
868 return;
869 }
870 break;
871 }
872 }
873
874 switch (CC) {
875 default:
876 break;
877 case ISD::SETGT:
878 case ISD::SETLE:
879 case ISD::SETUGT:
880 case ISD::SETULE:
882 std::swap(LHS, RHS);
883 break;
884 }
885}
886
887SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
888 SelectionDAG &DAG) const {
889 SDValue CondV = Op.getOperand(0);
890 SDValue TrueV = Op.getOperand(1);
891 SDValue FalseV = Op.getOperand(2);
892 SDLoc DL(Op);
893 MVT VT = Op.getSimpleValueType();
894 MVT GRLenVT = Subtarget.getGRLenVT();
895
896 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
897 return V;
898
899 if (Op.hasOneUse()) {
900 unsigned UseOpc = Op->user_begin()->getOpcode();
901 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
902 SDNode *BinOp = *Op->user_begin();
903 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
904 DAG, Subtarget)) {
905 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
906 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
907 // may return a constant node and cause crash in lowerSELECT.
908 if (NewSel.getOpcode() == ISD::SELECT)
909 return lowerSELECT(NewSel, DAG);
910 return NewSel;
911 }
912 }
913 }
914
915 // If the condition is not an integer SETCC which operates on GRLenVT, we need
916 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
917 // (select condv, truev, falsev)
918 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
919 if (CondV.getOpcode() != ISD::SETCC ||
920 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
921 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
922 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
923
924 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
925
926 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
927 }
928
929 // If the CondV is the output of a SETCC node which operates on GRLenVT
930 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
931 // to take advantage of the integer compare+branch instructions. i.e.: (select
932 // (setcc lhs, rhs, cc), truev, falsev)
933 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
934 SDValue LHS = CondV.getOperand(0);
935 SDValue RHS = CondV.getOperand(1);
936 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
937
938 // Special case for a select of 2 constants that have a difference of 1.
939 // Normally this is done by DAGCombine, but if the select is introduced by
940 // type legalization or op legalization, we miss it. Restricting to SETLT
941 // case for now because that is what signed saturating add/sub need.
942 // FIXME: We don't need the condition to be SETLT or even a SETCC,
943 // but we would probably want to swap the true/false values if the condition
944 // is SETGE/SETLE to avoid an XORI.
945 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
946 CCVal == ISD::SETLT) {
947 const APInt &TrueVal = TrueV->getAsAPIntVal();
948 const APInt &FalseVal = FalseV->getAsAPIntVal();
949 if (TrueVal - 1 == FalseVal)
950 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
951 if (TrueVal + 1 == FalseVal)
952 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
953 }
954
955 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
956 // 1 < x ? x : 1 -> 0 < x ? x : 1
957 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
958 RHS == TrueV && LHS == FalseV) {
959 LHS = DAG.getConstant(0, DL, VT);
960 // 0 <u x is the same as x != 0.
961 if (CCVal == ISD::SETULT) {
962 std::swap(LHS, RHS);
963 CCVal = ISD::SETNE;
964 }
965 }
966
967 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
968 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
969 RHS == FalseV) {
970 RHS = DAG.getConstant(0, DL, VT);
971 }
972
973 SDValue TargetCC = DAG.getCondCode(CCVal);
974
975 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
976 // (select (setcc lhs, rhs, CC), constant, falsev)
977 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
978 std::swap(TrueV, FalseV);
979 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
980 }
981
982 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
983 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
984}
985
986SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
987 SelectionDAG &DAG) const {
988 SDValue CondV = Op.getOperand(1);
989 SDLoc DL(Op);
990 MVT GRLenVT = Subtarget.getGRLenVT();
991
992 if (CondV.getOpcode() == ISD::SETCC) {
993 if (CondV.getOperand(0).getValueType() == GRLenVT) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
999
1000 SDValue TargetCC = DAG.getCondCode(CCVal);
1001 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1002 Op.getOperand(0), LHS, RHS, TargetCC,
1003 Op.getOperand(2));
1004 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1005 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1006 Op.getOperand(0), CondV, Op.getOperand(2));
1007 }
1008 }
1009
1010 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1011 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1013}
1014
1015SDValue
1016LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Op);
1019 MVT OpVT = Op.getSimpleValueType();
1020
1021 SDValue Vector = DAG.getUNDEF(OpVT);
1022 SDValue Val = Op.getOperand(0);
1023 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1024
1025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1026}
1027
1028SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1029 SelectionDAG &DAG) const {
1030 EVT ResTy = Op->getValueType(0);
1031 SDValue Src = Op->getOperand(0);
1032 SDLoc DL(Op);
1033
1034 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1035 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1036 unsigned int NewEltNum = NewVT.getVectorNumElements();
1037
1038 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1039
1041 for (unsigned int i = 0; i < NewEltNum; i++) {
1042 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1043 DAG.getConstant(i, DL, MVT::i64));
1044 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1045 ? (unsigned)LoongArchISD::BITREV_8B
1046 : (unsigned)ISD::BITREVERSE;
1047 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1048 }
1049 SDValue Res =
1050 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1051
1052 switch (ResTy.getSimpleVT().SimpleTy) {
1053 default:
1054 return SDValue();
1055 case MVT::v16i8:
1056 case MVT::v32i8:
1057 return Res;
1058 case MVT::v8i16:
1059 case MVT::v16i16:
1060 case MVT::v4i32:
1061 case MVT::v8i32: {
1063 for (unsigned int i = 0; i < NewEltNum; i++)
1064 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1065 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1066 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1067 }
1068 }
1069}
1070
1071// Widen element type to get a new mask value (if possible).
1072// For example:
1073// shufflevector <4 x i32> %a, <4 x i32> %b,
1074// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1075// is equivalent to:
1076// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1077// can be lowered to:
1078// VPACKOD_D vr0, vr0, vr1
1080 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1081 unsigned EltBits = VT.getScalarSizeInBits();
1082
1083 if (EltBits > 32 || EltBits == 1)
1084 return SDValue();
1085
1086 SmallVector<int, 8> NewMask;
1087 if (widenShuffleMaskElts(Mask, NewMask)) {
1088 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1089 : MVT::getIntegerVT(EltBits * 2);
1090 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1091 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1092 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1093 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1094 return DAG.getBitcast(
1095 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1096 }
1097 }
1098
1099 return SDValue();
1100}
1101
1102/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1103/// instruction.
1104// The funciton matches elements from one of the input vector shuffled to the
1105// left or right with zeroable elements 'shifted in'. It handles both the
1106// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1107// lane.
1108// Mostly copied from X86.
1109static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1110 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1111 int MaskOffset, const APInt &Zeroable) {
1112 int Size = Mask.size();
1113 unsigned SizeInBits = Size * ScalarSizeInBits;
1114
1115 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1116 for (int i = 0; i < Size; i += Scale)
1117 for (int j = 0; j < Shift; ++j)
1118 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1119 return false;
1120
1121 return true;
1122 };
1123
1124 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1125 int Step = 1) {
1126 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1127 if (!(Mask[i] == -1 || Mask[i] == Low))
1128 return false;
1129 return true;
1130 };
1131
1132 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1133 for (int i = 0; i != Size; i += Scale) {
1134 unsigned Pos = Left ? i + Shift : i;
1135 unsigned Low = Left ? i : i + Shift;
1136 unsigned Len = Scale - Shift;
1137 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1138 return -1;
1139 }
1140
1141 int ShiftEltBits = ScalarSizeInBits * Scale;
1142 bool ByteShift = ShiftEltBits > 64;
1143 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1144 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1145 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1146
1147 // Normalize the scale for byte shifts to still produce an i64 element
1148 // type.
1149 Scale = ByteShift ? Scale / 2 : Scale;
1150
1151 // We need to round trip through the appropriate type for the shift.
1152 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1153 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1154 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1155 return (int)ShiftAmt;
1156 };
1157
1158 unsigned MaxWidth = 128;
1159 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1160 for (int Shift = 1; Shift != Scale; ++Shift)
1161 for (bool Left : {true, false})
1162 if (CheckZeros(Shift, Scale, Left)) {
1163 int ShiftAmt = MatchShift(Shift, Scale, Left);
1164 if (0 < ShiftAmt)
1165 return ShiftAmt;
1166 }
1167
1168 // no match
1169 return -1;
1170}
1171
1172/// Lower VECTOR_SHUFFLE as shift (if possible).
1173///
1174/// For example:
1175/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1176/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1177/// is lowered to:
1178/// (VBSLL_V $v0, $v0, 4)
1179///
1180/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1181/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1182/// is lowered to:
1183/// (VSLLI_D $v0, $v0, 32)
1185 MVT VT, SDValue V1, SDValue V2,
1186 SelectionDAG &DAG,
1187 const LoongArchSubtarget &Subtarget,
1188 const APInt &Zeroable) {
1189 int Size = Mask.size();
1190 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1191
1192 MVT ShiftVT;
1193 SDValue V = V1;
1194 unsigned Opcode;
1195
1196 // Try to match shuffle against V1 shift.
1197 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1198 Mask, 0, Zeroable);
1199
1200 // If V1 failed, try to match shuffle against V2 shift.
1201 if (ShiftAmt < 0) {
1202 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1203 Mask, Size, Zeroable);
1204 V = V2;
1205 }
1206
1207 if (ShiftAmt < 0)
1208 return SDValue();
1209
1210 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1211 "Illegal integer vector type");
1212 V = DAG.getBitcast(ShiftVT, V);
1213 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1214 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1215 return DAG.getBitcast(VT, V);
1216}
1217
1218/// Determine whether a range fits a regular pattern of values.
1219/// This function accounts for the possibility of jumping over the End iterator.
1220template <typename ValType>
1221static bool
1223 unsigned CheckStride,
1225 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1226 auto &I = Begin;
1227
1228 while (I != End) {
1229 if (*I != -1 && *I != ExpectedIndex)
1230 return false;
1231 ExpectedIndex += ExpectedIndexStride;
1232
1233 // Incrementing past End is undefined behaviour so we must increment one
1234 // step at a time and check for End at each step.
1235 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1236 ; // Empty loop body.
1237 }
1238 return true;
1239}
1240
1241/// Compute whether each element of a shuffle is zeroable.
1242///
1243/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1245 SDValue V2, APInt &KnownUndef,
1246 APInt &KnownZero) {
1247 int Size = Mask.size();
1248 KnownUndef = KnownZero = APInt::getZero(Size);
1249
1250 V1 = peekThroughBitcasts(V1);
1251 V2 = peekThroughBitcasts(V2);
1252
1253 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1254 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1255
1256 int VectorSizeInBits = V1.getValueSizeInBits();
1257 int ScalarSizeInBits = VectorSizeInBits / Size;
1258 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1259 (void)ScalarSizeInBits;
1260
1261 for (int i = 0; i < Size; ++i) {
1262 int M = Mask[i];
1263 if (M < 0) {
1264 KnownUndef.setBit(i);
1265 continue;
1266 }
1267 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1268 KnownZero.setBit(i);
1269 continue;
1270 }
1271 }
1272}
1273
1274/// Test whether a shuffle mask is equivalent within each sub-lane.
1275///
1276/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1277/// non-trivial to compute in the face of undef lanes. The representation is
1278/// suitable for use with existing 128-bit shuffles as entries from the second
1279/// vector have been remapped to [LaneSize, 2*LaneSize).
1280static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1281 ArrayRef<int> Mask,
1282 SmallVectorImpl<int> &RepeatedMask) {
1283 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1284 RepeatedMask.assign(LaneSize, -1);
1285 int Size = Mask.size();
1286 for (int i = 0; i < Size; ++i) {
1287 assert(Mask[i] == -1 || Mask[i] >= 0);
1288 if (Mask[i] < 0)
1289 continue;
1290 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1291 // This entry crosses lanes, so there is no way to model this shuffle.
1292 return false;
1293
1294 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1295 // Adjust second vector indices to start at LaneSize instead of Size.
1296 int LocalM =
1297 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1298 if (RepeatedMask[i % LaneSize] < 0)
1299 // This is the first non-undef entry in this slot of a 128-bit lane.
1300 RepeatedMask[i % LaneSize] = LocalM;
1301 else if (RepeatedMask[i % LaneSize] != LocalM)
1302 // Found a mismatch with the repeated mask.
1303 return false;
1304 }
1305 return true;
1306}
1307
1308/// Attempts to match vector shuffle as byte rotation.
1310 ArrayRef<int> Mask) {
1311
1312 SDValue Lo, Hi;
1313 SmallVector<int, 16> RepeatedMask;
1314
1315 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1316 return -1;
1317
1318 int NumElts = RepeatedMask.size();
1319 int Rotation = 0;
1320 int Scale = 16 / NumElts;
1321
1322 for (int i = 0; i < NumElts; ++i) {
1323 int M = RepeatedMask[i];
1324 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1325 "Unexpected mask index.");
1326 if (M < 0)
1327 continue;
1328
1329 // Determine where a rotated vector would have started.
1330 int StartIdx = i - (M % NumElts);
1331 if (StartIdx == 0)
1332 return -1;
1333
1334 // If we found the tail of a vector the rotation must be the missing
1335 // front. If we found the head of a vector, it must be how much of the
1336 // head.
1337 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1338
1339 if (Rotation == 0)
1340 Rotation = CandidateRotation;
1341 else if (Rotation != CandidateRotation)
1342 return -1;
1343
1344 // Compute which value this mask is pointing at.
1345 SDValue MaskV = M < NumElts ? V1 : V2;
1346
1347 // Compute which of the two target values this index should be assigned
1348 // to. This reflects whether the high elements are remaining or the low
1349 // elements are remaining.
1350 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1351
1352 // Either set up this value if we've not encountered it before, or check
1353 // that it remains consistent.
1354 if (!TargetV)
1355 TargetV = MaskV;
1356 else if (TargetV != MaskV)
1357 return -1;
1358 }
1359
1360 // Check that we successfully analyzed the mask, and normalize the results.
1361 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1362 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1363 if (!Lo)
1364 Lo = Hi;
1365 else if (!Hi)
1366 Hi = Lo;
1367
1368 V1 = Lo;
1369 V2 = Hi;
1370
1371 return Rotation * Scale;
1372}
1373
1374/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1375///
1376/// For example:
1377/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1378/// <2 x i32> <i32 3, i32 0>
1379/// is lowered to:
1380/// (VBSRL_V $v1, $v1, 8)
1381/// (VBSLL_V $v0, $v0, 8)
1382/// (VOR_V $v0, $V0, $v1)
1383static SDValue
1385 SDValue V1, SDValue V2, SelectionDAG &DAG,
1386 const LoongArchSubtarget &Subtarget) {
1387
1388 SDValue Lo = V1, Hi = V2;
1389 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1390 if (ByteRotation <= 0)
1391 return SDValue();
1392
1393 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1394 Lo = DAG.getBitcast(ByteVT, Lo);
1395 Hi = DAG.getBitcast(ByteVT, Hi);
1396
1397 int LoByteShift = 16 - ByteRotation;
1398 int HiByteShift = ByteRotation;
1399 MVT GRLenVT = Subtarget.getGRLenVT();
1400
1401 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1402 DAG.getConstant(LoByteShift, DL, GRLenVT));
1403 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1404 DAG.getConstant(HiByteShift, DL, GRLenVT));
1405 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1406}
1407
1408/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1409///
1410/// For example:
1411/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1412/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1413/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1414/// is lowered to:
1415/// (VREPLI $v1, 0)
1416/// (VILVL $v0, $v1, $v0)
1418 ArrayRef<int> Mask, MVT VT,
1419 SDValue V1, SDValue V2,
1420 SelectionDAG &DAG,
1421 const APInt &Zeroable) {
1422 int Bits = VT.getSizeInBits();
1423 int EltBits = VT.getScalarSizeInBits();
1424 int NumElements = VT.getVectorNumElements();
1425
1426 if (Zeroable.isAllOnes())
1427 return DAG.getConstant(0, DL, VT);
1428
1429 // Define a helper function to check a particular ext-scale and lower to it if
1430 // valid.
1431 auto Lower = [&](int Scale) -> SDValue {
1432 SDValue InputV;
1433 bool AnyExt = true;
1434 int Offset = 0;
1435 for (int i = 0; i < NumElements; i++) {
1436 int M = Mask[i];
1437 if (M < 0)
1438 continue;
1439 if (i % Scale != 0) {
1440 // Each of the extended elements need to be zeroable.
1441 if (!Zeroable[i])
1442 return SDValue();
1443
1444 AnyExt = false;
1445 continue;
1446 }
1447
1448 // Each of the base elements needs to be consecutive indices into the
1449 // same input vector.
1450 SDValue V = M < NumElements ? V1 : V2;
1451 M = M % NumElements;
1452 if (!InputV) {
1453 InputV = V;
1454 Offset = M - (i / Scale);
1455
1456 // These offset can't be handled
1457 if (Offset % (NumElements / Scale))
1458 return SDValue();
1459 } else if (InputV != V)
1460 return SDValue();
1461
1462 if (M != (Offset + (i / Scale)))
1463 return SDValue(); // Non-consecutive strided elements.
1464 }
1465
1466 // If we fail to find an input, we have a zero-shuffle which should always
1467 // have already been handled.
1468 if (!InputV)
1469 return SDValue();
1470
1471 do {
1472 unsigned VilVLoHi = LoongArchISD::VILVL;
1473 if (Offset >= (NumElements / 2)) {
1474 VilVLoHi = LoongArchISD::VILVH;
1475 Offset -= (NumElements / 2);
1476 }
1477
1478 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1479 SDValue Ext =
1480 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1481 InputV = DAG.getBitcast(InputVT, InputV);
1482 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1483 Scale /= 2;
1484 EltBits *= 2;
1485 NumElements /= 2;
1486 } while (Scale > 1);
1487 return DAG.getBitcast(VT, InputV);
1488 };
1489
1490 // Each iteration, try extending the elements half as much, but into twice as
1491 // many elements.
1492 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1493 NumExtElements *= 2) {
1494 if (SDValue V = Lower(NumElements / NumExtElements))
1495 return V;
1496 }
1497 return SDValue();
1498}
1499
1500/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1501///
1502/// VREPLVEI performs vector broadcast based on an element specified by an
1503/// integer immediate, with its mask being similar to:
1504/// <x, x, x, ...>
1505/// where x is any valid index.
1506///
1507/// When undef's appear in the mask they are treated as if they were whatever
1508/// value is necessary in order to fit the above form.
1509static SDValue
1511 SDValue V1, SDValue V2, SelectionDAG &DAG,
1512 const LoongArchSubtarget &Subtarget) {
1513 int SplatIndex = -1;
1514 for (const auto &M : Mask) {
1515 if (M != -1) {
1516 SplatIndex = M;
1517 break;
1518 }
1519 }
1520
1521 if (SplatIndex == -1)
1522 return DAG.getUNDEF(VT);
1523
1524 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1525 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1526 APInt Imm(64, SplatIndex);
1527 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1528 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1529 }
1530
1531 return SDValue();
1532}
1533
1534/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1535///
1536/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1537/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1538///
1539/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1540/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1541/// When undef's appear they are treated as if they were whatever value is
1542/// necessary in order to fit the above forms.
1543///
1544/// For example:
1545/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1546/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1547/// i32 7, i32 6, i32 5, i32 4>
1548/// is lowered to:
1549/// (VSHUF4I_H $v0, $v1, 27)
1550/// where the 27 comes from:
1551/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1552static SDValue
1554 SDValue V1, SDValue V2, SelectionDAG &DAG,
1555 const LoongArchSubtarget &Subtarget) {
1556
1557 unsigned SubVecSize = 4;
1558 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1559 SubVecSize = 2;
1560
1561 int SubMask[4] = {-1, -1, -1, -1};
1562 for (unsigned i = 0; i < SubVecSize; ++i) {
1563 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1564 int M = Mask[j];
1565
1566 // Convert from vector index to 4-element subvector index
1567 // If an index refers to an element outside of the subvector then give up
1568 if (M != -1) {
1569 M -= 4 * (j / SubVecSize);
1570 if (M < 0 || M >= 4)
1571 return SDValue();
1572 }
1573
1574 // If the mask has an undef, replace it with the current index.
1575 // Note that it might still be undef if the current index is also undef
1576 if (SubMask[i] == -1)
1577 SubMask[i] = M;
1578 // Check that non-undef values are the same as in the mask. If they
1579 // aren't then give up
1580 else if (M != -1 && M != SubMask[i])
1581 return SDValue();
1582 }
1583 }
1584
1585 // Calculate the immediate. Replace any remaining undefs with zero
1586 APInt Imm(64, 0);
1587 for (int i = SubVecSize - 1; i >= 0; --i) {
1588 int M = SubMask[i];
1589
1590 if (M == -1)
1591 M = 0;
1592
1593 Imm <<= 2;
1594 Imm |= M & 0x3;
1595 }
1596
1597 MVT GRLenVT = Subtarget.getGRLenVT();
1598
1599 // Return vshuf4i.d
1600 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1601 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1602 DAG.getConstant(Imm, DL, GRLenVT));
1603
1604 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1605 DAG.getConstant(Imm, DL, GRLenVT));
1606}
1607
1608/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1609///
1610/// VPACKEV interleaves the even elements from each vector.
1611///
1612/// It is possible to lower into VPACKEV when the mask consists of two of the
1613/// following forms interleaved:
1614/// <0, 2, 4, ...>
1615/// <n, n+2, n+4, ...>
1616/// where n is the number of elements in the vector.
1617/// For example:
1618/// <0, 0, 2, 2, 4, 4, ...>
1619/// <0, n, 2, n+2, 4, n+4, ...>
1620///
1621/// When undef's appear in the mask they are treated as if they were whatever
1622/// value is necessary in order to fit the above forms.
1624 MVT VT, SDValue V1, SDValue V2,
1625 SelectionDAG &DAG) {
1626
1627 const auto &Begin = Mask.begin();
1628 const auto &End = Mask.end();
1629 SDValue OriV1 = V1, OriV2 = V2;
1630
1631 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1632 V1 = OriV1;
1633 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1634 V1 = OriV2;
1635 else
1636 return SDValue();
1637
1638 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1639 V2 = OriV1;
1640 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1641 V2 = OriV2;
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1649///
1650/// VPACKOD interleaves the odd elements from each vector.
1651///
1652/// It is possible to lower into VPACKOD when the mask consists of two of the
1653/// following forms interleaved:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 1, 3, 3, 5, 5, ...>
1659/// <1, n+1, 3, n+3, 5, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &End = Mask.end();
1669 SDValue OriV1 = V1, OriV2 = V2;
1670
1671 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1672 V1 = OriV1;
1673 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1674 V1 = OriV2;
1675 else
1676 return SDValue();
1677
1678 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1679 V2 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1681 V2 = OriV2;
1682 else
1683 return SDValue();
1684
1685 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1686}
1687
1688/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1689///
1690/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1691/// of each vector.
1692///
1693/// It is possible to lower into VILVH when the mask consists of two of the
1694/// following forms interleaved:
1695/// <x, x+1, x+2, ...>
1696/// <n+x, n+x+1, n+x+2, ...>
1697/// where n is the number of elements in the vector and x is half n.
1698/// For example:
1699/// <x, x, x+1, x+1, x+2, x+2, ...>
1700/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1701///
1702/// When undef's appear in the mask they are treated as if they were whatever
1703/// value is necessary in order to fit the above forms.
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG) {
1707
1708 const auto &Begin = Mask.begin();
1709 const auto &End = Mask.end();
1710 unsigned HalfSize = Mask.size() / 2;
1711 SDValue OriV1 = V1, OriV2 = V2;
1712
1713 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1714 V1 = OriV1;
1715 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1716 V1 = OriV2;
1717 else
1718 return SDValue();
1719
1720 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1721 V2 = OriV1;
1722 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1723 1))
1724 V2 = OriV2;
1725 else
1726 return SDValue();
1727
1728 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1729}
1730
1731/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1732///
1733/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1734/// of each vector.
1735///
1736/// It is possible to lower into VILVL when the mask consists of two of the
1737/// following forms interleaved:
1738/// <0, 1, 2, ...>
1739/// <n, n+1, n+2, ...>
1740/// where n is the number of elements in the vector.
1741/// For example:
1742/// <0, 0, 1, 1, 2, 2, ...>
1743/// <0, n, 1, n+1, 2, n+2, ...>
1744///
1745/// When undef's appear in the mask they are treated as if they were whatever
1746/// value is necessary in order to fit the above forms.
1748 MVT VT, SDValue V1, SDValue V2,
1749 SelectionDAG &DAG) {
1750
1751 const auto &Begin = Mask.begin();
1752 const auto &End = Mask.end();
1753 SDValue OriV1 = V1, OriV2 = V2;
1754
1755 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1756 V1 = OriV1;
1757 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1758 V1 = OriV2;
1759 else
1760 return SDValue();
1761
1762 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1763 V2 = OriV1;
1764 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1765 V2 = OriV2;
1766 else
1767 return SDValue();
1768
1769 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1770}
1771
1772/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1773///
1774/// VPICKEV copies the even elements of each vector into the result vector.
1775///
1776/// It is possible to lower into VPICKEV when the mask consists of two of the
1777/// following forms concatenated:
1778/// <0, 2, 4, ...>
1779/// <n, n+2, n+4, ...>
1780/// where n is the number of elements in the vector.
1781/// For example:
1782/// <0, 2, 4, ..., 0, 2, 4, ...>
1783/// <0, 2, 4, ..., n, n+2, n+4, ...>
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above forms.
1788 MVT VT, SDValue V1, SDValue V2,
1789 SelectionDAG &DAG) {
1790
1791 const auto &Begin = Mask.begin();
1792 const auto &Mid = Mask.begin() + Mask.size() / 2;
1793 const auto &End = Mask.end();
1794 SDValue OriV1 = V1, OriV2 = V2;
1795
1796 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1797 V1 = OriV1;
1798 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1799 V1 = OriV2;
1800 else
1801 return SDValue();
1802
1803 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1804 V2 = OriV1;
1805 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1806 V2 = OriV2;
1807
1808 else
1809 return SDValue();
1810
1811 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1812}
1813
1814/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1815///
1816/// VPICKOD copies the odd elements of each vector into the result vector.
1817///
1818/// It is possible to lower into VPICKOD when the mask consists of two of the
1819/// following forms concatenated:
1820/// <1, 3, 5, ...>
1821/// <n+1, n+3, n+5, ...>
1822/// where n is the number of elements in the vector.
1823/// For example:
1824/// <1, 3, 5, ..., 1, 3, 5, ...>
1825/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1826///
1827/// When undef's appear in the mask they are treated as if they were whatever
1828/// value is necessary in order to fit the above forms.
1830 MVT VT, SDValue V1, SDValue V2,
1831 SelectionDAG &DAG) {
1832
1833 const auto &Begin = Mask.begin();
1834 const auto &Mid = Mask.begin() + Mask.size() / 2;
1835 const auto &End = Mask.end();
1836 SDValue OriV1 = V1, OriV2 = V2;
1837
1838 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1839 V1 = OriV1;
1840 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1841 V1 = OriV2;
1842 else
1843 return SDValue();
1844
1845 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1846 V2 = OriV1;
1847 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1848 V2 = OriV2;
1849 else
1850 return SDValue();
1851
1852 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1853}
1854
1855/// Lower VECTOR_SHUFFLE into VSHUF.
1856///
1857/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1858/// adding it as an operand to the resulting VSHUF.
1860 MVT VT, SDValue V1, SDValue V2,
1861 SelectionDAG &DAG) {
1862
1864 for (auto M : Mask)
1865 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1866
1867 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1868 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1869
1870 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1871 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1872 // VSHF concatenates the vectors in a bitwise fashion:
1873 // <0b00, 0b01> + <0b10, 0b11> ->
1874 // 0b0100 + 0b1110 -> 0b01001110
1875 // <0b10, 0b11, 0b00, 0b01>
1876 // We must therefore swap the operands to get the correct result.
1877 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1878}
1879
1880/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1881///
1882/// This routine breaks down the specific type of 128-bit shuffle and
1883/// dispatches to the lowering routines accordingly.
1885 SDValue V1, SDValue V2, SelectionDAG &DAG,
1886 const LoongArchSubtarget &Subtarget) {
1887 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1888 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1889 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1890 "Vector type is unsupported for lsx!");
1892 "Two operands have different types!");
1893 assert(VT.getVectorNumElements() == Mask.size() &&
1894 "Unexpected mask size for shuffle!");
1895 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1896
1897 APInt KnownUndef, KnownZero;
1898 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1899 APInt Zeroable = KnownUndef | KnownZero;
1900
1901 SDValue Result;
1902 // TODO: Add more comparison patterns.
1903 if (V2.isUndef()) {
1904 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1905 Subtarget)))
1906 return Result;
1907 if ((Result =
1908 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1909 return Result;
1910
1911 // TODO: This comment may be enabled in the future to better match the
1912 // pattern for instruction selection.
1913 /* V2 = V1; */
1914 }
1915
1916 // It is recommended not to change the pattern comparison order for better
1917 // performance.
1918 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1927 return Result;
1928 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1929 return Result;
1930 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1931 (Result =
1932 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1933 return Result;
1934 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1935 Zeroable)))
1936 return Result;
1937 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1938 Zeroable)))
1939 return Result;
1940 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1941 Subtarget)))
1942 return Result;
1943 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1944 return NewShuffle;
1945 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1946 return Result;
1947 return SDValue();
1948}
1949
1950/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1951///
1952/// It is a XVREPLVEI when the mask is:
1953/// <x, x, x, ..., x+n, x+n, x+n, ...>
1954/// where the number of x is equal to n and n is half the length of vector.
1955///
1956/// When undef's appear in the mask they are treated as if they were whatever
1957/// value is necessary in order to fit the above form.
1958static SDValue
1960 SDValue V1, SDValue V2, SelectionDAG &DAG,
1961 const LoongArchSubtarget &Subtarget) {
1962 int SplatIndex = -1;
1963 for (const auto &M : Mask) {
1964 if (M != -1) {
1965 SplatIndex = M;
1966 break;
1967 }
1968 }
1969
1970 if (SplatIndex == -1)
1971 return DAG.getUNDEF(VT);
1972
1973 const auto &Begin = Mask.begin();
1974 const auto &End = Mask.end();
1975 unsigned HalfSize = Mask.size() / 2;
1976
1977 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1978 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1979 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1980 0)) {
1981 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1982 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1983 }
1984
1985 return SDValue();
1986}
1987
1988/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1989static SDValue
1991 SDValue V1, SDValue V2, SelectionDAG &DAG,
1992 const LoongArchSubtarget &Subtarget) {
1993 // When the size is less than or equal to 4, lower cost instructions may be
1994 // used.
1995 if (Mask.size() <= 4)
1996 return SDValue();
1997 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1998}
1999
2000/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2002 MVT VT, SDValue V1, SDValue V2,
2003 SelectionDAG &DAG) {
2004 // LoongArch LASX only have XVPERM_W.
2005 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2006 return SDValue();
2007
2008 unsigned NumElts = VT.getVectorNumElements();
2009 unsigned HalfSize = NumElts / 2;
2010 bool FrontLo = true, FrontHi = true;
2011 bool BackLo = true, BackHi = true;
2012
2013 auto inRange = [](int val, int low, int high) {
2014 return (val == -1) || (val >= low && val < high);
2015 };
2016
2017 for (unsigned i = 0; i < HalfSize; ++i) {
2018 int Fronti = Mask[i];
2019 int Backi = Mask[i + HalfSize];
2020
2021 FrontLo &= inRange(Fronti, 0, HalfSize);
2022 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2023 BackLo &= inRange(Backi, 0, HalfSize);
2024 BackHi &= inRange(Backi, HalfSize, NumElts);
2025 }
2026
2027 // If both the lower and upper 128-bit parts access only one half of the
2028 // vector (either lower or upper), avoid using xvperm.w. The latency of
2029 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2030 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2031 return SDValue();
2032
2034 for (unsigned i = 0; i < NumElts; ++i)
2035 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2036 : DAG.getConstant(Mask[i], DL, MVT::i64));
2037 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2038
2039 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2040}
2041
2042/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2044 MVT VT, SDValue V1, SDValue V2,
2045 SelectionDAG &DAG) {
2046 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2047}
2048
2049/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2051 MVT VT, SDValue V1, SDValue V2,
2052 SelectionDAG &DAG) {
2053 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2054}
2055
2056/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2058 MVT VT, SDValue V1, SDValue V2,
2059 SelectionDAG &DAG) {
2060
2061 const auto &Begin = Mask.begin();
2062 const auto &End = Mask.end();
2063 unsigned HalfSize = Mask.size() / 2;
2064 unsigned LeftSize = HalfSize / 2;
2065 SDValue OriV1 = V1, OriV2 = V2;
2066
2067 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2068 1) &&
2069 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2070 V1 = OriV1;
2071 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2072 Mask.size() + HalfSize - LeftSize, 1) &&
2073 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2074 Mask.size() + HalfSize + LeftSize, 1))
2075 V1 = OriV2;
2076 else
2077 return SDValue();
2078
2079 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2080 1) &&
2081 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2082 1))
2083 V2 = OriV1;
2084 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2085 Mask.size() + HalfSize - LeftSize, 1) &&
2086 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2087 Mask.size() + HalfSize + LeftSize, 1))
2088 V2 = OriV2;
2089 else
2090 return SDValue();
2091
2092 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2093}
2094
2095/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2097 MVT VT, SDValue V1, SDValue V2,
2098 SelectionDAG &DAG) {
2099
2100 const auto &Begin = Mask.begin();
2101 const auto &End = Mask.end();
2102 unsigned HalfSize = Mask.size() / 2;
2103 SDValue OriV1 = V1, OriV2 = V2;
2104
2105 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2106 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2107 V1 = OriV1;
2108 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2109 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2110 Mask.size() + HalfSize, 1))
2111 V1 = OriV2;
2112 else
2113 return SDValue();
2114
2115 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2116 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2117 V2 = OriV1;
2118 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2119 1) &&
2120 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2121 Mask.size() + HalfSize, 1))
2122 V2 = OriV2;
2123 else
2124 return SDValue();
2125
2126 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2127}
2128
2129/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2131 MVT VT, SDValue V1, SDValue V2,
2132 SelectionDAG &DAG) {
2133
2134 const auto &Begin = Mask.begin();
2135 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2136 const auto &Mid = Mask.begin() + Mask.size() / 2;
2137 const auto &RightMid = Mask.end() - Mask.size() / 4;
2138 const auto &End = Mask.end();
2139 unsigned HalfSize = Mask.size() / 2;
2140 SDValue OriV1 = V1, OriV2 = V2;
2141
2142 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2143 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2144 V1 = OriV1;
2145 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2146 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2147 V1 = OriV2;
2148 else
2149 return SDValue();
2150
2151 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2152 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2153 V2 = OriV1;
2154 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2155 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2156 V2 = OriV2;
2157
2158 else
2159 return SDValue();
2160
2161 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2162}
2163
2164/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2166 MVT VT, SDValue V1, SDValue V2,
2167 SelectionDAG &DAG) {
2168
2169 const auto &Begin = Mask.begin();
2170 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2171 const auto &Mid = Mask.begin() + Mask.size() / 2;
2172 const auto &RightMid = Mask.end() - Mask.size() / 4;
2173 const auto &End = Mask.end();
2174 unsigned HalfSize = Mask.size() / 2;
2175 SDValue OriV1 = V1, OriV2 = V2;
2176
2177 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2178 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2179 V1 = OriV1;
2180 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2181 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2182 2))
2183 V1 = OriV2;
2184 else
2185 return SDValue();
2186
2187 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2188 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2189 V2 = OriV1;
2190 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2191 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2192 2))
2193 V2 = OriV2;
2194 else
2195 return SDValue();
2196
2197 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2198}
2199
2200/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2202 MVT VT, SDValue V1, SDValue V2,
2203 SelectionDAG &DAG) {
2204
2205 int MaskSize = Mask.size();
2206 int HalfSize = Mask.size() / 2;
2207 const auto &Begin = Mask.begin();
2208 const auto &Mid = Mask.begin() + HalfSize;
2209 const auto &End = Mask.end();
2210
2211 // VECTOR_SHUFFLE concatenates the vectors:
2212 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2213 // shuffling ->
2214 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2215 //
2216 // XVSHUF concatenates the vectors:
2217 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2218 // shuffling ->
2219 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2220 SmallVector<SDValue, 8> MaskAlloc;
2221 for (auto it = Begin; it < Mid; it++) {
2222 if (*it < 0) // UNDEF
2223 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2224 else if ((*it >= 0 && *it < HalfSize) ||
2225 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2226 int M = *it < HalfSize ? *it : *it - HalfSize;
2227 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2228 } else
2229 return SDValue();
2230 }
2231 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2232
2233 for (auto it = Mid; it < End; it++) {
2234 if (*it < 0) // UNDEF
2235 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2236 else if ((*it >= HalfSize && *it < MaskSize) ||
2237 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2238 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2239 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2240 } else
2241 return SDValue();
2242 }
2243 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2244
2245 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2246 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2247 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2248}
2249
2250/// Shuffle vectors by lane to generate more optimized instructions.
2251/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2252///
2253/// Therefore, except for the following four cases, other cases are regarded
2254/// as cross-lane shuffles, where optimization is relatively limited.
2255///
2256/// - Shuffle high, low lanes of two inputs vector
2257/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2258/// - Shuffle low, high lanes of two inputs vector
2259/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2260/// - Shuffle low, low lanes of two inputs vector
2261/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2262/// - Shuffle high, high lanes of two inputs vector
2263/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2264///
2265/// The first case is the closest to LoongArch instructions and the other
2266/// cases need to be converted to it for processing.
2267///
2268/// This function may modify V1, V2 and Mask
2270 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2271 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2272
2273 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2274
2275 int MaskSize = Mask.size();
2276 int HalfSize = Mask.size() / 2;
2277 MVT GRLenVT = Subtarget.getGRLenVT();
2278
2279 HalfMaskType preMask = None, postMask = None;
2280
2281 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2282 return M < 0 || (M >= 0 && M < HalfSize) ||
2283 (M >= MaskSize && M < MaskSize + HalfSize);
2284 }))
2285 preMask = HighLaneTy;
2286 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2287 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2288 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2289 }))
2290 preMask = LowLaneTy;
2291
2292 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2293 return M < 0 || (M >= 0 && M < HalfSize) ||
2294 (M >= MaskSize && M < MaskSize + HalfSize);
2295 }))
2296 postMask = HighLaneTy;
2297 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2298 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2299 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2300 }))
2301 postMask = LowLaneTy;
2302
2303 // The pre-half of mask is high lane type, and the post-half of mask
2304 // is low lane type, which is closest to the LoongArch instructions.
2305 //
2306 // Note: In the LoongArch architecture, the high lane of mask corresponds
2307 // to the lower 128-bit of vector register, and the low lane of mask
2308 // corresponds the higher 128-bit of vector register.
2309 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2310 return;
2311 }
2312 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2313 V1 = DAG.getBitcast(MVT::v4i64, V1);
2314 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2315 DAG.getConstant(0b01001110, DL, GRLenVT));
2316 V1 = DAG.getBitcast(VT, V1);
2317
2318 if (!V2.isUndef()) {
2319 V2 = DAG.getBitcast(MVT::v4i64, V2);
2320 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2321 DAG.getConstant(0b01001110, DL, GRLenVT));
2322 V2 = DAG.getBitcast(VT, V2);
2323 }
2324
2325 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2326 *it = *it < 0 ? *it : *it - HalfSize;
2327 }
2328 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2329 *it = *it < 0 ? *it : *it + HalfSize;
2330 }
2331 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2332 V1 = DAG.getBitcast(MVT::v4i64, V1);
2333 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2334 DAG.getConstant(0b11101110, DL, GRLenVT));
2335 V1 = DAG.getBitcast(VT, V1);
2336
2337 if (!V2.isUndef()) {
2338 V2 = DAG.getBitcast(MVT::v4i64, V2);
2339 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2340 DAG.getConstant(0b11101110, DL, GRLenVT));
2341 V2 = DAG.getBitcast(VT, V2);
2342 }
2343
2344 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2345 *it = *it < 0 ? *it : *it - HalfSize;
2346 }
2347 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2348 V1 = DAG.getBitcast(MVT::v4i64, V1);
2349 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2350 DAG.getConstant(0b01000100, DL, GRLenVT));
2351 V1 = DAG.getBitcast(VT, V1);
2352
2353 if (!V2.isUndef()) {
2354 V2 = DAG.getBitcast(MVT::v4i64, V2);
2355 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2356 DAG.getConstant(0b01000100, DL, GRLenVT));
2357 V2 = DAG.getBitcast(VT, V2);
2358 }
2359
2360 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2361 *it = *it < 0 ? *it : *it + HalfSize;
2362 }
2363 } else { // cross-lane
2364 return;
2365 }
2366}
2367
2368/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2369/// Only for 256-bit vector.
2370///
2371/// For example:
2372/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2373/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2374/// is lowerded to:
2375/// (XVPERMI $xr2, $xr0, 78)
2376/// (XVSHUF $xr1, $xr2, $xr0)
2377/// (XVORI $xr0, $xr1, 0)
2379 ArrayRef<int> Mask,
2380 MVT VT, SDValue V1,
2381 SDValue V2,
2382 SelectionDAG &DAG) {
2383 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2384 int Size = Mask.size();
2385 int LaneSize = Size / 2;
2386
2387 bool LaneCrossing[2] = {false, false};
2388 for (int i = 0; i < Size; ++i)
2389 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2390 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2391
2392 // Ensure that all lanes ared involved.
2393 if (!LaneCrossing[0] && !LaneCrossing[1])
2394 return SDValue();
2395
2396 SmallVector<int> InLaneMask;
2397 InLaneMask.assign(Mask.begin(), Mask.end());
2398 for (int i = 0; i < Size; ++i) {
2399 int &M = InLaneMask[i];
2400 if (M < 0)
2401 continue;
2402 if (((M % Size) / LaneSize) != (i / LaneSize))
2403 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2404 }
2405
2406 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2407 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2408 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2409 Flipped = DAG.getBitcast(VT, Flipped);
2410 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2411}
2412
2413/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2414///
2415/// This routine breaks down the specific type of 256-bit shuffle and
2416/// dispatches to the lowering routines accordingly.
2418 SDValue V1, SDValue V2, SelectionDAG &DAG,
2419 const LoongArchSubtarget &Subtarget) {
2420 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2421 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2422 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2423 "Vector type is unsupported for lasx!");
2425 "Two operands have different types!");
2426 assert(VT.getVectorNumElements() == Mask.size() &&
2427 "Unexpected mask size for shuffle!");
2428 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2429 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2430
2431 // canonicalize non cross-lane shuffle vector
2432 SmallVector<int> NewMask(Mask);
2433 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2434
2435 APInt KnownUndef, KnownZero;
2436 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2437 APInt Zeroable = KnownUndef | KnownZero;
2438
2439 SDValue Result;
2440 // TODO: Add more comparison patterns.
2441 if (V2.isUndef()) {
2442 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2443 Subtarget)))
2444 return Result;
2445 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2446 Subtarget)))
2447 return Result;
2448 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2449 return Result;
2450 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2451 V1, V2, DAG)))
2452 return Result;
2453
2454 // TODO: This comment may be enabled in the future to better match the
2455 // pattern for instruction selection.
2456 /* V2 = V1; */
2457 }
2458
2459 // It is recommended not to change the pattern comparison order for better
2460 // performance.
2461 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2462 return Result;
2463 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2464 return Result;
2465 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2466 return Result;
2467 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2468 return Result;
2469 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2470 return Result;
2471 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2472 return Result;
2473 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2474 Subtarget, Zeroable)))
2475 return Result;
2476 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2477 Subtarget)))
2478 return Result;
2479 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2480 return NewShuffle;
2481 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2482 return Result;
2483
2484 return SDValue();
2485}
2486
2487SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2488 SelectionDAG &DAG) const {
2489 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2490 ArrayRef<int> OrigMask = SVOp->getMask();
2491 SDValue V1 = Op.getOperand(0);
2492 SDValue V2 = Op.getOperand(1);
2493 MVT VT = Op.getSimpleValueType();
2494 int NumElements = VT.getVectorNumElements();
2495 SDLoc DL(Op);
2496
2497 bool V1IsUndef = V1.isUndef();
2498 bool V2IsUndef = V2.isUndef();
2499 if (V1IsUndef && V2IsUndef)
2500 return DAG.getUNDEF(VT);
2501
2502 // When we create a shuffle node we put the UNDEF node to second operand,
2503 // but in some cases the first operand may be transformed to UNDEF.
2504 // In this case we should just commute the node.
2505 if (V1IsUndef)
2506 return DAG.getCommutedVectorShuffle(*SVOp);
2507
2508 // Check for non-undef masks pointing at an undef vector and make the masks
2509 // undef as well. This makes it easier to match the shuffle based solely on
2510 // the mask.
2511 if (V2IsUndef &&
2512 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2513 SmallVector<int, 8> NewMask(OrigMask);
2514 for (int &M : NewMask)
2515 if (M >= NumElements)
2516 M = -1;
2517 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2518 }
2519
2520 // Check for illegal shuffle mask element index values.
2521 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2522 (void)MaskUpperLimit;
2523 assert(llvm::all_of(OrigMask,
2524 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2525 "Out of bounds shuffle index");
2526
2527 // For each vector width, delegate to a specialized lowering routine.
2528 if (VT.is128BitVector())
2529 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2530
2531 if (VT.is256BitVector())
2532 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2533
2534 return SDValue();
2535}
2536
2537SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2538 SelectionDAG &DAG) const {
2539 // Custom lower to ensure the libcall return is passed in an FPR on hard
2540 // float ABIs.
2541 SDLoc DL(Op);
2542 MakeLibCallOptions CallOptions;
2543 SDValue Op0 = Op.getOperand(0);
2544 SDValue Chain = SDValue();
2545 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2546 SDValue Res;
2547 std::tie(Res, Chain) =
2548 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2549 if (Subtarget.is64Bit())
2550 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2551 return DAG.getBitcast(MVT::i32, Res);
2552}
2553
2554SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2555 SelectionDAG &DAG) const {
2556 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2557 // float ABIs.
2558 SDLoc DL(Op);
2559 MakeLibCallOptions CallOptions;
2560 SDValue Op0 = Op.getOperand(0);
2561 SDValue Chain = SDValue();
2562 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2563 DL, MVT::f32, Op0)
2564 : DAG.getBitcast(MVT::f32, Op0);
2565 SDValue Res;
2566 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2567 CallOptions, DL, Chain);
2568 return Res;
2569}
2570
2571SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2572 SelectionDAG &DAG) const {
2573 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2574 SDLoc DL(Op);
2575 MakeLibCallOptions CallOptions;
2576 RTLIB::Libcall LC =
2577 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2578 SDValue Res =
2579 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2580 if (Subtarget.is64Bit())
2581 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2582 return DAG.getBitcast(MVT::i32, Res);
2583}
2584
2585SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2586 SelectionDAG &DAG) const {
2587 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2588 MVT VT = Op.getSimpleValueType();
2589 SDLoc DL(Op);
2590 Op = DAG.getNode(
2591 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2592 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2593 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2594 DL, MVT::f32, Op)
2595 : DAG.getBitcast(MVT::f32, Op);
2596 if (VT != MVT::f32)
2597 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2598 return Res;
2599}
2600
2601// Lower BUILD_VECTOR as broadcast load (if possible).
2602// For example:
2603// %a = load i8, ptr %ptr
2604// %b = build_vector %a, %a, %a, %a
2605// is lowered to :
2606// (VLDREPL_B $a0, 0)
2608 const SDLoc &DL,
2609 SelectionDAG &DAG) {
2610 MVT VT = BVOp->getSimpleValueType(0);
2611 int NumOps = BVOp->getNumOperands();
2612
2613 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2614 "Unsupported vector type for broadcast.");
2615
2616 SDValue IdentitySrc;
2617 bool IsIdeneity = true;
2618
2619 for (int i = 0; i != NumOps; i++) {
2620 SDValue Op = BVOp->getOperand(i);
2621 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2622 IsIdeneity = false;
2623 break;
2624 }
2625 IdentitySrc = BVOp->getOperand(0);
2626 }
2627
2628 // make sure that this load is valid and only has one user.
2629 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2630 return SDValue();
2631
2632 auto *LN = cast<LoadSDNode>(IdentitySrc);
2633 auto ExtType = LN->getExtensionType();
2634
2635 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2636 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2637 SDVTList Tys =
2638 LN->isIndexed()
2639 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2640 : DAG.getVTList(VT, MVT::Other);
2641 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2642 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2643 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2644 return BCast;
2645 }
2646 return SDValue();
2647}
2648
2649// Sequentially insert elements from Ops into Vector, from low to high indices.
2650// Note: Ops can have fewer elements than Vector.
2652 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2653 EVT ResTy) {
2654 assert(Ops.size() <= ResTy.getVectorNumElements());
2655
2656 SDValue Op0 = Ops[0];
2657 if (!Op0.isUndef())
2658 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2659 for (unsigned i = 1; i < Ops.size(); ++i) {
2660 SDValue Opi = Ops[i];
2661 if (Opi.isUndef())
2662 continue;
2663 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2664 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2665 }
2666}
2667
2668// Build a ResTy subvector from Node, taking NumElts elements starting at index
2669// 'first'.
2671 SelectionDAG &DAG, SDLoc DL,
2672 const LoongArchSubtarget &Subtarget,
2673 EVT ResTy, unsigned first) {
2674 unsigned NumElts = ResTy.getVectorNumElements();
2675
2676 assert(first >= 0 &&
2677 first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2678
2679 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2680 Node->op_begin() + first + NumElts);
2681 SDValue Vector = DAG.getUNDEF(ResTy);
2682 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2683 return Vector;
2684}
2685
2686SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2687 SelectionDAG &DAG) const {
2688 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2689 MVT VT = Node->getSimpleValueType(0);
2690 EVT ResTy = Op->getValueType(0);
2691 unsigned NumElts = ResTy.getVectorNumElements();
2692 SDLoc DL(Op);
2693 APInt SplatValue, SplatUndef;
2694 unsigned SplatBitSize;
2695 bool HasAnyUndefs;
2696 bool IsConstant = false;
2697 bool UseSameConstant = true;
2698 SDValue ConstantValue;
2699 bool Is128Vec = ResTy.is128BitVector();
2700 bool Is256Vec = ResTy.is256BitVector();
2701
2702 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2703 (!Subtarget.hasExtLASX() || !Is256Vec))
2704 return SDValue();
2705
2706 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2707 return Result;
2708
2709 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2710 /*MinSplatBits=*/8) &&
2711 SplatBitSize <= 64) {
2712 // We can only cope with 8, 16, 32, or 64-bit elements.
2713 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2714 SplatBitSize != 64)
2715 return SDValue();
2716
2717 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2718 // We can only handle 64-bit elements that are within
2719 // the signed 10-bit range on 32-bit targets.
2720 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2721 if (!SplatValue.isSignedIntN(10))
2722 return SDValue();
2723 if ((Is128Vec && ResTy == MVT::v4i32) ||
2724 (Is256Vec && ResTy == MVT::v8i32))
2725 return Op;
2726 }
2727
2728 EVT ViaVecTy;
2729
2730 switch (SplatBitSize) {
2731 default:
2732 return SDValue();
2733 case 8:
2734 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2735 break;
2736 case 16:
2737 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2738 break;
2739 case 32:
2740 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2741 break;
2742 case 64:
2743 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2744 break;
2745 }
2746
2747 // SelectionDAG::getConstant will promote SplatValue appropriately.
2748 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2749
2750 // Bitcast to the type we originally wanted.
2751 if (ViaVecTy != ResTy)
2752 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2753
2754 return Result;
2755 }
2756
2757 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2758 return Op;
2759
2760 for (unsigned i = 0; i < NumElts; ++i) {
2761 SDValue Opi = Node->getOperand(i);
2762 if (isIntOrFPConstant(Opi)) {
2763 IsConstant = true;
2764 if (!ConstantValue.getNode())
2765 ConstantValue = Opi;
2766 else if (ConstantValue != Opi)
2767 UseSameConstant = false;
2768 }
2769 }
2770
2771 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2772 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2773 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2774 for (unsigned i = 0; i < NumElts; ++i) {
2775 SDValue Opi = Node->getOperand(i);
2776 if (!isIntOrFPConstant(Opi))
2777 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2778 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2779 }
2780 return Result;
2781 }
2782
2783 if (!IsConstant) {
2784 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2785 // the sub-sequence of the vector and then broadcast the sub-sequence.
2786 //
2787 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2788 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2789 // generates worse code in some cases. This could be further optimized
2790 // with more consideration.
2792 BitVector UndefElements;
2793 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2794 UndefElements.count() == 0) {
2795 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2796 // because the high part can be simply treated as undef.
2797 SDValue Vector = DAG.getUNDEF(ResTy);
2798 EVT FillTy = Is256Vec
2800 : ResTy;
2801 SDValue FillVec =
2802 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2803
2804 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2805
2806 unsigned SeqLen = Sequence.size();
2807 unsigned SplatLen = NumElts / SeqLen;
2808 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2809 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2810
2811 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2812 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
2813 if (SplatEltTy == MVT::i128)
2814 SplatTy = MVT::v4i64;
2815
2816 SDValue SplatVec;
2817 SDValue SrcVec = DAG.getBitcast(
2818 SplatTy,
2819 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
2820 if (Is256Vec) {
2821 SplatVec =
2822 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
2824 DL, SplatTy, SrcVec);
2825 } else {
2826 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
2827 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2828 }
2829
2830 return DAG.getBitcast(ResTy, SplatVec);
2831 }
2832
2833 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
2834 // using memory operations is much lower.
2835 //
2836 // For 256-bit vectors, normally split into two halves and concatenate.
2837 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
2838 // one non-undef element, skip spliting to avoid a worse result.
2839 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
2840 ResTy == MVT::v4f64) {
2841 unsigned NonUndefCount = 0;
2842 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
2843 if (!Node->getOperand(i).isUndef()) {
2844 ++NonUndefCount;
2845 if (NonUndefCount > 1)
2846 break;
2847 }
2848 }
2849 if (NonUndefCount == 1)
2850 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
2851 }
2852
2853 EVT VecTy =
2854 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
2855 SDValue Vector =
2856 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
2857
2858 if (Is128Vec)
2859 return Vector;
2860
2861 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
2862 VecTy, NumElts / 2);
2863
2864 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
2865 }
2866
2867 return SDValue();
2868}
2869
2870SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2871 SelectionDAG &DAG) const {
2872 SDLoc DL(Op);
2873 MVT ResVT = Op.getSimpleValueType();
2874 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2875
2876 unsigned NumOperands = Op.getNumOperands();
2877 unsigned NumFreezeUndef = 0;
2878 unsigned NumZero = 0;
2879 unsigned NumNonZero = 0;
2880 unsigned NonZeros = 0;
2881 SmallSet<SDValue, 4> Undefs;
2882 for (unsigned i = 0; i != NumOperands; ++i) {
2883 SDValue SubVec = Op.getOperand(i);
2884 if (SubVec.isUndef())
2885 continue;
2886 if (ISD::isFreezeUndef(SubVec.getNode())) {
2887 // If the freeze(undef) has multiple uses then we must fold to zero.
2888 if (SubVec.hasOneUse()) {
2889 ++NumFreezeUndef;
2890 } else {
2891 ++NumZero;
2892 Undefs.insert(SubVec);
2893 }
2894 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2895 ++NumZero;
2896 else {
2897 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2898 NonZeros |= 1 << i;
2899 ++NumNonZero;
2900 }
2901 }
2902
2903 // If we have more than 2 non-zeros, build each half separately.
2904 if (NumNonZero > 2) {
2905 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2906 ArrayRef<SDUse> Ops = Op->ops();
2907 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2908 Ops.slice(0, NumOperands / 2));
2909 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2910 Ops.slice(NumOperands / 2));
2911 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2912 }
2913
2914 // Otherwise, build it up through insert_subvectors.
2915 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2916 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2917 : DAG.getUNDEF(ResVT));
2918
2919 // Replace Undef operands with ZeroVector.
2920 for (SDValue U : Undefs)
2921 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2922
2923 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2924 unsigned NumSubElems = SubVT.getVectorNumElements();
2925 for (unsigned i = 0; i != NumOperands; ++i) {
2926 if ((NonZeros & (1 << i)) == 0)
2927 continue;
2928
2929 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2930 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2931 }
2932
2933 return Vec;
2934}
2935
2936SDValue
2937LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2938 SelectionDAG &DAG) const {
2939 MVT EltVT = Op.getSimpleValueType();
2940 SDValue Vec = Op->getOperand(0);
2941 EVT VecTy = Vec->getValueType(0);
2942 SDValue Idx = Op->getOperand(1);
2943 SDLoc DL(Op);
2944 MVT GRLenVT = Subtarget.getGRLenVT();
2945
2946 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2947
2948 if (isa<ConstantSDNode>(Idx))
2949 return Op;
2950
2951 switch (VecTy.getSimpleVT().SimpleTy) {
2952 default:
2953 llvm_unreachable("Unexpected type");
2954 case MVT::v32i8:
2955 case MVT::v16i16:
2956 case MVT::v4i64:
2957 case MVT::v4f64: {
2958 // Extract the high half subvector and place it to the low half of a new
2959 // vector. It doesn't matter what the high half of the new vector is.
2960 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2961 SDValue VecHi =
2962 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2963 SDValue TmpVec =
2964 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2965 VecHi, DAG.getConstant(0, DL, GRLenVT));
2966
2967 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2968 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2969 // desired element.
2970 SDValue IdxCp =
2971 Subtarget.is64Bit()
2972 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
2973 : DAG.getBitcast(MVT::f32, Idx);
2974 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2975 SDValue MaskVec =
2976 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2977 SDValue ResVec =
2978 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2979
2980 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2981 DAG.getConstant(0, DL, GRLenVT));
2982 }
2983 case MVT::v8i32:
2984 case MVT::v8f32: {
2985 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2986 SDValue SplatValue =
2987 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2988
2989 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2990 DAG.getConstant(0, DL, GRLenVT));
2991 }
2992 }
2993}
2994
2995SDValue
2996LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2997 SelectionDAG &DAG) const {
2998 MVT VT = Op.getSimpleValueType();
2999 MVT EltVT = VT.getVectorElementType();
3000 unsigned NumElts = VT.getVectorNumElements();
3001 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3002 SDLoc DL(Op);
3003 SDValue Op0 = Op.getOperand(0);
3004 SDValue Op1 = Op.getOperand(1);
3005 SDValue Op2 = Op.getOperand(2);
3006
3007 if (isa<ConstantSDNode>(Op2))
3008 return Op;
3009
3010 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3011 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3012
3013 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3014 return SDValue();
3015
3016 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3017 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3018
3019 SmallVector<SDValue, 32> RawIndices;
3020 for (unsigned i = 0; i < NumElts; ++i)
3021 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3022 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3023
3024 // insert vec, elt, idx
3025 // =>
3026 // select (splatidx == {0,1,2...}) ? splatelt : vec
3027 SDValue SelectCC =
3028 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3029 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3030}
3031
3032SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3033 SelectionDAG &DAG) const {
3034 SDLoc DL(Op);
3035 SyncScope::ID FenceSSID =
3036 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3037
3038 // singlethread fences only synchronize with signal handlers on the same
3039 // thread and thus only need to preserve instruction order, not actually
3040 // enforce memory ordering.
3041 if (FenceSSID == SyncScope::SingleThread)
3042 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3043 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3044
3045 return Op;
3046}
3047
3048SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3049 SelectionDAG &DAG) const {
3050
3051 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3052 DAG.getContext()->emitError(
3053 "On LA64, only 64-bit registers can be written.");
3054 return Op.getOperand(0);
3055 }
3056
3057 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3058 DAG.getContext()->emitError(
3059 "On LA32, only 32-bit registers can be written.");
3060 return Op.getOperand(0);
3061 }
3062
3063 return Op;
3064}
3065
3066SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3067 SelectionDAG &DAG) const {
3068 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3069 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3070 "be a constant integer");
3071 return SDValue();
3072 }
3073
3074 MachineFunction &MF = DAG.getMachineFunction();
3076 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3077 EVT VT = Op.getValueType();
3078 SDLoc DL(Op);
3079 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3080 unsigned Depth = Op.getConstantOperandVal(0);
3081 int GRLenInBytes = Subtarget.getGRLen() / 8;
3082
3083 while (Depth--) {
3084 int Offset = -(GRLenInBytes * 2);
3085 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3086 DAG.getSignedConstant(Offset, DL, VT));
3087 FrameAddr =
3088 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3089 }
3090 return FrameAddr;
3091}
3092
3093SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3094 SelectionDAG &DAG) const {
3095 // Currently only support lowering return address for current frame.
3096 if (Op.getConstantOperandVal(0) != 0) {
3097 DAG.getContext()->emitError(
3098 "return address can only be determined for the current frame");
3099 return SDValue();
3100 }
3101
3102 MachineFunction &MF = DAG.getMachineFunction();
3104 MVT GRLenVT = Subtarget.getGRLenVT();
3105
3106 // Return the value of the return address register, marking it an implicit
3107 // live-in.
3108 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3109 getRegClassFor(GRLenVT));
3110 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3111}
3112
3113SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3114 SelectionDAG &DAG) const {
3115 MachineFunction &MF = DAG.getMachineFunction();
3116 auto Size = Subtarget.getGRLen() / 8;
3117 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3118 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3119}
3120
3121SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3122 SelectionDAG &DAG) const {
3123 MachineFunction &MF = DAG.getMachineFunction();
3124 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3125
3126 SDLoc DL(Op);
3127 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3129
3130 // vastart just stores the address of the VarArgsFrameIndex slot into the
3131 // memory location argument.
3132 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3133 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3134 MachinePointerInfo(SV));
3135}
3136
3137SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3138 SelectionDAG &DAG) const {
3139 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3140 !Subtarget.hasBasicD() && "unexpected target features");
3141
3142 SDLoc DL(Op);
3143 SDValue Op0 = Op.getOperand(0);
3144 if (Op0->getOpcode() == ISD::AND) {
3145 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3146 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3147 return Op;
3148 }
3149
3150 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3151 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3152 Op0.getConstantOperandVal(2) == UINT64_C(0))
3153 return Op;
3154
3155 if (Op0.getOpcode() == ISD::AssertZext &&
3156 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3157 return Op;
3158
3159 EVT OpVT = Op0.getValueType();
3160 EVT RetVT = Op.getValueType();
3161 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3162 MakeLibCallOptions CallOptions;
3163 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3164 SDValue Chain = SDValue();
3166 std::tie(Result, Chain) =
3167 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3168 return Result;
3169}
3170
3171SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3172 SelectionDAG &DAG) const {
3173 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3174 !Subtarget.hasBasicD() && "unexpected target features");
3175
3176 SDLoc DL(Op);
3177 SDValue Op0 = Op.getOperand(0);
3178
3179 if ((Op0.getOpcode() == ISD::AssertSext ||
3181 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3182 return Op;
3183
3184 EVT OpVT = Op0.getValueType();
3185 EVT RetVT = Op.getValueType();
3186 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3187 MakeLibCallOptions CallOptions;
3188 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3189 SDValue Chain = SDValue();
3191 std::tie(Result, Chain) =
3192 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3193 return Result;
3194}
3195
3196SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3197 SelectionDAG &DAG) const {
3198
3199 SDLoc DL(Op);
3200 EVT VT = Op.getValueType();
3201 SDValue Op0 = Op.getOperand(0);
3202 EVT Op0VT = Op0.getValueType();
3203
3204 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3205 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3206 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3207 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3208 }
3209 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3210 SDValue Lo, Hi;
3211 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3212 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3213 }
3214 return Op;
3215}
3216
3217SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3218 SelectionDAG &DAG) const {
3219
3220 SDLoc DL(Op);
3221 SDValue Op0 = Op.getOperand(0);
3222
3223 if (Op0.getValueType() == MVT::f16)
3224 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3225
3226 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3227 !Subtarget.hasBasicD()) {
3228 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3229 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3230 }
3231
3232 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3233 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3234 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3235}
3236
3238 SelectionDAG &DAG, unsigned Flags) {
3239 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3240}
3241
3243 SelectionDAG &DAG, unsigned Flags) {
3244 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3245 Flags);
3246}
3247
3249 SelectionDAG &DAG, unsigned Flags) {
3250 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3251 N->getOffset(), Flags);
3252}
3253
3255 SelectionDAG &DAG, unsigned Flags) {
3256 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3257}
3258
3259template <class NodeTy>
3260SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3262 bool IsLocal) const {
3263 SDLoc DL(N);
3264 EVT Ty = getPointerTy(DAG.getDataLayout());
3265 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3266 SDValue Load;
3267
3268 switch (M) {
3269 default:
3270 report_fatal_error("Unsupported code model");
3271
3272 case CodeModel::Large: {
3273 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3274
3275 // This is not actually used, but is necessary for successfully matching
3276 // the PseudoLA_*_LARGE nodes.
3277 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3278 if (IsLocal) {
3279 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3280 // eventually becomes the desired 5-insn code sequence.
3281 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3282 Tmp, Addr),
3283 0);
3284 } else {
3285 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3286 // eventually becomes the desired 5-insn code sequence.
3287 Load = SDValue(
3288 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3289 0);
3290 }
3291 break;
3292 }
3293
3294 case CodeModel::Small:
3295 case CodeModel::Medium:
3296 if (IsLocal) {
3297 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3298 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3299 Load = SDValue(
3300 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3301 } else {
3302 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3303 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3304 Load =
3305 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3306 }
3307 }
3308
3309 if (!IsLocal) {
3310 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3311 MachineFunction &MF = DAG.getMachineFunction();
3312 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3316 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3317 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3318 }
3319
3320 return Load;
3321}
3322
3323SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3324 SelectionDAG &DAG) const {
3325 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3326 DAG.getTarget().getCodeModel());
3327}
3328
3329SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3332 DAG.getTarget().getCodeModel());
3333}
3334
3335SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3336 SelectionDAG &DAG) const {
3337 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3338 DAG.getTarget().getCodeModel());
3339}
3340
3341SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3342 SelectionDAG &DAG) const {
3343 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3344 assert(N->getOffset() == 0 && "unexpected offset in global node");
3345 auto CM = DAG.getTarget().getCodeModel();
3346 const GlobalValue *GV = N->getGlobal();
3347
3348 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3349 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3350 CM = *GCM;
3351 }
3352
3353 return getAddr(N, DAG, CM, GV->isDSOLocal());
3354}
3355
3356SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3357 SelectionDAG &DAG,
3358 unsigned Opc, bool UseGOT,
3359 bool Large) const {
3360 SDLoc DL(N);
3361 EVT Ty = getPointerTy(DAG.getDataLayout());
3362 MVT GRLenVT = Subtarget.getGRLenVT();
3363
3364 // This is not actually used, but is necessary for successfully matching the
3365 // PseudoLA_*_LARGE nodes.
3366 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3367 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3368
3369 // Only IE needs an extra argument for large code model.
3370 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3371 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3372 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3373
3374 // If it is LE for normal/medium code model, the add tp operation will occur
3375 // during the pseudo-instruction expansion.
3376 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3377 return Offset;
3378
3379 if (UseGOT) {
3380 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3381 MachineFunction &MF = DAG.getMachineFunction();
3382 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3386 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3387 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3388 }
3389
3390 // Add the thread pointer.
3391 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3392 DAG.getRegister(LoongArch::R2, GRLenVT));
3393}
3394
3395SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3396 SelectionDAG &DAG,
3397 unsigned Opc,
3398 bool Large) const {
3399 SDLoc DL(N);
3400 EVT Ty = getPointerTy(DAG.getDataLayout());
3401 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3402
3403 // This is not actually used, but is necessary for successfully matching the
3404 // PseudoLA_*_LARGE nodes.
3405 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3406
3407 // Use a PC-relative addressing mode to access the dynamic GOT address.
3408 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3409 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3410 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3411
3412 // Prepare argument list to generate call.
3414 Args.emplace_back(Load, CallTy);
3415
3416 // Setup call to __tls_get_addr.
3417 TargetLowering::CallLoweringInfo CLI(DAG);
3418 CLI.setDebugLoc(DL)
3419 .setChain(DAG.getEntryNode())
3420 .setLibCallee(CallingConv::C, CallTy,
3421 DAG.getExternalSymbol("__tls_get_addr", Ty),
3422 std::move(Args));
3423
3424 return LowerCallTo(CLI).first;
3425}
3426
3427SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3428 SelectionDAG &DAG, unsigned Opc,
3429 bool Large) const {
3430 SDLoc DL(N);
3431 EVT Ty = getPointerTy(DAG.getDataLayout());
3432 const GlobalValue *GV = N->getGlobal();
3433
3434 // This is not actually used, but is necessary for successfully matching the
3435 // PseudoLA_*_LARGE nodes.
3436 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3437
3438 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3439 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3440 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3441 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3442 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3443}
3444
3445SDValue
3446LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3447 SelectionDAG &DAG) const {
3450 report_fatal_error("In GHC calling convention TLS is not supported");
3451
3452 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3453 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3454
3455 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3456 assert(N->getOffset() == 0 && "unexpected offset in global node");
3457
3458 if (DAG.getTarget().useEmulatedTLS())
3459 reportFatalUsageError("the emulated TLS is prohibited");
3460
3461 bool IsDesc = DAG.getTarget().useTLSDESC();
3462
3463 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3465 // In this model, application code calls the dynamic linker function
3466 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3467 // runtime.
3468 if (!IsDesc)
3469 return getDynamicTLSAddr(N, DAG,
3470 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3471 : LoongArch::PseudoLA_TLS_GD,
3472 Large);
3473 break;
3475 // Same as GeneralDynamic, except for assembly modifiers and relocation
3476 // records.
3477 if (!IsDesc)
3478 return getDynamicTLSAddr(N, DAG,
3479 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3480 : LoongArch::PseudoLA_TLS_LD,
3481 Large);
3482 break;
3484 // This model uses the GOT to resolve TLS offsets.
3485 return getStaticTLSAddr(N, DAG,
3486 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3487 : LoongArch::PseudoLA_TLS_IE,
3488 /*UseGOT=*/true, Large);
3490 // This model is used when static linking as the TLS offsets are resolved
3491 // during program linking.
3492 //
3493 // This node doesn't need an extra argument for the large code model.
3494 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3495 /*UseGOT=*/false, Large);
3496 }
3497
3498 return getTLSDescAddr(N, DAG,
3499 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3500 : LoongArch::PseudoLA_TLS_DESC,
3501 Large);
3502}
3503
3504template <unsigned N>
3506 SelectionDAG &DAG, bool IsSigned = false) {
3507 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3508 // Check the ImmArg.
3509 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3510 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3511 DAG.getContext()->emitError(Op->getOperationName(0) +
3512 ": argument out of range.");
3513 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3514 }
3515 return SDValue();
3516}
3517
3518SDValue
3519LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3520 SelectionDAG &DAG) const {
3521 switch (Op.getConstantOperandVal(0)) {
3522 default:
3523 return SDValue(); // Don't custom lower most intrinsics.
3524 case Intrinsic::thread_pointer: {
3525 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3526 return DAG.getRegister(LoongArch::R2, PtrVT);
3527 }
3528 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3529 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3530 case Intrinsic::loongarch_lsx_vreplvei_d:
3531 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3532 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3533 case Intrinsic::loongarch_lsx_vreplvei_w:
3534 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3535 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3536 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3537 case Intrinsic::loongarch_lasx_xvpickve_d:
3538 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3539 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3540 case Intrinsic::loongarch_lasx_xvinsve0_d:
3541 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3542 case Intrinsic::loongarch_lsx_vsat_b:
3543 case Intrinsic::loongarch_lsx_vsat_bu:
3544 case Intrinsic::loongarch_lsx_vrotri_b:
3545 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3546 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3547 case Intrinsic::loongarch_lsx_vsrlri_b:
3548 case Intrinsic::loongarch_lsx_vsrari_b:
3549 case Intrinsic::loongarch_lsx_vreplvei_h:
3550 case Intrinsic::loongarch_lasx_xvsat_b:
3551 case Intrinsic::loongarch_lasx_xvsat_bu:
3552 case Intrinsic::loongarch_lasx_xvrotri_b:
3553 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3554 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3555 case Intrinsic::loongarch_lasx_xvsrlri_b:
3556 case Intrinsic::loongarch_lasx_xvsrari_b:
3557 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3558 case Intrinsic::loongarch_lasx_xvpickve_w:
3559 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3560 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3561 case Intrinsic::loongarch_lasx_xvinsve0_w:
3562 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3563 case Intrinsic::loongarch_lsx_vsat_h:
3564 case Intrinsic::loongarch_lsx_vsat_hu:
3565 case Intrinsic::loongarch_lsx_vrotri_h:
3566 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3567 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3568 case Intrinsic::loongarch_lsx_vsrlri_h:
3569 case Intrinsic::loongarch_lsx_vsrari_h:
3570 case Intrinsic::loongarch_lsx_vreplvei_b:
3571 case Intrinsic::loongarch_lasx_xvsat_h:
3572 case Intrinsic::loongarch_lasx_xvsat_hu:
3573 case Intrinsic::loongarch_lasx_xvrotri_h:
3574 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3575 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3576 case Intrinsic::loongarch_lasx_xvsrlri_h:
3577 case Intrinsic::loongarch_lasx_xvsrari_h:
3578 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3579 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3580 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3581 case Intrinsic::loongarch_lsx_vsrani_b_h:
3582 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3583 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3584 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3585 case Intrinsic::loongarch_lsx_vssrani_b_h:
3586 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3587 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3588 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3589 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3590 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3591 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3592 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3593 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3594 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3595 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3596 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3597 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3598 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3599 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3600 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3601 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3602 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3603 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3604 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3605 case Intrinsic::loongarch_lsx_vsat_w:
3606 case Intrinsic::loongarch_lsx_vsat_wu:
3607 case Intrinsic::loongarch_lsx_vrotri_w:
3608 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3609 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3610 case Intrinsic::loongarch_lsx_vsrlri_w:
3611 case Intrinsic::loongarch_lsx_vsrari_w:
3612 case Intrinsic::loongarch_lsx_vslei_bu:
3613 case Intrinsic::loongarch_lsx_vslei_hu:
3614 case Intrinsic::loongarch_lsx_vslei_wu:
3615 case Intrinsic::loongarch_lsx_vslei_du:
3616 case Intrinsic::loongarch_lsx_vslti_bu:
3617 case Intrinsic::loongarch_lsx_vslti_hu:
3618 case Intrinsic::loongarch_lsx_vslti_wu:
3619 case Intrinsic::loongarch_lsx_vslti_du:
3620 case Intrinsic::loongarch_lsx_vbsll_v:
3621 case Intrinsic::loongarch_lsx_vbsrl_v:
3622 case Intrinsic::loongarch_lasx_xvsat_w:
3623 case Intrinsic::loongarch_lasx_xvsat_wu:
3624 case Intrinsic::loongarch_lasx_xvrotri_w:
3625 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3626 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3627 case Intrinsic::loongarch_lasx_xvsrlri_w:
3628 case Intrinsic::loongarch_lasx_xvsrari_w:
3629 case Intrinsic::loongarch_lasx_xvslei_bu:
3630 case Intrinsic::loongarch_lasx_xvslei_hu:
3631 case Intrinsic::loongarch_lasx_xvslei_wu:
3632 case Intrinsic::loongarch_lasx_xvslei_du:
3633 case Intrinsic::loongarch_lasx_xvslti_bu:
3634 case Intrinsic::loongarch_lasx_xvslti_hu:
3635 case Intrinsic::loongarch_lasx_xvslti_wu:
3636 case Intrinsic::loongarch_lasx_xvslti_du:
3637 case Intrinsic::loongarch_lasx_xvbsll_v:
3638 case Intrinsic::loongarch_lasx_xvbsrl_v:
3639 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3640 case Intrinsic::loongarch_lsx_vseqi_b:
3641 case Intrinsic::loongarch_lsx_vseqi_h:
3642 case Intrinsic::loongarch_lsx_vseqi_w:
3643 case Intrinsic::loongarch_lsx_vseqi_d:
3644 case Intrinsic::loongarch_lsx_vslei_b:
3645 case Intrinsic::loongarch_lsx_vslei_h:
3646 case Intrinsic::loongarch_lsx_vslei_w:
3647 case Intrinsic::loongarch_lsx_vslei_d:
3648 case Intrinsic::loongarch_lsx_vslti_b:
3649 case Intrinsic::loongarch_lsx_vslti_h:
3650 case Intrinsic::loongarch_lsx_vslti_w:
3651 case Intrinsic::loongarch_lsx_vslti_d:
3652 case Intrinsic::loongarch_lasx_xvseqi_b:
3653 case Intrinsic::loongarch_lasx_xvseqi_h:
3654 case Intrinsic::loongarch_lasx_xvseqi_w:
3655 case Intrinsic::loongarch_lasx_xvseqi_d:
3656 case Intrinsic::loongarch_lasx_xvslei_b:
3657 case Intrinsic::loongarch_lasx_xvslei_h:
3658 case Intrinsic::loongarch_lasx_xvslei_w:
3659 case Intrinsic::loongarch_lasx_xvslei_d:
3660 case Intrinsic::loongarch_lasx_xvslti_b:
3661 case Intrinsic::loongarch_lasx_xvslti_h:
3662 case Intrinsic::loongarch_lasx_xvslti_w:
3663 case Intrinsic::loongarch_lasx_xvslti_d:
3664 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3665 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3666 case Intrinsic::loongarch_lsx_vsrani_h_w:
3667 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3668 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3669 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3670 case Intrinsic::loongarch_lsx_vssrani_h_w:
3671 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3672 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3673 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3674 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3675 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3676 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3677 case Intrinsic::loongarch_lsx_vfrstpi_b:
3678 case Intrinsic::loongarch_lsx_vfrstpi_h:
3679 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3680 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3681 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3682 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3683 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3684 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3685 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3686 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3687 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3688 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3689 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3690 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3691 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3692 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3693 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3694 case Intrinsic::loongarch_lsx_vsat_d:
3695 case Intrinsic::loongarch_lsx_vsat_du:
3696 case Intrinsic::loongarch_lsx_vrotri_d:
3697 case Intrinsic::loongarch_lsx_vsrlri_d:
3698 case Intrinsic::loongarch_lsx_vsrari_d:
3699 case Intrinsic::loongarch_lasx_xvsat_d:
3700 case Intrinsic::loongarch_lasx_xvsat_du:
3701 case Intrinsic::loongarch_lasx_xvrotri_d:
3702 case Intrinsic::loongarch_lasx_xvsrlri_d:
3703 case Intrinsic::loongarch_lasx_xvsrari_d:
3704 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3705 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3706 case Intrinsic::loongarch_lsx_vsrani_w_d:
3707 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3708 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3709 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3710 case Intrinsic::loongarch_lsx_vssrani_w_d:
3711 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3712 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3713 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3714 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3715 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3716 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3717 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3718 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3719 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3720 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3721 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3722 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3723 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3724 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3725 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3726 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3727 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3728 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3729 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3730 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3731 case Intrinsic::loongarch_lsx_vsrani_d_q:
3732 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3733 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3734 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3735 case Intrinsic::loongarch_lsx_vssrani_d_q:
3736 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3737 case Intrinsic::loongarch_lsx_vssrani_du_q:
3738 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3739 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3740 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3741 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3742 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3743 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3744 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3745 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3746 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3747 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3748 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3749 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3750 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3751 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3752 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3753 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3754 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3755 case Intrinsic::loongarch_lsx_vnori_b:
3756 case Intrinsic::loongarch_lsx_vshuf4i_b:
3757 case Intrinsic::loongarch_lsx_vshuf4i_h:
3758 case Intrinsic::loongarch_lsx_vshuf4i_w:
3759 case Intrinsic::loongarch_lasx_xvnori_b:
3760 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3761 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3762 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3763 case Intrinsic::loongarch_lasx_xvpermi_d:
3764 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3765 case Intrinsic::loongarch_lsx_vshuf4i_d:
3766 case Intrinsic::loongarch_lsx_vpermi_w:
3767 case Intrinsic::loongarch_lsx_vbitseli_b:
3768 case Intrinsic::loongarch_lsx_vextrins_b:
3769 case Intrinsic::loongarch_lsx_vextrins_h:
3770 case Intrinsic::loongarch_lsx_vextrins_w:
3771 case Intrinsic::loongarch_lsx_vextrins_d:
3772 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3773 case Intrinsic::loongarch_lasx_xvpermi_w:
3774 case Intrinsic::loongarch_lasx_xvpermi_q:
3775 case Intrinsic::loongarch_lasx_xvbitseli_b:
3776 case Intrinsic::loongarch_lasx_xvextrins_b:
3777 case Intrinsic::loongarch_lasx_xvextrins_h:
3778 case Intrinsic::loongarch_lasx_xvextrins_w:
3779 case Intrinsic::loongarch_lasx_xvextrins_d:
3780 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3781 case Intrinsic::loongarch_lsx_vrepli_b:
3782 case Intrinsic::loongarch_lsx_vrepli_h:
3783 case Intrinsic::loongarch_lsx_vrepli_w:
3784 case Intrinsic::loongarch_lsx_vrepli_d:
3785 case Intrinsic::loongarch_lasx_xvrepli_b:
3786 case Intrinsic::loongarch_lasx_xvrepli_h:
3787 case Intrinsic::loongarch_lasx_xvrepli_w:
3788 case Intrinsic::loongarch_lasx_xvrepli_d:
3789 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3790 case Intrinsic::loongarch_lsx_vldi:
3791 case Intrinsic::loongarch_lasx_xvldi:
3792 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3793 }
3794}
3795
3796// Helper function that emits error message for intrinsics with chain and return
3797// merge values of a UNDEF and the chain.
3799 StringRef ErrorMsg,
3800 SelectionDAG &DAG) {
3801 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3802 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3803 SDLoc(Op));
3804}
3805
3806SDValue
3807LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3808 SelectionDAG &DAG) const {
3809 SDLoc DL(Op);
3810 MVT GRLenVT = Subtarget.getGRLenVT();
3811 EVT VT = Op.getValueType();
3812 SDValue Chain = Op.getOperand(0);
3813 const StringRef ErrorMsgOOR = "argument out of range";
3814 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3815 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3816
3817 switch (Op.getConstantOperandVal(1)) {
3818 default:
3819 return Op;
3820 case Intrinsic::loongarch_crc_w_b_w:
3821 case Intrinsic::loongarch_crc_w_h_w:
3822 case Intrinsic::loongarch_crc_w_w_w:
3823 case Intrinsic::loongarch_crc_w_d_w:
3824 case Intrinsic::loongarch_crcc_w_b_w:
3825 case Intrinsic::loongarch_crcc_w_h_w:
3826 case Intrinsic::loongarch_crcc_w_w_w:
3827 case Intrinsic::loongarch_crcc_w_d_w:
3828 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3829 case Intrinsic::loongarch_csrrd_w:
3830 case Intrinsic::loongarch_csrrd_d: {
3831 unsigned Imm = Op.getConstantOperandVal(2);
3832 return !isUInt<14>(Imm)
3833 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3834 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3835 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3836 }
3837 case Intrinsic::loongarch_csrwr_w:
3838 case Intrinsic::loongarch_csrwr_d: {
3839 unsigned Imm = Op.getConstantOperandVal(3);
3840 return !isUInt<14>(Imm)
3841 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3842 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3843 {Chain, Op.getOperand(2),
3844 DAG.getConstant(Imm, DL, GRLenVT)});
3845 }
3846 case Intrinsic::loongarch_csrxchg_w:
3847 case Intrinsic::loongarch_csrxchg_d: {
3848 unsigned Imm = Op.getConstantOperandVal(4);
3849 return !isUInt<14>(Imm)
3850 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3851 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3852 {Chain, Op.getOperand(2), Op.getOperand(3),
3853 DAG.getConstant(Imm, DL, GRLenVT)});
3854 }
3855 case Intrinsic::loongarch_iocsrrd_d: {
3856 return DAG.getNode(
3857 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3858 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3859 }
3860#define IOCSRRD_CASE(NAME, NODE) \
3861 case Intrinsic::loongarch_##NAME: { \
3862 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3863 {Chain, Op.getOperand(2)}); \
3864 }
3865 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3866 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3867 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3868#undef IOCSRRD_CASE
3869 case Intrinsic::loongarch_cpucfg: {
3870 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3871 {Chain, Op.getOperand(2)});
3872 }
3873 case Intrinsic::loongarch_lddir_d: {
3874 unsigned Imm = Op.getConstantOperandVal(3);
3875 return !isUInt<8>(Imm)
3876 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3877 : Op;
3878 }
3879 case Intrinsic::loongarch_movfcsr2gr: {
3880 if (!Subtarget.hasBasicF())
3881 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3882 unsigned Imm = Op.getConstantOperandVal(2);
3883 return !isUInt<2>(Imm)
3884 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3885 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3886 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3887 }
3888 case Intrinsic::loongarch_lsx_vld:
3889 case Intrinsic::loongarch_lsx_vldrepl_b:
3890 case Intrinsic::loongarch_lasx_xvld:
3891 case Intrinsic::loongarch_lasx_xvldrepl_b:
3892 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3893 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3894 : SDValue();
3895 case Intrinsic::loongarch_lsx_vldrepl_h:
3896 case Intrinsic::loongarch_lasx_xvldrepl_h:
3897 return !isShiftedInt<11, 1>(
3898 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3900 Op, "argument out of range or not a multiple of 2", DAG)
3901 : SDValue();
3902 case Intrinsic::loongarch_lsx_vldrepl_w:
3903 case Intrinsic::loongarch_lasx_xvldrepl_w:
3904 return !isShiftedInt<10, 2>(
3905 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3907 Op, "argument out of range or not a multiple of 4", DAG)
3908 : SDValue();
3909 case Intrinsic::loongarch_lsx_vldrepl_d:
3910 case Intrinsic::loongarch_lasx_xvldrepl_d:
3911 return !isShiftedInt<9, 3>(
3912 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3914 Op, "argument out of range or not a multiple of 8", DAG)
3915 : SDValue();
3916 }
3917}
3918
3919// Helper function that emits error message for intrinsics with void return
3920// value and return the chain.
3922 SelectionDAG &DAG) {
3923
3924 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3925 return Op.getOperand(0);
3926}
3927
3928SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3929 SelectionDAG &DAG) const {
3930 SDLoc DL(Op);
3931 MVT GRLenVT = Subtarget.getGRLenVT();
3932 SDValue Chain = Op.getOperand(0);
3933 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3934 SDValue Op2 = Op.getOperand(2);
3935 const StringRef ErrorMsgOOR = "argument out of range";
3936 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3937 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3938 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3939
3940 switch (IntrinsicEnum) {
3941 default:
3942 // TODO: Add more Intrinsics.
3943 return SDValue();
3944 case Intrinsic::loongarch_cacop_d:
3945 case Intrinsic::loongarch_cacop_w: {
3946 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3947 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3948 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3949 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3950 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3951 unsigned Imm1 = Op2->getAsZExtVal();
3952 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3953 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3954 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3955 return Op;
3956 }
3957 case Intrinsic::loongarch_dbar: {
3958 unsigned Imm = Op2->getAsZExtVal();
3959 return !isUInt<15>(Imm)
3960 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3961 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3962 DAG.getConstant(Imm, DL, GRLenVT));
3963 }
3964 case Intrinsic::loongarch_ibar: {
3965 unsigned Imm = Op2->getAsZExtVal();
3966 return !isUInt<15>(Imm)
3967 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3968 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3969 DAG.getConstant(Imm, DL, GRLenVT));
3970 }
3971 case Intrinsic::loongarch_break: {
3972 unsigned Imm = Op2->getAsZExtVal();
3973 return !isUInt<15>(Imm)
3974 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3975 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3976 DAG.getConstant(Imm, DL, GRLenVT));
3977 }
3978 case Intrinsic::loongarch_movgr2fcsr: {
3979 if (!Subtarget.hasBasicF())
3980 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3981 unsigned Imm = Op2->getAsZExtVal();
3982 return !isUInt<2>(Imm)
3983 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3984 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3985 DAG.getConstant(Imm, DL, GRLenVT),
3986 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3987 Op.getOperand(3)));
3988 }
3989 case Intrinsic::loongarch_syscall: {
3990 unsigned Imm = Op2->getAsZExtVal();
3991 return !isUInt<15>(Imm)
3992 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3993 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3994 DAG.getConstant(Imm, DL, GRLenVT));
3995 }
3996#define IOCSRWR_CASE(NAME, NODE) \
3997 case Intrinsic::loongarch_##NAME: { \
3998 SDValue Op3 = Op.getOperand(3); \
3999 return Subtarget.is64Bit() \
4000 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4001 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4002 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4003 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4004 Op3); \
4005 }
4006 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4007 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4008 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4009#undef IOCSRWR_CASE
4010 case Intrinsic::loongarch_iocsrwr_d: {
4011 return !Subtarget.is64Bit()
4012 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4013 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4014 Op2,
4015 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4016 Op.getOperand(3)));
4017 }
4018#define ASRT_LE_GT_CASE(NAME) \
4019 case Intrinsic::loongarch_##NAME: { \
4020 return !Subtarget.is64Bit() \
4021 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4022 : Op; \
4023 }
4024 ASRT_LE_GT_CASE(asrtle_d)
4025 ASRT_LE_GT_CASE(asrtgt_d)
4026#undef ASRT_LE_GT_CASE
4027 case Intrinsic::loongarch_ldpte_d: {
4028 unsigned Imm = Op.getConstantOperandVal(3);
4029 return !Subtarget.is64Bit()
4030 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4031 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4032 : Op;
4033 }
4034 case Intrinsic::loongarch_lsx_vst:
4035 case Intrinsic::loongarch_lasx_xvst:
4036 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4037 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4038 : SDValue();
4039 case Intrinsic::loongarch_lasx_xvstelm_b:
4040 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4041 !isUInt<5>(Op.getConstantOperandVal(5)))
4042 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4043 : SDValue();
4044 case Intrinsic::loongarch_lsx_vstelm_b:
4045 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4046 !isUInt<4>(Op.getConstantOperandVal(5)))
4047 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4048 : SDValue();
4049 case Intrinsic::loongarch_lasx_xvstelm_h:
4050 return (!isShiftedInt<8, 1>(
4051 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4052 !isUInt<4>(Op.getConstantOperandVal(5)))
4054 Op, "argument out of range or not a multiple of 2", DAG)
4055 : SDValue();
4056 case Intrinsic::loongarch_lsx_vstelm_h:
4057 return (!isShiftedInt<8, 1>(
4058 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4059 !isUInt<3>(Op.getConstantOperandVal(5)))
4061 Op, "argument out of range or not a multiple of 2", DAG)
4062 : SDValue();
4063 case Intrinsic::loongarch_lasx_xvstelm_w:
4064 return (!isShiftedInt<8, 2>(
4065 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4066 !isUInt<3>(Op.getConstantOperandVal(5)))
4068 Op, "argument out of range or not a multiple of 4", DAG)
4069 : SDValue();
4070 case Intrinsic::loongarch_lsx_vstelm_w:
4071 return (!isShiftedInt<8, 2>(
4072 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4073 !isUInt<2>(Op.getConstantOperandVal(5)))
4075 Op, "argument out of range or not a multiple of 4", DAG)
4076 : SDValue();
4077 case Intrinsic::loongarch_lasx_xvstelm_d:
4078 return (!isShiftedInt<8, 3>(
4079 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4080 !isUInt<2>(Op.getConstantOperandVal(5)))
4082 Op, "argument out of range or not a multiple of 8", DAG)
4083 : SDValue();
4084 case Intrinsic::loongarch_lsx_vstelm_d:
4085 return (!isShiftedInt<8, 3>(
4086 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4087 !isUInt<1>(Op.getConstantOperandVal(5)))
4089 Op, "argument out of range or not a multiple of 8", DAG)
4090 : SDValue();
4091 }
4092}
4093
4094SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4095 SelectionDAG &DAG) const {
4096 SDLoc DL(Op);
4097 SDValue Lo = Op.getOperand(0);
4098 SDValue Hi = Op.getOperand(1);
4099 SDValue Shamt = Op.getOperand(2);
4100 EVT VT = Lo.getValueType();
4101
4102 // if Shamt-GRLen < 0: // Shamt < GRLen
4103 // Lo = Lo << Shamt
4104 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4105 // else:
4106 // Lo = 0
4107 // Hi = Lo << (Shamt-GRLen)
4108
4109 SDValue Zero = DAG.getConstant(0, DL, VT);
4110 SDValue One = DAG.getConstant(1, DL, VT);
4111 SDValue MinusGRLen =
4112 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4113 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4114 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4115 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4116
4117 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4118 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4119 SDValue ShiftRightLo =
4120 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4121 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4122 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4123 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4124
4125 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4126
4127 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4128 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4129
4130 SDValue Parts[2] = {Lo, Hi};
4131 return DAG.getMergeValues(Parts, DL);
4132}
4133
4134SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4135 SelectionDAG &DAG,
4136 bool IsSRA) const {
4137 SDLoc DL(Op);
4138 SDValue Lo = Op.getOperand(0);
4139 SDValue Hi = Op.getOperand(1);
4140 SDValue Shamt = Op.getOperand(2);
4141 EVT VT = Lo.getValueType();
4142
4143 // SRA expansion:
4144 // if Shamt-GRLen < 0: // Shamt < GRLen
4145 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4146 // Hi = Hi >>s Shamt
4147 // else:
4148 // Lo = Hi >>s (Shamt-GRLen);
4149 // Hi = Hi >>s (GRLen-1)
4150 //
4151 // SRL expansion:
4152 // if Shamt-GRLen < 0: // Shamt < GRLen
4153 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4154 // Hi = Hi >>u Shamt
4155 // else:
4156 // Lo = Hi >>u (Shamt-GRLen);
4157 // Hi = 0;
4158
4159 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4160
4161 SDValue Zero = DAG.getConstant(0, DL, VT);
4162 SDValue One = DAG.getConstant(1, DL, VT);
4163 SDValue MinusGRLen =
4164 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4165 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4166 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4167 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4168
4169 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4170 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4171 SDValue ShiftLeftHi =
4172 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4173 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4174 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4175 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4176 SDValue HiFalse =
4177 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4178
4179 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4180
4181 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4182 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4183
4184 SDValue Parts[2] = {Lo, Hi};
4185 return DAG.getMergeValues(Parts, DL);
4186}
4187
4188// Returns the opcode of the target-specific SDNode that implements the 32-bit
4189// form of the given Opcode.
4191 switch (Opcode) {
4192 default:
4193 llvm_unreachable("Unexpected opcode");
4194 case ISD::SDIV:
4195 return LoongArchISD::DIV_W;
4196 case ISD::UDIV:
4197 return LoongArchISD::DIV_WU;
4198 case ISD::SREM:
4199 return LoongArchISD::MOD_W;
4200 case ISD::UREM:
4201 return LoongArchISD::MOD_WU;
4202 case ISD::SHL:
4203 return LoongArchISD::SLL_W;
4204 case ISD::SRA:
4205 return LoongArchISD::SRA_W;
4206 case ISD::SRL:
4207 return LoongArchISD::SRL_W;
4208 case ISD::ROTL:
4209 case ISD::ROTR:
4210 return LoongArchISD::ROTR_W;
4211 case ISD::CTTZ:
4212 return LoongArchISD::CTZ_W;
4213 case ISD::CTLZ:
4214 return LoongArchISD::CLZ_W;
4215 }
4216}
4217
4218// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4219// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4220// otherwise be promoted to i64, making it difficult to select the
4221// SLL_W/.../*W later one because the fact the operation was originally of
4222// type i8/i16/i32 is lost.
4224 unsigned ExtOpc = ISD::ANY_EXTEND) {
4225 SDLoc DL(N);
4226 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4227 SDValue NewOp0, NewRes;
4228
4229 switch (NumOp) {
4230 default:
4231 llvm_unreachable("Unexpected NumOp");
4232 case 1: {
4233 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4234 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4235 break;
4236 }
4237 case 2: {
4238 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4239 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4240 if (N->getOpcode() == ISD::ROTL) {
4241 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4242 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4243 }
4244 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4245 break;
4246 }
4247 // TODO:Handle more NumOp.
4248 }
4249
4250 // ReplaceNodeResults requires we maintain the same type for the return
4251 // value.
4252 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4253}
4254
4255// Converts the given 32-bit operation to a i64 operation with signed extension
4256// semantic to reduce the signed extension instructions.
4258 SDLoc DL(N);
4259 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4260 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4261 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4262 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4263 DAG.getValueType(MVT::i32));
4264 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4265}
4266
4267// Helper function that emits error message for intrinsics with/without chain
4268// and return a UNDEF or and the chain as the results.
4271 StringRef ErrorMsg, bool WithChain = true) {
4272 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4273 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4274 if (!WithChain)
4275 return;
4276 Results.push_back(N->getOperand(0));
4277}
4278
4279template <unsigned N>
4280static void
4282 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4283 unsigned ResOp) {
4284 const StringRef ErrorMsgOOR = "argument out of range";
4285 unsigned Imm = Node->getConstantOperandVal(2);
4286 if (!isUInt<N>(Imm)) {
4288 /*WithChain=*/false);
4289 return;
4290 }
4291 SDLoc DL(Node);
4292 SDValue Vec = Node->getOperand(1);
4293
4294 SDValue PickElt =
4295 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4296 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4298 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4299 PickElt.getValue(0)));
4300}
4301
4304 SelectionDAG &DAG,
4305 const LoongArchSubtarget &Subtarget,
4306 unsigned ResOp) {
4307 SDLoc DL(N);
4308 SDValue Vec = N->getOperand(1);
4309
4310 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4311 Results.push_back(
4312 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4313}
4314
4315static void
4317 SelectionDAG &DAG,
4318 const LoongArchSubtarget &Subtarget) {
4319 switch (N->getConstantOperandVal(0)) {
4320 default:
4321 llvm_unreachable("Unexpected Intrinsic.");
4322 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4323 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4325 break;
4326 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4327 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4328 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4330 break;
4331 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4332 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4334 break;
4335 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4336 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4338 break;
4339 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4340 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4341 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4343 break;
4344 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4345 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4347 break;
4348 case Intrinsic::loongarch_lsx_bz_b:
4349 case Intrinsic::loongarch_lsx_bz_h:
4350 case Intrinsic::loongarch_lsx_bz_w:
4351 case Intrinsic::loongarch_lsx_bz_d:
4352 case Intrinsic::loongarch_lasx_xbz_b:
4353 case Intrinsic::loongarch_lasx_xbz_h:
4354 case Intrinsic::loongarch_lasx_xbz_w:
4355 case Intrinsic::loongarch_lasx_xbz_d:
4356 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4358 break;
4359 case Intrinsic::loongarch_lsx_bz_v:
4360 case Intrinsic::loongarch_lasx_xbz_v:
4361 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4363 break;
4364 case Intrinsic::loongarch_lsx_bnz_b:
4365 case Intrinsic::loongarch_lsx_bnz_h:
4366 case Intrinsic::loongarch_lsx_bnz_w:
4367 case Intrinsic::loongarch_lsx_bnz_d:
4368 case Intrinsic::loongarch_lasx_xbnz_b:
4369 case Intrinsic::loongarch_lasx_xbnz_h:
4370 case Intrinsic::loongarch_lasx_xbnz_w:
4371 case Intrinsic::loongarch_lasx_xbnz_d:
4372 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4374 break;
4375 case Intrinsic::loongarch_lsx_bnz_v:
4376 case Intrinsic::loongarch_lasx_xbnz_v:
4377 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4379 break;
4380 }
4381}
4382
4385 SelectionDAG &DAG) {
4386 assert(N->getValueType(0) == MVT::i128 &&
4387 "AtomicCmpSwap on types less than 128 should be legal");
4388 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4389
4390 unsigned Opcode;
4391 switch (MemOp->getMergedOrdering()) {
4395 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4396 break;
4399 Opcode = LoongArch::PseudoCmpXchg128;
4400 break;
4401 default:
4402 llvm_unreachable("Unexpected ordering!");
4403 }
4404
4405 SDLoc DL(N);
4406 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4407 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4408 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4409 NewVal.first, NewVal.second, N->getOperand(0)};
4410
4411 SDNode *CmpSwap = DAG.getMachineNode(
4412 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4413 Ops);
4414 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4415 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4416 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4417 Results.push_back(SDValue(CmpSwap, 3));
4418}
4419
4422 SDLoc DL(N);
4423 EVT VT = N->getValueType(0);
4424 switch (N->getOpcode()) {
4425 default:
4426 llvm_unreachable("Don't know how to legalize this operation");
4427 case ISD::ADD:
4428 case ISD::SUB:
4429 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4430 "Unexpected custom legalisation");
4431 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4432 break;
4433 case ISD::SDIV:
4434 case ISD::UDIV:
4435 case ISD::SREM:
4436 case ISD::UREM:
4437 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4438 "Unexpected custom legalisation");
4439 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4440 Subtarget.hasDiv32() && VT == MVT::i32
4442 : ISD::SIGN_EXTEND));
4443 break;
4444 case ISD::SHL:
4445 case ISD::SRA:
4446 case ISD::SRL:
4447 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4448 "Unexpected custom legalisation");
4449 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4450 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4451 break;
4452 }
4453 break;
4454 case ISD::ROTL:
4455 case ISD::ROTR:
4456 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4457 "Unexpected custom legalisation");
4458 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4459 break;
4460 case ISD::FP_TO_SINT: {
4461 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4462 "Unexpected custom legalisation");
4463 SDValue Src = N->getOperand(0);
4464 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4465 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4467 if (!isTypeLegal(Src.getValueType()))
4468 return;
4469 if (Src.getValueType() == MVT::f16)
4470 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4471 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4472 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4473 return;
4474 }
4475 // If the FP type needs to be softened, emit a library call using the 'si'
4476 // version. If we left it to default legalization we'd end up with 'di'.
4477 RTLIB::Libcall LC;
4478 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4479 MakeLibCallOptions CallOptions;
4480 EVT OpVT = Src.getValueType();
4481 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4482 SDValue Chain = SDValue();
4483 SDValue Result;
4484 std::tie(Result, Chain) =
4485 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4486 Results.push_back(Result);
4487 break;
4488 }
4489 case ISD::BITCAST: {
4490 SDValue Src = N->getOperand(0);
4491 EVT SrcVT = Src.getValueType();
4492 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4493 Subtarget.hasBasicF()) {
4494 SDValue Dst =
4495 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4496 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4497 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4499 DAG.getVTList(MVT::i32, MVT::i32), Src);
4500 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4501 NewReg.getValue(0), NewReg.getValue(1));
4502 Results.push_back(RetReg);
4503 }
4504 break;
4505 }
4506 case ISD::FP_TO_UINT: {
4507 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4508 "Unexpected custom legalisation");
4509 auto &TLI = DAG.getTargetLoweringInfo();
4510 SDValue Tmp1, Tmp2;
4511 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4512 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4513 break;
4514 }
4515 case ISD::BSWAP: {
4516 SDValue Src = N->getOperand(0);
4517 assert((VT == MVT::i16 || VT == MVT::i32) &&
4518 "Unexpected custom legalization");
4519 MVT GRLenVT = Subtarget.getGRLenVT();
4520 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4521 SDValue Tmp;
4522 switch (VT.getSizeInBits()) {
4523 default:
4524 llvm_unreachable("Unexpected operand width");
4525 case 16:
4526 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4527 break;
4528 case 32:
4529 // Only LA64 will get to here due to the size mismatch between VT and
4530 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4531 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4532 break;
4533 }
4534 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4535 break;
4536 }
4537 case ISD::BITREVERSE: {
4538 SDValue Src = N->getOperand(0);
4539 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4540 "Unexpected custom legalization");
4541 MVT GRLenVT = Subtarget.getGRLenVT();
4542 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4543 SDValue Tmp;
4544 switch (VT.getSizeInBits()) {
4545 default:
4546 llvm_unreachable("Unexpected operand width");
4547 case 8:
4548 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4549 break;
4550 case 32:
4551 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4552 break;
4553 }
4554 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4555 break;
4556 }
4557 case ISD::CTLZ:
4558 case ISD::CTTZ: {
4559 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4560 "Unexpected custom legalisation");
4561 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4562 break;
4563 }
4565 SDValue Chain = N->getOperand(0);
4566 SDValue Op2 = N->getOperand(2);
4567 MVT GRLenVT = Subtarget.getGRLenVT();
4568 const StringRef ErrorMsgOOR = "argument out of range";
4569 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4570 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4571
4572 switch (N->getConstantOperandVal(1)) {
4573 default:
4574 llvm_unreachable("Unexpected Intrinsic.");
4575 case Intrinsic::loongarch_movfcsr2gr: {
4576 if (!Subtarget.hasBasicF()) {
4577 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4578 return;
4579 }
4580 unsigned Imm = Op2->getAsZExtVal();
4581 if (!isUInt<2>(Imm)) {
4582 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4583 return;
4584 }
4585 SDValue MOVFCSR2GRResults = DAG.getNode(
4586 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4587 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4588 Results.push_back(
4589 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4590 Results.push_back(MOVFCSR2GRResults.getValue(1));
4591 break;
4592 }
4593#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4594 case Intrinsic::loongarch_##NAME: { \
4595 SDValue NODE = DAG.getNode( \
4596 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4597 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4598 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4599 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4600 Results.push_back(NODE.getValue(1)); \
4601 break; \
4602 }
4603 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4604 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4605 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4606 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4607 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4608 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4609#undef CRC_CASE_EXT_BINARYOP
4610
4611#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4612 case Intrinsic::loongarch_##NAME: { \
4613 SDValue NODE = DAG.getNode( \
4614 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4615 {Chain, Op2, \
4616 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4617 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4618 Results.push_back(NODE.getValue(1)); \
4619 break; \
4620 }
4621 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4622 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4623#undef CRC_CASE_EXT_UNARYOP
4624#define CSR_CASE(ID) \
4625 case Intrinsic::loongarch_##ID: { \
4626 if (!Subtarget.is64Bit()) \
4627 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4628 break; \
4629 }
4630 CSR_CASE(csrrd_d);
4631 CSR_CASE(csrwr_d);
4632 CSR_CASE(csrxchg_d);
4633 CSR_CASE(iocsrrd_d);
4634#undef CSR_CASE
4635 case Intrinsic::loongarch_csrrd_w: {
4636 unsigned Imm = Op2->getAsZExtVal();
4637 if (!isUInt<14>(Imm)) {
4638 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4639 return;
4640 }
4641 SDValue CSRRDResults =
4642 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4643 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4644 Results.push_back(
4645 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4646 Results.push_back(CSRRDResults.getValue(1));
4647 break;
4648 }
4649 case Intrinsic::loongarch_csrwr_w: {
4650 unsigned Imm = N->getConstantOperandVal(3);
4651 if (!isUInt<14>(Imm)) {
4652 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4653 return;
4654 }
4655 SDValue CSRWRResults =
4656 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4657 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4658 DAG.getConstant(Imm, DL, GRLenVT)});
4659 Results.push_back(
4660 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4661 Results.push_back(CSRWRResults.getValue(1));
4662 break;
4663 }
4664 case Intrinsic::loongarch_csrxchg_w: {
4665 unsigned Imm = N->getConstantOperandVal(4);
4666 if (!isUInt<14>(Imm)) {
4667 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4668 return;
4669 }
4670 SDValue CSRXCHGResults = DAG.getNode(
4671 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4672 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4673 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4674 DAG.getConstant(Imm, DL, GRLenVT)});
4675 Results.push_back(
4676 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4677 Results.push_back(CSRXCHGResults.getValue(1));
4678 break;
4679 }
4680#define IOCSRRD_CASE(NAME, NODE) \
4681 case Intrinsic::loongarch_##NAME: { \
4682 SDValue IOCSRRDResults = \
4683 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4684 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4685 Results.push_back( \
4686 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4687 Results.push_back(IOCSRRDResults.getValue(1)); \
4688 break; \
4689 }
4690 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4691 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4692 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4693#undef IOCSRRD_CASE
4694 case Intrinsic::loongarch_cpucfg: {
4695 SDValue CPUCFGResults =
4696 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4697 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4698 Results.push_back(
4699 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4700 Results.push_back(CPUCFGResults.getValue(1));
4701 break;
4702 }
4703 case Intrinsic::loongarch_lddir_d: {
4704 if (!Subtarget.is64Bit()) {
4705 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4706 return;
4707 }
4708 break;
4709 }
4710 }
4711 break;
4712 }
4713 case ISD::READ_REGISTER: {
4714 if (Subtarget.is64Bit())
4715 DAG.getContext()->emitError(
4716 "On LA64, only 64-bit registers can be read.");
4717 else
4718 DAG.getContext()->emitError(
4719 "On LA32, only 32-bit registers can be read.");
4720 Results.push_back(DAG.getUNDEF(VT));
4721 Results.push_back(N->getOperand(0));
4722 break;
4723 }
4725 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4726 break;
4727 }
4728 case ISD::LROUND: {
4729 SDValue Op0 = N->getOperand(0);
4730 EVT OpVT = Op0.getValueType();
4731 RTLIB::Libcall LC =
4732 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4733 MakeLibCallOptions CallOptions;
4734 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4735 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4736 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4737 Results.push_back(Result);
4738 break;
4739 }
4740 case ISD::ATOMIC_CMP_SWAP: {
4742 break;
4743 }
4744 case ISD::TRUNCATE: {
4745 MVT VT = N->getSimpleValueType(0);
4746 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4747 return;
4748
4749 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4750 SDValue In = N->getOperand(0);
4751 EVT InVT = In.getValueType();
4752 EVT InEltVT = InVT.getVectorElementType();
4753 EVT EltVT = VT.getVectorElementType();
4754 unsigned MinElts = VT.getVectorNumElements();
4755 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4756 unsigned InBits = InVT.getSizeInBits();
4757
4758 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4759 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4760 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4761 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4762 for (unsigned I = 0; I < MinElts; ++I)
4763 TruncMask[I] = Scale * I;
4764
4765 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4766 MVT SVT = In.getSimpleValueType().getScalarType();
4767 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4768 SDValue WidenIn =
4769 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4770 DAG.getVectorIdxConstant(0, DL));
4771 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4772 "Illegal vector type in truncation");
4773 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4774 Results.push_back(
4775 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4776 return;
4777 }
4778 }
4779
4780 break;
4781 }
4782 }
4783}
4784
4787 const LoongArchSubtarget &Subtarget) {
4788 if (DCI.isBeforeLegalizeOps())
4789 return SDValue();
4790
4791 SDValue FirstOperand = N->getOperand(0);
4792 SDValue SecondOperand = N->getOperand(1);
4793 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4794 EVT ValTy = N->getValueType(0);
4795 SDLoc DL(N);
4796 uint64_t lsb, msb;
4797 unsigned SMIdx, SMLen;
4798 ConstantSDNode *CN;
4799 SDValue NewOperand;
4800 MVT GRLenVT = Subtarget.getGRLenVT();
4801
4802 // BSTRPICK requires the 32S feature.
4803 if (!Subtarget.has32S())
4804 return SDValue();
4805
4806 // Op's second operand must be a shifted mask.
4807 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4808 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4809 return SDValue();
4810
4811 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4812 // Pattern match BSTRPICK.
4813 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4814 // => BSTRPICK $dst, $src, msb, lsb
4815 // where msb = lsb + len - 1
4816
4817 // The second operand of the shift must be an immediate.
4818 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4819 return SDValue();
4820
4821 lsb = CN->getZExtValue();
4822
4823 // Return if the shifted mask does not start at bit 0 or the sum of its
4824 // length and lsb exceeds the word's size.
4825 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4826 return SDValue();
4827
4828 NewOperand = FirstOperand.getOperand(0);
4829 } else {
4830 // Pattern match BSTRPICK.
4831 // $dst = and $src, (2**len- 1) , if len > 12
4832 // => BSTRPICK $dst, $src, msb, lsb
4833 // where lsb = 0 and msb = len - 1
4834
4835 // If the mask is <= 0xfff, andi can be used instead.
4836 if (CN->getZExtValue() <= 0xfff)
4837 return SDValue();
4838
4839 // Return if the MSB exceeds.
4840 if (SMIdx + SMLen > ValTy.getSizeInBits())
4841 return SDValue();
4842
4843 if (SMIdx > 0) {
4844 // Omit if the constant has more than 2 uses. This a conservative
4845 // decision. Whether it is a win depends on the HW microarchitecture.
4846 // However it should always be better for 1 and 2 uses.
4847 if (CN->use_size() > 2)
4848 return SDValue();
4849 // Return if the constant can be composed by a single LU12I.W.
4850 if ((CN->getZExtValue() & 0xfff) == 0)
4851 return SDValue();
4852 // Return if the constand can be composed by a single ADDI with
4853 // the zero register.
4854 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4855 return SDValue();
4856 }
4857
4858 lsb = SMIdx;
4859 NewOperand = FirstOperand;
4860 }
4861
4862 msb = lsb + SMLen - 1;
4863 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4864 DAG.getConstant(msb, DL, GRLenVT),
4865 DAG.getConstant(lsb, DL, GRLenVT));
4866 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4867 return NR0;
4868 // Try to optimize to
4869 // bstrpick $Rd, $Rs, msb, lsb
4870 // slli $Rd, $Rd, lsb
4871 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4872 DAG.getConstant(lsb, DL, GRLenVT));
4873}
4874
4877 const LoongArchSubtarget &Subtarget) {
4878 // BSTRPICK requires the 32S feature.
4879 if (!Subtarget.has32S())
4880 return SDValue();
4881
4882 if (DCI.isBeforeLegalizeOps())
4883 return SDValue();
4884
4885 // $dst = srl (and $src, Mask), Shamt
4886 // =>
4887 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4888 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4889 //
4890
4891 SDValue FirstOperand = N->getOperand(0);
4892 ConstantSDNode *CN;
4893 EVT ValTy = N->getValueType(0);
4894 SDLoc DL(N);
4895 MVT GRLenVT = Subtarget.getGRLenVT();
4896 unsigned MaskIdx, MaskLen;
4897 uint64_t Shamt;
4898
4899 // The first operand must be an AND and the second operand of the AND must be
4900 // a shifted mask.
4901 if (FirstOperand.getOpcode() != ISD::AND ||
4902 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4903 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4904 return SDValue();
4905
4906 // The second operand (shift amount) must be an immediate.
4907 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4908 return SDValue();
4909
4910 Shamt = CN->getZExtValue();
4911 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4912 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4913 FirstOperand->getOperand(0),
4914 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4915 DAG.getConstant(Shamt, DL, GRLenVT));
4916
4917 return SDValue();
4918}
4919
4920// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4921// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4922static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4923 unsigned Depth) {
4924 // Limit recursion.
4926 return false;
4927 switch (Src.getOpcode()) {
4928 case ISD::SETCC:
4929 case ISD::TRUNCATE:
4930 return Src.getOperand(0).getValueSizeInBits() == Size;
4931 case ISD::FREEZE:
4932 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4933 case ISD::AND:
4934 case ISD::XOR:
4935 case ISD::OR:
4936 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4937 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4938 case ISD::SELECT:
4939 case ISD::VSELECT:
4940 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4941 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4942 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4943 case ISD::BUILD_VECTOR:
4944 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4945 ISD::isBuildVectorAllOnes(Src.getNode());
4946 }
4947 return false;
4948}
4949
4950// Helper to push sign extension of vXi1 SETCC result through bitops.
4952 SDValue Src, const SDLoc &DL) {
4953 switch (Src.getOpcode()) {
4954 case ISD::SETCC:
4955 case ISD::FREEZE:
4956 case ISD::TRUNCATE:
4957 case ISD::BUILD_VECTOR:
4958 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4959 case ISD::AND:
4960 case ISD::XOR:
4961 case ISD::OR:
4962 return DAG.getNode(
4963 Src.getOpcode(), DL, SExtVT,
4964 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4965 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4966 case ISD::SELECT:
4967 case ISD::VSELECT:
4968 return DAG.getSelect(
4969 DL, SExtVT, Src.getOperand(0),
4970 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4971 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4972 }
4973 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4974}
4975
4976static SDValue
4979 const LoongArchSubtarget &Subtarget) {
4980 SDLoc DL(N);
4981 EVT VT = N->getValueType(0);
4982 SDValue Src = N->getOperand(0);
4983 EVT SrcVT = Src.getValueType();
4984
4985 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4986 return SDValue();
4987
4988 bool UseLASX;
4989 unsigned Opc = ISD::DELETED_NODE;
4990 EVT CmpVT = Src.getOperand(0).getValueType();
4991 EVT EltVT = CmpVT.getVectorElementType();
4992
4993 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4994 UseLASX = false;
4995 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4996 CmpVT.getSizeInBits() == 256)
4997 UseLASX = true;
4998 else
4999 return SDValue();
5000
5001 SDValue SrcN1 = Src.getOperand(1);
5002 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5003 default:
5004 break;
5005 case ISD::SETEQ:
5006 // x == 0 => not (vmsknez.b x)
5007 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5009 break;
5010 case ISD::SETGT:
5011 // x > -1 => vmskgez.b x
5012 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5014 break;
5015 case ISD::SETGE:
5016 // x >= 0 => vmskgez.b x
5017 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5019 break;
5020 case ISD::SETLT:
5021 // x < 0 => vmskltz.{b,h,w,d} x
5022 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5023 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5024 EltVT == MVT::i64))
5026 break;
5027 case ISD::SETLE:
5028 // x <= -1 => vmskltz.{b,h,w,d} x
5029 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5030 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5031 EltVT == MVT::i64))
5033 break;
5034 case ISD::SETNE:
5035 // x != 0 => vmsknez.b x
5036 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5038 break;
5039 }
5040
5041 if (Opc == ISD::DELETED_NODE)
5042 return SDValue();
5043
5044 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
5046 V = DAG.getZExtOrTrunc(V, DL, T);
5047 return DAG.getBitcast(VT, V);
5048}
5049
5052 const LoongArchSubtarget &Subtarget) {
5053 SDLoc DL(N);
5054 EVT VT = N->getValueType(0);
5055 SDValue Src = N->getOperand(0);
5056 EVT SrcVT = Src.getValueType();
5057
5058 if (!DCI.isBeforeLegalizeOps())
5059 return SDValue();
5060
5061 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5062 return SDValue();
5063
5064 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5065 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5066 if (Res)
5067 return Res;
5068
5069 // Generate vXi1 using [X]VMSKLTZ
5070 MVT SExtVT;
5071 unsigned Opc;
5072 bool UseLASX = false;
5073 bool PropagateSExt = false;
5074
5075 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5076 EVT CmpVT = Src.getOperand(0).getValueType();
5077 if (CmpVT.getSizeInBits() > 256)
5078 return SDValue();
5079 }
5080
5081 switch (SrcVT.getSimpleVT().SimpleTy) {
5082 default:
5083 return SDValue();
5084 case MVT::v2i1:
5085 SExtVT = MVT::v2i64;
5086 break;
5087 case MVT::v4i1:
5088 SExtVT = MVT::v4i32;
5089 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5090 SExtVT = MVT::v4i64;
5091 UseLASX = true;
5092 PropagateSExt = true;
5093 }
5094 break;
5095 case MVT::v8i1:
5096 SExtVT = MVT::v8i16;
5097 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5098 SExtVT = MVT::v8i32;
5099 UseLASX = true;
5100 PropagateSExt = true;
5101 }
5102 break;
5103 case MVT::v16i1:
5104 SExtVT = MVT::v16i8;
5105 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5106 SExtVT = MVT::v16i16;
5107 UseLASX = true;
5108 PropagateSExt = true;
5109 }
5110 break;
5111 case MVT::v32i1:
5112 SExtVT = MVT::v32i8;
5113 UseLASX = true;
5114 break;
5115 };
5116 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5117 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5118
5119 SDValue V;
5120 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5121 if (Src.getSimpleValueType() == MVT::v32i8) {
5122 SDValue Lo, Hi;
5123 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5124 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5125 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5126 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5127 DAG.getConstant(16, DL, MVT::i8));
5128 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5129 } else if (UseLASX) {
5130 return SDValue();
5131 }
5132 }
5133
5134 if (!V) {
5136 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5137 }
5138
5140 V = DAG.getZExtOrTrunc(V, DL, T);
5141 return DAG.getBitcast(VT, V);
5142}
5143
5146 const LoongArchSubtarget &Subtarget) {
5147 MVT GRLenVT = Subtarget.getGRLenVT();
5148 EVT ValTy = N->getValueType(0);
5149 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5150 ConstantSDNode *CN0, *CN1;
5151 SDLoc DL(N);
5152 unsigned ValBits = ValTy.getSizeInBits();
5153 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5154 unsigned Shamt;
5155 bool SwapAndRetried = false;
5156
5157 // BSTRPICK requires the 32S feature.
5158 if (!Subtarget.has32S())
5159 return SDValue();
5160
5161 if (DCI.isBeforeLegalizeOps())
5162 return SDValue();
5163
5164 if (ValBits != 32 && ValBits != 64)
5165 return SDValue();
5166
5167Retry:
5168 // 1st pattern to match BSTRINS:
5169 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5170 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5171 // =>
5172 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5173 if (N0.getOpcode() == ISD::AND &&
5174 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5175 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5176 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5177 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5178 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5179 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5180 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5181 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5182 (MaskIdx0 + MaskLen0 <= ValBits)) {
5183 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5184 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5185 N1.getOperand(0).getOperand(0),
5186 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5187 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5188 }
5189
5190 // 2nd pattern to match BSTRINS:
5191 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5192 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5193 // =>
5194 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5195 if (N0.getOpcode() == ISD::AND &&
5196 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5197 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5198 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5199 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5200 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5201 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5202 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5203 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5204 (MaskIdx0 + MaskLen0 <= ValBits)) {
5205 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5206 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5207 N1.getOperand(0).getOperand(0),
5208 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5209 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5210 }
5211
5212 // 3rd pattern to match BSTRINS:
5213 // R = or (and X, mask0), (and Y, mask1)
5214 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5215 // =>
5216 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5217 // where msb = lsb + size - 1
5218 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5219 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5220 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5221 (MaskIdx0 + MaskLen0 <= 64) &&
5222 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5223 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5224 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5225 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5226 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5227 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5228 DAG.getConstant(ValBits == 32
5229 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5230 : (MaskIdx0 + MaskLen0 - 1),
5231 DL, GRLenVT),
5232 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5233 }
5234
5235 // 4th pattern to match BSTRINS:
5236 // R = or (and X, mask), (shl Y, shamt)
5237 // where mask = (2**shamt - 1)
5238 // =>
5239 // R = BSTRINS X, Y, ValBits - 1, shamt
5240 // where ValBits = 32 or 64
5241 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5242 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5243 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5244 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5245 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5246 (MaskIdx0 + MaskLen0 <= ValBits)) {
5247 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5248 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5249 N1.getOperand(0),
5250 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5251 DAG.getConstant(Shamt, DL, GRLenVT));
5252 }
5253
5254 // 5th pattern to match BSTRINS:
5255 // R = or (and X, mask), const
5256 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5257 // =>
5258 // R = BSTRINS X, (const >> lsb), msb, lsb
5259 // where msb = lsb + size - 1
5260 if (N0.getOpcode() == ISD::AND &&
5261 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5262 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5263 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5264 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5265 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5266 return DAG.getNode(
5267 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5268 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5269 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5270 : (MaskIdx0 + MaskLen0 - 1),
5271 DL, GRLenVT),
5272 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5273 }
5274
5275 // 6th pattern.
5276 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5277 // by the incoming bits are known to be zero.
5278 // =>
5279 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5280 //
5281 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5282 // pattern is more common than the 1st. So we put the 1st before the 6th in
5283 // order to match as many nodes as possible.
5284 ConstantSDNode *CNMask, *CNShamt;
5285 unsigned MaskIdx, MaskLen;
5286 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5287 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5288 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5289 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5290 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5291 Shamt = CNShamt->getZExtValue();
5292 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5293 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5294 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5295 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5296 N1.getOperand(0).getOperand(0),
5297 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5298 DAG.getConstant(Shamt, DL, GRLenVT));
5299 }
5300 }
5301
5302 // 7th pattern.
5303 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5304 // overwritten by the incoming bits are known to be zero.
5305 // =>
5306 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5307 //
5308 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5309 // before the 7th in order to match as many nodes as possible.
5310 if (N1.getOpcode() == ISD::AND &&
5311 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5312 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5313 N1.getOperand(0).getOpcode() == ISD::SHL &&
5314 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5315 CNShamt->getZExtValue() == MaskIdx) {
5316 APInt ShMask(ValBits, CNMask->getZExtValue());
5317 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5318 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5319 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5320 N1.getOperand(0).getOperand(0),
5321 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5322 DAG.getConstant(MaskIdx, DL, GRLenVT));
5323 }
5324 }
5325
5326 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5327 if (!SwapAndRetried) {
5328 std::swap(N0, N1);
5329 SwapAndRetried = true;
5330 goto Retry;
5331 }
5332
5333 SwapAndRetried = false;
5334Retry2:
5335 // 8th pattern.
5336 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5337 // the incoming bits are known to be zero.
5338 // =>
5339 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5340 //
5341 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5342 // we put it here in order to match as many nodes as possible or generate less
5343 // instructions.
5344 if (N1.getOpcode() == ISD::AND &&
5345 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5346 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5347 APInt ShMask(ValBits, CNMask->getZExtValue());
5348 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5349 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5350 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5351 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5352 N1->getOperand(0),
5353 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5354 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5355 DAG.getConstant(MaskIdx, DL, GRLenVT));
5356 }
5357 }
5358 // Swap N0/N1 and retry.
5359 if (!SwapAndRetried) {
5360 std::swap(N0, N1);
5361 SwapAndRetried = true;
5362 goto Retry2;
5363 }
5364
5365 return SDValue();
5366}
5367
5368static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5369 ExtType = ISD::NON_EXTLOAD;
5370
5371 switch (V.getNode()->getOpcode()) {
5372 case ISD::LOAD: {
5373 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5374 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5375 (LoadNode->getMemoryVT() == MVT::i16)) {
5376 ExtType = LoadNode->getExtensionType();
5377 return true;
5378 }
5379 return false;
5380 }
5381 case ISD::AssertSext: {
5382 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5383 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5384 ExtType = ISD::SEXTLOAD;
5385 return true;
5386 }
5387 return false;
5388 }
5389 case ISD::AssertZext: {
5390 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5391 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5392 ExtType = ISD::ZEXTLOAD;
5393 return true;
5394 }
5395 return false;
5396 }
5397 default:
5398 return false;
5399 }
5400
5401 return false;
5402}
5403
5404// Eliminate redundant truncation and zero-extension nodes.
5405// * Case 1:
5406// +------------+ +------------+ +------------+
5407// | Input1 | | Input2 | | CC |
5408// +------------+ +------------+ +------------+
5409// | | |
5410// V V +----+
5411// +------------+ +------------+ |
5412// | TRUNCATE | | TRUNCATE | |
5413// +------------+ +------------+ |
5414// | | |
5415// V V |
5416// +------------+ +------------+ |
5417// | ZERO_EXT | | ZERO_EXT | |
5418// +------------+ +------------+ |
5419// | | |
5420// | +-------------+ |
5421// V V | |
5422// +----------------+ | |
5423// | AND | | |
5424// +----------------+ | |
5425// | | |
5426// +---------------+ | |
5427// | | |
5428// V V V
5429// +-------------+
5430// | CMP |
5431// +-------------+
5432// * Case 2:
5433// +------------+ +------------+ +-------------+ +------------+ +------------+
5434// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5435// +------------+ +------------+ +-------------+ +------------+ +------------+
5436// | | | | |
5437// V | | | |
5438// +------------+ | | | |
5439// | XOR |<---------------------+ | |
5440// +------------+ | | |
5441// | | | |
5442// V V +---------------+ |
5443// +------------+ +------------+ | |
5444// | TRUNCATE | | TRUNCATE | | +-------------------------+
5445// +------------+ +------------+ | |
5446// | | | |
5447// V V | |
5448// +------------+ +------------+ | |
5449// | ZERO_EXT | | ZERO_EXT | | |
5450// +------------+ +------------+ | |
5451// | | | |
5452// V V | |
5453// +----------------+ | |
5454// | AND | | |
5455// +----------------+ | |
5456// | | |
5457// +---------------+ | |
5458// | | |
5459// V V V
5460// +-------------+
5461// | CMP |
5462// +-------------+
5465 const LoongArchSubtarget &Subtarget) {
5466 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5467
5468 SDNode *AndNode = N->getOperand(0).getNode();
5469 if (AndNode->getOpcode() != ISD::AND)
5470 return SDValue();
5471
5472 SDValue AndInputValue2 = AndNode->getOperand(1);
5473 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5474 return SDValue();
5475
5476 SDValue CmpInputValue = N->getOperand(1);
5477 SDValue AndInputValue1 = AndNode->getOperand(0);
5478 if (AndInputValue1.getOpcode() == ISD::XOR) {
5479 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5480 return SDValue();
5481 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5482 if (!CN || CN->getSExtValue() != -1)
5483 return SDValue();
5484 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5485 if (!CN || CN->getSExtValue() != 0)
5486 return SDValue();
5487 AndInputValue1 = AndInputValue1.getOperand(0);
5488 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5489 return SDValue();
5490 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5491 if (AndInputValue2 != CmpInputValue)
5492 return SDValue();
5493 } else {
5494 return SDValue();
5495 }
5496
5497 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5498 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5499 return SDValue();
5500
5501 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5502 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5503 return SDValue();
5504
5505 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5506 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5507 ISD::LoadExtType ExtType1;
5508 ISD::LoadExtType ExtType2;
5509
5510 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5511 !checkValueWidth(TruncInputValue2, ExtType2))
5512 return SDValue();
5513
5514 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5515 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5516 return SDValue();
5517
5518 if ((ExtType2 != ISD::ZEXTLOAD) &&
5519 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5520 return SDValue();
5521
5522 // These truncation and zero-extension nodes are not necessary, remove them.
5523 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5524 TruncInputValue1, TruncInputValue2);
5525 SDValue NewSetCC =
5526 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5527 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5528 return SDValue(N, 0);
5529}
5530
5531// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5534 const LoongArchSubtarget &Subtarget) {
5535 if (DCI.isBeforeLegalizeOps())
5536 return SDValue();
5537
5538 SDValue Src = N->getOperand(0);
5539 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5540 return SDValue();
5541
5542 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5543 Src.getOperand(0));
5544}
5545
5546// Perform common combines for BR_CC and SELECT_CC conditions.
5547static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5548 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5549 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5550
5551 // As far as arithmetic right shift always saves the sign,
5552 // shift can be omitted.
5553 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5554 // setge (sra X, N), 0 -> setge X, 0
5555 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5556 LHS.getOpcode() == ISD::SRA) {
5557 LHS = LHS.getOperand(0);
5558 return true;
5559 }
5560
5561 if (!ISD::isIntEqualitySetCC(CCVal))
5562 return false;
5563
5564 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5565 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5566 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5567 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5568 // If we're looking for eq 0 instead of ne 0, we need to invert the
5569 // condition.
5570 bool Invert = CCVal == ISD::SETEQ;
5571 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5572 if (Invert)
5573 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5574
5575 RHS = LHS.getOperand(1);
5576 LHS = LHS.getOperand(0);
5577 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5578
5579 CC = DAG.getCondCode(CCVal);
5580 return true;
5581 }
5582
5583 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5584 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5585 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5586 SDValue LHS0 = LHS.getOperand(0);
5587 if (LHS0.getOpcode() == ISD::AND &&
5588 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5589 uint64_t Mask = LHS0.getConstantOperandVal(1);
5590 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5591 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5592 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5593 CC = DAG.getCondCode(CCVal);
5594
5595 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5596 LHS = LHS0.getOperand(0);
5597 if (ShAmt != 0)
5598 LHS =
5599 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5600 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5601 return true;
5602 }
5603 }
5604 }
5605
5606 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5607 // This can occur when legalizing some floating point comparisons.
5608 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5609 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5610 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5611 CC = DAG.getCondCode(CCVal);
5612 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5613 return true;
5614 }
5615
5616 return false;
5617}
5618
5621 const LoongArchSubtarget &Subtarget) {
5622 SDValue LHS = N->getOperand(1);
5623 SDValue RHS = N->getOperand(2);
5624 SDValue CC = N->getOperand(3);
5625 SDLoc DL(N);
5626
5627 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5628 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5629 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5630
5631 return SDValue();
5632}
5633
5636 const LoongArchSubtarget &Subtarget) {
5637 // Transform
5638 SDValue LHS = N->getOperand(0);
5639 SDValue RHS = N->getOperand(1);
5640 SDValue CC = N->getOperand(2);
5641 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5642 SDValue TrueV = N->getOperand(3);
5643 SDValue FalseV = N->getOperand(4);
5644 SDLoc DL(N);
5645 EVT VT = N->getValueType(0);
5646
5647 // If the True and False values are the same, we don't need a select_cc.
5648 if (TrueV == FalseV)
5649 return TrueV;
5650
5651 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5652 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5653 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5655 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5656 if (CCVal == ISD::CondCode::SETGE)
5657 std::swap(TrueV, FalseV);
5658
5659 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5660 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5661 // Only handle simm12, if it is not in this range, it can be considered as
5662 // register.
5663 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5664 isInt<12>(TrueSImm - FalseSImm)) {
5665 SDValue SRA =
5666 DAG.getNode(ISD::SRA, DL, VT, LHS,
5667 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5668 SDValue AND =
5669 DAG.getNode(ISD::AND, DL, VT, SRA,
5670 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5671 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5672 }
5673
5674 if (CCVal == ISD::CondCode::SETGE)
5675 std::swap(TrueV, FalseV);
5676 }
5677
5678 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5679 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5680 {LHS, RHS, CC, TrueV, FalseV});
5681
5682 return SDValue();
5683}
5684
5685template <unsigned N>
5687 SelectionDAG &DAG,
5688 const LoongArchSubtarget &Subtarget,
5689 bool IsSigned = false) {
5690 SDLoc DL(Node);
5691 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5692 // Check the ImmArg.
5693 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5694 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5695 DAG.getContext()->emitError(Node->getOperationName(0) +
5696 ": argument out of range.");
5697 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5698 }
5699 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5700}
5701
5702template <unsigned N>
5703static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5704 SelectionDAG &DAG, bool IsSigned = false) {
5705 SDLoc DL(Node);
5706 EVT ResTy = Node->getValueType(0);
5707 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5708
5709 // Check the ImmArg.
5710 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5711 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5712 DAG.getContext()->emitError(Node->getOperationName(0) +
5713 ": argument out of range.");
5714 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5715 }
5716 return DAG.getConstant(
5718 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5719 DL, ResTy);
5720}
5721
5723 SDLoc DL(Node);
5724 EVT ResTy = Node->getValueType(0);
5725 SDValue Vec = Node->getOperand(2);
5726 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5727 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5728}
5729
5731 SDLoc DL(Node);
5732 EVT ResTy = Node->getValueType(0);
5733 SDValue One = DAG.getConstant(1, DL, ResTy);
5734 SDValue Bit =
5735 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5736
5737 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5738 DAG.getNOT(DL, Bit, ResTy));
5739}
5740
5741template <unsigned N>
5743 SDLoc DL(Node);
5744 EVT ResTy = Node->getValueType(0);
5745 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5746 // Check the unsigned ImmArg.
5747 if (!isUInt<N>(CImm->getZExtValue())) {
5748 DAG.getContext()->emitError(Node->getOperationName(0) +
5749 ": argument out of range.");
5750 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5751 }
5752
5753 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5754 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5755
5756 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5757}
5758
5759template <unsigned N>
5761 SDLoc DL(Node);
5762 EVT ResTy = Node->getValueType(0);
5763 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5764 // Check the unsigned ImmArg.
5765 if (!isUInt<N>(CImm->getZExtValue())) {
5766 DAG.getContext()->emitError(Node->getOperationName(0) +
5767 ": argument out of range.");
5768 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5769 }
5770
5771 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5772 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5773 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5774}
5775
5776template <unsigned N>
5778 SDLoc DL(Node);
5779 EVT ResTy = Node->getValueType(0);
5780 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5781 // Check the unsigned ImmArg.
5782 if (!isUInt<N>(CImm->getZExtValue())) {
5783 DAG.getContext()->emitError(Node->getOperationName(0) +
5784 ": argument out of range.");
5785 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5786 }
5787
5788 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5789 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5790 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5791}
5792
5793static SDValue
5796 const LoongArchSubtarget &Subtarget) {
5797 SDLoc DL(N);
5798 switch (N->getConstantOperandVal(0)) {
5799 default:
5800 break;
5801 case Intrinsic::loongarch_lsx_vadd_b:
5802 case Intrinsic::loongarch_lsx_vadd_h:
5803 case Intrinsic::loongarch_lsx_vadd_w:
5804 case Intrinsic::loongarch_lsx_vadd_d:
5805 case Intrinsic::loongarch_lasx_xvadd_b:
5806 case Intrinsic::loongarch_lasx_xvadd_h:
5807 case Intrinsic::loongarch_lasx_xvadd_w:
5808 case Intrinsic::loongarch_lasx_xvadd_d:
5809 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5810 N->getOperand(2));
5811 case Intrinsic::loongarch_lsx_vaddi_bu:
5812 case Intrinsic::loongarch_lsx_vaddi_hu:
5813 case Intrinsic::loongarch_lsx_vaddi_wu:
5814 case Intrinsic::loongarch_lsx_vaddi_du:
5815 case Intrinsic::loongarch_lasx_xvaddi_bu:
5816 case Intrinsic::loongarch_lasx_xvaddi_hu:
5817 case Intrinsic::loongarch_lasx_xvaddi_wu:
5818 case Intrinsic::loongarch_lasx_xvaddi_du:
5819 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5820 lowerVectorSplatImm<5>(N, 2, DAG));
5821 case Intrinsic::loongarch_lsx_vsub_b:
5822 case Intrinsic::loongarch_lsx_vsub_h:
5823 case Intrinsic::loongarch_lsx_vsub_w:
5824 case Intrinsic::loongarch_lsx_vsub_d:
5825 case Intrinsic::loongarch_lasx_xvsub_b:
5826 case Intrinsic::loongarch_lasx_xvsub_h:
5827 case Intrinsic::loongarch_lasx_xvsub_w:
5828 case Intrinsic::loongarch_lasx_xvsub_d:
5829 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5830 N->getOperand(2));
5831 case Intrinsic::loongarch_lsx_vsubi_bu:
5832 case Intrinsic::loongarch_lsx_vsubi_hu:
5833 case Intrinsic::loongarch_lsx_vsubi_wu:
5834 case Intrinsic::loongarch_lsx_vsubi_du:
5835 case Intrinsic::loongarch_lasx_xvsubi_bu:
5836 case Intrinsic::loongarch_lasx_xvsubi_hu:
5837 case Intrinsic::loongarch_lasx_xvsubi_wu:
5838 case Intrinsic::loongarch_lasx_xvsubi_du:
5839 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5840 lowerVectorSplatImm<5>(N, 2, DAG));
5841 case Intrinsic::loongarch_lsx_vneg_b:
5842 case Intrinsic::loongarch_lsx_vneg_h:
5843 case Intrinsic::loongarch_lsx_vneg_w:
5844 case Intrinsic::loongarch_lsx_vneg_d:
5845 case Intrinsic::loongarch_lasx_xvneg_b:
5846 case Intrinsic::loongarch_lasx_xvneg_h:
5847 case Intrinsic::loongarch_lasx_xvneg_w:
5848 case Intrinsic::loongarch_lasx_xvneg_d:
5849 return DAG.getNode(
5850 ISD::SUB, DL, N->getValueType(0),
5851 DAG.getConstant(
5852 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5853 /*isSigned=*/true),
5854 SDLoc(N), N->getValueType(0)),
5855 N->getOperand(1));
5856 case Intrinsic::loongarch_lsx_vmax_b:
5857 case Intrinsic::loongarch_lsx_vmax_h:
5858 case Intrinsic::loongarch_lsx_vmax_w:
5859 case Intrinsic::loongarch_lsx_vmax_d:
5860 case Intrinsic::loongarch_lasx_xvmax_b:
5861 case Intrinsic::loongarch_lasx_xvmax_h:
5862 case Intrinsic::loongarch_lasx_xvmax_w:
5863 case Intrinsic::loongarch_lasx_xvmax_d:
5864 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5865 N->getOperand(2));
5866 case Intrinsic::loongarch_lsx_vmax_bu:
5867 case Intrinsic::loongarch_lsx_vmax_hu:
5868 case Intrinsic::loongarch_lsx_vmax_wu:
5869 case Intrinsic::loongarch_lsx_vmax_du:
5870 case Intrinsic::loongarch_lasx_xvmax_bu:
5871 case Intrinsic::loongarch_lasx_xvmax_hu:
5872 case Intrinsic::loongarch_lasx_xvmax_wu:
5873 case Intrinsic::loongarch_lasx_xvmax_du:
5874 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5875 N->getOperand(2));
5876 case Intrinsic::loongarch_lsx_vmaxi_b:
5877 case Intrinsic::loongarch_lsx_vmaxi_h:
5878 case Intrinsic::loongarch_lsx_vmaxi_w:
5879 case Intrinsic::loongarch_lsx_vmaxi_d:
5880 case Intrinsic::loongarch_lasx_xvmaxi_b:
5881 case Intrinsic::loongarch_lasx_xvmaxi_h:
5882 case Intrinsic::loongarch_lasx_xvmaxi_w:
5883 case Intrinsic::loongarch_lasx_xvmaxi_d:
5884 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5885 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5886 case Intrinsic::loongarch_lsx_vmaxi_bu:
5887 case Intrinsic::loongarch_lsx_vmaxi_hu:
5888 case Intrinsic::loongarch_lsx_vmaxi_wu:
5889 case Intrinsic::loongarch_lsx_vmaxi_du:
5890 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5891 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5892 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5893 case Intrinsic::loongarch_lasx_xvmaxi_du:
5894 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5895 lowerVectorSplatImm<5>(N, 2, DAG));
5896 case Intrinsic::loongarch_lsx_vmin_b:
5897 case Intrinsic::loongarch_lsx_vmin_h:
5898 case Intrinsic::loongarch_lsx_vmin_w:
5899 case Intrinsic::loongarch_lsx_vmin_d:
5900 case Intrinsic::loongarch_lasx_xvmin_b:
5901 case Intrinsic::loongarch_lasx_xvmin_h:
5902 case Intrinsic::loongarch_lasx_xvmin_w:
5903 case Intrinsic::loongarch_lasx_xvmin_d:
5904 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5905 N->getOperand(2));
5906 case Intrinsic::loongarch_lsx_vmin_bu:
5907 case Intrinsic::loongarch_lsx_vmin_hu:
5908 case Intrinsic::loongarch_lsx_vmin_wu:
5909 case Intrinsic::loongarch_lsx_vmin_du:
5910 case Intrinsic::loongarch_lasx_xvmin_bu:
5911 case Intrinsic::loongarch_lasx_xvmin_hu:
5912 case Intrinsic::loongarch_lasx_xvmin_wu:
5913 case Intrinsic::loongarch_lasx_xvmin_du:
5914 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5915 N->getOperand(2));
5916 case Intrinsic::loongarch_lsx_vmini_b:
5917 case Intrinsic::loongarch_lsx_vmini_h:
5918 case Intrinsic::loongarch_lsx_vmini_w:
5919 case Intrinsic::loongarch_lsx_vmini_d:
5920 case Intrinsic::loongarch_lasx_xvmini_b:
5921 case Intrinsic::loongarch_lasx_xvmini_h:
5922 case Intrinsic::loongarch_lasx_xvmini_w:
5923 case Intrinsic::loongarch_lasx_xvmini_d:
5924 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5925 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5926 case Intrinsic::loongarch_lsx_vmini_bu:
5927 case Intrinsic::loongarch_lsx_vmini_hu:
5928 case Intrinsic::loongarch_lsx_vmini_wu:
5929 case Intrinsic::loongarch_lsx_vmini_du:
5930 case Intrinsic::loongarch_lasx_xvmini_bu:
5931 case Intrinsic::loongarch_lasx_xvmini_hu:
5932 case Intrinsic::loongarch_lasx_xvmini_wu:
5933 case Intrinsic::loongarch_lasx_xvmini_du:
5934 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5935 lowerVectorSplatImm<5>(N, 2, DAG));
5936 case Intrinsic::loongarch_lsx_vmul_b:
5937 case Intrinsic::loongarch_lsx_vmul_h:
5938 case Intrinsic::loongarch_lsx_vmul_w:
5939 case Intrinsic::loongarch_lsx_vmul_d:
5940 case Intrinsic::loongarch_lasx_xvmul_b:
5941 case Intrinsic::loongarch_lasx_xvmul_h:
5942 case Intrinsic::loongarch_lasx_xvmul_w:
5943 case Intrinsic::loongarch_lasx_xvmul_d:
5944 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5945 N->getOperand(2));
5946 case Intrinsic::loongarch_lsx_vmadd_b:
5947 case Intrinsic::loongarch_lsx_vmadd_h:
5948 case Intrinsic::loongarch_lsx_vmadd_w:
5949 case Intrinsic::loongarch_lsx_vmadd_d:
5950 case Intrinsic::loongarch_lasx_xvmadd_b:
5951 case Intrinsic::loongarch_lasx_xvmadd_h:
5952 case Intrinsic::loongarch_lasx_xvmadd_w:
5953 case Intrinsic::loongarch_lasx_xvmadd_d: {
5954 EVT ResTy = N->getValueType(0);
5955 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5956 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5957 N->getOperand(3)));
5958 }
5959 case Intrinsic::loongarch_lsx_vmsub_b:
5960 case Intrinsic::loongarch_lsx_vmsub_h:
5961 case Intrinsic::loongarch_lsx_vmsub_w:
5962 case Intrinsic::loongarch_lsx_vmsub_d:
5963 case Intrinsic::loongarch_lasx_xvmsub_b:
5964 case Intrinsic::loongarch_lasx_xvmsub_h:
5965 case Intrinsic::loongarch_lasx_xvmsub_w:
5966 case Intrinsic::loongarch_lasx_xvmsub_d: {
5967 EVT ResTy = N->getValueType(0);
5968 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5969 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5970 N->getOperand(3)));
5971 }
5972 case Intrinsic::loongarch_lsx_vdiv_b:
5973 case Intrinsic::loongarch_lsx_vdiv_h:
5974 case Intrinsic::loongarch_lsx_vdiv_w:
5975 case Intrinsic::loongarch_lsx_vdiv_d:
5976 case Intrinsic::loongarch_lasx_xvdiv_b:
5977 case Intrinsic::loongarch_lasx_xvdiv_h:
5978 case Intrinsic::loongarch_lasx_xvdiv_w:
5979 case Intrinsic::loongarch_lasx_xvdiv_d:
5980 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5981 N->getOperand(2));
5982 case Intrinsic::loongarch_lsx_vdiv_bu:
5983 case Intrinsic::loongarch_lsx_vdiv_hu:
5984 case Intrinsic::loongarch_lsx_vdiv_wu:
5985 case Intrinsic::loongarch_lsx_vdiv_du:
5986 case Intrinsic::loongarch_lasx_xvdiv_bu:
5987 case Intrinsic::loongarch_lasx_xvdiv_hu:
5988 case Intrinsic::loongarch_lasx_xvdiv_wu:
5989 case Intrinsic::loongarch_lasx_xvdiv_du:
5990 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5991 N->getOperand(2));
5992 case Intrinsic::loongarch_lsx_vmod_b:
5993 case Intrinsic::loongarch_lsx_vmod_h:
5994 case Intrinsic::loongarch_lsx_vmod_w:
5995 case Intrinsic::loongarch_lsx_vmod_d:
5996 case Intrinsic::loongarch_lasx_xvmod_b:
5997 case Intrinsic::loongarch_lasx_xvmod_h:
5998 case Intrinsic::loongarch_lasx_xvmod_w:
5999 case Intrinsic::loongarch_lasx_xvmod_d:
6000 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6001 N->getOperand(2));
6002 case Intrinsic::loongarch_lsx_vmod_bu:
6003 case Intrinsic::loongarch_lsx_vmod_hu:
6004 case Intrinsic::loongarch_lsx_vmod_wu:
6005 case Intrinsic::loongarch_lsx_vmod_du:
6006 case Intrinsic::loongarch_lasx_xvmod_bu:
6007 case Intrinsic::loongarch_lasx_xvmod_hu:
6008 case Intrinsic::loongarch_lasx_xvmod_wu:
6009 case Intrinsic::loongarch_lasx_xvmod_du:
6010 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6011 N->getOperand(2));
6012 case Intrinsic::loongarch_lsx_vand_v:
6013 case Intrinsic::loongarch_lasx_xvand_v:
6014 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6015 N->getOperand(2));
6016 case Intrinsic::loongarch_lsx_vor_v:
6017 case Intrinsic::loongarch_lasx_xvor_v:
6018 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6019 N->getOperand(2));
6020 case Intrinsic::loongarch_lsx_vxor_v:
6021 case Intrinsic::loongarch_lasx_xvxor_v:
6022 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6023 N->getOperand(2));
6024 case Intrinsic::loongarch_lsx_vnor_v:
6025 case Intrinsic::loongarch_lasx_xvnor_v: {
6026 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6027 N->getOperand(2));
6028 return DAG.getNOT(DL, Res, Res->getValueType(0));
6029 }
6030 case Intrinsic::loongarch_lsx_vandi_b:
6031 case Intrinsic::loongarch_lasx_xvandi_b:
6032 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6033 lowerVectorSplatImm<8>(N, 2, DAG));
6034 case Intrinsic::loongarch_lsx_vori_b:
6035 case Intrinsic::loongarch_lasx_xvori_b:
6036 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6037 lowerVectorSplatImm<8>(N, 2, DAG));
6038 case Intrinsic::loongarch_lsx_vxori_b:
6039 case Intrinsic::loongarch_lasx_xvxori_b:
6040 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6041 lowerVectorSplatImm<8>(N, 2, DAG));
6042 case Intrinsic::loongarch_lsx_vsll_b:
6043 case Intrinsic::loongarch_lsx_vsll_h:
6044 case Intrinsic::loongarch_lsx_vsll_w:
6045 case Intrinsic::loongarch_lsx_vsll_d:
6046 case Intrinsic::loongarch_lasx_xvsll_b:
6047 case Intrinsic::loongarch_lasx_xvsll_h:
6048 case Intrinsic::loongarch_lasx_xvsll_w:
6049 case Intrinsic::loongarch_lasx_xvsll_d:
6050 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6051 truncateVecElts(N, DAG));
6052 case Intrinsic::loongarch_lsx_vslli_b:
6053 case Intrinsic::loongarch_lasx_xvslli_b:
6054 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6055 lowerVectorSplatImm<3>(N, 2, DAG));
6056 case Intrinsic::loongarch_lsx_vslli_h:
6057 case Intrinsic::loongarch_lasx_xvslli_h:
6058 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6059 lowerVectorSplatImm<4>(N, 2, DAG));
6060 case Intrinsic::loongarch_lsx_vslli_w:
6061 case Intrinsic::loongarch_lasx_xvslli_w:
6062 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6063 lowerVectorSplatImm<5>(N, 2, DAG));
6064 case Intrinsic::loongarch_lsx_vslli_d:
6065 case Intrinsic::loongarch_lasx_xvslli_d:
6066 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6067 lowerVectorSplatImm<6>(N, 2, DAG));
6068 case Intrinsic::loongarch_lsx_vsrl_b:
6069 case Intrinsic::loongarch_lsx_vsrl_h:
6070 case Intrinsic::loongarch_lsx_vsrl_w:
6071 case Intrinsic::loongarch_lsx_vsrl_d:
6072 case Intrinsic::loongarch_lasx_xvsrl_b:
6073 case Intrinsic::loongarch_lasx_xvsrl_h:
6074 case Intrinsic::loongarch_lasx_xvsrl_w:
6075 case Intrinsic::loongarch_lasx_xvsrl_d:
6076 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6077 truncateVecElts(N, DAG));
6078 case Intrinsic::loongarch_lsx_vsrli_b:
6079 case Intrinsic::loongarch_lasx_xvsrli_b:
6080 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6081 lowerVectorSplatImm<3>(N, 2, DAG));
6082 case Intrinsic::loongarch_lsx_vsrli_h:
6083 case Intrinsic::loongarch_lasx_xvsrli_h:
6084 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6085 lowerVectorSplatImm<4>(N, 2, DAG));
6086 case Intrinsic::loongarch_lsx_vsrli_w:
6087 case Intrinsic::loongarch_lasx_xvsrli_w:
6088 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6089 lowerVectorSplatImm<5>(N, 2, DAG));
6090 case Intrinsic::loongarch_lsx_vsrli_d:
6091 case Intrinsic::loongarch_lasx_xvsrli_d:
6092 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6093 lowerVectorSplatImm<6>(N, 2, DAG));
6094 case Intrinsic::loongarch_lsx_vsra_b:
6095 case Intrinsic::loongarch_lsx_vsra_h:
6096 case Intrinsic::loongarch_lsx_vsra_w:
6097 case Intrinsic::loongarch_lsx_vsra_d:
6098 case Intrinsic::loongarch_lasx_xvsra_b:
6099 case Intrinsic::loongarch_lasx_xvsra_h:
6100 case Intrinsic::loongarch_lasx_xvsra_w:
6101 case Intrinsic::loongarch_lasx_xvsra_d:
6102 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6103 truncateVecElts(N, DAG));
6104 case Intrinsic::loongarch_lsx_vsrai_b:
6105 case Intrinsic::loongarch_lasx_xvsrai_b:
6106 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6107 lowerVectorSplatImm<3>(N, 2, DAG));
6108 case Intrinsic::loongarch_lsx_vsrai_h:
6109 case Intrinsic::loongarch_lasx_xvsrai_h:
6110 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6111 lowerVectorSplatImm<4>(N, 2, DAG));
6112 case Intrinsic::loongarch_lsx_vsrai_w:
6113 case Intrinsic::loongarch_lasx_xvsrai_w:
6114 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6115 lowerVectorSplatImm<5>(N, 2, DAG));
6116 case Intrinsic::loongarch_lsx_vsrai_d:
6117 case Intrinsic::loongarch_lasx_xvsrai_d:
6118 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6119 lowerVectorSplatImm<6>(N, 2, DAG));
6120 case Intrinsic::loongarch_lsx_vclz_b:
6121 case Intrinsic::loongarch_lsx_vclz_h:
6122 case Intrinsic::loongarch_lsx_vclz_w:
6123 case Intrinsic::loongarch_lsx_vclz_d:
6124 case Intrinsic::loongarch_lasx_xvclz_b:
6125 case Intrinsic::loongarch_lasx_xvclz_h:
6126 case Intrinsic::loongarch_lasx_xvclz_w:
6127 case Intrinsic::loongarch_lasx_xvclz_d:
6128 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6129 case Intrinsic::loongarch_lsx_vpcnt_b:
6130 case Intrinsic::loongarch_lsx_vpcnt_h:
6131 case Intrinsic::loongarch_lsx_vpcnt_w:
6132 case Intrinsic::loongarch_lsx_vpcnt_d:
6133 case Intrinsic::loongarch_lasx_xvpcnt_b:
6134 case Intrinsic::loongarch_lasx_xvpcnt_h:
6135 case Intrinsic::loongarch_lasx_xvpcnt_w:
6136 case Intrinsic::loongarch_lasx_xvpcnt_d:
6137 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6138 case Intrinsic::loongarch_lsx_vbitclr_b:
6139 case Intrinsic::loongarch_lsx_vbitclr_h:
6140 case Intrinsic::loongarch_lsx_vbitclr_w:
6141 case Intrinsic::loongarch_lsx_vbitclr_d:
6142 case Intrinsic::loongarch_lasx_xvbitclr_b:
6143 case Intrinsic::loongarch_lasx_xvbitclr_h:
6144 case Intrinsic::loongarch_lasx_xvbitclr_w:
6145 case Intrinsic::loongarch_lasx_xvbitclr_d:
6146 return lowerVectorBitClear(N, DAG);
6147 case Intrinsic::loongarch_lsx_vbitclri_b:
6148 case Intrinsic::loongarch_lasx_xvbitclri_b:
6149 return lowerVectorBitClearImm<3>(N, DAG);
6150 case Intrinsic::loongarch_lsx_vbitclri_h:
6151 case Intrinsic::loongarch_lasx_xvbitclri_h:
6152 return lowerVectorBitClearImm<4>(N, DAG);
6153 case Intrinsic::loongarch_lsx_vbitclri_w:
6154 case Intrinsic::loongarch_lasx_xvbitclri_w:
6155 return lowerVectorBitClearImm<5>(N, DAG);
6156 case Intrinsic::loongarch_lsx_vbitclri_d:
6157 case Intrinsic::loongarch_lasx_xvbitclri_d:
6158 return lowerVectorBitClearImm<6>(N, DAG);
6159 case Intrinsic::loongarch_lsx_vbitset_b:
6160 case Intrinsic::loongarch_lsx_vbitset_h:
6161 case Intrinsic::loongarch_lsx_vbitset_w:
6162 case Intrinsic::loongarch_lsx_vbitset_d:
6163 case Intrinsic::loongarch_lasx_xvbitset_b:
6164 case Intrinsic::loongarch_lasx_xvbitset_h:
6165 case Intrinsic::loongarch_lasx_xvbitset_w:
6166 case Intrinsic::loongarch_lasx_xvbitset_d: {
6167 EVT VecTy = N->getValueType(0);
6168 SDValue One = DAG.getConstant(1, DL, VecTy);
6169 return DAG.getNode(
6170 ISD::OR, DL, VecTy, N->getOperand(1),
6171 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6172 }
6173 case Intrinsic::loongarch_lsx_vbitseti_b:
6174 case Intrinsic::loongarch_lasx_xvbitseti_b:
6175 return lowerVectorBitSetImm<3>(N, DAG);
6176 case Intrinsic::loongarch_lsx_vbitseti_h:
6177 case Intrinsic::loongarch_lasx_xvbitseti_h:
6178 return lowerVectorBitSetImm<4>(N, DAG);
6179 case Intrinsic::loongarch_lsx_vbitseti_w:
6180 case Intrinsic::loongarch_lasx_xvbitseti_w:
6181 return lowerVectorBitSetImm<5>(N, DAG);
6182 case Intrinsic::loongarch_lsx_vbitseti_d:
6183 case Intrinsic::loongarch_lasx_xvbitseti_d:
6184 return lowerVectorBitSetImm<6>(N, DAG);
6185 case Intrinsic::loongarch_lsx_vbitrev_b:
6186 case Intrinsic::loongarch_lsx_vbitrev_h:
6187 case Intrinsic::loongarch_lsx_vbitrev_w:
6188 case Intrinsic::loongarch_lsx_vbitrev_d:
6189 case Intrinsic::loongarch_lasx_xvbitrev_b:
6190 case Intrinsic::loongarch_lasx_xvbitrev_h:
6191 case Intrinsic::loongarch_lasx_xvbitrev_w:
6192 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6193 EVT VecTy = N->getValueType(0);
6194 SDValue One = DAG.getConstant(1, DL, VecTy);
6195 return DAG.getNode(
6196 ISD::XOR, DL, VecTy, N->getOperand(1),
6197 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6198 }
6199 case Intrinsic::loongarch_lsx_vbitrevi_b:
6200 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6201 return lowerVectorBitRevImm<3>(N, DAG);
6202 case Intrinsic::loongarch_lsx_vbitrevi_h:
6203 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6204 return lowerVectorBitRevImm<4>(N, DAG);
6205 case Intrinsic::loongarch_lsx_vbitrevi_w:
6206 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6207 return lowerVectorBitRevImm<5>(N, DAG);
6208 case Intrinsic::loongarch_lsx_vbitrevi_d:
6209 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6210 return lowerVectorBitRevImm<6>(N, DAG);
6211 case Intrinsic::loongarch_lsx_vfadd_s:
6212 case Intrinsic::loongarch_lsx_vfadd_d:
6213 case Intrinsic::loongarch_lasx_xvfadd_s:
6214 case Intrinsic::loongarch_lasx_xvfadd_d:
6215 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6216 N->getOperand(2));
6217 case Intrinsic::loongarch_lsx_vfsub_s:
6218 case Intrinsic::loongarch_lsx_vfsub_d:
6219 case Intrinsic::loongarch_lasx_xvfsub_s:
6220 case Intrinsic::loongarch_lasx_xvfsub_d:
6221 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6222 N->getOperand(2));
6223 case Intrinsic::loongarch_lsx_vfmul_s:
6224 case Intrinsic::loongarch_lsx_vfmul_d:
6225 case Intrinsic::loongarch_lasx_xvfmul_s:
6226 case Intrinsic::loongarch_lasx_xvfmul_d:
6227 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6228 N->getOperand(2));
6229 case Intrinsic::loongarch_lsx_vfdiv_s:
6230 case Intrinsic::loongarch_lsx_vfdiv_d:
6231 case Intrinsic::loongarch_lasx_xvfdiv_s:
6232 case Intrinsic::loongarch_lasx_xvfdiv_d:
6233 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6234 N->getOperand(2));
6235 case Intrinsic::loongarch_lsx_vfmadd_s:
6236 case Intrinsic::loongarch_lsx_vfmadd_d:
6237 case Intrinsic::loongarch_lasx_xvfmadd_s:
6238 case Intrinsic::loongarch_lasx_xvfmadd_d:
6239 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6240 N->getOperand(2), N->getOperand(3));
6241 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6242 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6243 N->getOperand(1), N->getOperand(2),
6244 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6245 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6246 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6247 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6248 N->getOperand(1), N->getOperand(2),
6249 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6250 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6251 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6252 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6253 N->getOperand(1), N->getOperand(2),
6254 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6255 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6256 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6257 N->getOperand(1), N->getOperand(2),
6258 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6259 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6260 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6261 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6262 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6263 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6264 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6265 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6266 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6267 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6268 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6269 N->getOperand(1)));
6270 case Intrinsic::loongarch_lsx_vreplve_b:
6271 case Intrinsic::loongarch_lsx_vreplve_h:
6272 case Intrinsic::loongarch_lsx_vreplve_w:
6273 case Intrinsic::loongarch_lsx_vreplve_d:
6274 case Intrinsic::loongarch_lasx_xvreplve_b:
6275 case Intrinsic::loongarch_lasx_xvreplve_h:
6276 case Intrinsic::loongarch_lasx_xvreplve_w:
6277 case Intrinsic::loongarch_lasx_xvreplve_d:
6278 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6279 N->getOperand(1),
6280 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6281 N->getOperand(2)));
6282 }
6283 return SDValue();
6284}
6285
6288 const LoongArchSubtarget &Subtarget) {
6289 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6290 // conversion is unnecessary and can be replaced with the
6291 // MOVFR2GR_S_LA64 operand.
6292 SDValue Op0 = N->getOperand(0);
6294 return Op0.getOperand(0);
6295 return SDValue();
6296}
6297
6300 const LoongArchSubtarget &Subtarget) {
6301 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6302 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6303 // operand.
6304 SDValue Op0 = N->getOperand(0);
6306 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6307 "Unexpected value type!");
6308 return Op0.getOperand(0);
6309 }
6310 return SDValue();
6311}
6312
6315 const LoongArchSubtarget &Subtarget) {
6316 MVT VT = N->getSimpleValueType(0);
6317 unsigned NumBits = VT.getScalarSizeInBits();
6318
6319 // Simplify the inputs.
6320 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6321 APInt DemandedMask(APInt::getAllOnes(NumBits));
6322 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6323 return SDValue(N, 0);
6324
6325 return SDValue();
6326}
6327
6328static SDValue
6331 const LoongArchSubtarget &Subtarget) {
6332 SDValue Op0 = N->getOperand(0);
6333 SDLoc DL(N);
6334
6335 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6336 // redundant. Instead, use BuildPairF64's operands directly.
6338 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6339
6340 if (Op0->isUndef()) {
6341 SDValue Lo = DAG.getUNDEF(MVT::i32);
6342 SDValue Hi = DAG.getUNDEF(MVT::i32);
6343 return DCI.CombineTo(N, Lo, Hi);
6344 }
6345
6346 // It's cheaper to materialise two 32-bit integers than to load a double
6347 // from the constant pool and transfer it to integer registers through the
6348 // stack.
6350 APInt V = C->getValueAPF().bitcastToAPInt();
6351 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6352 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6353 return DCI.CombineTo(N, Lo, Hi);
6354 }
6355
6356 return SDValue();
6357}
6358
6359static SDValue
6362 const LoongArchSubtarget &Subtarget) {
6363 if (!DCI.isBeforeLegalize())
6364 return SDValue();
6365
6366 MVT EltVT = N->getSimpleValueType(0);
6367 SDValue Vec = N->getOperand(0);
6368 EVT VecTy = Vec->getValueType(0);
6369 SDValue Idx = N->getOperand(1);
6370 unsigned IdxOp = Idx.getOpcode();
6371 SDLoc DL(N);
6372
6373 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6374 return SDValue();
6375
6376 // Combine:
6377 // t2 = truncate t1
6378 // t3 = {zero/sign/any}_extend t2
6379 // t4 = extract_vector_elt t0, t3
6380 // to:
6381 // t4 = extract_vector_elt t0, t1
6382 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6383 IdxOp == ISD::ANY_EXTEND) {
6384 SDValue IdxOrig = Idx.getOperand(0);
6385 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6386 return SDValue();
6387
6388 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6389 IdxOrig.getOperand(0));
6390 }
6391
6392 return SDValue();
6393}
6394
6396 DAGCombinerInfo &DCI) const {
6397 SelectionDAG &DAG = DCI.DAG;
6398 switch (N->getOpcode()) {
6399 default:
6400 break;
6401 case ISD::AND:
6402 return performANDCombine(N, DAG, DCI, Subtarget);
6403 case ISD::OR:
6404 return performORCombine(N, DAG, DCI, Subtarget);
6405 case ISD::SETCC:
6406 return performSETCCCombine(N, DAG, DCI, Subtarget);
6407 case ISD::SRL:
6408 return performSRLCombine(N, DAG, DCI, Subtarget);
6409 case ISD::BITCAST:
6410 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6412 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6414 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6416 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6418 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6420 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6422 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6425 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6427 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6429 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6430 }
6431 return SDValue();
6432}
6433
6436 if (!ZeroDivCheck)
6437 return MBB;
6438
6439 // Build instructions:
6440 // MBB:
6441 // div(or mod) $dst, $dividend, $divisor
6442 // bne $divisor, $zero, SinkMBB
6443 // BreakMBB:
6444 // break 7 // BRK_DIVZERO
6445 // SinkMBB:
6446 // fallthrough
6447 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6448 MachineFunction::iterator It = ++MBB->getIterator();
6449 MachineFunction *MF = MBB->getParent();
6450 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6451 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6452 MF->insert(It, BreakMBB);
6453 MF->insert(It, SinkMBB);
6454
6455 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6456 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6457 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6458
6459 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6460 DebugLoc DL = MI.getDebugLoc();
6461 MachineOperand &Divisor = MI.getOperand(2);
6462 Register DivisorReg = Divisor.getReg();
6463
6464 // MBB:
6465 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6466 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6467 .addReg(LoongArch::R0)
6468 .addMBB(SinkMBB);
6469 MBB->addSuccessor(BreakMBB);
6470 MBB->addSuccessor(SinkMBB);
6471
6472 // BreakMBB:
6473 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6474 // definition of BRK_DIVZERO.
6475 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6476 BreakMBB->addSuccessor(SinkMBB);
6477
6478 // Clear Divisor's kill flag.
6479 Divisor.setIsKill(false);
6480
6481 return SinkMBB;
6482}
6483
6484static MachineBasicBlock *
6486 const LoongArchSubtarget &Subtarget) {
6487 unsigned CondOpc;
6488 switch (MI.getOpcode()) {
6489 default:
6490 llvm_unreachable("Unexpected opcode");
6491 case LoongArch::PseudoVBZ:
6492 CondOpc = LoongArch::VSETEQZ_V;
6493 break;
6494 case LoongArch::PseudoVBZ_B:
6495 CondOpc = LoongArch::VSETANYEQZ_B;
6496 break;
6497 case LoongArch::PseudoVBZ_H:
6498 CondOpc = LoongArch::VSETANYEQZ_H;
6499 break;
6500 case LoongArch::PseudoVBZ_W:
6501 CondOpc = LoongArch::VSETANYEQZ_W;
6502 break;
6503 case LoongArch::PseudoVBZ_D:
6504 CondOpc = LoongArch::VSETANYEQZ_D;
6505 break;
6506 case LoongArch::PseudoVBNZ:
6507 CondOpc = LoongArch::VSETNEZ_V;
6508 break;
6509 case LoongArch::PseudoVBNZ_B:
6510 CondOpc = LoongArch::VSETALLNEZ_B;
6511 break;
6512 case LoongArch::PseudoVBNZ_H:
6513 CondOpc = LoongArch::VSETALLNEZ_H;
6514 break;
6515 case LoongArch::PseudoVBNZ_W:
6516 CondOpc = LoongArch::VSETALLNEZ_W;
6517 break;
6518 case LoongArch::PseudoVBNZ_D:
6519 CondOpc = LoongArch::VSETALLNEZ_D;
6520 break;
6521 case LoongArch::PseudoXVBZ:
6522 CondOpc = LoongArch::XVSETEQZ_V;
6523 break;
6524 case LoongArch::PseudoXVBZ_B:
6525 CondOpc = LoongArch::XVSETANYEQZ_B;
6526 break;
6527 case LoongArch::PseudoXVBZ_H:
6528 CondOpc = LoongArch::XVSETANYEQZ_H;
6529 break;
6530 case LoongArch::PseudoXVBZ_W:
6531 CondOpc = LoongArch::XVSETANYEQZ_W;
6532 break;
6533 case LoongArch::PseudoXVBZ_D:
6534 CondOpc = LoongArch::XVSETANYEQZ_D;
6535 break;
6536 case LoongArch::PseudoXVBNZ:
6537 CondOpc = LoongArch::XVSETNEZ_V;
6538 break;
6539 case LoongArch::PseudoXVBNZ_B:
6540 CondOpc = LoongArch::XVSETALLNEZ_B;
6541 break;
6542 case LoongArch::PseudoXVBNZ_H:
6543 CondOpc = LoongArch::XVSETALLNEZ_H;
6544 break;
6545 case LoongArch::PseudoXVBNZ_W:
6546 CondOpc = LoongArch::XVSETALLNEZ_W;
6547 break;
6548 case LoongArch::PseudoXVBNZ_D:
6549 CondOpc = LoongArch::XVSETALLNEZ_D;
6550 break;
6551 }
6552
6553 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6554 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6555 DebugLoc DL = MI.getDebugLoc();
6558
6559 MachineFunction *F = BB->getParent();
6560 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6561 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6562 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6563
6564 F->insert(It, FalseBB);
6565 F->insert(It, TrueBB);
6566 F->insert(It, SinkBB);
6567
6568 // Transfer the remainder of MBB and its successor edges to Sink.
6569 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6571
6572 // Insert the real instruction to BB.
6573 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6574 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6575
6576 // Insert branch.
6577 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6578 BB->addSuccessor(FalseBB);
6579 BB->addSuccessor(TrueBB);
6580
6581 // FalseBB.
6582 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6583 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6584 .addReg(LoongArch::R0)
6585 .addImm(0);
6586 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6587 FalseBB->addSuccessor(SinkBB);
6588
6589 // TrueBB.
6590 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6591 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6592 .addReg(LoongArch::R0)
6593 .addImm(1);
6594 TrueBB->addSuccessor(SinkBB);
6595
6596 // SinkBB: merge the results.
6597 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6598 MI.getOperand(0).getReg())
6599 .addReg(RD1)
6600 .addMBB(FalseBB)
6601 .addReg(RD2)
6602 .addMBB(TrueBB);
6603
6604 // The pseudo instruction is gone now.
6605 MI.eraseFromParent();
6606 return SinkBB;
6607}
6608
6609static MachineBasicBlock *
6611 const LoongArchSubtarget &Subtarget) {
6612 unsigned InsOp;
6613 unsigned BroadcastOp;
6614 unsigned HalfSize;
6615 switch (MI.getOpcode()) {
6616 default:
6617 llvm_unreachable("Unexpected opcode");
6618 case LoongArch::PseudoXVINSGR2VR_B:
6619 HalfSize = 16;
6620 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6621 InsOp = LoongArch::XVEXTRINS_B;
6622 break;
6623 case LoongArch::PseudoXVINSGR2VR_H:
6624 HalfSize = 8;
6625 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6626 InsOp = LoongArch::XVEXTRINS_H;
6627 break;
6628 }
6629 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6630 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6631 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6632 DebugLoc DL = MI.getDebugLoc();
6634 // XDst = vector_insert XSrc, Elt, Idx
6635 Register XDst = MI.getOperand(0).getReg();
6636 Register XSrc = MI.getOperand(1).getReg();
6637 Register Elt = MI.getOperand(2).getReg();
6638 unsigned Idx = MI.getOperand(3).getImm();
6639
6640 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6641 Idx < HalfSize) {
6642 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6643 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6644
6645 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6646 .addReg(XSrc, 0, LoongArch::sub_128);
6647 BuildMI(*BB, MI, DL,
6648 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6649 : LoongArch::VINSGR2VR_B),
6650 ScratchSubReg2)
6651 .addReg(ScratchSubReg1)
6652 .addReg(Elt)
6653 .addImm(Idx);
6654
6655 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6656 .addImm(0)
6657 .addReg(ScratchSubReg2)
6658 .addImm(LoongArch::sub_128);
6659 } else {
6660 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6661 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6662
6663 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6664
6665 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6666 .addReg(ScratchReg1)
6667 .addReg(XSrc)
6668 .addImm(Idx >= HalfSize ? 48 : 18);
6669
6670 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6671 .addReg(XSrc)
6672 .addReg(ScratchReg2)
6673 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6674 }
6675
6676 MI.eraseFromParent();
6677 return BB;
6678}
6679
6682 const LoongArchSubtarget &Subtarget) {
6683 assert(Subtarget.hasExtLSX());
6684 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6685 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6686 DebugLoc DL = MI.getDebugLoc();
6688 Register Dst = MI.getOperand(0).getReg();
6689 Register Src = MI.getOperand(1).getReg();
6690 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6691 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6692 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6693
6694 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6695 BuildMI(*BB, MI, DL,
6696 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6697 : LoongArch::VINSGR2VR_W),
6698 ScratchReg2)
6699 .addReg(ScratchReg1)
6700 .addReg(Src)
6701 .addImm(0);
6702 BuildMI(
6703 *BB, MI, DL,
6704 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6705 ScratchReg3)
6706 .addReg(ScratchReg2);
6707 BuildMI(*BB, MI, DL,
6708 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6709 : LoongArch::VPICKVE2GR_W),
6710 Dst)
6711 .addReg(ScratchReg3)
6712 .addImm(0);
6713
6714 MI.eraseFromParent();
6715 return BB;
6716}
6717
6718static MachineBasicBlock *
6720 const LoongArchSubtarget &Subtarget) {
6721 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6722 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6723 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6725 Register Dst = MI.getOperand(0).getReg();
6726 Register Src = MI.getOperand(1).getReg();
6727 DebugLoc DL = MI.getDebugLoc();
6728 unsigned EleBits = 8;
6729 unsigned NotOpc = 0;
6730 unsigned MskOpc;
6731
6732 switch (MI.getOpcode()) {
6733 default:
6734 llvm_unreachable("Unexpected opcode");
6735 case LoongArch::PseudoVMSKLTZ_B:
6736 MskOpc = LoongArch::VMSKLTZ_B;
6737 break;
6738 case LoongArch::PseudoVMSKLTZ_H:
6739 MskOpc = LoongArch::VMSKLTZ_H;
6740 EleBits = 16;
6741 break;
6742 case LoongArch::PseudoVMSKLTZ_W:
6743 MskOpc = LoongArch::VMSKLTZ_W;
6744 EleBits = 32;
6745 break;
6746 case LoongArch::PseudoVMSKLTZ_D:
6747 MskOpc = LoongArch::VMSKLTZ_D;
6748 EleBits = 64;
6749 break;
6750 case LoongArch::PseudoVMSKGEZ_B:
6751 MskOpc = LoongArch::VMSKGEZ_B;
6752 break;
6753 case LoongArch::PseudoVMSKEQZ_B:
6754 MskOpc = LoongArch::VMSKNZ_B;
6755 NotOpc = LoongArch::VNOR_V;
6756 break;
6757 case LoongArch::PseudoVMSKNEZ_B:
6758 MskOpc = LoongArch::VMSKNZ_B;
6759 break;
6760 case LoongArch::PseudoXVMSKLTZ_B:
6761 MskOpc = LoongArch::XVMSKLTZ_B;
6762 RC = &LoongArch::LASX256RegClass;
6763 break;
6764 case LoongArch::PseudoXVMSKLTZ_H:
6765 MskOpc = LoongArch::XVMSKLTZ_H;
6766 RC = &LoongArch::LASX256RegClass;
6767 EleBits = 16;
6768 break;
6769 case LoongArch::PseudoXVMSKLTZ_W:
6770 MskOpc = LoongArch::XVMSKLTZ_W;
6771 RC = &LoongArch::LASX256RegClass;
6772 EleBits = 32;
6773 break;
6774 case LoongArch::PseudoXVMSKLTZ_D:
6775 MskOpc = LoongArch::XVMSKLTZ_D;
6776 RC = &LoongArch::LASX256RegClass;
6777 EleBits = 64;
6778 break;
6779 case LoongArch::PseudoXVMSKGEZ_B:
6780 MskOpc = LoongArch::XVMSKGEZ_B;
6781 RC = &LoongArch::LASX256RegClass;
6782 break;
6783 case LoongArch::PseudoXVMSKEQZ_B:
6784 MskOpc = LoongArch::XVMSKNZ_B;
6785 NotOpc = LoongArch::XVNOR_V;
6786 RC = &LoongArch::LASX256RegClass;
6787 break;
6788 case LoongArch::PseudoXVMSKNEZ_B:
6789 MskOpc = LoongArch::XVMSKNZ_B;
6790 RC = &LoongArch::LASX256RegClass;
6791 break;
6792 }
6793
6794 Register Msk = MRI.createVirtualRegister(RC);
6795 if (NotOpc) {
6796 Register Tmp = MRI.createVirtualRegister(RC);
6797 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6798 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6799 .addReg(Tmp, RegState::Kill)
6800 .addReg(Tmp, RegState::Kill);
6801 } else {
6802 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6803 }
6804
6805 if (TRI->getRegSizeInBits(*RC) > 128) {
6806 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6807 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6808 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6809 .addReg(Msk)
6810 .addImm(0);
6811 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6812 .addReg(Msk, RegState::Kill)
6813 .addImm(4);
6814 BuildMI(*BB, MI, DL,
6815 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6816 : LoongArch::BSTRINS_W),
6817 Dst)
6820 .addImm(256 / EleBits - 1)
6821 .addImm(128 / EleBits);
6822 } else {
6823 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6824 .addReg(Msk, RegState::Kill)
6825 .addImm(0);
6826 }
6827
6828 MI.eraseFromParent();
6829 return BB;
6830}
6831
6832static MachineBasicBlock *
6834 const LoongArchSubtarget &Subtarget) {
6835 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6836 "Unexpected instruction");
6837
6838 MachineFunction &MF = *BB->getParent();
6839 DebugLoc DL = MI.getDebugLoc();
6841 Register LoReg = MI.getOperand(0).getReg();
6842 Register HiReg = MI.getOperand(1).getReg();
6843 Register SrcReg = MI.getOperand(2).getReg();
6844
6845 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6846 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6847 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6848 MI.eraseFromParent(); // The pseudo instruction is gone now.
6849 return BB;
6850}
6851
6852static MachineBasicBlock *
6854 const LoongArchSubtarget &Subtarget) {
6855 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6856 "Unexpected instruction");
6857
6858 MachineFunction &MF = *BB->getParent();
6859 DebugLoc DL = MI.getDebugLoc();
6862 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6863 Register DstReg = MI.getOperand(0).getReg();
6864 Register LoReg = MI.getOperand(1).getReg();
6865 Register HiReg = MI.getOperand(2).getReg();
6866
6867 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6868 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6869 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6870 .addReg(TmpReg, RegState::Kill)
6871 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6872 MI.eraseFromParent(); // The pseudo instruction is gone now.
6873 return BB;
6874}
6875
6877 switch (MI.getOpcode()) {
6878 default:
6879 return false;
6880 case LoongArch::Select_GPR_Using_CC_GPR:
6881 return true;
6882 }
6883}
6884
6885static MachineBasicBlock *
6887 const LoongArchSubtarget &Subtarget) {
6888 // To "insert" Select_* instructions, we actually have to insert the triangle
6889 // control-flow pattern. The incoming instructions know the destination vreg
6890 // to set, the condition code register to branch on, the true/false values to
6891 // select between, and the condcode to use to select the appropriate branch.
6892 //
6893 // We produce the following control flow:
6894 // HeadMBB
6895 // | \
6896 // | IfFalseMBB
6897 // | /
6898 // TailMBB
6899 //
6900 // When we find a sequence of selects we attempt to optimize their emission
6901 // by sharing the control flow. Currently we only handle cases where we have
6902 // multiple selects with the exact same condition (same LHS, RHS and CC).
6903 // The selects may be interleaved with other instructions if the other
6904 // instructions meet some requirements we deem safe:
6905 // - They are not pseudo instructions.
6906 // - They are debug instructions. Otherwise,
6907 // - They do not have side-effects, do not access memory and their inputs do
6908 // not depend on the results of the select pseudo-instructions.
6909 // The TrueV/FalseV operands of the selects cannot depend on the result of
6910 // previous selects in the sequence.
6911 // These conditions could be further relaxed. See the X86 target for a
6912 // related approach and more information.
6913
6914 Register LHS = MI.getOperand(1).getReg();
6915 Register RHS;
6916 if (MI.getOperand(2).isReg())
6917 RHS = MI.getOperand(2).getReg();
6918 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6919
6920 SmallVector<MachineInstr *, 4> SelectDebugValues;
6921 SmallSet<Register, 4> SelectDests;
6922 SelectDests.insert(MI.getOperand(0).getReg());
6923
6924 MachineInstr *LastSelectPseudo = &MI;
6925 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6926 SequenceMBBI != E; ++SequenceMBBI) {
6927 if (SequenceMBBI->isDebugInstr())
6928 continue;
6929 if (isSelectPseudo(*SequenceMBBI)) {
6930 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6931 !SequenceMBBI->getOperand(2).isReg() ||
6932 SequenceMBBI->getOperand(2).getReg() != RHS ||
6933 SequenceMBBI->getOperand(3).getImm() != CC ||
6934 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6935 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6936 break;
6937 LastSelectPseudo = &*SequenceMBBI;
6938 SequenceMBBI->collectDebugValues(SelectDebugValues);
6939 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6940 continue;
6941 }
6942 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6943 SequenceMBBI->mayLoadOrStore() ||
6944 SequenceMBBI->usesCustomInsertionHook())
6945 break;
6946 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6947 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6948 }))
6949 break;
6950 }
6951
6952 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6953 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6954 DebugLoc DL = MI.getDebugLoc();
6956
6957 MachineBasicBlock *HeadMBB = BB;
6958 MachineFunction *F = BB->getParent();
6959 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6960 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6961
6962 F->insert(I, IfFalseMBB);
6963 F->insert(I, TailMBB);
6964
6965 // Set the call frame size on entry to the new basic blocks.
6966 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6967 IfFalseMBB->setCallFrameSize(CallFrameSize);
6968 TailMBB->setCallFrameSize(CallFrameSize);
6969
6970 // Transfer debug instructions associated with the selects to TailMBB.
6971 for (MachineInstr *DebugInstr : SelectDebugValues) {
6972 TailMBB->push_back(DebugInstr->removeFromParent());
6973 }
6974
6975 // Move all instructions after the sequence to TailMBB.
6976 TailMBB->splice(TailMBB->end(), HeadMBB,
6977 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6978 // Update machine-CFG edges by transferring all successors of the current
6979 // block to the new block which will contain the Phi nodes for the selects.
6980 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6981 // Set the successors for HeadMBB.
6982 HeadMBB->addSuccessor(IfFalseMBB);
6983 HeadMBB->addSuccessor(TailMBB);
6984
6985 // Insert appropriate branch.
6986 if (MI.getOperand(2).isImm())
6987 BuildMI(HeadMBB, DL, TII.get(CC))
6988 .addReg(LHS)
6989 .addImm(MI.getOperand(2).getImm())
6990 .addMBB(TailMBB);
6991 else
6992 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6993
6994 // IfFalseMBB just falls through to TailMBB.
6995 IfFalseMBB->addSuccessor(TailMBB);
6996
6997 // Create PHIs for all of the select pseudo-instructions.
6998 auto SelectMBBI = MI.getIterator();
6999 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7000 auto InsertionPoint = TailMBB->begin();
7001 while (SelectMBBI != SelectEnd) {
7002 auto Next = std::next(SelectMBBI);
7003 if (isSelectPseudo(*SelectMBBI)) {
7004 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7005 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7006 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7007 .addReg(SelectMBBI->getOperand(4).getReg())
7008 .addMBB(HeadMBB)
7009 .addReg(SelectMBBI->getOperand(5).getReg())
7010 .addMBB(IfFalseMBB);
7011 SelectMBBI->eraseFromParent();
7012 }
7013 SelectMBBI = Next;
7014 }
7015
7016 F->getProperties().resetNoPHIs();
7017 return TailMBB;
7018}
7019
7020MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7021 MachineInstr &MI, MachineBasicBlock *BB) const {
7022 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7023 DebugLoc DL = MI.getDebugLoc();
7024
7025 switch (MI.getOpcode()) {
7026 default:
7027 llvm_unreachable("Unexpected instr type to insert");
7028 case LoongArch::DIV_W:
7029 case LoongArch::DIV_WU:
7030 case LoongArch::MOD_W:
7031 case LoongArch::MOD_WU:
7032 case LoongArch::DIV_D:
7033 case LoongArch::DIV_DU:
7034 case LoongArch::MOD_D:
7035 case LoongArch::MOD_DU:
7036 return insertDivByZeroTrap(MI, BB);
7037 break;
7038 case LoongArch::WRFCSR: {
7039 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7040 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7041 .addReg(MI.getOperand(1).getReg());
7042 MI.eraseFromParent();
7043 return BB;
7044 }
7045 case LoongArch::RDFCSR: {
7046 MachineInstr *ReadFCSR =
7047 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7048 MI.getOperand(0).getReg())
7049 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7050 ReadFCSR->getOperand(1).setIsUndef();
7051 MI.eraseFromParent();
7052 return BB;
7053 }
7054 case LoongArch::Select_GPR_Using_CC_GPR:
7055 return emitSelectPseudo(MI, BB, Subtarget);
7056 case LoongArch::BuildPairF64Pseudo:
7057 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7058 case LoongArch::SplitPairF64Pseudo:
7059 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7060 case LoongArch::PseudoVBZ:
7061 case LoongArch::PseudoVBZ_B:
7062 case LoongArch::PseudoVBZ_H:
7063 case LoongArch::PseudoVBZ_W:
7064 case LoongArch::PseudoVBZ_D:
7065 case LoongArch::PseudoVBNZ:
7066 case LoongArch::PseudoVBNZ_B:
7067 case LoongArch::PseudoVBNZ_H:
7068 case LoongArch::PseudoVBNZ_W:
7069 case LoongArch::PseudoVBNZ_D:
7070 case LoongArch::PseudoXVBZ:
7071 case LoongArch::PseudoXVBZ_B:
7072 case LoongArch::PseudoXVBZ_H:
7073 case LoongArch::PseudoXVBZ_W:
7074 case LoongArch::PseudoXVBZ_D:
7075 case LoongArch::PseudoXVBNZ:
7076 case LoongArch::PseudoXVBNZ_B:
7077 case LoongArch::PseudoXVBNZ_H:
7078 case LoongArch::PseudoXVBNZ_W:
7079 case LoongArch::PseudoXVBNZ_D:
7080 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7081 case LoongArch::PseudoXVINSGR2VR_B:
7082 case LoongArch::PseudoXVINSGR2VR_H:
7083 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7084 case LoongArch::PseudoCTPOP:
7085 return emitPseudoCTPOP(MI, BB, Subtarget);
7086 case LoongArch::PseudoVMSKLTZ_B:
7087 case LoongArch::PseudoVMSKLTZ_H:
7088 case LoongArch::PseudoVMSKLTZ_W:
7089 case LoongArch::PseudoVMSKLTZ_D:
7090 case LoongArch::PseudoVMSKGEZ_B:
7091 case LoongArch::PseudoVMSKEQZ_B:
7092 case LoongArch::PseudoVMSKNEZ_B:
7093 case LoongArch::PseudoXVMSKLTZ_B:
7094 case LoongArch::PseudoXVMSKLTZ_H:
7095 case LoongArch::PseudoXVMSKLTZ_W:
7096 case LoongArch::PseudoXVMSKLTZ_D:
7097 case LoongArch::PseudoXVMSKGEZ_B:
7098 case LoongArch::PseudoXVMSKEQZ_B:
7099 case LoongArch::PseudoXVMSKNEZ_B:
7100 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7101 case TargetOpcode::STATEPOINT:
7102 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7103 // while bl call instruction (where statepoint will be lowered at the
7104 // end) has implicit def. This def is early-clobber as it will be set at
7105 // the moment of the call and earlier than any use is read.
7106 // Add this implicit dead def here as a workaround.
7107 MI.addOperand(*MI.getMF(),
7109 LoongArch::R1, /*isDef*/ true,
7110 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7111 /*isUndef*/ false, /*isEarlyClobber*/ true));
7112 if (!Subtarget.is64Bit())
7113 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7114 return emitPatchPoint(MI, BB);
7115 }
7116}
7117
7119 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7120 unsigned *Fast) const {
7121 if (!Subtarget.hasUAL())
7122 return false;
7123
7124 // TODO: set reasonable speed number.
7125 if (Fast)
7126 *Fast = 1;
7127 return true;
7128}
7129
7130const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7131 switch ((LoongArchISD::NodeType)Opcode) {
7133 break;
7134
7135#define NODE_NAME_CASE(node) \
7136 case LoongArchISD::node: \
7137 return "LoongArchISD::" #node;
7138
7139 // TODO: Add more target-dependent nodes later.
7140 NODE_NAME_CASE(CALL)
7141 NODE_NAME_CASE(CALL_MEDIUM)
7142 NODE_NAME_CASE(CALL_LARGE)
7143 NODE_NAME_CASE(RET)
7144 NODE_NAME_CASE(TAIL)
7145 NODE_NAME_CASE(TAIL_MEDIUM)
7146 NODE_NAME_CASE(TAIL_LARGE)
7147 NODE_NAME_CASE(SELECT_CC)
7148 NODE_NAME_CASE(BR_CC)
7149 NODE_NAME_CASE(BRCOND)
7150 NODE_NAME_CASE(SLL_W)
7151 NODE_NAME_CASE(SRA_W)
7152 NODE_NAME_CASE(SRL_W)
7153 NODE_NAME_CASE(BSTRINS)
7154 NODE_NAME_CASE(BSTRPICK)
7155 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7156 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7157 NODE_NAME_CASE(FTINT)
7158 NODE_NAME_CASE(BUILD_PAIR_F64)
7159 NODE_NAME_CASE(SPLIT_PAIR_F64)
7160 NODE_NAME_CASE(REVB_2H)
7161 NODE_NAME_CASE(REVB_2W)
7162 NODE_NAME_CASE(BITREV_4B)
7163 NODE_NAME_CASE(BITREV_8B)
7164 NODE_NAME_CASE(BITREV_W)
7165 NODE_NAME_CASE(ROTR_W)
7166 NODE_NAME_CASE(ROTL_W)
7167 NODE_NAME_CASE(DIV_W)
7168 NODE_NAME_CASE(DIV_WU)
7169 NODE_NAME_CASE(MOD_W)
7170 NODE_NAME_CASE(MOD_WU)
7171 NODE_NAME_CASE(CLZ_W)
7172 NODE_NAME_CASE(CTZ_W)
7173 NODE_NAME_CASE(DBAR)
7174 NODE_NAME_CASE(IBAR)
7175 NODE_NAME_CASE(BREAK)
7176 NODE_NAME_CASE(SYSCALL)
7177 NODE_NAME_CASE(CRC_W_B_W)
7178 NODE_NAME_CASE(CRC_W_H_W)
7179 NODE_NAME_CASE(CRC_W_W_W)
7180 NODE_NAME_CASE(CRC_W_D_W)
7181 NODE_NAME_CASE(CRCC_W_B_W)
7182 NODE_NAME_CASE(CRCC_W_H_W)
7183 NODE_NAME_CASE(CRCC_W_W_W)
7184 NODE_NAME_CASE(CRCC_W_D_W)
7185 NODE_NAME_CASE(CSRRD)
7186 NODE_NAME_CASE(CSRWR)
7187 NODE_NAME_CASE(CSRXCHG)
7188 NODE_NAME_CASE(IOCSRRD_B)
7189 NODE_NAME_CASE(IOCSRRD_H)
7190 NODE_NAME_CASE(IOCSRRD_W)
7191 NODE_NAME_CASE(IOCSRRD_D)
7192 NODE_NAME_CASE(IOCSRWR_B)
7193 NODE_NAME_CASE(IOCSRWR_H)
7194 NODE_NAME_CASE(IOCSRWR_W)
7195 NODE_NAME_CASE(IOCSRWR_D)
7196 NODE_NAME_CASE(CPUCFG)
7197 NODE_NAME_CASE(MOVGR2FCSR)
7198 NODE_NAME_CASE(MOVFCSR2GR)
7199 NODE_NAME_CASE(CACOP_D)
7200 NODE_NAME_CASE(CACOP_W)
7201 NODE_NAME_CASE(VSHUF)
7202 NODE_NAME_CASE(VPICKEV)
7203 NODE_NAME_CASE(VPICKOD)
7204 NODE_NAME_CASE(VPACKEV)
7205 NODE_NAME_CASE(VPACKOD)
7206 NODE_NAME_CASE(VILVL)
7207 NODE_NAME_CASE(VILVH)
7208 NODE_NAME_CASE(VSHUF4I)
7209 NODE_NAME_CASE(VREPLVEI)
7210 NODE_NAME_CASE(VREPLGR2VR)
7211 NODE_NAME_CASE(XVPERMI)
7212 NODE_NAME_CASE(XVPERM)
7213 NODE_NAME_CASE(XVREPLVE0)
7214 NODE_NAME_CASE(XVREPLVE0Q)
7215 NODE_NAME_CASE(VPICK_SEXT_ELT)
7216 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7217 NODE_NAME_CASE(VREPLVE)
7218 NODE_NAME_CASE(VALL_ZERO)
7219 NODE_NAME_CASE(VANY_ZERO)
7220 NODE_NAME_CASE(VALL_NONZERO)
7221 NODE_NAME_CASE(VANY_NONZERO)
7222 NODE_NAME_CASE(FRECIPE)
7223 NODE_NAME_CASE(FRSQRTE)
7224 NODE_NAME_CASE(VSLLI)
7225 NODE_NAME_CASE(VSRLI)
7226 NODE_NAME_CASE(VBSLL)
7227 NODE_NAME_CASE(VBSRL)
7228 NODE_NAME_CASE(VLDREPL)
7229 NODE_NAME_CASE(VMSKLTZ)
7230 NODE_NAME_CASE(VMSKGEZ)
7231 NODE_NAME_CASE(VMSKEQZ)
7232 NODE_NAME_CASE(VMSKNEZ)
7233 NODE_NAME_CASE(XVMSKLTZ)
7234 NODE_NAME_CASE(XVMSKGEZ)
7235 NODE_NAME_CASE(XVMSKEQZ)
7236 NODE_NAME_CASE(XVMSKNEZ)
7237 NODE_NAME_CASE(VHADDW)
7238 }
7239#undef NODE_NAME_CASE
7240 return nullptr;
7241}
7242
7243//===----------------------------------------------------------------------===//
7244// Calling Convention Implementation
7245//===----------------------------------------------------------------------===//
7246
7247// Eight general-purpose registers a0-a7 used for passing integer arguments,
7248// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7249// fixed-point arguments, and floating-point arguments when no FPR is available
7250// or with soft float ABI.
7251const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7252 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7253 LoongArch::R10, LoongArch::R11};
7254// Eight floating-point registers fa0-fa7 used for passing floating-point
7255// arguments, and fa0-fa1 are also used to return values.
7256const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7257 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7258 LoongArch::F6, LoongArch::F7};
7259// FPR32 and FPR64 alias each other.
7261 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7262 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7263
7264const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7265 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7266 LoongArch::VR6, LoongArch::VR7};
7267
7268const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7269 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7270 LoongArch::XR6, LoongArch::XR7};
7271
7272// Pass a 2*GRLen argument that has been split into two GRLen values through
7273// registers or the stack as necessary.
7274static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7275 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7276 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7277 ISD::ArgFlagsTy ArgFlags2) {
7278 unsigned GRLenInBytes = GRLen / 8;
7279 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7280 // At least one half can be passed via register.
7281 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7282 VA1.getLocVT(), CCValAssign::Full));
7283 } else {
7284 // Both halves must be passed on the stack, with proper alignment.
7285 Align StackAlign =
7286 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7287 State.addLoc(
7289 State.AllocateStack(GRLenInBytes, StackAlign),
7290 VA1.getLocVT(), CCValAssign::Full));
7291 State.addLoc(CCValAssign::getMem(
7292 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7293 LocVT2, CCValAssign::Full));
7294 return false;
7295 }
7296 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7297 // The second half can also be passed via register.
7298 State.addLoc(
7299 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7300 } else {
7301 // The second half is passed via the stack, without additional alignment.
7302 State.addLoc(CCValAssign::getMem(
7303 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7304 LocVT2, CCValAssign::Full));
7305 }
7306 return false;
7307}
7308
7309// Implements the LoongArch calling convention. Returns true upon failure.
7311 unsigned ValNo, MVT ValVT,
7312 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7313 CCState &State, bool IsRet, Type *OrigTy) {
7314 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7315 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7316 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7317 MVT LocVT = ValVT;
7318
7319 // Any return value split into more than two values can't be returned
7320 // directly.
7321 if (IsRet && ValNo > 1)
7322 return true;
7323
7324 // If passing a variadic argument, or if no FPR is available.
7325 bool UseGPRForFloat = true;
7326
7327 switch (ABI) {
7328 default:
7329 llvm_unreachable("Unexpected ABI");
7330 break;
7335 UseGPRForFloat = ArgFlags.isVarArg();
7336 break;
7339 break;
7340 }
7341
7342 // If this is a variadic argument, the LoongArch calling convention requires
7343 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7344 // byte alignment. An aligned register should be used regardless of whether
7345 // the original argument was split during legalisation or not. The argument
7346 // will not be passed by registers if the original type is larger than
7347 // 2*GRLen, so the register alignment rule does not apply.
7348 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7349 if (ArgFlags.isVarArg() &&
7350 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7351 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7352 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7353 // Skip 'odd' register if necessary.
7354 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7355 State.AllocateReg(ArgGPRs);
7356 }
7357
7358 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7359 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7360 State.getPendingArgFlags();
7361
7362 assert(PendingLocs.size() == PendingArgFlags.size() &&
7363 "PendingLocs and PendingArgFlags out of sync");
7364
7365 // FPR32 and FPR64 alias each other.
7366 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7367 UseGPRForFloat = true;
7368
7369 if (UseGPRForFloat && ValVT == MVT::f32) {
7370 LocVT = GRLenVT;
7371 LocInfo = CCValAssign::BCvt;
7372 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7373 LocVT = MVT::i64;
7374 LocInfo = CCValAssign::BCvt;
7375 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7376 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7377 // registers are exhausted.
7378 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7379 // Depending on available argument GPRS, f64 may be passed in a pair of
7380 // GPRs, split between a GPR and the stack, or passed completely on the
7381 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7382 // cases.
7383 MCRegister Reg = State.AllocateReg(ArgGPRs);
7384 if (!Reg) {
7385 int64_t StackOffset = State.AllocateStack(8, Align(8));
7386 State.addLoc(
7387 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7388 return false;
7389 }
7390 LocVT = MVT::i32;
7391 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7392 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7393 if (HiReg) {
7394 State.addLoc(
7395 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7396 } else {
7397 int64_t StackOffset = State.AllocateStack(4, Align(4));
7398 State.addLoc(
7399 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7400 }
7401 return false;
7402 }
7403
7404 // Split arguments might be passed indirectly, so keep track of the pending
7405 // values.
7406 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7407 LocVT = GRLenVT;
7408 LocInfo = CCValAssign::Indirect;
7409 PendingLocs.push_back(
7410 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7411 PendingArgFlags.push_back(ArgFlags);
7412 if (!ArgFlags.isSplitEnd()) {
7413 return false;
7414 }
7415 }
7416
7417 // If the split argument only had two elements, it should be passed directly
7418 // in registers or on the stack.
7419 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7420 PendingLocs.size() <= 2) {
7421 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7422 // Apply the normal calling convention rules to the first half of the
7423 // split argument.
7424 CCValAssign VA = PendingLocs[0];
7425 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7426 PendingLocs.clear();
7427 PendingArgFlags.clear();
7428 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7429 ArgFlags);
7430 }
7431
7432 // Allocate to a register if possible, or else a stack slot.
7433 Register Reg;
7434 unsigned StoreSizeBytes = GRLen / 8;
7435 Align StackAlign = Align(GRLen / 8);
7436
7437 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7438 Reg = State.AllocateReg(ArgFPR32s);
7439 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7440 Reg = State.AllocateReg(ArgFPR64s);
7441 } else if (ValVT.is128BitVector()) {
7442 Reg = State.AllocateReg(ArgVRs);
7443 UseGPRForFloat = false;
7444 StoreSizeBytes = 16;
7445 StackAlign = Align(16);
7446 } else if (ValVT.is256BitVector()) {
7447 Reg = State.AllocateReg(ArgXRs);
7448 UseGPRForFloat = false;
7449 StoreSizeBytes = 32;
7450 StackAlign = Align(32);
7451 } else {
7452 Reg = State.AllocateReg(ArgGPRs);
7453 }
7454
7455 unsigned StackOffset =
7456 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7457
7458 // If we reach this point and PendingLocs is non-empty, we must be at the
7459 // end of a split argument that must be passed indirectly.
7460 if (!PendingLocs.empty()) {
7461 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7462 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7463 for (auto &It : PendingLocs) {
7464 if (Reg)
7465 It.convertToReg(Reg);
7466 else
7467 It.convertToMem(StackOffset);
7468 State.addLoc(It);
7469 }
7470 PendingLocs.clear();
7471 PendingArgFlags.clear();
7472 return false;
7473 }
7474 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7475 "Expected an GRLenVT at this stage");
7476
7477 if (Reg) {
7478 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7479 return false;
7480 }
7481
7482 // When a floating-point value is passed on the stack, no bit-cast is needed.
7483 if (ValVT.isFloatingPoint()) {
7484 LocVT = ValVT;
7485 LocInfo = CCValAssign::Full;
7486 }
7487
7488 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7489 return false;
7490}
7491
7492void LoongArchTargetLowering::analyzeInputArgs(
7493 MachineFunction &MF, CCState &CCInfo,
7494 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7495 LoongArchCCAssignFn Fn) const {
7496 FunctionType *FType = MF.getFunction().getFunctionType();
7497 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7498 MVT ArgVT = Ins[i].VT;
7499 Type *ArgTy = nullptr;
7500 if (IsRet)
7501 ArgTy = FType->getReturnType();
7502 else if (Ins[i].isOrigArg())
7503 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7505 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7506 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7507 CCInfo, IsRet, ArgTy)) {
7508 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7509 << '\n');
7510 llvm_unreachable("");
7511 }
7512 }
7513}
7514
7515void LoongArchTargetLowering::analyzeOutputArgs(
7516 MachineFunction &MF, CCState &CCInfo,
7517 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7518 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7519 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7520 MVT ArgVT = Outs[i].VT;
7521 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7523 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7524 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7525 CCInfo, IsRet, OrigTy)) {
7526 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7527 << "\n");
7528 llvm_unreachable("");
7529 }
7530 }
7531}
7532
7533// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7534// values.
7536 const CCValAssign &VA, const SDLoc &DL) {
7537 switch (VA.getLocInfo()) {
7538 default:
7539 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7540 case CCValAssign::Full:
7542 break;
7543 case CCValAssign::BCvt:
7544 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7545 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7546 else
7547 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7548 break;
7549 }
7550 return Val;
7551}
7552
7554 const CCValAssign &VA, const SDLoc &DL,
7555 const ISD::InputArg &In,
7556 const LoongArchTargetLowering &TLI) {
7559 EVT LocVT = VA.getLocVT();
7560 SDValue Val;
7561 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7562 Register VReg = RegInfo.createVirtualRegister(RC);
7563 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7564 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7565
7566 // If input is sign extended from 32 bits, note it for the OptW pass.
7567 if (In.isOrigArg()) {
7568 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7569 if (OrigArg->getType()->isIntegerTy()) {
7570 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7571 // An input zero extended from i31 can also be considered sign extended.
7572 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7573 (BitWidth < 32 && In.Flags.isZExt())) {
7576 LAFI->addSExt32Register(VReg);
7577 }
7578 }
7579 }
7580
7581 return convertLocVTToValVT(DAG, Val, VA, DL);
7582}
7583
7584// The caller is responsible for loading the full value if the argument is
7585// passed with CCValAssign::Indirect.
7587 const CCValAssign &VA, const SDLoc &DL) {
7589 MachineFrameInfo &MFI = MF.getFrameInfo();
7590 EVT ValVT = VA.getValVT();
7591 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7592 /*IsImmutable=*/true);
7593 SDValue FIN = DAG.getFrameIndex(
7595
7596 ISD::LoadExtType ExtType;
7597 switch (VA.getLocInfo()) {
7598 default:
7599 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7600 case CCValAssign::Full:
7602 case CCValAssign::BCvt:
7603 ExtType = ISD::NON_EXTLOAD;
7604 break;
7605 }
7606 return DAG.getExtLoad(
7607 ExtType, DL, VA.getLocVT(), Chain, FIN,
7609}
7610
7612 const CCValAssign &VA,
7613 const CCValAssign &HiVA,
7614 const SDLoc &DL) {
7615 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7616 "Unexpected VA");
7618 MachineFrameInfo &MFI = MF.getFrameInfo();
7620
7621 assert(VA.isRegLoc() && "Expected register VA assignment");
7622
7623 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7624 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7625 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7626 SDValue Hi;
7627 if (HiVA.isMemLoc()) {
7628 // Second half of f64 is passed on the stack.
7629 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7630 /*IsImmutable=*/true);
7631 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7632 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7634 } else {
7635 // Second half of f64 is passed in another GPR.
7636 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7637 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7638 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7639 }
7640 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7641}
7642
7644 const CCValAssign &VA, const SDLoc &DL) {
7645 EVT LocVT = VA.getLocVT();
7646
7647 switch (VA.getLocInfo()) {
7648 default:
7649 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7650 case CCValAssign::Full:
7651 break;
7652 case CCValAssign::BCvt:
7653 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7654 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7655 else
7656 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7657 break;
7658 }
7659 return Val;
7660}
7661
7662static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7663 CCValAssign::LocInfo LocInfo,
7664 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7665 CCState &State) {
7666 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7667 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7668 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7669 static const MCPhysReg GPRList[] = {
7670 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7671 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7672 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7673 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7674 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7675 return false;
7676 }
7677 }
7678
7679 if (LocVT == MVT::f32) {
7680 // Pass in STG registers: F1, F2, F3, F4
7681 // fs0,fs1,fs2,fs3
7682 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7683 LoongArch::F26, LoongArch::F27};
7684 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7685 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7686 return false;
7687 }
7688 }
7689
7690 if (LocVT == MVT::f64) {
7691 // Pass in STG registers: D1, D2, D3, D4
7692 // fs4,fs5,fs6,fs7
7693 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7694 LoongArch::F30_64, LoongArch::F31_64};
7695 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7696 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7697 return false;
7698 }
7699 }
7700
7701 report_fatal_error("No registers left in GHC calling convention");
7702 return true;
7703}
7704
7705// Transform physical registers into virtual registers.
7707 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7708 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7709 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7710
7712
7713 switch (CallConv) {
7714 default:
7715 llvm_unreachable("Unsupported calling convention");
7716 case CallingConv::C:
7717 case CallingConv::Fast:
7719 break;
7720 case CallingConv::GHC:
7721 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7722 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7724 "GHC calling convention requires the F and D extensions");
7725 }
7726
7727 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7728 MVT GRLenVT = Subtarget.getGRLenVT();
7729 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7730 // Used with varargs to acumulate store chains.
7731 std::vector<SDValue> OutChains;
7732
7733 // Assign locations to all of the incoming arguments.
7735 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7736
7737 if (CallConv == CallingConv::GHC)
7739 else
7740 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7741
7742 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7743 CCValAssign &VA = ArgLocs[i];
7744 SDValue ArgValue;
7745 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7746 // case.
7747 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7748 assert(VA.needsCustom());
7749 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7750 } else if (VA.isRegLoc())
7751 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7752 else
7753 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7754 if (VA.getLocInfo() == CCValAssign::Indirect) {
7755 // If the original argument was split and passed by reference, we need to
7756 // load all parts of it here (using the same address).
7757 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7759 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7760 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7761 assert(ArgPartOffset == 0);
7762 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7763 CCValAssign &PartVA = ArgLocs[i + 1];
7764 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7765 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7766 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7767 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7769 ++i;
7770 ++InsIdx;
7771 }
7772 continue;
7773 }
7774 InVals.push_back(ArgValue);
7775 }
7776
7777 if (IsVarArg) {
7779 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7780 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7781 MachineFrameInfo &MFI = MF.getFrameInfo();
7782 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7783 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7784
7785 // Offset of the first variable argument from stack pointer, and size of
7786 // the vararg save area. For now, the varargs save area is either zero or
7787 // large enough to hold a0-a7.
7788 int VaArgOffset, VarArgsSaveSize;
7789
7790 // If all registers are allocated, then all varargs must be passed on the
7791 // stack and we don't need to save any argregs.
7792 if (ArgRegs.size() == Idx) {
7793 VaArgOffset = CCInfo.getStackSize();
7794 VarArgsSaveSize = 0;
7795 } else {
7796 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7797 VaArgOffset = -VarArgsSaveSize;
7798 }
7799
7800 // Record the frame index of the first variable argument
7801 // which is a value necessary to VASTART.
7802 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7803 LoongArchFI->setVarArgsFrameIndex(FI);
7804
7805 // If saving an odd number of registers then create an extra stack slot to
7806 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7807 // offsets to even-numbered registered remain 2*GRLen-aligned.
7808 if (Idx % 2) {
7809 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7810 true);
7811 VarArgsSaveSize += GRLenInBytes;
7812 }
7813
7814 // Copy the integer registers that may have been used for passing varargs
7815 // to the vararg save area.
7816 for (unsigned I = Idx; I < ArgRegs.size();
7817 ++I, VaArgOffset += GRLenInBytes) {
7818 const Register Reg = RegInfo.createVirtualRegister(RC);
7819 RegInfo.addLiveIn(ArgRegs[I], Reg);
7820 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7821 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7822 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7823 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7825 cast<StoreSDNode>(Store.getNode())
7826 ->getMemOperand()
7827 ->setValue((Value *)nullptr);
7828 OutChains.push_back(Store);
7829 }
7830 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7831 }
7832
7833 // All stores are grouped in one node to allow the matching between
7834 // the size of Ins and InVals. This only happens for vararg functions.
7835 if (!OutChains.empty()) {
7836 OutChains.push_back(Chain);
7837 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7838 }
7839
7840 return Chain;
7841}
7842
7844 return CI->isTailCall();
7845}
7846
7847// Check if the return value is used as only a return value, as otherwise
7848// we can't perform a tail-call.
7850 SDValue &Chain) const {
7851 if (N->getNumValues() != 1)
7852 return false;
7853 if (!N->hasNUsesOfValue(1, 0))
7854 return false;
7855
7856 SDNode *Copy = *N->user_begin();
7857 if (Copy->getOpcode() != ISD::CopyToReg)
7858 return false;
7859
7860 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7861 // isn't safe to perform a tail call.
7862 if (Copy->getGluedNode())
7863 return false;
7864
7865 // The copy must be used by a LoongArchISD::RET, and nothing else.
7866 bool HasRet = false;
7867 for (SDNode *Node : Copy->users()) {
7868 if (Node->getOpcode() != LoongArchISD::RET)
7869 return false;
7870 HasRet = true;
7871 }
7872
7873 if (!HasRet)
7874 return false;
7875
7876 Chain = Copy->getOperand(0);
7877 return true;
7878}
7879
7880// Check whether the call is eligible for tail call optimization.
7881bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7882 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7883 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7884
7885 auto CalleeCC = CLI.CallConv;
7886 auto &Outs = CLI.Outs;
7887 auto &Caller = MF.getFunction();
7888 auto CallerCC = Caller.getCallingConv();
7889
7890 // Do not tail call opt if the stack is used to pass parameters.
7891 if (CCInfo.getStackSize() != 0)
7892 return false;
7893
7894 // Do not tail call opt if any parameters need to be passed indirectly.
7895 for (auto &VA : ArgLocs)
7896 if (VA.getLocInfo() == CCValAssign::Indirect)
7897 return false;
7898
7899 // Do not tail call opt if either caller or callee uses struct return
7900 // semantics.
7901 auto IsCallerStructRet = Caller.hasStructRetAttr();
7902 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7903 if (IsCallerStructRet || IsCalleeStructRet)
7904 return false;
7905
7906 // Do not tail call opt if either the callee or caller has a byval argument.
7907 for (auto &Arg : Outs)
7908 if (Arg.Flags.isByVal())
7909 return false;
7910
7911 // The callee has to preserve all registers the caller needs to preserve.
7912 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7913 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7914 if (CalleeCC != CallerCC) {
7915 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7916 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7917 return false;
7918 }
7919 return true;
7920}
7921
7923 return DAG.getDataLayout().getPrefTypeAlign(
7924 VT.getTypeForEVT(*DAG.getContext()));
7925}
7926
7927// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7928// and output parameter nodes.
7929SDValue
7931 SmallVectorImpl<SDValue> &InVals) const {
7932 SelectionDAG &DAG = CLI.DAG;
7933 SDLoc &DL = CLI.DL;
7935 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7937 SDValue Chain = CLI.Chain;
7938 SDValue Callee = CLI.Callee;
7939 CallingConv::ID CallConv = CLI.CallConv;
7940 bool IsVarArg = CLI.IsVarArg;
7941 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7942 MVT GRLenVT = Subtarget.getGRLenVT();
7943 bool &IsTailCall = CLI.IsTailCall;
7944
7946
7947 // Analyze the operands of the call, assigning locations to each operand.
7949 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7950
7951 if (CallConv == CallingConv::GHC)
7952 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7953 else
7954 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7955
7956 // Check if it's really possible to do a tail call.
7957 if (IsTailCall)
7958 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7959
7960 if (IsTailCall)
7961 ++NumTailCalls;
7962 else if (CLI.CB && CLI.CB->isMustTailCall())
7963 report_fatal_error("failed to perform tail call elimination on a call "
7964 "site marked musttail");
7965
7966 // Get a count of how many bytes are to be pushed on the stack.
7967 unsigned NumBytes = ArgCCInfo.getStackSize();
7968
7969 // Create local copies for byval args.
7970 SmallVector<SDValue> ByValArgs;
7971 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7972 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7973 if (!Flags.isByVal())
7974 continue;
7975
7976 SDValue Arg = OutVals[i];
7977 unsigned Size = Flags.getByValSize();
7978 Align Alignment = Flags.getNonZeroByValAlign();
7979
7980 int FI =
7981 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7982 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7983 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7984
7985 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7986 /*IsVolatile=*/false,
7987 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7989 ByValArgs.push_back(FIPtr);
7990 }
7991
7992 if (!IsTailCall)
7993 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7994
7995 // Copy argument values to their designated locations.
7997 SmallVector<SDValue> MemOpChains;
7998 SDValue StackPtr;
7999 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8000 ++i, ++OutIdx) {
8001 CCValAssign &VA = ArgLocs[i];
8002 SDValue ArgValue = OutVals[OutIdx];
8003 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8004
8005 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8006 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8007 assert(VA.isRegLoc() && "Expected register VA assignment");
8008 assert(VA.needsCustom());
8009 SDValue SplitF64 =
8011 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8012 SDValue Lo = SplitF64.getValue(0);
8013 SDValue Hi = SplitF64.getValue(1);
8014
8015 Register RegLo = VA.getLocReg();
8016 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8017
8018 // Get the CCValAssign for the Hi part.
8019 CCValAssign &HiVA = ArgLocs[++i];
8020
8021 if (HiVA.isMemLoc()) {
8022 // Second half of f64 is passed on the stack.
8023 if (!StackPtr.getNode())
8024 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8026 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8027 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8028 // Emit the store.
8029 MemOpChains.push_back(DAG.getStore(
8030 Chain, DL, Hi, Address,
8032 } else {
8033 // Second half of f64 is passed in another GPR.
8034 Register RegHigh = HiVA.getLocReg();
8035 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8036 }
8037 continue;
8038 }
8039
8040 // Promote the value if needed.
8041 // For now, only handle fully promoted and indirect arguments.
8042 if (VA.getLocInfo() == CCValAssign::Indirect) {
8043 // Store the argument in a stack slot and pass its address.
8044 Align StackAlign =
8045 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8046 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8047 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8048 // If the original argument was split and passed by reference, we need to
8049 // store the required parts of it here (and pass just one address).
8050 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8051 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8052 assert(ArgPartOffset == 0);
8053 // Calculate the total size to store. We don't have access to what we're
8054 // actually storing other than performing the loop and collecting the
8055 // info.
8057 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8058 SDValue PartValue = OutVals[OutIdx + 1];
8059 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8060 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8061 EVT PartVT = PartValue.getValueType();
8062
8063 StoredSize += PartVT.getStoreSize();
8064 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8065 Parts.push_back(std::make_pair(PartValue, Offset));
8066 ++i;
8067 ++OutIdx;
8068 }
8069 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8070 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8071 MemOpChains.push_back(
8072 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8074 for (const auto &Part : Parts) {
8075 SDValue PartValue = Part.first;
8076 SDValue PartOffset = Part.second;
8078 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8079 MemOpChains.push_back(
8080 DAG.getStore(Chain, DL, PartValue, Address,
8082 }
8083 ArgValue = SpillSlot;
8084 } else {
8085 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8086 }
8087
8088 // Use local copy if it is a byval arg.
8089 if (Flags.isByVal())
8090 ArgValue = ByValArgs[j++];
8091
8092 if (VA.isRegLoc()) {
8093 // Queue up the argument copies and emit them at the end.
8094 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8095 } else {
8096 assert(VA.isMemLoc() && "Argument not register or memory");
8097 assert(!IsTailCall && "Tail call not allowed if stack is used "
8098 "for passing parameters");
8099
8100 // Work out the address of the stack slot.
8101 if (!StackPtr.getNode())
8102 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8104 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8106
8107 // Emit the store.
8108 MemOpChains.push_back(
8109 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8110 }
8111 }
8112
8113 // Join the stores, which are independent of one another.
8114 if (!MemOpChains.empty())
8115 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8116
8117 SDValue Glue;
8118
8119 // Build a sequence of copy-to-reg nodes, chained and glued together.
8120 for (auto &Reg : RegsToPass) {
8121 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8122 Glue = Chain.getValue(1);
8123 }
8124
8125 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8126 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8127 // split it and then direct call can be matched by PseudoCALL.
8129 const GlobalValue *GV = S->getGlobal();
8130 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8133 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8134 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8135 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8138 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8139 }
8140
8141 // The first call operand is the chain and the second is the target address.
8143 Ops.push_back(Chain);
8144 Ops.push_back(Callee);
8145
8146 // Add argument registers to the end of the list so that they are
8147 // known live into the call.
8148 for (auto &Reg : RegsToPass)
8149 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8150
8151 if (!IsTailCall) {
8152 // Add a register mask operand representing the call-preserved registers.
8153 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8154 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8155 assert(Mask && "Missing call preserved mask for calling convention");
8156 Ops.push_back(DAG.getRegisterMask(Mask));
8157 }
8158
8159 // Glue the call to the argument copies, if any.
8160 if (Glue.getNode())
8161 Ops.push_back(Glue);
8162
8163 // Emit the call.
8164 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8165 unsigned Op;
8166 switch (DAG.getTarget().getCodeModel()) {
8167 default:
8168 report_fatal_error("Unsupported code model");
8169 case CodeModel::Small:
8170 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8171 break;
8172 case CodeModel::Medium:
8173 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8175 break;
8176 case CodeModel::Large:
8177 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8179 break;
8180 }
8181
8182 if (IsTailCall) {
8184 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8185 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8186 return Ret;
8187 }
8188
8189 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8190 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8191 Glue = Chain.getValue(1);
8192
8193 // Mark the end of the call, which is glued to the call itself.
8194 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8195 Glue = Chain.getValue(1);
8196
8197 // Assign locations to each value returned by this call.
8199 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8200 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8201
8202 // Copy all of the result registers out of their specified physreg.
8203 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8204 auto &VA = RVLocs[i];
8205 // Copy the value out.
8206 SDValue RetValue =
8207 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8208 // Glue the RetValue to the end of the call sequence.
8209 Chain = RetValue.getValue(1);
8210 Glue = RetValue.getValue(2);
8211
8212 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8213 assert(VA.needsCustom());
8214 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8215 MVT::i32, Glue);
8216 Chain = RetValue2.getValue(1);
8217 Glue = RetValue2.getValue(2);
8218 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8219 RetValue, RetValue2);
8220 } else
8221 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8222
8223 InVals.push_back(RetValue);
8224 }
8225
8226 return Chain;
8227}
8228
8230 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8231 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8232 const Type *RetTy) const {
8234 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8235
8236 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8237 LoongArchABI::ABI ABI =
8238 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8239 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8240 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8241 return false;
8242 }
8243 return true;
8244}
8245
8247 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8249 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8250 SelectionDAG &DAG) const {
8251 // Stores the assignment of the return value to a location.
8253
8254 // Info about the registers and stack slot.
8255 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8256 *DAG.getContext());
8257
8258 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8259 nullptr, CC_LoongArch);
8260 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8261 report_fatal_error("GHC functions return void only");
8262 SDValue Glue;
8263 SmallVector<SDValue, 4> RetOps(1, Chain);
8264
8265 // Copy the result values into the output registers.
8266 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8267 SDValue Val = OutVals[OutIdx];
8268 CCValAssign &VA = RVLocs[i];
8269 assert(VA.isRegLoc() && "Can only return in registers!");
8270
8271 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8272 // Handle returning f64 on LA32D with a soft float ABI.
8273 assert(VA.isRegLoc() && "Expected return via registers");
8274 assert(VA.needsCustom());
8276 DAG.getVTList(MVT::i32, MVT::i32), Val);
8277 SDValue Lo = SplitF64.getValue(0);
8278 SDValue Hi = SplitF64.getValue(1);
8279 Register RegLo = VA.getLocReg();
8280 Register RegHi = RVLocs[++i].getLocReg();
8281
8282 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8283 Glue = Chain.getValue(1);
8284 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8285 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8286 Glue = Chain.getValue(1);
8287 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8288 } else {
8289 // Handle a 'normal' return.
8290 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8291 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8292
8293 // Guarantee that all emitted copies are stuck together.
8294 Glue = Chain.getValue(1);
8295 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8296 }
8297 }
8298
8299 RetOps[0] = Chain; // Update chain.
8300
8301 // Add the glue node if we have it.
8302 if (Glue.getNode())
8303 RetOps.push_back(Glue);
8304
8305 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8306}
8307
8309 EVT VT) const {
8310 if (!Subtarget.hasExtLSX())
8311 return false;
8312
8313 if (VT == MVT::f32) {
8314 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8315 return (masked == 0x3e000000 || masked == 0x40000000);
8316 }
8317
8318 if (VT == MVT::f64) {
8319 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8320 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8321 }
8322
8323 return false;
8324}
8325
8326bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8327 bool ForCodeSize) const {
8328 // TODO: Maybe need more checks here after vector extension is supported.
8329 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8330 return false;
8331 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8332 return false;
8333 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8334}
8335
8337 return true;
8338}
8339
8341 return true;
8342}
8343
8344bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8345 const Instruction *I) const {
8346 if (!Subtarget.is64Bit())
8347 return isa<LoadInst>(I) || isa<StoreInst>(I);
8348
8349 if (isa<LoadInst>(I))
8350 return true;
8351
8352 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8353 // require fences beacuse we can use amswap_db.[w/d].
8354 Type *Ty = I->getOperand(0)->getType();
8355 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8356 unsigned Size = Ty->getIntegerBitWidth();
8357 return (Size == 8 || Size == 16);
8358 }
8359
8360 return false;
8361}
8362
8364 LLVMContext &Context,
8365 EVT VT) const {
8366 if (!VT.isVector())
8367 return getPointerTy(DL);
8369}
8370
8372 // TODO: Support vectors.
8373 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8374}
8375
8377 const CallInst &I,
8378 MachineFunction &MF,
8379 unsigned Intrinsic) const {
8380 switch (Intrinsic) {
8381 default:
8382 return false;
8383 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8384 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8385 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8386 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8387 Info.opc = ISD::INTRINSIC_W_CHAIN;
8388 Info.memVT = MVT::i32;
8389 Info.ptrVal = I.getArgOperand(0);
8390 Info.offset = 0;
8391 Info.align = Align(4);
8394 return true;
8395 // TODO: Add more Intrinsics later.
8396 }
8397}
8398
8399// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8400// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8401// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8402// regression, we need to implement it manually.
8405
8407 Op == AtomicRMWInst::And) &&
8408 "Unable to expand");
8409 unsigned MinWordSize = 4;
8410
8411 IRBuilder<> Builder(AI);
8412 LLVMContext &Ctx = Builder.getContext();
8413 const DataLayout &DL = AI->getDataLayout();
8414 Type *ValueType = AI->getType();
8415 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8416
8417 Value *Addr = AI->getPointerOperand();
8418 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8419 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8420
8421 Value *AlignedAddr = Builder.CreateIntrinsic(
8422 Intrinsic::ptrmask, {PtrTy, IntTy},
8423 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8424 "AlignedAddr");
8425
8426 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8427 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8428 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8429 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8430 Value *Mask = Builder.CreateShl(
8431 ConstantInt::get(WordType,
8432 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8433 ShiftAmt, "Mask");
8434 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8435 Value *ValOperand_Shifted =
8436 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8437 ShiftAmt, "ValOperand_Shifted");
8438 Value *NewOperand;
8439 if (Op == AtomicRMWInst::And)
8440 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8441 else
8442 NewOperand = ValOperand_Shifted;
8443
8444 AtomicRMWInst *NewAI =
8445 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8446 AI->getOrdering(), AI->getSyncScopeID());
8447
8448 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8449 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8450 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8451 AI->replaceAllUsesWith(FinalOldResult);
8452 AI->eraseFromParent();
8453}
8454
8457 // TODO: Add more AtomicRMWInst that needs to be extended.
8458
8459 // Since floating-point operation requires a non-trivial set of data
8460 // operations, use CmpXChg to expand.
8461 if (AI->isFloatingPointOperation() ||
8467
8468 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8471 AI->getOperation() == AtomicRMWInst::Sub)) {
8473 }
8474
8475 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8476 if (Subtarget.hasLAMCAS()) {
8477 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8481 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8483 }
8484
8485 if (Size == 8 || Size == 16)
8488}
8489
8490static Intrinsic::ID
8492 AtomicRMWInst::BinOp BinOp) {
8493 if (GRLen == 64) {
8494 switch (BinOp) {
8495 default:
8496 llvm_unreachable("Unexpected AtomicRMW BinOp");
8498 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8499 case AtomicRMWInst::Add:
8500 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8501 case AtomicRMWInst::Sub:
8502 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8504 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8506 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8508 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8509 case AtomicRMWInst::Max:
8510 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8511 case AtomicRMWInst::Min:
8512 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8513 // TODO: support other AtomicRMWInst.
8514 }
8515 }
8516
8517 if (GRLen == 32) {
8518 switch (BinOp) {
8519 default:
8520 llvm_unreachable("Unexpected AtomicRMW BinOp");
8522 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8523 case AtomicRMWInst::Add:
8524 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8525 case AtomicRMWInst::Sub:
8526 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8528 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8530 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8532 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8533 case AtomicRMWInst::Max:
8534 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8535 case AtomicRMWInst::Min:
8536 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8537 // TODO: support other AtomicRMWInst.
8538 }
8539 }
8540
8541 llvm_unreachable("Unexpected GRLen\n");
8542}
8543
8546 AtomicCmpXchgInst *CI) const {
8547
8548 if (Subtarget.hasLAMCAS())
8550
8552 if (Size == 8 || Size == 16)
8555}
8556
8558 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8559 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8560 unsigned GRLen = Subtarget.getGRLen();
8561 AtomicOrdering FailOrd = CI->getFailureOrdering();
8562 Value *FailureOrdering =
8563 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8564 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8565 if (GRLen == 64) {
8566 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8567 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8568 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8569 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8570 }
8571 Type *Tys[] = {AlignedAddr->getType()};
8572 Value *Result = Builder.CreateIntrinsic(
8573 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8574 if (GRLen == 64)
8575 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8576 return Result;
8577}
8578
8580 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8581 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8582 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8583 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8584 // mask, as this produces better code than the LL/SC loop emitted by
8585 // int_loongarch_masked_atomicrmw_xchg.
8586 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8589 if (CVal->isZero())
8590 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8591 Builder.CreateNot(Mask, "Inv_Mask"),
8592 AI->getAlign(), Ord);
8593 if (CVal->isMinusOne())
8594 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8595 AI->getAlign(), Ord);
8596 }
8597
8598 unsigned GRLen = Subtarget.getGRLen();
8599 Value *Ordering =
8600 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8601 Type *Tys[] = {AlignedAddr->getType()};
8603 AI->getModule(),
8605
8606 if (GRLen == 64) {
8607 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8608 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8609 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8610 }
8611
8612 Value *Result;
8613
8614 // Must pass the shift amount needed to sign extend the loaded value prior
8615 // to performing a signed comparison for min/max. ShiftAmt is the number of
8616 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8617 // is the number of bits to left+right shift the value in order to
8618 // sign-extend.
8619 if (AI->getOperation() == AtomicRMWInst::Min ||
8621 const DataLayout &DL = AI->getDataLayout();
8622 unsigned ValWidth =
8623 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8624 Value *SextShamt =
8625 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8626 Result = Builder.CreateCall(LlwOpScwLoop,
8627 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8628 } else {
8629 Result =
8630 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8631 }
8632
8633 if (GRLen == 64)
8634 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8635 return Result;
8636}
8637
8639 const MachineFunction &MF, EVT VT) const {
8640 VT = VT.getScalarType();
8641
8642 if (!VT.isSimple())
8643 return false;
8644
8645 switch (VT.getSimpleVT().SimpleTy) {
8646 case MVT::f32:
8647 case MVT::f64:
8648 return true;
8649 default:
8650 break;
8651 }
8652
8653 return false;
8654}
8655
8657 const Constant *PersonalityFn) const {
8658 return LoongArch::R4;
8659}
8660
8662 const Constant *PersonalityFn) const {
8663 return LoongArch::R5;
8664}
8665
8666//===----------------------------------------------------------------------===//
8667// Target Optimization Hooks
8668//===----------------------------------------------------------------------===//
8669
8671 const LoongArchSubtarget &Subtarget) {
8672 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8673 // IEEE float has 23 digits and double has 52 digits.
8674 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8675 return RefinementSteps;
8676}
8677
8679 SelectionDAG &DAG, int Enabled,
8680 int &RefinementSteps,
8681 bool &UseOneConstNR,
8682 bool Reciprocal) const {
8683 if (Subtarget.hasFrecipe()) {
8684 SDLoc DL(Operand);
8685 EVT VT = Operand.getValueType();
8686
8687 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8688 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8689 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8690 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8691 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8692
8693 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8694 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8695
8696 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8697 if (Reciprocal)
8698 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8699
8700 return Estimate;
8701 }
8702 }
8703
8704 return SDValue();
8705}
8706
8708 SelectionDAG &DAG,
8709 int Enabled,
8710 int &RefinementSteps) const {
8711 if (Subtarget.hasFrecipe()) {
8712 SDLoc DL(Operand);
8713 EVT VT = Operand.getValueType();
8714
8715 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8716 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8717 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8718 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8719 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8720
8721 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8722 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8723
8724 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8725 }
8726 }
8727
8728 return SDValue();
8729}
8730
8731//===----------------------------------------------------------------------===//
8732// LoongArch Inline Assembly Support
8733//===----------------------------------------------------------------------===//
8734
8736LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8737 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8738 //
8739 // 'f': A floating-point register (if available).
8740 // 'k': A memory operand whose address is formed by a base register and
8741 // (optionally scaled) index register.
8742 // 'l': A signed 16-bit constant.
8743 // 'm': A memory operand whose address is formed by a base register and
8744 // offset that is suitable for use in instructions with the same
8745 // addressing mode as st.w and ld.w.
8746 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8747 // instruction)
8748 // 'I': A signed 12-bit constant (for arithmetic instructions).
8749 // 'J': Integer zero.
8750 // 'K': An unsigned 12-bit constant (for logic instructions).
8751 // "ZB": An address that is held in a general-purpose register. The offset is
8752 // zero.
8753 // "ZC": A memory operand whose address is formed by a base register and
8754 // offset that is suitable for use in instructions with the same
8755 // addressing mode as ll.w and sc.w.
8756 if (Constraint.size() == 1) {
8757 switch (Constraint[0]) {
8758 default:
8759 break;
8760 case 'f':
8761 case 'q':
8762 return C_RegisterClass;
8763 case 'l':
8764 case 'I':
8765 case 'J':
8766 case 'K':
8767 return C_Immediate;
8768 case 'k':
8769 return C_Memory;
8770 }
8771 }
8772
8773 if (Constraint == "ZC" || Constraint == "ZB")
8774 return C_Memory;
8775
8776 // 'm' is handled here.
8777 return TargetLowering::getConstraintType(Constraint);
8778}
8779
8780InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8781 StringRef ConstraintCode) const {
8782 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8786 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8787}
8788
8789std::pair<unsigned, const TargetRegisterClass *>
8790LoongArchTargetLowering::getRegForInlineAsmConstraint(
8791 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8792 // First, see if this is a constraint that directly corresponds to a LoongArch
8793 // register class.
8794 if (Constraint.size() == 1) {
8795 switch (Constraint[0]) {
8796 case 'r':
8797 // TODO: Support fixed vectors up to GRLen?
8798 if (VT.isVector())
8799 break;
8800 return std::make_pair(0U, &LoongArch::GPRRegClass);
8801 case 'q':
8802 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8803 case 'f':
8804 if (Subtarget.hasBasicF() && VT == MVT::f32)
8805 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8806 if (Subtarget.hasBasicD() && VT == MVT::f64)
8807 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8808 if (Subtarget.hasExtLSX() &&
8809 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8810 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8811 if (Subtarget.hasExtLASX() &&
8812 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8813 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8814 break;
8815 default:
8816 break;
8817 }
8818 }
8819
8820 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8821 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8822 // constraints while the official register name is prefixed with a '$'. So we
8823 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8824 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8825 // case insensitive, so no need to convert the constraint to upper case here.
8826 //
8827 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8828 // decode the usage of register name aliases into their official names. And
8829 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8830 // official register names.
8831 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8832 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8833 bool IsFP = Constraint[2] == 'f';
8834 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8835 std::pair<unsigned, const TargetRegisterClass *> R;
8837 TRI, join_items("", Temp.first, Temp.second), VT);
8838 // Match those names to the widest floating point register type available.
8839 if (IsFP) {
8840 unsigned RegNo = R.first;
8841 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8842 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8843 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8844 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8845 }
8846 }
8847 }
8848 return R;
8849 }
8850
8851 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8852}
8853
8854void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8855 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8856 SelectionDAG &DAG) const {
8857 // Currently only support length 1 constraints.
8858 if (Constraint.size() == 1) {
8859 switch (Constraint[0]) {
8860 case 'l':
8861 // Validate & create a 16-bit signed immediate operand.
8862 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8863 uint64_t CVal = C->getSExtValue();
8864 if (isInt<16>(CVal))
8865 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8866 Subtarget.getGRLenVT()));
8867 }
8868 return;
8869 case 'I':
8870 // Validate & create a 12-bit signed immediate operand.
8871 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8872 uint64_t CVal = C->getSExtValue();
8873 if (isInt<12>(CVal))
8874 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8875 Subtarget.getGRLenVT()));
8876 }
8877 return;
8878 case 'J':
8879 // Validate & create an integer zero operand.
8880 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8881 if (C->getZExtValue() == 0)
8882 Ops.push_back(
8883 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8884 return;
8885 case 'K':
8886 // Validate & create a 12-bit unsigned immediate operand.
8887 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8888 uint64_t CVal = C->getZExtValue();
8889 if (isUInt<12>(CVal))
8890 Ops.push_back(
8891 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8892 }
8893 return;
8894 default:
8895 break;
8896 }
8897 }
8899}
8900
8901#define GET_REGISTER_MATCHER
8902#include "LoongArchGenAsmMatcher.inc"
8903
8906 const MachineFunction &MF) const {
8907 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8908 std::string NewRegName = Name.second.str();
8909 Register Reg = MatchRegisterAltName(NewRegName);
8910 if (!Reg)
8911 Reg = MatchRegisterName(NewRegName);
8912 if (!Reg)
8913 return Reg;
8914 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8915 if (!ReservedRegs.test(Reg))
8916 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8917 StringRef(RegName) + "\"."));
8918 return Reg;
8919}
8920
8922 EVT VT, SDValue C) const {
8923 // TODO: Support vectors.
8924 if (!VT.isScalarInteger())
8925 return false;
8926
8927 // Omit the optimization if the data size exceeds GRLen.
8928 if (VT.getSizeInBits() > Subtarget.getGRLen())
8929 return false;
8930
8931 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8932 const APInt &Imm = ConstNode->getAPIntValue();
8933 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8934 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8935 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8936 return true;
8937 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8938 if (ConstNode->hasOneUse() &&
8939 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8940 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8941 return true;
8942 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8943 // in which the immediate has two set bits. Or Break (MUL x, imm)
8944 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8945 // equals to (1 << s0) - (1 << s1).
8946 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8947 unsigned Shifts = Imm.countr_zero();
8948 // Reject immediates which can be composed via a single LUI.
8949 if (Shifts >= 12)
8950 return false;
8951 // Reject multiplications can be optimized to
8952 // (SLLI (ALSL x, x, 1/2/3/4), s).
8953 APInt ImmPop = Imm.ashr(Shifts);
8954 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8955 return false;
8956 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8957 // since it needs one more instruction than other 3 cases.
8958 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8959 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8960 (ImmSmall - Imm).isPowerOf2())
8961 return true;
8962 }
8963 }
8964
8965 return false;
8966}
8967
8969 const AddrMode &AM,
8970 Type *Ty, unsigned AS,
8971 Instruction *I) const {
8972 // LoongArch has four basic addressing modes:
8973 // 1. reg
8974 // 2. reg + 12-bit signed offset
8975 // 3. reg + 14-bit signed offset left-shifted by 2
8976 // 4. reg1 + reg2
8977 // TODO: Add more checks after support vector extension.
8978
8979 // No global is ever allowed as a base.
8980 if (AM.BaseGV)
8981 return false;
8982
8983 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8984 // with `UAL` feature.
8985 if (!isInt<12>(AM.BaseOffs) &&
8986 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8987 return false;
8988
8989 switch (AM.Scale) {
8990 case 0:
8991 // "r+i" or just "i", depending on HasBaseReg.
8992 break;
8993 case 1:
8994 // "r+r+i" is not allowed.
8995 if (AM.HasBaseReg && AM.BaseOffs)
8996 return false;
8997 // Otherwise we have "r+r" or "r+i".
8998 break;
8999 case 2:
9000 // "2*r+r" or "2*r+i" is not allowed.
9001 if (AM.HasBaseReg || AM.BaseOffs)
9002 return false;
9003 // Allow "2*r" as "r+r".
9004 break;
9005 default:
9006 return false;
9007 }
9008
9009 return true;
9010}
9011
9013 return isInt<12>(Imm);
9014}
9015
9017 return isInt<12>(Imm);
9018}
9019
9021 // Zexts are free if they can be combined with a load.
9022 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9023 // poorly with type legalization of compares preferring sext.
9024 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9025 EVT MemVT = LD->getMemoryVT();
9026 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9027 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9028 LD->getExtensionType() == ISD::ZEXTLOAD))
9029 return true;
9030 }
9031
9032 return TargetLowering::isZExtFree(Val, VT2);
9033}
9034
9036 EVT DstVT) const {
9037 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9038}
9039
9041 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9042}
9043
9045 // TODO: Support vectors.
9046 if (Y.getValueType().isVector())
9047 return false;
9048
9049 return !isa<ConstantSDNode>(Y);
9050}
9051
9053 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9054 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9055}
9056
9058 Type *Ty, bool IsSigned) const {
9059 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9060 return true;
9061
9062 return IsSigned;
9063}
9064
9066 // Return false to suppress the unnecessary extensions if the LibCall
9067 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9068 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9069 Type.getSizeInBits() < Subtarget.getGRLen()))
9070 return false;
9071 return true;
9072}
9073
9074// memcpy, and other memory intrinsics, typically tries to use wider load/store
9075// if the source/dest is aligned and the copy size is large enough. We therefore
9076// want to align such objects passed to memory intrinsics.
9078 unsigned &MinSize,
9079 Align &PrefAlign) const {
9080 if (!isa<MemIntrinsic>(CI))
9081 return false;
9082
9083 if (Subtarget.is64Bit()) {
9084 MinSize = 8;
9085 PrefAlign = Align(8);
9086 } else {
9087 MinSize = 4;
9088 PrefAlign = Align(4);
9089 }
9090
9091 return true;
9092}
9093
9102
9103bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9104 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9105 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9106 bool IsABIRegCopy = CC.has_value();
9107 EVT ValueVT = Val.getValueType();
9108
9109 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9110 PartVT == MVT::f32) {
9111 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9112 // nan, and cast to f32.
9113 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9114 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9115 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9116 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9117 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9118 Parts[0] = Val;
9119 return true;
9120 }
9121
9122 return false;
9123}
9124
9125SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9126 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9127 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9128 bool IsABIRegCopy = CC.has_value();
9129
9130 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9131 PartVT == MVT::f32) {
9132 SDValue Val = Parts[0];
9133
9134 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9135 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9136 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9137 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9138 return Val;
9139 }
9140
9141 return SDValue();
9142}
9143
9144MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9145 CallingConv::ID CC,
9146 EVT VT) const {
9147 // Use f32 to pass f16.
9148 if (VT == MVT::f16 && Subtarget.hasBasicF())
9149 return MVT::f32;
9150
9152}
9153
9154unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9155 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9156 // Use f32 to pass f16.
9157 if (VT == MVT::f16 && Subtarget.hasBasicF())
9158 return 1;
9159
9161}
9162
9164 SDValue Op, const APInt &OriginalDemandedBits,
9165 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9166 unsigned Depth) const {
9167 EVT VT = Op.getValueType();
9168 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9169 unsigned Opc = Op.getOpcode();
9170 switch (Opc) {
9171 default:
9172 break;
9175 SDValue Src = Op.getOperand(0);
9176 MVT SrcVT = Src.getSimpleValueType();
9177 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9178 unsigned NumElts = SrcVT.getVectorNumElements();
9179
9180 // If we don't need the sign bits at all just return zero.
9181 if (OriginalDemandedBits.countr_zero() >= NumElts)
9182 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9183
9184 // Only demand the vector elements of the sign bits we need.
9185 APInt KnownUndef, KnownZero;
9186 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9187 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9188 TLO, Depth + 1))
9189 return true;
9190
9191 Known.Zero = KnownZero.zext(BitWidth);
9192 Known.Zero.setHighBits(BitWidth - NumElts);
9193
9194 // [X]VMSKLTZ only uses the MSB from each vector element.
9195 KnownBits KnownSrc;
9196 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9197 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9198 Depth + 1))
9199 return true;
9200
9201 if (KnownSrc.One[SrcBits - 1])
9202 Known.One.setLowBits(NumElts);
9203 else if (KnownSrc.Zero[SrcBits - 1])
9204 Known.Zero.setLowBits(NumElts);
9205
9206 // Attempt to avoid multi-use ops if we don't need anything from it.
9208 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9209 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9210 return false;
9211 }
9212 }
9213
9215 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9216}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:162
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:130
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...