Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
318 }
319 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
321 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
323 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
326 }
327 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 setOperationAction(ISD::FSQRT, VT, Legal);
332 setOperationAction(ISD::FNEG, VT, Legal);
335 VT, Expand);
337 }
339 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
340 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
341 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
342 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
343
344 for (MVT VT :
345 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
346 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
348 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
352 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
356 }
357 }
358
359 // Set operations for 'LASX' feature.
360
361 if (Subtarget.hasExtLASX()) {
362 for (MVT VT : LASXVTs) {
363 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
364 setOperationAction(ISD::BITCAST, VT, Legal);
366
372
376 }
377 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
380 Legal);
382 VT, Legal);
389 Expand);
397 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
398 }
399 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
401 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
403 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
406 }
407 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
411 setOperationAction(ISD::FSQRT, VT, Legal);
412 setOperationAction(ISD::FNEG, VT, Legal);
415 VT, Expand);
417 }
418 }
419
420 // Set DAG combine for LA32 and LA64.
421
426
427 // Set DAG combine for 'LSX' feature.
428
429 if (Subtarget.hasExtLSX()) {
431 setTargetDAGCombine(ISD::BITCAST);
432 }
433
434 // Set DAG combine for 'LASX' feature.
435
436 if (Subtarget.hasExtLASX())
438
439 // Compute derived properties from the register classes.
440 computeRegisterProperties(Subtarget.getRegisterInfo());
441
443
446
447 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
448
450
451 // Function alignments.
453 // Set preferred alignments.
454 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
455 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
456 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
457
458 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
459 if (Subtarget.hasLAMCAS())
461
462 if (Subtarget.hasSCQ()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
465 }
466}
467
469 const GlobalAddressSDNode *GA) const {
470 // In order to maximise the opportunity for common subexpression elimination,
471 // keep a separate ADD node for the global address offset instead of folding
472 // it in the global address node. Later peephole optimisations may choose to
473 // fold it back in when profitable.
474 return false;
475}
476
478 SelectionDAG &DAG) const {
479 switch (Op.getOpcode()) {
480 case ISD::ATOMIC_FENCE:
481 return lowerATOMIC_FENCE(Op, DAG);
483 return lowerEH_DWARF_CFA(Op, DAG);
485 return lowerGlobalAddress(Op, DAG);
487 return lowerGlobalTLSAddress(Op, DAG);
489 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
491 return lowerINTRINSIC_W_CHAIN(Op, DAG);
493 return lowerINTRINSIC_VOID(Op, DAG);
495 return lowerBlockAddress(Op, DAG);
496 case ISD::JumpTable:
497 return lowerJumpTable(Op, DAG);
498 case ISD::SHL_PARTS:
499 return lowerShiftLeftParts(Op, DAG);
500 case ISD::SRA_PARTS:
501 return lowerShiftRightParts(Op, DAG, true);
502 case ISD::SRL_PARTS:
503 return lowerShiftRightParts(Op, DAG, false);
505 return lowerConstantPool(Op, DAG);
506 case ISD::FP_TO_SINT:
507 return lowerFP_TO_SINT(Op, DAG);
508 case ISD::BITCAST:
509 return lowerBITCAST(Op, DAG);
510 case ISD::UINT_TO_FP:
511 return lowerUINT_TO_FP(Op, DAG);
512 case ISD::SINT_TO_FP:
513 return lowerSINT_TO_FP(Op, DAG);
514 case ISD::VASTART:
515 return lowerVASTART(Op, DAG);
516 case ISD::FRAMEADDR:
517 return lowerFRAMEADDR(Op, DAG);
518 case ISD::RETURNADDR:
519 return lowerRETURNADDR(Op, DAG);
521 return lowerWRITE_REGISTER(Op, DAG);
523 return lowerINSERT_VECTOR_ELT(Op, DAG);
525 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
527 return lowerBUILD_VECTOR(Op, DAG);
529 return lowerCONCAT_VECTORS(Op, DAG);
531 return lowerVECTOR_SHUFFLE(Op, DAG);
532 case ISD::BITREVERSE:
533 return lowerBITREVERSE(Op, DAG);
535 return lowerSCALAR_TO_VECTOR(Op, DAG);
536 case ISD::PREFETCH:
537 return lowerPREFETCH(Op, DAG);
538 case ISD::SELECT:
539 return lowerSELECT(Op, DAG);
540 case ISD::BRCOND:
541 return lowerBRCOND(Op, DAG);
542 case ISD::FP_TO_FP16:
543 return lowerFP_TO_FP16(Op, DAG);
544 case ISD::FP16_TO_FP:
545 return lowerFP16_TO_FP(Op, DAG);
546 case ISD::FP_TO_BF16:
547 return lowerFP_TO_BF16(Op, DAG);
548 case ISD::BF16_TO_FP:
549 return lowerBF16_TO_FP(Op, DAG);
550 case ISD::VECREDUCE_ADD:
551 return lowerVECREDUCE_ADD(Op, DAG);
552 case ISD::VECREDUCE_AND:
553 case ISD::VECREDUCE_OR:
554 case ISD::VECREDUCE_XOR:
555 case ISD::VECREDUCE_SMAX:
556 case ISD::VECREDUCE_SMIN:
557 case ISD::VECREDUCE_UMAX:
558 case ISD::VECREDUCE_UMIN:
559 return lowerVECREDUCE(Op, DAG);
560 }
561 return SDValue();
562}
563
564// Lower vecreduce_add using vhaddw instructions.
565// For Example:
566// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
567// can be lowered to:
568// VHADDW_D_W vr0, vr0, vr0
569// VHADDW_Q_D vr0, vr0, vr0
570// VPICKVE2GR_D a0, vr0, 0
571// ADDI_W a0, a0, 0
572SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
573 SelectionDAG &DAG) const {
574
575 SDLoc DL(Op);
576 MVT OpVT = Op.getSimpleValueType();
577 SDValue Val = Op.getOperand(0);
578
579 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
580 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
581
582 unsigned LegalVecSize = 128;
583 bool isLASX256Vector =
584 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
585
586 // Ensure operand type legal or enable it legal.
587 while (!isTypeLegal(Val.getSimpleValueType())) {
588 Val = DAG.WidenVector(Val, DL);
589 }
590
591 // NumEles is designed for iterations count, v4i32 for LSX
592 // and v8i32 for LASX should have the same count.
593 if (isLASX256Vector) {
594 NumEles /= 2;
595 LegalVecSize = 256;
596 }
597
598 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
599 MVT IntTy = MVT::getIntegerVT(EleBits);
600 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
601 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
602 }
603
604 if (isLASX256Vector) {
605 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
606 DAG.getConstant(2, DL, MVT::i64));
607 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
608 }
609
610 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
611 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
612}
613
614// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
615// For Example:
616// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
617// can be lowered to:
618// VBSRL_V vr1, vr0, 8
619// VMAX_W vr0, vr1, vr0
620// VBSRL_V vr1, vr0, 4
621// VMAX_W vr0, vr1, vr0
622// VPICKVE2GR_W a0, vr0, 0
623// For 256 bit vector, it is illegal and will be spilt into
624// two 128 bit vector by default then processed by this.
625SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
626 SelectionDAG &DAG) const {
627 SDLoc DL(Op);
628
629 MVT OpVT = Op.getSimpleValueType();
630 SDValue Val = Op.getOperand(0);
631
632 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
633 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
634
635 // Ensure operand type legal or enable it legal.
636 while (!isTypeLegal(Val.getSimpleValueType())) {
637 Val = DAG.WidenVector(Val, DL);
638 }
639
640 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
641 MVT VecTy = Val.getSimpleValueType();
642
643 for (int i = NumEles; i > 1; i /= 2) {
644 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
645 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
646 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
647 }
648
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
650 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
651}
652
653SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
654 SelectionDAG &DAG) const {
655 unsigned IsData = Op.getConstantOperandVal(4);
656
657 // We don't support non-data prefetch.
658 // Just preserve the chain.
659 if (!IsData)
660 return Op.getOperand(0);
661
662 return Op;
663}
664
665// Return true if Val is equal to (setcc LHS, RHS, CC).
666// Return false if Val is the inverse of (setcc LHS, RHS, CC).
667// Otherwise, return std::nullopt.
668static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
669 ISD::CondCode CC, SDValue Val) {
670 assert(Val->getOpcode() == ISD::SETCC);
671 SDValue LHS2 = Val.getOperand(0);
672 SDValue RHS2 = Val.getOperand(1);
673 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
674
675 if (LHS == LHS2 && RHS == RHS2) {
676 if (CC == CC2)
677 return true;
678 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
679 return false;
680 } else if (LHS == RHS2 && RHS == LHS2) {
682 if (CC == CC2)
683 return true;
684 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
685 return false;
686 }
687
688 return std::nullopt;
689}
690
692 const LoongArchSubtarget &Subtarget) {
693 SDValue CondV = N->getOperand(0);
694 SDValue TrueV = N->getOperand(1);
695 SDValue FalseV = N->getOperand(2);
696 MVT VT = N->getSimpleValueType(0);
697 SDLoc DL(N);
698
699 // (select c, -1, y) -> -c | y
700 if (isAllOnesConstant(TrueV)) {
701 SDValue Neg = DAG.getNegative(CondV, DL, VT);
702 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, -1) -> (c-1) | y
705 if (isAllOnesConstant(FalseV)) {
706 SDValue Neg =
707 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
708 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
709 }
710
711 // (select c, 0, y) -> (c-1) & y
712 if (isNullConstant(TrueV)) {
713 SDValue Neg =
714 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
715 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
716 }
717 // (select c, y, 0) -> -c & y
718 if (isNullConstant(FalseV)) {
719 SDValue Neg = DAG.getNegative(CondV, DL, VT);
720 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
721 }
722
723 // select c, ~x, x --> xor -c, x
724 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
725 const APInt &TrueVal = TrueV->getAsAPIntVal();
726 const APInt &FalseVal = FalseV->getAsAPIntVal();
727 if (~TrueVal == FalseVal) {
728 SDValue Neg = DAG.getNegative(CondV, DL, VT);
729 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
730 }
731 }
732
733 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
734 // when both truev and falsev are also setcc.
735 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
736 FalseV.getOpcode() == ISD::SETCC) {
737 SDValue LHS = CondV.getOperand(0);
738 SDValue RHS = CondV.getOperand(1);
739 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
740
741 // (select x, x, y) -> x | y
742 // (select !x, x, y) -> x & y
743 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
744 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
745 DAG.getFreeze(FalseV));
746 }
747 // (select x, y, x) -> x & y
748 // (select !x, y, x) -> x | y
749 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
750 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
751 DAG.getFreeze(TrueV), FalseV);
752 }
753 }
754
755 return SDValue();
756}
757
758// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
759// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
760// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
761// being `0` or `-1`. In such cases we can replace `select` with `and`.
762// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
763// than `c0`?
764static SDValue
766 const LoongArchSubtarget &Subtarget) {
767 unsigned SelOpNo = 0;
768 SDValue Sel = BO->getOperand(0);
769 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
770 SelOpNo = 1;
771 Sel = BO->getOperand(1);
772 }
773
774 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
775 return SDValue();
776
777 unsigned ConstSelOpNo = 1;
778 unsigned OtherSelOpNo = 2;
779 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
780 ConstSelOpNo = 2;
781 OtherSelOpNo = 1;
782 }
783 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
784 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
785 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
786 return SDValue();
787
788 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
789 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
790 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
791 return SDValue();
792
793 SDLoc DL(Sel);
794 EVT VT = BO->getValueType(0);
795
796 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
797 if (SelOpNo == 1)
798 std::swap(NewConstOps[0], NewConstOps[1]);
799
800 SDValue NewConstOp =
801 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
802 if (!NewConstOp)
803 return SDValue();
804
805 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
806 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
807 return SDValue();
808
809 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
810 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
811 if (SelOpNo == 1)
812 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
813 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
814
815 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
816 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
817 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
818}
819
820// Changes the condition code and swaps operands if necessary, so the SetCC
821// operation matches one of the comparisons supported directly by branches
822// in the LoongArch ISA. May adjust compares to favor compare with 0 over
823// compare with 1/-1.
825 ISD::CondCode &CC, SelectionDAG &DAG) {
826 // If this is a single bit test that can't be handled by ANDI, shift the
827 // bit to be tested to the MSB and perform a signed compare with 0.
828 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
829 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
830 isa<ConstantSDNode>(LHS.getOperand(1))) {
831 uint64_t Mask = LHS.getConstantOperandVal(1);
832 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
833 unsigned ShAmt = 0;
834 if (isPowerOf2_64(Mask)) {
835 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
836 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
837 } else {
838 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
839 }
840
841 LHS = LHS.getOperand(0);
842 if (ShAmt != 0)
843 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
844 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
845 return;
846 }
847 }
848
849 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
850 int64_t C = RHSC->getSExtValue();
851 switch (CC) {
852 default:
853 break;
854 case ISD::SETGT:
855 // Convert X > -1 to X >= 0.
856 if (C == -1) {
857 RHS = DAG.getConstant(0, DL, RHS.getValueType());
858 CC = ISD::SETGE;
859 return;
860 }
861 break;
862 case ISD::SETLT:
863 // Convert X < 1 to 0 >= X.
864 if (C == 1) {
865 RHS = LHS;
866 LHS = DAG.getConstant(0, DL, RHS.getValueType());
867 CC = ISD::SETGE;
868 return;
869 }
870 break;
871 }
872 }
873
874 switch (CC) {
875 default:
876 break;
877 case ISD::SETGT:
878 case ISD::SETLE:
879 case ISD::SETUGT:
880 case ISD::SETULE:
882 std::swap(LHS, RHS);
883 break;
884 }
885}
886
887SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
888 SelectionDAG &DAG) const {
889 SDValue CondV = Op.getOperand(0);
890 SDValue TrueV = Op.getOperand(1);
891 SDValue FalseV = Op.getOperand(2);
892 SDLoc DL(Op);
893 MVT VT = Op.getSimpleValueType();
894 MVT GRLenVT = Subtarget.getGRLenVT();
895
896 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
897 return V;
898
899 if (Op.hasOneUse()) {
900 unsigned UseOpc = Op->user_begin()->getOpcode();
901 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
902 SDNode *BinOp = *Op->user_begin();
903 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
904 DAG, Subtarget)) {
905 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
906 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
907 // may return a constant node and cause crash in lowerSELECT.
908 if (NewSel.getOpcode() == ISD::SELECT)
909 return lowerSELECT(NewSel, DAG);
910 return NewSel;
911 }
912 }
913 }
914
915 // If the condition is not an integer SETCC which operates on GRLenVT, we need
916 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
917 // (select condv, truev, falsev)
918 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
919 if (CondV.getOpcode() != ISD::SETCC ||
920 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
921 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
922 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
923
924 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
925
926 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
927 }
928
929 // If the CondV is the output of a SETCC node which operates on GRLenVT
930 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
931 // to take advantage of the integer compare+branch instructions. i.e.: (select
932 // (setcc lhs, rhs, cc), truev, falsev)
933 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
934 SDValue LHS = CondV.getOperand(0);
935 SDValue RHS = CondV.getOperand(1);
936 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
937
938 // Special case for a select of 2 constants that have a difference of 1.
939 // Normally this is done by DAGCombine, but if the select is introduced by
940 // type legalization or op legalization, we miss it. Restricting to SETLT
941 // case for now because that is what signed saturating add/sub need.
942 // FIXME: We don't need the condition to be SETLT or even a SETCC,
943 // but we would probably want to swap the true/false values if the condition
944 // is SETGE/SETLE to avoid an XORI.
945 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
946 CCVal == ISD::SETLT) {
947 const APInt &TrueVal = TrueV->getAsAPIntVal();
948 const APInt &FalseVal = FalseV->getAsAPIntVal();
949 if (TrueVal - 1 == FalseVal)
950 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
951 if (TrueVal + 1 == FalseVal)
952 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
953 }
954
955 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
956 // 1 < x ? x : 1 -> 0 < x ? x : 1
957 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
958 RHS == TrueV && LHS == FalseV) {
959 LHS = DAG.getConstant(0, DL, VT);
960 // 0 <u x is the same as x != 0.
961 if (CCVal == ISD::SETULT) {
962 std::swap(LHS, RHS);
963 CCVal = ISD::SETNE;
964 }
965 }
966
967 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
968 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
969 RHS == FalseV) {
970 RHS = DAG.getConstant(0, DL, VT);
971 }
972
973 SDValue TargetCC = DAG.getCondCode(CCVal);
974
975 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
976 // (select (setcc lhs, rhs, CC), constant, falsev)
977 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
978 std::swap(TrueV, FalseV);
979 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
980 }
981
982 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
983 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
984}
985
986SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
987 SelectionDAG &DAG) const {
988 SDValue CondV = Op.getOperand(1);
989 SDLoc DL(Op);
990 MVT GRLenVT = Subtarget.getGRLenVT();
991
992 if (CondV.getOpcode() == ISD::SETCC) {
993 if (CondV.getOperand(0).getValueType() == GRLenVT) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
999
1000 SDValue TargetCC = DAG.getCondCode(CCVal);
1001 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1002 Op.getOperand(0), LHS, RHS, TargetCC,
1003 Op.getOperand(2));
1004 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1005 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1006 Op.getOperand(0), CondV, Op.getOperand(2));
1007 }
1008 }
1009
1010 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1011 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1013}
1014
1015SDValue
1016LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Op);
1019 MVT OpVT = Op.getSimpleValueType();
1020
1021 SDValue Vector = DAG.getUNDEF(OpVT);
1022 SDValue Val = Op.getOperand(0);
1023 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1024
1025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1026}
1027
1028SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1029 SelectionDAG &DAG) const {
1030 EVT ResTy = Op->getValueType(0);
1031 SDValue Src = Op->getOperand(0);
1032 SDLoc DL(Op);
1033
1034 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1035 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1036 unsigned int NewEltNum = NewVT.getVectorNumElements();
1037
1038 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1039
1041 for (unsigned int i = 0; i < NewEltNum; i++) {
1042 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1043 DAG.getConstant(i, DL, MVT::i64));
1044 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1045 ? (unsigned)LoongArchISD::BITREV_8B
1046 : (unsigned)ISD::BITREVERSE;
1047 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1048 }
1049 SDValue Res =
1050 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1051
1052 switch (ResTy.getSimpleVT().SimpleTy) {
1053 default:
1054 return SDValue();
1055 case MVT::v16i8:
1056 case MVT::v32i8:
1057 return Res;
1058 case MVT::v8i16:
1059 case MVT::v16i16:
1060 case MVT::v4i32:
1061 case MVT::v8i32: {
1063 for (unsigned int i = 0; i < NewEltNum; i++)
1064 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1065 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1066 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1067 }
1068 }
1069}
1070
1071// Widen element type to get a new mask value (if possible).
1072// For example:
1073// shufflevector <4 x i32> %a, <4 x i32> %b,
1074// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1075// is equivalent to:
1076// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1077// can be lowered to:
1078// VPACKOD_D vr0, vr0, vr1
1080 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1081 unsigned EltBits = VT.getScalarSizeInBits();
1082
1083 if (EltBits > 32 || EltBits == 1)
1084 return SDValue();
1085
1086 SmallVector<int, 8> NewMask;
1087 if (widenShuffleMaskElts(Mask, NewMask)) {
1088 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1089 : MVT::getIntegerVT(EltBits * 2);
1090 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1091 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1092 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1093 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1094 return DAG.getBitcast(
1095 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1096 }
1097 }
1098
1099 return SDValue();
1100}
1101
1102/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1103/// instruction.
1104// The funciton matches elements from one of the input vector shuffled to the
1105// left or right with zeroable elements 'shifted in'. It handles both the
1106// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1107// lane.
1108// Mostly copied from X86.
1109static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1110 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1111 int MaskOffset, const APInt &Zeroable) {
1112 int Size = Mask.size();
1113 unsigned SizeInBits = Size * ScalarSizeInBits;
1114
1115 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1116 for (int i = 0; i < Size; i += Scale)
1117 for (int j = 0; j < Shift; ++j)
1118 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1119 return false;
1120
1121 return true;
1122 };
1123
1124 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1125 int Step = 1) {
1126 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1127 if (!(Mask[i] == -1 || Mask[i] == Low))
1128 return false;
1129 return true;
1130 };
1131
1132 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1133 for (int i = 0; i != Size; i += Scale) {
1134 unsigned Pos = Left ? i + Shift : i;
1135 unsigned Low = Left ? i : i + Shift;
1136 unsigned Len = Scale - Shift;
1137 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1138 return -1;
1139 }
1140
1141 int ShiftEltBits = ScalarSizeInBits * Scale;
1142 bool ByteShift = ShiftEltBits > 64;
1143 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1144 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1145 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1146
1147 // Normalize the scale for byte shifts to still produce an i64 element
1148 // type.
1149 Scale = ByteShift ? Scale / 2 : Scale;
1150
1151 // We need to round trip through the appropriate type for the shift.
1152 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1153 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1154 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1155 return (int)ShiftAmt;
1156 };
1157
1158 unsigned MaxWidth = 128;
1159 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1160 for (int Shift = 1; Shift != Scale; ++Shift)
1161 for (bool Left : {true, false})
1162 if (CheckZeros(Shift, Scale, Left)) {
1163 int ShiftAmt = MatchShift(Shift, Scale, Left);
1164 if (0 < ShiftAmt)
1165 return ShiftAmt;
1166 }
1167
1168 // no match
1169 return -1;
1170}
1171
1172/// Lower VECTOR_SHUFFLE as shift (if possible).
1173///
1174/// For example:
1175/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1176/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1177/// is lowered to:
1178/// (VBSLL_V $v0, $v0, 4)
1179///
1180/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1181/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1182/// is lowered to:
1183/// (VSLLI_D $v0, $v0, 32)
1185 MVT VT, SDValue V1, SDValue V2,
1186 SelectionDAG &DAG,
1187 const LoongArchSubtarget &Subtarget,
1188 const APInt &Zeroable) {
1189 int Size = Mask.size();
1190 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1191
1192 MVT ShiftVT;
1193 SDValue V = V1;
1194 unsigned Opcode;
1195
1196 // Try to match shuffle against V1 shift.
1197 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1198 Mask, 0, Zeroable);
1199
1200 // If V1 failed, try to match shuffle against V2 shift.
1201 if (ShiftAmt < 0) {
1202 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1203 Mask, Size, Zeroable);
1204 V = V2;
1205 }
1206
1207 if (ShiftAmt < 0)
1208 return SDValue();
1209
1210 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1211 "Illegal integer vector type");
1212 V = DAG.getBitcast(ShiftVT, V);
1213 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1214 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1215 return DAG.getBitcast(VT, V);
1216}
1217
1218/// Determine whether a range fits a regular pattern of values.
1219/// This function accounts for the possibility of jumping over the End iterator.
1220template <typename ValType>
1221static bool
1223 unsigned CheckStride,
1225 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1226 auto &I = Begin;
1227
1228 while (I != End) {
1229 if (*I != -1 && *I != ExpectedIndex)
1230 return false;
1231 ExpectedIndex += ExpectedIndexStride;
1232
1233 // Incrementing past End is undefined behaviour so we must increment one
1234 // step at a time and check for End at each step.
1235 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1236 ; // Empty loop body.
1237 }
1238 return true;
1239}
1240
1241/// Compute whether each element of a shuffle is zeroable.
1242///
1243/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1245 SDValue V2, APInt &KnownUndef,
1246 APInt &KnownZero) {
1247 int Size = Mask.size();
1248 KnownUndef = KnownZero = APInt::getZero(Size);
1249
1250 V1 = peekThroughBitcasts(V1);
1251 V2 = peekThroughBitcasts(V2);
1252
1253 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1254 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1255
1256 int VectorSizeInBits = V1.getValueSizeInBits();
1257 int ScalarSizeInBits = VectorSizeInBits / Size;
1258 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1259 (void)ScalarSizeInBits;
1260
1261 for (int i = 0; i < Size; ++i) {
1262 int M = Mask[i];
1263 if (M < 0) {
1264 KnownUndef.setBit(i);
1265 continue;
1266 }
1267 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1268 KnownZero.setBit(i);
1269 continue;
1270 }
1271 }
1272}
1273
1274/// Test whether a shuffle mask is equivalent within each sub-lane.
1275///
1276/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1277/// non-trivial to compute in the face of undef lanes. The representation is
1278/// suitable for use with existing 128-bit shuffles as entries from the second
1279/// vector have been remapped to [LaneSize, 2*LaneSize).
1280static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1281 ArrayRef<int> Mask,
1282 SmallVectorImpl<int> &RepeatedMask) {
1283 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1284 RepeatedMask.assign(LaneSize, -1);
1285 int Size = Mask.size();
1286 for (int i = 0; i < Size; ++i) {
1287 assert(Mask[i] == -1 || Mask[i] >= 0);
1288 if (Mask[i] < 0)
1289 continue;
1290 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1291 // This entry crosses lanes, so there is no way to model this shuffle.
1292 return false;
1293
1294 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1295 // Adjust second vector indices to start at LaneSize instead of Size.
1296 int LocalM =
1297 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1298 if (RepeatedMask[i % LaneSize] < 0)
1299 // This is the first non-undef entry in this slot of a 128-bit lane.
1300 RepeatedMask[i % LaneSize] = LocalM;
1301 else if (RepeatedMask[i % LaneSize] != LocalM)
1302 // Found a mismatch with the repeated mask.
1303 return false;
1304 }
1305 return true;
1306}
1307
1308/// Attempts to match vector shuffle as byte rotation.
1310 ArrayRef<int> Mask) {
1311
1312 SDValue Lo, Hi;
1313 SmallVector<int, 16> RepeatedMask;
1314
1315 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1316 return -1;
1317
1318 int NumElts = RepeatedMask.size();
1319 int Rotation = 0;
1320 int Scale = 16 / NumElts;
1321
1322 for (int i = 0; i < NumElts; ++i) {
1323 int M = RepeatedMask[i];
1324 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1325 "Unexpected mask index.");
1326 if (M < 0)
1327 continue;
1328
1329 // Determine where a rotated vector would have started.
1330 int StartIdx = i - (M % NumElts);
1331 if (StartIdx == 0)
1332 return -1;
1333
1334 // If we found the tail of a vector the rotation must be the missing
1335 // front. If we found the head of a vector, it must be how much of the
1336 // head.
1337 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1338
1339 if (Rotation == 0)
1340 Rotation = CandidateRotation;
1341 else if (Rotation != CandidateRotation)
1342 return -1;
1343
1344 // Compute which value this mask is pointing at.
1345 SDValue MaskV = M < NumElts ? V1 : V2;
1346
1347 // Compute which of the two target values this index should be assigned
1348 // to. This reflects whether the high elements are remaining or the low
1349 // elements are remaining.
1350 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1351
1352 // Either set up this value if we've not encountered it before, or check
1353 // that it remains consistent.
1354 if (!TargetV)
1355 TargetV = MaskV;
1356 else if (TargetV != MaskV)
1357 return -1;
1358 }
1359
1360 // Check that we successfully analyzed the mask, and normalize the results.
1361 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1362 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1363 if (!Lo)
1364 Lo = Hi;
1365 else if (!Hi)
1366 Hi = Lo;
1367
1368 V1 = Lo;
1369 V2 = Hi;
1370
1371 return Rotation * Scale;
1372}
1373
1374/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1375///
1376/// For example:
1377/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1378/// <2 x i32> <i32 3, i32 0>
1379/// is lowered to:
1380/// (VBSRL_V $v1, $v1, 8)
1381/// (VBSLL_V $v0, $v0, 8)
1382/// (VOR_V $v0, $V0, $v1)
1383static SDValue
1385 SDValue V1, SDValue V2, SelectionDAG &DAG,
1386 const LoongArchSubtarget &Subtarget) {
1387
1388 SDValue Lo = V1, Hi = V2;
1389 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1390 if (ByteRotation <= 0)
1391 return SDValue();
1392
1393 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1394 Lo = DAG.getBitcast(ByteVT, Lo);
1395 Hi = DAG.getBitcast(ByteVT, Hi);
1396
1397 int LoByteShift = 16 - ByteRotation;
1398 int HiByteShift = ByteRotation;
1399 MVT GRLenVT = Subtarget.getGRLenVT();
1400
1401 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1402 DAG.getConstant(LoByteShift, DL, GRLenVT));
1403 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1404 DAG.getConstant(HiByteShift, DL, GRLenVT));
1405 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1406}
1407
1408/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1409///
1410/// For example:
1411/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1412/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1413/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1414/// is lowered to:
1415/// (VREPLI $v1, 0)
1416/// (VILVL $v0, $v1, $v0)
1418 ArrayRef<int> Mask, MVT VT,
1419 SDValue V1, SDValue V2,
1420 SelectionDAG &DAG,
1421 const APInt &Zeroable) {
1422 int Bits = VT.getSizeInBits();
1423 int EltBits = VT.getScalarSizeInBits();
1424 int NumElements = VT.getVectorNumElements();
1425
1426 if (Zeroable.isAllOnes())
1427 return DAG.getConstant(0, DL, VT);
1428
1429 // Define a helper function to check a particular ext-scale and lower to it if
1430 // valid.
1431 auto Lower = [&](int Scale) -> SDValue {
1432 SDValue InputV;
1433 bool AnyExt = true;
1434 int Offset = 0;
1435 for (int i = 0; i < NumElements; i++) {
1436 int M = Mask[i];
1437 if (M < 0)
1438 continue;
1439 if (i % Scale != 0) {
1440 // Each of the extended elements need to be zeroable.
1441 if (!Zeroable[i])
1442 return SDValue();
1443
1444 AnyExt = false;
1445 continue;
1446 }
1447
1448 // Each of the base elements needs to be consecutive indices into the
1449 // same input vector.
1450 SDValue V = M < NumElements ? V1 : V2;
1451 M = M % NumElements;
1452 if (!InputV) {
1453 InputV = V;
1454 Offset = M - (i / Scale);
1455
1456 // These offset can't be handled
1457 if (Offset % (NumElements / Scale))
1458 return SDValue();
1459 } else if (InputV != V)
1460 return SDValue();
1461
1462 if (M != (Offset + (i / Scale)))
1463 return SDValue(); // Non-consecutive strided elements.
1464 }
1465
1466 // If we fail to find an input, we have a zero-shuffle which should always
1467 // have already been handled.
1468 if (!InputV)
1469 return SDValue();
1470
1471 do {
1472 unsigned VilVLoHi = LoongArchISD::VILVL;
1473 if (Offset >= (NumElements / 2)) {
1474 VilVLoHi = LoongArchISD::VILVH;
1475 Offset -= (NumElements / 2);
1476 }
1477
1478 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1479 SDValue Ext =
1480 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1481 InputV = DAG.getBitcast(InputVT, InputV);
1482 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1483 Scale /= 2;
1484 EltBits *= 2;
1485 NumElements /= 2;
1486 } while (Scale > 1);
1487 return DAG.getBitcast(VT, InputV);
1488 };
1489
1490 // Each iteration, try extending the elements half as much, but into twice as
1491 // many elements.
1492 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1493 NumExtElements *= 2) {
1494 if (SDValue V = Lower(NumElements / NumExtElements))
1495 return V;
1496 }
1497 return SDValue();
1498}
1499
1500/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1501///
1502/// VREPLVEI performs vector broadcast based on an element specified by an
1503/// integer immediate, with its mask being similar to:
1504/// <x, x, x, ...>
1505/// where x is any valid index.
1506///
1507/// When undef's appear in the mask they are treated as if they were whatever
1508/// value is necessary in order to fit the above form.
1509static SDValue
1511 SDValue V1, SDValue V2, SelectionDAG &DAG,
1512 const LoongArchSubtarget &Subtarget) {
1513 int SplatIndex = -1;
1514 for (const auto &M : Mask) {
1515 if (M != -1) {
1516 SplatIndex = M;
1517 break;
1518 }
1519 }
1520
1521 if (SplatIndex == -1)
1522 return DAG.getUNDEF(VT);
1523
1524 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1525 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1526 APInt Imm(64, SplatIndex);
1527 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1528 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1529 }
1530
1531 return SDValue();
1532}
1533
1534/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1535///
1536/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1537/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1538///
1539/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1540/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1541/// When undef's appear they are treated as if they were whatever value is
1542/// necessary in order to fit the above forms.
1543///
1544/// For example:
1545/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1546/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1547/// i32 7, i32 6, i32 5, i32 4>
1548/// is lowered to:
1549/// (VSHUF4I_H $v0, $v1, 27)
1550/// where the 27 comes from:
1551/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1552static SDValue
1554 SDValue V1, SDValue V2, SelectionDAG &DAG,
1555 const LoongArchSubtarget &Subtarget) {
1556
1557 unsigned SubVecSize = 4;
1558 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1559 SubVecSize = 2;
1560
1561 int SubMask[4] = {-1, -1, -1, -1};
1562 for (unsigned i = 0; i < SubVecSize; ++i) {
1563 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1564 int M = Mask[j];
1565
1566 // Convert from vector index to 4-element subvector index
1567 // If an index refers to an element outside of the subvector then give up
1568 if (M != -1) {
1569 M -= 4 * (j / SubVecSize);
1570 if (M < 0 || M >= 4)
1571 return SDValue();
1572 }
1573
1574 // If the mask has an undef, replace it with the current index.
1575 // Note that it might still be undef if the current index is also undef
1576 if (SubMask[i] == -1)
1577 SubMask[i] = M;
1578 // Check that non-undef values are the same as in the mask. If they
1579 // aren't then give up
1580 else if (M != -1 && M != SubMask[i])
1581 return SDValue();
1582 }
1583 }
1584
1585 // Calculate the immediate. Replace any remaining undefs with zero
1586 APInt Imm(64, 0);
1587 for (int i = SubVecSize - 1; i >= 0; --i) {
1588 int M = SubMask[i];
1589
1590 if (M == -1)
1591 M = 0;
1592
1593 Imm <<= 2;
1594 Imm |= M & 0x3;
1595 }
1596
1597 MVT GRLenVT = Subtarget.getGRLenVT();
1598
1599 // Return vshuf4i.d
1600 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1601 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1602 DAG.getConstant(Imm, DL, GRLenVT));
1603
1604 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1605 DAG.getConstant(Imm, DL, GRLenVT));
1606}
1607
1608/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1609///
1610/// VPACKEV interleaves the even elements from each vector.
1611///
1612/// It is possible to lower into VPACKEV when the mask consists of two of the
1613/// following forms interleaved:
1614/// <0, 2, 4, ...>
1615/// <n, n+2, n+4, ...>
1616/// where n is the number of elements in the vector.
1617/// For example:
1618/// <0, 0, 2, 2, 4, 4, ...>
1619/// <0, n, 2, n+2, 4, n+4, ...>
1620///
1621/// When undef's appear in the mask they are treated as if they were whatever
1622/// value is necessary in order to fit the above forms.
1624 MVT VT, SDValue V1, SDValue V2,
1625 SelectionDAG &DAG) {
1626
1627 const auto &Begin = Mask.begin();
1628 const auto &End = Mask.end();
1629 SDValue OriV1 = V1, OriV2 = V2;
1630
1631 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1632 V1 = OriV1;
1633 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1634 V1 = OriV2;
1635 else
1636 return SDValue();
1637
1638 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1639 V2 = OriV1;
1640 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1641 V2 = OriV2;
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1649///
1650/// VPACKOD interleaves the odd elements from each vector.
1651///
1652/// It is possible to lower into VPACKOD when the mask consists of two of the
1653/// following forms interleaved:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 1, 3, 3, 5, 5, ...>
1659/// <1, n+1, 3, n+3, 5, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &End = Mask.end();
1669 SDValue OriV1 = V1, OriV2 = V2;
1670
1671 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1672 V1 = OriV1;
1673 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1674 V1 = OriV2;
1675 else
1676 return SDValue();
1677
1678 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1679 V2 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1681 V2 = OriV2;
1682 else
1683 return SDValue();
1684
1685 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1686}
1687
1688/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1689///
1690/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1691/// of each vector.
1692///
1693/// It is possible to lower into VILVH when the mask consists of two of the
1694/// following forms interleaved:
1695/// <x, x+1, x+2, ...>
1696/// <n+x, n+x+1, n+x+2, ...>
1697/// where n is the number of elements in the vector and x is half n.
1698/// For example:
1699/// <x, x, x+1, x+1, x+2, x+2, ...>
1700/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1701///
1702/// When undef's appear in the mask they are treated as if they were whatever
1703/// value is necessary in order to fit the above forms.
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG) {
1707
1708 const auto &Begin = Mask.begin();
1709 const auto &End = Mask.end();
1710 unsigned HalfSize = Mask.size() / 2;
1711 SDValue OriV1 = V1, OriV2 = V2;
1712
1713 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1714 V1 = OriV1;
1715 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1716 V1 = OriV2;
1717 else
1718 return SDValue();
1719
1720 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1721 V2 = OriV1;
1722 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1723 1))
1724 V2 = OriV2;
1725 else
1726 return SDValue();
1727
1728 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1729}
1730
1731/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1732///
1733/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1734/// of each vector.
1735///
1736/// It is possible to lower into VILVL when the mask consists of two of the
1737/// following forms interleaved:
1738/// <0, 1, 2, ...>
1739/// <n, n+1, n+2, ...>
1740/// where n is the number of elements in the vector.
1741/// For example:
1742/// <0, 0, 1, 1, 2, 2, ...>
1743/// <0, n, 1, n+1, 2, n+2, ...>
1744///
1745/// When undef's appear in the mask they are treated as if they were whatever
1746/// value is necessary in order to fit the above forms.
1748 MVT VT, SDValue V1, SDValue V2,
1749 SelectionDAG &DAG) {
1750
1751 const auto &Begin = Mask.begin();
1752 const auto &End = Mask.end();
1753 SDValue OriV1 = V1, OriV2 = V2;
1754
1755 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1756 V1 = OriV1;
1757 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1758 V1 = OriV2;
1759 else
1760 return SDValue();
1761
1762 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1763 V2 = OriV1;
1764 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1765 V2 = OriV2;
1766 else
1767 return SDValue();
1768
1769 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1770}
1771
1772/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1773///
1774/// VPICKEV copies the even elements of each vector into the result vector.
1775///
1776/// It is possible to lower into VPICKEV when the mask consists of two of the
1777/// following forms concatenated:
1778/// <0, 2, 4, ...>
1779/// <n, n+2, n+4, ...>
1780/// where n is the number of elements in the vector.
1781/// For example:
1782/// <0, 2, 4, ..., 0, 2, 4, ...>
1783/// <0, 2, 4, ..., n, n+2, n+4, ...>
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above forms.
1788 MVT VT, SDValue V1, SDValue V2,
1789 SelectionDAG &DAG) {
1790
1791 const auto &Begin = Mask.begin();
1792 const auto &Mid = Mask.begin() + Mask.size() / 2;
1793 const auto &End = Mask.end();
1794 SDValue OriV1 = V1, OriV2 = V2;
1795
1796 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1797 V1 = OriV1;
1798 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1799 V1 = OriV2;
1800 else
1801 return SDValue();
1802
1803 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1804 V2 = OriV1;
1805 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1806 V2 = OriV2;
1807
1808 else
1809 return SDValue();
1810
1811 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1812}
1813
1814/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1815///
1816/// VPICKOD copies the odd elements of each vector into the result vector.
1817///
1818/// It is possible to lower into VPICKOD when the mask consists of two of the
1819/// following forms concatenated:
1820/// <1, 3, 5, ...>
1821/// <n+1, n+3, n+5, ...>
1822/// where n is the number of elements in the vector.
1823/// For example:
1824/// <1, 3, 5, ..., 1, 3, 5, ...>
1825/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1826///
1827/// When undef's appear in the mask they are treated as if they were whatever
1828/// value is necessary in order to fit the above forms.
1830 MVT VT, SDValue V1, SDValue V2,
1831 SelectionDAG &DAG) {
1832
1833 const auto &Begin = Mask.begin();
1834 const auto &Mid = Mask.begin() + Mask.size() / 2;
1835 const auto &End = Mask.end();
1836 SDValue OriV1 = V1, OriV2 = V2;
1837
1838 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1839 V1 = OriV1;
1840 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1841 V1 = OriV2;
1842 else
1843 return SDValue();
1844
1845 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1846 V2 = OriV1;
1847 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1848 V2 = OriV2;
1849 else
1850 return SDValue();
1851
1852 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1853}
1854
1855/// Lower VECTOR_SHUFFLE into VSHUF.
1856///
1857/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1858/// adding it as an operand to the resulting VSHUF.
1860 MVT VT, SDValue V1, SDValue V2,
1861 SelectionDAG &DAG) {
1862
1864 for (auto M : Mask)
1865 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1866
1867 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1868 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1869
1870 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1871 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1872 // VSHF concatenates the vectors in a bitwise fashion:
1873 // <0b00, 0b01> + <0b10, 0b11> ->
1874 // 0b0100 + 0b1110 -> 0b01001110
1875 // <0b10, 0b11, 0b00, 0b01>
1876 // We must therefore swap the operands to get the correct result.
1877 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1878}
1879
1880/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1881///
1882/// This routine breaks down the specific type of 128-bit shuffle and
1883/// dispatches to the lowering routines accordingly.
1885 SDValue V1, SDValue V2, SelectionDAG &DAG,
1886 const LoongArchSubtarget &Subtarget) {
1887 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1888 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1889 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1890 "Vector type is unsupported for lsx!");
1892 "Two operands have different types!");
1893 assert(VT.getVectorNumElements() == Mask.size() &&
1894 "Unexpected mask size for shuffle!");
1895 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1896
1897 APInt KnownUndef, KnownZero;
1898 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1899 APInt Zeroable = KnownUndef | KnownZero;
1900
1901 SDValue Result;
1902 // TODO: Add more comparison patterns.
1903 if (V2.isUndef()) {
1904 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1905 Subtarget)))
1906 return Result;
1907 if ((Result =
1908 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1909 return Result;
1910
1911 // TODO: This comment may be enabled in the future to better match the
1912 // pattern for instruction selection.
1913 /* V2 = V1; */
1914 }
1915
1916 // It is recommended not to change the pattern comparison order for better
1917 // performance.
1918 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1927 return Result;
1928 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1929 return Result;
1930 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1931 (Result =
1932 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1933 return Result;
1934 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1935 Zeroable)))
1936 return Result;
1937 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1938 Zeroable)))
1939 return Result;
1940 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1941 Subtarget)))
1942 return Result;
1943 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1944 return NewShuffle;
1945 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1946 return Result;
1947 return SDValue();
1948}
1949
1950/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1951///
1952/// It is a XVREPLVEI when the mask is:
1953/// <x, x, x, ..., x+n, x+n, x+n, ...>
1954/// where the number of x is equal to n and n is half the length of vector.
1955///
1956/// When undef's appear in the mask they are treated as if they were whatever
1957/// value is necessary in order to fit the above form.
1958static SDValue
1960 SDValue V1, SDValue V2, SelectionDAG &DAG,
1961 const LoongArchSubtarget &Subtarget) {
1962 int SplatIndex = -1;
1963 for (const auto &M : Mask) {
1964 if (M != -1) {
1965 SplatIndex = M;
1966 break;
1967 }
1968 }
1969
1970 if (SplatIndex == -1)
1971 return DAG.getUNDEF(VT);
1972
1973 const auto &Begin = Mask.begin();
1974 const auto &End = Mask.end();
1975 unsigned HalfSize = Mask.size() / 2;
1976
1977 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1978 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1979 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1980 0)) {
1981 APInt Imm(64, SplatIndex);
1982 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1983 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1984 }
1985
1986 return SDValue();
1987}
1988
1989/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1990static SDValue
1992 SDValue V1, SDValue V2, SelectionDAG &DAG,
1993 const LoongArchSubtarget &Subtarget) {
1994 // When the size is less than or equal to 4, lower cost instructions may be
1995 // used.
1996 if (Mask.size() <= 4)
1997 return SDValue();
1998 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1999}
2000
2001/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2003 MVT VT, SDValue V1, SDValue V2,
2004 SelectionDAG &DAG) {
2005 // LoongArch LASX only have XVPERM_W.
2006 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2007 return SDValue();
2008
2009 unsigned NumElts = VT.getVectorNumElements();
2010 unsigned HalfSize = NumElts / 2;
2011 bool FrontLo = true, FrontHi = true;
2012 bool BackLo = true, BackHi = true;
2013
2014 auto inRange = [](int val, int low, int high) {
2015 return (val == -1) || (val >= low && val < high);
2016 };
2017
2018 for (unsigned i = 0; i < HalfSize; ++i) {
2019 int Fronti = Mask[i];
2020 int Backi = Mask[i + HalfSize];
2021
2022 FrontLo &= inRange(Fronti, 0, HalfSize);
2023 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2024 BackLo &= inRange(Backi, 0, HalfSize);
2025 BackHi &= inRange(Backi, HalfSize, NumElts);
2026 }
2027
2028 // If both the lower and upper 128-bit parts access only one half of the
2029 // vector (either lower or upper), avoid using xvperm.w. The latency of
2030 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2031 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2032 return SDValue();
2033
2035 for (unsigned i = 0; i < NumElts; ++i)
2036 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2037 : DAG.getConstant(Mask[i], DL, MVT::i64));
2038 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2039
2040 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2041}
2042
2043/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2048}
2049
2050/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2055}
2056
2057/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2059 MVT VT, SDValue V1, SDValue V2,
2060 SelectionDAG &DAG) {
2061
2062 const auto &Begin = Mask.begin();
2063 const auto &End = Mask.end();
2064 unsigned HalfSize = Mask.size() / 2;
2065 unsigned LeftSize = HalfSize / 2;
2066 SDValue OriV1 = V1, OriV2 = V2;
2067
2068 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2069 1) &&
2070 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2071 V1 = OriV1;
2072 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2073 Mask.size() + HalfSize - LeftSize, 1) &&
2074 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2075 Mask.size() + HalfSize + LeftSize, 1))
2076 V1 = OriV2;
2077 else
2078 return SDValue();
2079
2080 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2081 1) &&
2082 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2083 1))
2084 V2 = OriV1;
2085 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2086 Mask.size() + HalfSize - LeftSize, 1) &&
2087 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2088 Mask.size() + HalfSize + LeftSize, 1))
2089 V2 = OriV2;
2090 else
2091 return SDValue();
2092
2093 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2094}
2095
2096/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2098 MVT VT, SDValue V1, SDValue V2,
2099 SelectionDAG &DAG) {
2100
2101 const auto &Begin = Mask.begin();
2102 const auto &End = Mask.end();
2103 unsigned HalfSize = Mask.size() / 2;
2104 SDValue OriV1 = V1, OriV2 = V2;
2105
2106 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2107 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2108 V1 = OriV1;
2109 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2110 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2111 Mask.size() + HalfSize, 1))
2112 V1 = OriV2;
2113 else
2114 return SDValue();
2115
2116 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2117 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2118 V2 = OriV1;
2119 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2120 1) &&
2121 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2122 Mask.size() + HalfSize, 1))
2123 V2 = OriV2;
2124 else
2125 return SDValue();
2126
2127 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2128}
2129
2130/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2132 MVT VT, SDValue V1, SDValue V2,
2133 SelectionDAG &DAG) {
2134
2135 const auto &Begin = Mask.begin();
2136 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2137 const auto &Mid = Mask.begin() + Mask.size() / 2;
2138 const auto &RightMid = Mask.end() - Mask.size() / 4;
2139 const auto &End = Mask.end();
2140 unsigned HalfSize = Mask.size() / 2;
2141 SDValue OriV1 = V1, OriV2 = V2;
2142
2143 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2144 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2145 V1 = OriV1;
2146 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2147 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2153 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2154 V2 = OriV1;
2155 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2156 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2157 V2 = OriV2;
2158
2159 else
2160 return SDValue();
2161
2162 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2163}
2164
2165/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2167 MVT VT, SDValue V1, SDValue V2,
2168 SelectionDAG &DAG) {
2169
2170 const auto &Begin = Mask.begin();
2171 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2172 const auto &Mid = Mask.begin() + Mask.size() / 2;
2173 const auto &RightMid = Mask.end() - Mask.size() / 4;
2174 const auto &End = Mask.end();
2175 unsigned HalfSize = Mask.size() / 2;
2176 SDValue OriV1 = V1, OriV2 = V2;
2177
2178 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2179 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2180 V1 = OriV1;
2181 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2182 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2183 2))
2184 V1 = OriV2;
2185 else
2186 return SDValue();
2187
2188 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2189 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2190 V2 = OriV1;
2191 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2192 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2193 2))
2194 V2 = OriV2;
2195 else
2196 return SDValue();
2197
2198 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2199}
2200
2201/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2203 MVT VT, SDValue V1, SDValue V2,
2204 SelectionDAG &DAG) {
2205
2206 int MaskSize = Mask.size();
2207 int HalfSize = Mask.size() / 2;
2208 const auto &Begin = Mask.begin();
2209 const auto &Mid = Mask.begin() + HalfSize;
2210 const auto &End = Mask.end();
2211
2212 // VECTOR_SHUFFLE concatenates the vectors:
2213 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2214 // shuffling ->
2215 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2216 //
2217 // XVSHUF concatenates the vectors:
2218 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2219 // shuffling ->
2220 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2221 SmallVector<SDValue, 8> MaskAlloc;
2222 for (auto it = Begin; it < Mid; it++) {
2223 if (*it < 0) // UNDEF
2224 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2225 else if ((*it >= 0 && *it < HalfSize) ||
2226 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2227 int M = *it < HalfSize ? *it : *it - HalfSize;
2228 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2229 } else
2230 return SDValue();
2231 }
2232 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2233
2234 for (auto it = Mid; it < End; it++) {
2235 if (*it < 0) // UNDEF
2236 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2237 else if ((*it >= HalfSize && *it < MaskSize) ||
2238 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2239 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2240 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2241 } else
2242 return SDValue();
2243 }
2244 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2245
2246 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2247 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2248 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2249}
2250
2251/// Shuffle vectors by lane to generate more optimized instructions.
2252/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2253///
2254/// Therefore, except for the following four cases, other cases are regarded
2255/// as cross-lane shuffles, where optimization is relatively limited.
2256///
2257/// - Shuffle high, low lanes of two inputs vector
2258/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2259/// - Shuffle low, high lanes of two inputs vector
2260/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2261/// - Shuffle low, low lanes of two inputs vector
2262/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2263/// - Shuffle high, high lanes of two inputs vector
2264/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2265///
2266/// The first case is the closest to LoongArch instructions and the other
2267/// cases need to be converted to it for processing.
2268///
2269/// This function may modify V1, V2 and Mask
2271 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2272 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2273
2274 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2275
2276 int MaskSize = Mask.size();
2277 int HalfSize = Mask.size() / 2;
2278 MVT GRLenVT = Subtarget.getGRLenVT();
2279
2280 HalfMaskType preMask = None, postMask = None;
2281
2282 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2283 return M < 0 || (M >= 0 && M < HalfSize) ||
2284 (M >= MaskSize && M < MaskSize + HalfSize);
2285 }))
2286 preMask = HighLaneTy;
2287 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2288 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2289 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2290 }))
2291 preMask = LowLaneTy;
2292
2293 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2294 return M < 0 || (M >= 0 && M < HalfSize) ||
2295 (M >= MaskSize && M < MaskSize + HalfSize);
2296 }))
2297 postMask = HighLaneTy;
2298 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2299 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2300 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2301 }))
2302 postMask = LowLaneTy;
2303
2304 // The pre-half of mask is high lane type, and the post-half of mask
2305 // is low lane type, which is closest to the LoongArch instructions.
2306 //
2307 // Note: In the LoongArch architecture, the high lane of mask corresponds
2308 // to the lower 128-bit of vector register, and the low lane of mask
2309 // corresponds the higher 128-bit of vector register.
2310 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2311 return;
2312 }
2313 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2314 V1 = DAG.getBitcast(MVT::v4i64, V1);
2315 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2316 DAG.getConstant(0b01001110, DL, GRLenVT));
2317 V1 = DAG.getBitcast(VT, V1);
2318
2319 if (!V2.isUndef()) {
2320 V2 = DAG.getBitcast(MVT::v4i64, V2);
2321 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2322 DAG.getConstant(0b01001110, DL, GRLenVT));
2323 V2 = DAG.getBitcast(VT, V2);
2324 }
2325
2326 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2327 *it = *it < 0 ? *it : *it - HalfSize;
2328 }
2329 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2330 *it = *it < 0 ? *it : *it + HalfSize;
2331 }
2332 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2333 V1 = DAG.getBitcast(MVT::v4i64, V1);
2334 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2335 DAG.getConstant(0b11101110, DL, GRLenVT));
2336 V1 = DAG.getBitcast(VT, V1);
2337
2338 if (!V2.isUndef()) {
2339 V2 = DAG.getBitcast(MVT::v4i64, V2);
2340 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2341 DAG.getConstant(0b11101110, DL, GRLenVT));
2342 V2 = DAG.getBitcast(VT, V2);
2343 }
2344
2345 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2346 *it = *it < 0 ? *it : *it - HalfSize;
2347 }
2348 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2349 V1 = DAG.getBitcast(MVT::v4i64, V1);
2350 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2351 DAG.getConstant(0b01000100, DL, GRLenVT));
2352 V1 = DAG.getBitcast(VT, V1);
2353
2354 if (!V2.isUndef()) {
2355 V2 = DAG.getBitcast(MVT::v4i64, V2);
2356 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2357 DAG.getConstant(0b01000100, DL, GRLenVT));
2358 V2 = DAG.getBitcast(VT, V2);
2359 }
2360
2361 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2362 *it = *it < 0 ? *it : *it + HalfSize;
2363 }
2364 } else { // cross-lane
2365 return;
2366 }
2367}
2368
2369/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2370/// Only for 256-bit vector.
2371///
2372/// For example:
2373/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2374/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2375/// is lowerded to:
2376/// (XVPERMI $xr2, $xr0, 78)
2377/// (XVSHUF $xr1, $xr2, $xr0)
2378/// (XVORI $xr0, $xr1, 0)
2380 ArrayRef<int> Mask,
2381 MVT VT, SDValue V1,
2382 SDValue V2,
2383 SelectionDAG &DAG) {
2384 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2385 int Size = Mask.size();
2386 int LaneSize = Size / 2;
2387
2388 bool LaneCrossing[2] = {false, false};
2389 for (int i = 0; i < Size; ++i)
2390 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2391 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2392
2393 // Ensure that all lanes ared involved.
2394 if (!LaneCrossing[0] && !LaneCrossing[1])
2395 return SDValue();
2396
2397 SmallVector<int> InLaneMask;
2398 InLaneMask.assign(Mask.begin(), Mask.end());
2399 for (int i = 0; i < Size; ++i) {
2400 int &M = InLaneMask[i];
2401 if (M < 0)
2402 continue;
2403 if (((M % Size) / LaneSize) != (i / LaneSize))
2404 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2405 }
2406
2407 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2408 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2409 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2410 Flipped = DAG.getBitcast(VT, Flipped);
2411 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2412}
2413
2414/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2415///
2416/// This routine breaks down the specific type of 256-bit shuffle and
2417/// dispatches to the lowering routines accordingly.
2419 SDValue V1, SDValue V2, SelectionDAG &DAG,
2420 const LoongArchSubtarget &Subtarget) {
2421 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2422 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2423 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2424 "Vector type is unsupported for lasx!");
2426 "Two operands have different types!");
2427 assert(VT.getVectorNumElements() == Mask.size() &&
2428 "Unexpected mask size for shuffle!");
2429 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2430 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2431
2432 // canonicalize non cross-lane shuffle vector
2433 SmallVector<int> NewMask(Mask);
2434 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2435
2436 APInt KnownUndef, KnownZero;
2437 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2438 APInt Zeroable = KnownUndef | KnownZero;
2439
2440 SDValue Result;
2441 // TODO: Add more comparison patterns.
2442 if (V2.isUndef()) {
2443 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2444 Subtarget)))
2445 return Result;
2446 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2447 Subtarget)))
2448 return Result;
2449 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2450 return Result;
2451 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2452 V1, V2, DAG)))
2453 return Result;
2454
2455 // TODO: This comment may be enabled in the future to better match the
2456 // pattern for instruction selection.
2457 /* V2 = V1; */
2458 }
2459
2460 // It is recommended not to change the pattern comparison order for better
2461 // performance.
2462 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2463 return Result;
2464 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2465 return Result;
2466 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2467 return Result;
2468 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2469 return Result;
2470 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2471 return Result;
2472 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2473 return Result;
2474 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2475 Subtarget, Zeroable)))
2476 return Result;
2477 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2478 Subtarget)))
2479 return Result;
2480 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2481 return NewShuffle;
2482 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2483 return Result;
2484
2485 return SDValue();
2486}
2487
2488SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2489 SelectionDAG &DAG) const {
2490 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2491 ArrayRef<int> OrigMask = SVOp->getMask();
2492 SDValue V1 = Op.getOperand(0);
2493 SDValue V2 = Op.getOperand(1);
2494 MVT VT = Op.getSimpleValueType();
2495 int NumElements = VT.getVectorNumElements();
2496 SDLoc DL(Op);
2497
2498 bool V1IsUndef = V1.isUndef();
2499 bool V2IsUndef = V2.isUndef();
2500 if (V1IsUndef && V2IsUndef)
2501 return DAG.getUNDEF(VT);
2502
2503 // When we create a shuffle node we put the UNDEF node to second operand,
2504 // but in some cases the first operand may be transformed to UNDEF.
2505 // In this case we should just commute the node.
2506 if (V1IsUndef)
2507 return DAG.getCommutedVectorShuffle(*SVOp);
2508
2509 // Check for non-undef masks pointing at an undef vector and make the masks
2510 // undef as well. This makes it easier to match the shuffle based solely on
2511 // the mask.
2512 if (V2IsUndef &&
2513 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2514 SmallVector<int, 8> NewMask(OrigMask);
2515 for (int &M : NewMask)
2516 if (M >= NumElements)
2517 M = -1;
2518 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2519 }
2520
2521 // Check for illegal shuffle mask element index values.
2522 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2523 (void)MaskUpperLimit;
2524 assert(llvm::all_of(OrigMask,
2525 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2526 "Out of bounds shuffle index");
2527
2528 // For each vector width, delegate to a specialized lowering routine.
2529 if (VT.is128BitVector())
2530 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2531
2532 if (VT.is256BitVector())
2533 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2534
2535 return SDValue();
2536}
2537
2538SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 // Custom lower to ensure the libcall return is passed in an FPR on hard
2541 // float ABIs.
2542 SDLoc DL(Op);
2543 MakeLibCallOptions CallOptions;
2544 SDValue Op0 = Op.getOperand(0);
2545 SDValue Chain = SDValue();
2546 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2547 SDValue Res;
2548 std::tie(Res, Chain) =
2549 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2550 if (Subtarget.is64Bit())
2551 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2552 return DAG.getBitcast(MVT::i32, Res);
2553}
2554
2555SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2556 SelectionDAG &DAG) const {
2557 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2558 // float ABIs.
2559 SDLoc DL(Op);
2560 MakeLibCallOptions CallOptions;
2561 SDValue Op0 = Op.getOperand(0);
2562 SDValue Chain = SDValue();
2563 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2564 DL, MVT::f32, Op0)
2565 : DAG.getBitcast(MVT::f32, Op0);
2566 SDValue Res;
2567 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2568 CallOptions, DL, Chain);
2569 return Res;
2570}
2571
2572SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2573 SelectionDAG &DAG) const {
2574 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2575 SDLoc DL(Op);
2576 MakeLibCallOptions CallOptions;
2577 RTLIB::Libcall LC =
2578 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2579 SDValue Res =
2580 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2581 if (Subtarget.is64Bit())
2582 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2583 return DAG.getBitcast(MVT::i32, Res);
2584}
2585
2586SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2587 SelectionDAG &DAG) const {
2588 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2589 MVT VT = Op.getSimpleValueType();
2590 SDLoc DL(Op);
2591 Op = DAG.getNode(
2592 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2593 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2594 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2595 DL, MVT::f32, Op)
2596 : DAG.getBitcast(MVT::f32, Op);
2597 if (VT != MVT::f32)
2598 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2599 return Res;
2600}
2601
2602// Lower BUILD_VECTOR as broadcast load (if possible).
2603// For example:
2604// %a = load i8, ptr %ptr
2605// %b = build_vector %a, %a, %a, %a
2606// is lowered to :
2607// (VLDREPL_B $a0, 0)
2609 const SDLoc &DL,
2610 SelectionDAG &DAG) {
2611 MVT VT = BVOp->getSimpleValueType(0);
2612 int NumOps = BVOp->getNumOperands();
2613
2614 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2615 "Unsupported vector type for broadcast.");
2616
2617 SDValue IdentitySrc;
2618 bool IsIdeneity = true;
2619
2620 for (int i = 0; i != NumOps; i++) {
2621 SDValue Op = BVOp->getOperand(i);
2622 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2623 IsIdeneity = false;
2624 break;
2625 }
2626 IdentitySrc = BVOp->getOperand(0);
2627 }
2628
2629 // make sure that this load is valid and only has one user.
2630 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2631 return SDValue();
2632
2633 auto *LN = cast<LoadSDNode>(IdentitySrc);
2634 auto ExtType = LN->getExtensionType();
2635
2636 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2637 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2638 SDVTList Tys =
2639 LN->isIndexed()
2640 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2641 : DAG.getVTList(VT, MVT::Other);
2642 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2643 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2644 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2645 return BCast;
2646 }
2647 return SDValue();
2648}
2649
2650SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2651 SelectionDAG &DAG) const {
2652 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2653 MVT VT = Node->getSimpleValueType(0);
2654 EVT ResTy = Op->getValueType(0);
2655 unsigned NumElts = ResTy.getVectorNumElements();
2656 SDLoc DL(Op);
2657 APInt SplatValue, SplatUndef;
2658 unsigned SplatBitSize;
2659 bool HasAnyUndefs;
2660 bool IsConstant = false;
2661 bool UseSameConstant = true;
2662 SDValue ConstantValue;
2663 bool Is128Vec = ResTy.is128BitVector();
2664 bool Is256Vec = ResTy.is256BitVector();
2665
2666 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2667 (!Subtarget.hasExtLASX() || !Is256Vec))
2668 return SDValue();
2669
2670 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2671 return Result;
2672
2673 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2674 /*MinSplatBits=*/8) &&
2675 SplatBitSize <= 64) {
2676 // We can only cope with 8, 16, 32, or 64-bit elements.
2677 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2678 SplatBitSize != 64)
2679 return SDValue();
2680
2681 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2682 // We can only handle 64-bit elements that are within
2683 // the signed 10-bit range on 32-bit targets.
2684 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2685 if (!SplatValue.isSignedIntN(10))
2686 return SDValue();
2687 if ((Is128Vec && ResTy == MVT::v4i32) ||
2688 (Is256Vec && ResTy == MVT::v8i32))
2689 return Op;
2690 }
2691
2692 EVT ViaVecTy;
2693
2694 switch (SplatBitSize) {
2695 default:
2696 return SDValue();
2697 case 8:
2698 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2699 break;
2700 case 16:
2701 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2702 break;
2703 case 32:
2704 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2705 break;
2706 case 64:
2707 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2708 break;
2709 }
2710
2711 // SelectionDAG::getConstant will promote SplatValue appropriately.
2712 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2713
2714 // Bitcast to the type we originally wanted.
2715 if (ViaVecTy != ResTy)
2716 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2717
2718 return Result;
2719 }
2720
2721 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2722 return Op;
2723
2724 for (unsigned i = 0; i < NumElts; ++i) {
2725 SDValue Opi = Node->getOperand(i);
2726 if (isIntOrFPConstant(Opi)) {
2727 IsConstant = true;
2728 if (!ConstantValue.getNode())
2729 ConstantValue = Opi;
2730 else if (ConstantValue != Opi)
2731 UseSameConstant = false;
2732 }
2733 }
2734
2735 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2736 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2737 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2738 for (unsigned i = 0; i < NumElts; ++i) {
2739 SDValue Opi = Node->getOperand(i);
2740 if (!isIntOrFPConstant(Opi))
2741 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2742 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2743 }
2744 return Result;
2745 }
2746
2747 if (!IsConstant) {
2748 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2749 // the sub-sequence of the vector and then broadcast the sub-sequence.
2750 //
2751 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2752 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2753 // generates worse code in some cases. This could be further optimized
2754 // with more consideration.
2756 BitVector UndefElements;
2757 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2758 UndefElements.count() == 0) {
2759 SDValue Vector = DAG.getUNDEF(ResTy);
2760 SDValue FillVec = Vector;
2761 EVT FillTy = ResTy;
2762
2763 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2764 // because the high part can be simply treated as undef.
2765 if (Is256Vec) {
2766 FillTy = ResTy.getHalfNumVectorElementsVT(*DAG.getContext());
2767 FillVec = DAG.getExtractSubvector(DL, FillTy, Vector, 0);
2768 }
2769
2770 SDValue Op0 = Sequence[0];
2771 unsigned SeqLen = Sequence.size();
2772 if (!Op0.isUndef())
2773 FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
2774 for (unsigned i = 1; i < SeqLen; ++i) {
2775 SDValue Opi = Sequence[i];
2776 if (Opi.isUndef())
2777 continue;
2778 FillVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
2779 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2780 }
2781
2782 unsigned SplatLen = NumElts / SeqLen;
2783 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2784 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2785
2786 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2787 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
2788 if (SplatEltTy == MVT::i128)
2789 SplatTy = MVT::v4i64;
2790
2791 SDValue SplatVec;
2792 SDValue SrcVec = DAG.getBitcast(
2793 SplatTy,
2794 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
2795 if (Is256Vec) {
2796 SplatVec =
2797 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
2799 DL, SplatTy, SrcVec);
2800 } else {
2801 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
2802 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2803 }
2804
2805 return DAG.getBitcast(ResTy, SplatVec);
2806 }
2807
2808 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2809 // The resulting code is the same length as the expansion, but it doesn't
2810 // use memory operations.
2811 assert(ResTy.isVector());
2812
2813 SDValue Op0 = Node->getOperand(0);
2814 SDValue Vector = DAG.getUNDEF(ResTy);
2815
2816 if (!Op0.isUndef())
2817 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2818 for (unsigned i = 1; i < NumElts; ++i) {
2819 SDValue Opi = Node->getOperand(i);
2820 if (Opi.isUndef())
2821 continue;
2822 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2823 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2824 }
2825 return Vector;
2826 }
2827
2828 return SDValue();
2829}
2830
2831SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2832 SelectionDAG &DAG) const {
2833 SDLoc DL(Op);
2834 MVT ResVT = Op.getSimpleValueType();
2835 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2836
2837 unsigned NumOperands = Op.getNumOperands();
2838 unsigned NumFreezeUndef = 0;
2839 unsigned NumZero = 0;
2840 unsigned NumNonZero = 0;
2841 unsigned NonZeros = 0;
2842 SmallSet<SDValue, 4> Undefs;
2843 for (unsigned i = 0; i != NumOperands; ++i) {
2844 SDValue SubVec = Op.getOperand(i);
2845 if (SubVec.isUndef())
2846 continue;
2847 if (ISD::isFreezeUndef(SubVec.getNode())) {
2848 // If the freeze(undef) has multiple uses then we must fold to zero.
2849 if (SubVec.hasOneUse()) {
2850 ++NumFreezeUndef;
2851 } else {
2852 ++NumZero;
2853 Undefs.insert(SubVec);
2854 }
2855 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2856 ++NumZero;
2857 else {
2858 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2859 NonZeros |= 1 << i;
2860 ++NumNonZero;
2861 }
2862 }
2863
2864 // If we have more than 2 non-zeros, build each half separately.
2865 if (NumNonZero > 2) {
2866 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2867 ArrayRef<SDUse> Ops = Op->ops();
2868 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2869 Ops.slice(0, NumOperands / 2));
2870 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2871 Ops.slice(NumOperands / 2));
2872 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2873 }
2874
2875 // Otherwise, build it up through insert_subvectors.
2876 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2877 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2878 : DAG.getUNDEF(ResVT));
2879
2880 // Replace Undef operands with ZeroVector.
2881 for (SDValue U : Undefs)
2882 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2883
2884 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2885 unsigned NumSubElems = SubVT.getVectorNumElements();
2886 for (unsigned i = 0; i != NumOperands; ++i) {
2887 if ((NonZeros & (1 << i)) == 0)
2888 continue;
2889
2890 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2891 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2892 }
2893
2894 return Vec;
2895}
2896
2897SDValue
2898LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2899 SelectionDAG &DAG) const {
2900 MVT EltVT = Op.getSimpleValueType();
2901 SDValue Vec = Op->getOperand(0);
2902 EVT VecTy = Vec->getValueType(0);
2903 SDValue Idx = Op->getOperand(1);
2904 SDLoc DL(Op);
2905 MVT GRLenVT = Subtarget.getGRLenVT();
2906
2907 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2908
2909 if (isa<ConstantSDNode>(Idx))
2910 return Op;
2911
2912 switch (VecTy.getSimpleVT().SimpleTy) {
2913 default:
2914 llvm_unreachable("Unexpected type");
2915 case MVT::v32i8:
2916 case MVT::v16i16:
2917 case MVT::v4i64:
2918 case MVT::v4f64: {
2919 // Extract the high half subvector and place it to the low half of a new
2920 // vector. It doesn't matter what the high half of the new vector is.
2921 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2922 SDValue VecHi =
2923 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2924 SDValue TmpVec =
2925 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2926 VecHi, DAG.getConstant(0, DL, GRLenVT));
2927
2928 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2929 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2930 // desired element.
2931 SDValue IdxCp =
2932 Subtarget.is64Bit()
2933 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
2934 : DAG.getBitcast(MVT::f32, Idx);
2935 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2936 SDValue MaskVec =
2937 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2938 SDValue ResVec =
2939 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2940
2941 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2942 DAG.getConstant(0, DL, GRLenVT));
2943 }
2944 case MVT::v8i32:
2945 case MVT::v8f32: {
2946 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2947 SDValue SplatValue =
2948 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2949
2950 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2951 DAG.getConstant(0, DL, GRLenVT));
2952 }
2953 }
2954}
2955
2956SDValue
2957LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2958 SelectionDAG &DAG) const {
2959 MVT VT = Op.getSimpleValueType();
2960 MVT EltVT = VT.getVectorElementType();
2961 unsigned NumElts = VT.getVectorNumElements();
2962 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2963 SDLoc DL(Op);
2964 SDValue Op0 = Op.getOperand(0);
2965 SDValue Op1 = Op.getOperand(1);
2966 SDValue Op2 = Op.getOperand(2);
2967
2968 if (isa<ConstantSDNode>(Op2))
2969 return Op;
2970
2971 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2972 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2973
2974 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2975 return SDValue();
2976
2977 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2978 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2979
2980 SmallVector<SDValue, 32> RawIndices;
2981 for (unsigned i = 0; i < NumElts; ++i)
2982 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2983 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2984
2985 // insert vec, elt, idx
2986 // =>
2987 // select (splatidx == {0,1,2...}) ? splatelt : vec
2988 SDValue SelectCC =
2989 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2990 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2991}
2992
2993SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2994 SelectionDAG &DAG) const {
2995 SDLoc DL(Op);
2996 SyncScope::ID FenceSSID =
2997 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2998
2999 // singlethread fences only synchronize with signal handlers on the same
3000 // thread and thus only need to preserve instruction order, not actually
3001 // enforce memory ordering.
3002 if (FenceSSID == SyncScope::SingleThread)
3003 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3004 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3005
3006 return Op;
3007}
3008
3009SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3010 SelectionDAG &DAG) const {
3011
3012 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3013 DAG.getContext()->emitError(
3014 "On LA64, only 64-bit registers can be written.");
3015 return Op.getOperand(0);
3016 }
3017
3018 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3019 DAG.getContext()->emitError(
3020 "On LA32, only 32-bit registers can be written.");
3021 return Op.getOperand(0);
3022 }
3023
3024 return Op;
3025}
3026
3027SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3028 SelectionDAG &DAG) const {
3029 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3030 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3031 "be a constant integer");
3032 return SDValue();
3033 }
3034
3035 MachineFunction &MF = DAG.getMachineFunction();
3037 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3038 EVT VT = Op.getValueType();
3039 SDLoc DL(Op);
3040 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3041 unsigned Depth = Op.getConstantOperandVal(0);
3042 int GRLenInBytes = Subtarget.getGRLen() / 8;
3043
3044 while (Depth--) {
3045 int Offset = -(GRLenInBytes * 2);
3046 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3047 DAG.getSignedConstant(Offset, DL, VT));
3048 FrameAddr =
3049 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3050 }
3051 return FrameAddr;
3052}
3053
3054SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3055 SelectionDAG &DAG) const {
3056 // Currently only support lowering return address for current frame.
3057 if (Op.getConstantOperandVal(0) != 0) {
3058 DAG.getContext()->emitError(
3059 "return address can only be determined for the current frame");
3060 return SDValue();
3061 }
3062
3063 MachineFunction &MF = DAG.getMachineFunction();
3065 MVT GRLenVT = Subtarget.getGRLenVT();
3066
3067 // Return the value of the return address register, marking it an implicit
3068 // live-in.
3069 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3070 getRegClassFor(GRLenVT));
3071 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3072}
3073
3074SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3075 SelectionDAG &DAG) const {
3076 MachineFunction &MF = DAG.getMachineFunction();
3077 auto Size = Subtarget.getGRLen() / 8;
3078 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3079 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3080}
3081
3082SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3083 SelectionDAG &DAG) const {
3084 MachineFunction &MF = DAG.getMachineFunction();
3085 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3086
3087 SDLoc DL(Op);
3088 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3090
3091 // vastart just stores the address of the VarArgsFrameIndex slot into the
3092 // memory location argument.
3093 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3094 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3095 MachinePointerInfo(SV));
3096}
3097
3098SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3099 SelectionDAG &DAG) const {
3100 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3101 !Subtarget.hasBasicD() && "unexpected target features");
3102
3103 SDLoc DL(Op);
3104 SDValue Op0 = Op.getOperand(0);
3105 if (Op0->getOpcode() == ISD::AND) {
3106 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3107 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3108 return Op;
3109 }
3110
3111 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3112 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3113 Op0.getConstantOperandVal(2) == UINT64_C(0))
3114 return Op;
3115
3116 if (Op0.getOpcode() == ISD::AssertZext &&
3117 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3118 return Op;
3119
3120 EVT OpVT = Op0.getValueType();
3121 EVT RetVT = Op.getValueType();
3122 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3123 MakeLibCallOptions CallOptions;
3124 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3125 SDValue Chain = SDValue();
3127 std::tie(Result, Chain) =
3128 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3129 return Result;
3130}
3131
3132SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3133 SelectionDAG &DAG) const {
3134 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3135 !Subtarget.hasBasicD() && "unexpected target features");
3136
3137 SDLoc DL(Op);
3138 SDValue Op0 = Op.getOperand(0);
3139
3140 if ((Op0.getOpcode() == ISD::AssertSext ||
3142 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3143 return Op;
3144
3145 EVT OpVT = Op0.getValueType();
3146 EVT RetVT = Op.getValueType();
3147 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3148 MakeLibCallOptions CallOptions;
3149 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3150 SDValue Chain = SDValue();
3152 std::tie(Result, Chain) =
3153 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3154 return Result;
3155}
3156
3157SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3158 SelectionDAG &DAG) const {
3159
3160 SDLoc DL(Op);
3161 EVT VT = Op.getValueType();
3162 SDValue Op0 = Op.getOperand(0);
3163 EVT Op0VT = Op0.getValueType();
3164
3165 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3166 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3167 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3168 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3169 }
3170 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3171 SDValue Lo, Hi;
3172 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3173 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3174 }
3175 return Op;
3176}
3177
3178SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3179 SelectionDAG &DAG) const {
3180
3181 SDLoc DL(Op);
3182 SDValue Op0 = Op.getOperand(0);
3183
3184 if (Op0.getValueType() == MVT::f16)
3185 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3186
3187 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3188 !Subtarget.hasBasicD()) {
3189 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3190 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3191 }
3192
3193 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3194 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3195 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3196}
3197
3199 SelectionDAG &DAG, unsigned Flags) {
3200 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3201}
3202
3204 SelectionDAG &DAG, unsigned Flags) {
3205 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3206 Flags);
3207}
3208
3210 SelectionDAG &DAG, unsigned Flags) {
3211 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3212 N->getOffset(), Flags);
3213}
3214
3216 SelectionDAG &DAG, unsigned Flags) {
3217 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3218}
3219
3220template <class NodeTy>
3221SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3223 bool IsLocal) const {
3224 SDLoc DL(N);
3225 EVT Ty = getPointerTy(DAG.getDataLayout());
3226 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3227 SDValue Load;
3228
3229 switch (M) {
3230 default:
3231 report_fatal_error("Unsupported code model");
3232
3233 case CodeModel::Large: {
3234 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3235
3236 // This is not actually used, but is necessary for successfully matching
3237 // the PseudoLA_*_LARGE nodes.
3238 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3239 if (IsLocal) {
3240 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3241 // eventually becomes the desired 5-insn code sequence.
3242 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3243 Tmp, Addr),
3244 0);
3245 } else {
3246 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3247 // eventually becomes the desired 5-insn code sequence.
3248 Load = SDValue(
3249 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3250 0);
3251 }
3252 break;
3253 }
3254
3255 case CodeModel::Small:
3256 case CodeModel::Medium:
3257 if (IsLocal) {
3258 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3259 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3260 Load = SDValue(
3261 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3262 } else {
3263 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3264 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3265 Load =
3266 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3267 }
3268 }
3269
3270 if (!IsLocal) {
3271 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3272 MachineFunction &MF = DAG.getMachineFunction();
3273 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3277 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3278 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3279 }
3280
3281 return Load;
3282}
3283
3284SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3285 SelectionDAG &DAG) const {
3286 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3287 DAG.getTarget().getCodeModel());
3288}
3289
3290SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3291 SelectionDAG &DAG) const {
3292 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3293 DAG.getTarget().getCodeModel());
3294}
3295
3296SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3297 SelectionDAG &DAG) const {
3298 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3299 DAG.getTarget().getCodeModel());
3300}
3301
3302SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3303 SelectionDAG &DAG) const {
3304 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3305 assert(N->getOffset() == 0 && "unexpected offset in global node");
3306 auto CM = DAG.getTarget().getCodeModel();
3307 const GlobalValue *GV = N->getGlobal();
3308
3309 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3310 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3311 CM = *GCM;
3312 }
3313
3314 return getAddr(N, DAG, CM, GV->isDSOLocal());
3315}
3316
3317SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3318 SelectionDAG &DAG,
3319 unsigned Opc, bool UseGOT,
3320 bool Large) const {
3321 SDLoc DL(N);
3322 EVT Ty = getPointerTy(DAG.getDataLayout());
3323 MVT GRLenVT = Subtarget.getGRLenVT();
3324
3325 // This is not actually used, but is necessary for successfully matching the
3326 // PseudoLA_*_LARGE nodes.
3327 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3328 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3329
3330 // Only IE needs an extra argument for large code model.
3331 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3332 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3333 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3334
3335 // If it is LE for normal/medium code model, the add tp operation will occur
3336 // during the pseudo-instruction expansion.
3337 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3338 return Offset;
3339
3340 if (UseGOT) {
3341 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3342 MachineFunction &MF = DAG.getMachineFunction();
3343 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3347 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3348 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3349 }
3350
3351 // Add the thread pointer.
3352 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3353 DAG.getRegister(LoongArch::R2, GRLenVT));
3354}
3355
3356SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3357 SelectionDAG &DAG,
3358 unsigned Opc,
3359 bool Large) const {
3360 SDLoc DL(N);
3361 EVT Ty = getPointerTy(DAG.getDataLayout());
3362 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3363
3364 // This is not actually used, but is necessary for successfully matching the
3365 // PseudoLA_*_LARGE nodes.
3366 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3367
3368 // Use a PC-relative addressing mode to access the dynamic GOT address.
3369 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3370 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3371 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3372
3373 // Prepare argument list to generate call.
3375 Args.emplace_back(Load, CallTy);
3376
3377 // Setup call to __tls_get_addr.
3378 TargetLowering::CallLoweringInfo CLI(DAG);
3379 CLI.setDebugLoc(DL)
3380 .setChain(DAG.getEntryNode())
3381 .setLibCallee(CallingConv::C, CallTy,
3382 DAG.getExternalSymbol("__tls_get_addr", Ty),
3383 std::move(Args));
3384
3385 return LowerCallTo(CLI).first;
3386}
3387
3388SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3389 SelectionDAG &DAG, unsigned Opc,
3390 bool Large) const {
3391 SDLoc DL(N);
3392 EVT Ty = getPointerTy(DAG.getDataLayout());
3393 const GlobalValue *GV = N->getGlobal();
3394
3395 // This is not actually used, but is necessary for successfully matching the
3396 // PseudoLA_*_LARGE nodes.
3397 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3398
3399 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3400 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3401 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3402 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3403 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3404}
3405
3406SDValue
3407LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3408 SelectionDAG &DAG) const {
3411 report_fatal_error("In GHC calling convention TLS is not supported");
3412
3413 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3414 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3415
3416 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3417 assert(N->getOffset() == 0 && "unexpected offset in global node");
3418
3419 if (DAG.getTarget().useEmulatedTLS())
3420 reportFatalUsageError("the emulated TLS is prohibited");
3421
3422 bool IsDesc = DAG.getTarget().useTLSDESC();
3423
3424 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3426 // In this model, application code calls the dynamic linker function
3427 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3428 // runtime.
3429 if (!IsDesc)
3430 return getDynamicTLSAddr(N, DAG,
3431 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3432 : LoongArch::PseudoLA_TLS_GD,
3433 Large);
3434 break;
3436 // Same as GeneralDynamic, except for assembly modifiers and relocation
3437 // records.
3438 if (!IsDesc)
3439 return getDynamicTLSAddr(N, DAG,
3440 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3441 : LoongArch::PseudoLA_TLS_LD,
3442 Large);
3443 break;
3445 // This model uses the GOT to resolve TLS offsets.
3446 return getStaticTLSAddr(N, DAG,
3447 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3448 : LoongArch::PseudoLA_TLS_IE,
3449 /*UseGOT=*/true, Large);
3451 // This model is used when static linking as the TLS offsets are resolved
3452 // during program linking.
3453 //
3454 // This node doesn't need an extra argument for the large code model.
3455 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3456 /*UseGOT=*/false, Large);
3457 }
3458
3459 return getTLSDescAddr(N, DAG,
3460 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3461 : LoongArch::PseudoLA_TLS_DESC,
3462 Large);
3463}
3464
3465template <unsigned N>
3467 SelectionDAG &DAG, bool IsSigned = false) {
3468 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3469 // Check the ImmArg.
3470 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3471 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3472 DAG.getContext()->emitError(Op->getOperationName(0) +
3473 ": argument out of range.");
3474 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3475 }
3476 return SDValue();
3477}
3478
3479SDValue
3480LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 switch (Op.getConstantOperandVal(0)) {
3483 default:
3484 return SDValue(); // Don't custom lower most intrinsics.
3485 case Intrinsic::thread_pointer: {
3486 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3487 return DAG.getRegister(LoongArch::R2, PtrVT);
3488 }
3489 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3490 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3491 case Intrinsic::loongarch_lsx_vreplvei_d:
3492 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3493 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3494 case Intrinsic::loongarch_lsx_vreplvei_w:
3495 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3496 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3497 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3498 case Intrinsic::loongarch_lasx_xvpickve_d:
3499 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3500 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3501 case Intrinsic::loongarch_lasx_xvinsve0_d:
3502 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3503 case Intrinsic::loongarch_lsx_vsat_b:
3504 case Intrinsic::loongarch_lsx_vsat_bu:
3505 case Intrinsic::loongarch_lsx_vrotri_b:
3506 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3507 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3508 case Intrinsic::loongarch_lsx_vsrlri_b:
3509 case Intrinsic::loongarch_lsx_vsrari_b:
3510 case Intrinsic::loongarch_lsx_vreplvei_h:
3511 case Intrinsic::loongarch_lasx_xvsat_b:
3512 case Intrinsic::loongarch_lasx_xvsat_bu:
3513 case Intrinsic::loongarch_lasx_xvrotri_b:
3514 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3515 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3516 case Intrinsic::loongarch_lasx_xvsrlri_b:
3517 case Intrinsic::loongarch_lasx_xvsrari_b:
3518 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3519 case Intrinsic::loongarch_lasx_xvpickve_w:
3520 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3521 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3522 case Intrinsic::loongarch_lasx_xvinsve0_w:
3523 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3524 case Intrinsic::loongarch_lsx_vsat_h:
3525 case Intrinsic::loongarch_lsx_vsat_hu:
3526 case Intrinsic::loongarch_lsx_vrotri_h:
3527 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3528 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3529 case Intrinsic::loongarch_lsx_vsrlri_h:
3530 case Intrinsic::loongarch_lsx_vsrari_h:
3531 case Intrinsic::loongarch_lsx_vreplvei_b:
3532 case Intrinsic::loongarch_lasx_xvsat_h:
3533 case Intrinsic::loongarch_lasx_xvsat_hu:
3534 case Intrinsic::loongarch_lasx_xvrotri_h:
3535 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3536 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3537 case Intrinsic::loongarch_lasx_xvsrlri_h:
3538 case Intrinsic::loongarch_lasx_xvsrari_h:
3539 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3540 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3541 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3542 case Intrinsic::loongarch_lsx_vsrani_b_h:
3543 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3544 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3545 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3546 case Intrinsic::loongarch_lsx_vssrani_b_h:
3547 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3548 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3549 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3550 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3551 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3552 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3553 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3554 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3555 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3556 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3557 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3558 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3559 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3560 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3561 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3562 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3563 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3564 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3565 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3566 case Intrinsic::loongarch_lsx_vsat_w:
3567 case Intrinsic::loongarch_lsx_vsat_wu:
3568 case Intrinsic::loongarch_lsx_vrotri_w:
3569 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3570 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3571 case Intrinsic::loongarch_lsx_vsrlri_w:
3572 case Intrinsic::loongarch_lsx_vsrari_w:
3573 case Intrinsic::loongarch_lsx_vslei_bu:
3574 case Intrinsic::loongarch_lsx_vslei_hu:
3575 case Intrinsic::loongarch_lsx_vslei_wu:
3576 case Intrinsic::loongarch_lsx_vslei_du:
3577 case Intrinsic::loongarch_lsx_vslti_bu:
3578 case Intrinsic::loongarch_lsx_vslti_hu:
3579 case Intrinsic::loongarch_lsx_vslti_wu:
3580 case Intrinsic::loongarch_lsx_vslti_du:
3581 case Intrinsic::loongarch_lsx_vbsll_v:
3582 case Intrinsic::loongarch_lsx_vbsrl_v:
3583 case Intrinsic::loongarch_lasx_xvsat_w:
3584 case Intrinsic::loongarch_lasx_xvsat_wu:
3585 case Intrinsic::loongarch_lasx_xvrotri_w:
3586 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3587 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3588 case Intrinsic::loongarch_lasx_xvsrlri_w:
3589 case Intrinsic::loongarch_lasx_xvsrari_w:
3590 case Intrinsic::loongarch_lasx_xvslei_bu:
3591 case Intrinsic::loongarch_lasx_xvslei_hu:
3592 case Intrinsic::loongarch_lasx_xvslei_wu:
3593 case Intrinsic::loongarch_lasx_xvslei_du:
3594 case Intrinsic::loongarch_lasx_xvslti_bu:
3595 case Intrinsic::loongarch_lasx_xvslti_hu:
3596 case Intrinsic::loongarch_lasx_xvslti_wu:
3597 case Intrinsic::loongarch_lasx_xvslti_du:
3598 case Intrinsic::loongarch_lasx_xvbsll_v:
3599 case Intrinsic::loongarch_lasx_xvbsrl_v:
3600 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3601 case Intrinsic::loongarch_lsx_vseqi_b:
3602 case Intrinsic::loongarch_lsx_vseqi_h:
3603 case Intrinsic::loongarch_lsx_vseqi_w:
3604 case Intrinsic::loongarch_lsx_vseqi_d:
3605 case Intrinsic::loongarch_lsx_vslei_b:
3606 case Intrinsic::loongarch_lsx_vslei_h:
3607 case Intrinsic::loongarch_lsx_vslei_w:
3608 case Intrinsic::loongarch_lsx_vslei_d:
3609 case Intrinsic::loongarch_lsx_vslti_b:
3610 case Intrinsic::loongarch_lsx_vslti_h:
3611 case Intrinsic::loongarch_lsx_vslti_w:
3612 case Intrinsic::loongarch_lsx_vslti_d:
3613 case Intrinsic::loongarch_lasx_xvseqi_b:
3614 case Intrinsic::loongarch_lasx_xvseqi_h:
3615 case Intrinsic::loongarch_lasx_xvseqi_w:
3616 case Intrinsic::loongarch_lasx_xvseqi_d:
3617 case Intrinsic::loongarch_lasx_xvslei_b:
3618 case Intrinsic::loongarch_lasx_xvslei_h:
3619 case Intrinsic::loongarch_lasx_xvslei_w:
3620 case Intrinsic::loongarch_lasx_xvslei_d:
3621 case Intrinsic::loongarch_lasx_xvslti_b:
3622 case Intrinsic::loongarch_lasx_xvslti_h:
3623 case Intrinsic::loongarch_lasx_xvslti_w:
3624 case Intrinsic::loongarch_lasx_xvslti_d:
3625 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3626 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3627 case Intrinsic::loongarch_lsx_vsrani_h_w:
3628 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3629 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3630 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3631 case Intrinsic::loongarch_lsx_vssrani_h_w:
3632 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3633 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3634 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3635 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3636 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3637 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3638 case Intrinsic::loongarch_lsx_vfrstpi_b:
3639 case Intrinsic::loongarch_lsx_vfrstpi_h:
3640 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3641 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3642 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3643 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3644 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3645 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3646 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3647 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3648 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3649 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3650 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3651 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3652 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3653 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3654 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3655 case Intrinsic::loongarch_lsx_vsat_d:
3656 case Intrinsic::loongarch_lsx_vsat_du:
3657 case Intrinsic::loongarch_lsx_vrotri_d:
3658 case Intrinsic::loongarch_lsx_vsrlri_d:
3659 case Intrinsic::loongarch_lsx_vsrari_d:
3660 case Intrinsic::loongarch_lasx_xvsat_d:
3661 case Intrinsic::loongarch_lasx_xvsat_du:
3662 case Intrinsic::loongarch_lasx_xvrotri_d:
3663 case Intrinsic::loongarch_lasx_xvsrlri_d:
3664 case Intrinsic::loongarch_lasx_xvsrari_d:
3665 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3666 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3667 case Intrinsic::loongarch_lsx_vsrani_w_d:
3668 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3669 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3670 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3671 case Intrinsic::loongarch_lsx_vssrani_w_d:
3672 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3673 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3674 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3675 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3676 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3677 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3678 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3679 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3680 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3681 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3682 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3683 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3684 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3685 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3686 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3687 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3688 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3689 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3690 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3691 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3692 case Intrinsic::loongarch_lsx_vsrani_d_q:
3693 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3694 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3695 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3696 case Intrinsic::loongarch_lsx_vssrani_d_q:
3697 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3698 case Intrinsic::loongarch_lsx_vssrani_du_q:
3699 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3700 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3701 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3702 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3703 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3704 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3705 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3706 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3707 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3708 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3709 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3710 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3711 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3712 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3713 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3714 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3715 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3716 case Intrinsic::loongarch_lsx_vnori_b:
3717 case Intrinsic::loongarch_lsx_vshuf4i_b:
3718 case Intrinsic::loongarch_lsx_vshuf4i_h:
3719 case Intrinsic::loongarch_lsx_vshuf4i_w:
3720 case Intrinsic::loongarch_lasx_xvnori_b:
3721 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3722 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3723 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3724 case Intrinsic::loongarch_lasx_xvpermi_d:
3725 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3726 case Intrinsic::loongarch_lsx_vshuf4i_d:
3727 case Intrinsic::loongarch_lsx_vpermi_w:
3728 case Intrinsic::loongarch_lsx_vbitseli_b:
3729 case Intrinsic::loongarch_lsx_vextrins_b:
3730 case Intrinsic::loongarch_lsx_vextrins_h:
3731 case Intrinsic::loongarch_lsx_vextrins_w:
3732 case Intrinsic::loongarch_lsx_vextrins_d:
3733 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3734 case Intrinsic::loongarch_lasx_xvpermi_w:
3735 case Intrinsic::loongarch_lasx_xvpermi_q:
3736 case Intrinsic::loongarch_lasx_xvbitseli_b:
3737 case Intrinsic::loongarch_lasx_xvextrins_b:
3738 case Intrinsic::loongarch_lasx_xvextrins_h:
3739 case Intrinsic::loongarch_lasx_xvextrins_w:
3740 case Intrinsic::loongarch_lasx_xvextrins_d:
3741 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3742 case Intrinsic::loongarch_lsx_vrepli_b:
3743 case Intrinsic::loongarch_lsx_vrepli_h:
3744 case Intrinsic::loongarch_lsx_vrepli_w:
3745 case Intrinsic::loongarch_lsx_vrepli_d:
3746 case Intrinsic::loongarch_lasx_xvrepli_b:
3747 case Intrinsic::loongarch_lasx_xvrepli_h:
3748 case Intrinsic::loongarch_lasx_xvrepli_w:
3749 case Intrinsic::loongarch_lasx_xvrepli_d:
3750 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3751 case Intrinsic::loongarch_lsx_vldi:
3752 case Intrinsic::loongarch_lasx_xvldi:
3753 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3754 }
3755}
3756
3757// Helper function that emits error message for intrinsics with chain and return
3758// merge values of a UNDEF and the chain.
3760 StringRef ErrorMsg,
3761 SelectionDAG &DAG) {
3762 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3763 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3764 SDLoc(Op));
3765}
3766
3767SDValue
3768LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3769 SelectionDAG &DAG) const {
3770 SDLoc DL(Op);
3771 MVT GRLenVT = Subtarget.getGRLenVT();
3772 EVT VT = Op.getValueType();
3773 SDValue Chain = Op.getOperand(0);
3774 const StringRef ErrorMsgOOR = "argument out of range";
3775 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3776 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3777
3778 switch (Op.getConstantOperandVal(1)) {
3779 default:
3780 return Op;
3781 case Intrinsic::loongarch_crc_w_b_w:
3782 case Intrinsic::loongarch_crc_w_h_w:
3783 case Intrinsic::loongarch_crc_w_w_w:
3784 case Intrinsic::loongarch_crc_w_d_w:
3785 case Intrinsic::loongarch_crcc_w_b_w:
3786 case Intrinsic::loongarch_crcc_w_h_w:
3787 case Intrinsic::loongarch_crcc_w_w_w:
3788 case Intrinsic::loongarch_crcc_w_d_w:
3789 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3790 case Intrinsic::loongarch_csrrd_w:
3791 case Intrinsic::loongarch_csrrd_d: {
3792 unsigned Imm = Op.getConstantOperandVal(2);
3793 return !isUInt<14>(Imm)
3794 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3795 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3796 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3797 }
3798 case Intrinsic::loongarch_csrwr_w:
3799 case Intrinsic::loongarch_csrwr_d: {
3800 unsigned Imm = Op.getConstantOperandVal(3);
3801 return !isUInt<14>(Imm)
3802 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3803 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3804 {Chain, Op.getOperand(2),
3805 DAG.getConstant(Imm, DL, GRLenVT)});
3806 }
3807 case Intrinsic::loongarch_csrxchg_w:
3808 case Intrinsic::loongarch_csrxchg_d: {
3809 unsigned Imm = Op.getConstantOperandVal(4);
3810 return !isUInt<14>(Imm)
3811 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3812 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3813 {Chain, Op.getOperand(2), Op.getOperand(3),
3814 DAG.getConstant(Imm, DL, GRLenVT)});
3815 }
3816 case Intrinsic::loongarch_iocsrrd_d: {
3817 return DAG.getNode(
3818 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3819 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3820 }
3821#define IOCSRRD_CASE(NAME, NODE) \
3822 case Intrinsic::loongarch_##NAME: { \
3823 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3824 {Chain, Op.getOperand(2)}); \
3825 }
3826 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3827 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3828 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3829#undef IOCSRRD_CASE
3830 case Intrinsic::loongarch_cpucfg: {
3831 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3832 {Chain, Op.getOperand(2)});
3833 }
3834 case Intrinsic::loongarch_lddir_d: {
3835 unsigned Imm = Op.getConstantOperandVal(3);
3836 return !isUInt<8>(Imm)
3837 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3838 : Op;
3839 }
3840 case Intrinsic::loongarch_movfcsr2gr: {
3841 if (!Subtarget.hasBasicF())
3842 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3843 unsigned Imm = Op.getConstantOperandVal(2);
3844 return !isUInt<2>(Imm)
3845 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3846 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3847 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3848 }
3849 case Intrinsic::loongarch_lsx_vld:
3850 case Intrinsic::loongarch_lsx_vldrepl_b:
3851 case Intrinsic::loongarch_lasx_xvld:
3852 case Intrinsic::loongarch_lasx_xvldrepl_b:
3853 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3854 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3855 : SDValue();
3856 case Intrinsic::loongarch_lsx_vldrepl_h:
3857 case Intrinsic::loongarch_lasx_xvldrepl_h:
3858 return !isShiftedInt<11, 1>(
3859 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3861 Op, "argument out of range or not a multiple of 2", DAG)
3862 : SDValue();
3863 case Intrinsic::loongarch_lsx_vldrepl_w:
3864 case Intrinsic::loongarch_lasx_xvldrepl_w:
3865 return !isShiftedInt<10, 2>(
3866 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3868 Op, "argument out of range or not a multiple of 4", DAG)
3869 : SDValue();
3870 case Intrinsic::loongarch_lsx_vldrepl_d:
3871 case Intrinsic::loongarch_lasx_xvldrepl_d:
3872 return !isShiftedInt<9, 3>(
3873 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3875 Op, "argument out of range or not a multiple of 8", DAG)
3876 : SDValue();
3877 }
3878}
3879
3880// Helper function that emits error message for intrinsics with void return
3881// value and return the chain.
3883 SelectionDAG &DAG) {
3884
3885 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3886 return Op.getOperand(0);
3887}
3888
3889SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3890 SelectionDAG &DAG) const {
3891 SDLoc DL(Op);
3892 MVT GRLenVT = Subtarget.getGRLenVT();
3893 SDValue Chain = Op.getOperand(0);
3894 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3895 SDValue Op2 = Op.getOperand(2);
3896 const StringRef ErrorMsgOOR = "argument out of range";
3897 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3898 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3899 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3900
3901 switch (IntrinsicEnum) {
3902 default:
3903 // TODO: Add more Intrinsics.
3904 return SDValue();
3905 case Intrinsic::loongarch_cacop_d:
3906 case Intrinsic::loongarch_cacop_w: {
3907 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3908 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3909 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3910 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3911 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3912 unsigned Imm1 = Op2->getAsZExtVal();
3913 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3914 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3915 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3916 return Op;
3917 }
3918 case Intrinsic::loongarch_dbar: {
3919 unsigned Imm = Op2->getAsZExtVal();
3920 return !isUInt<15>(Imm)
3921 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3922 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3923 DAG.getConstant(Imm, DL, GRLenVT));
3924 }
3925 case Intrinsic::loongarch_ibar: {
3926 unsigned Imm = Op2->getAsZExtVal();
3927 return !isUInt<15>(Imm)
3928 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3929 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3930 DAG.getConstant(Imm, DL, GRLenVT));
3931 }
3932 case Intrinsic::loongarch_break: {
3933 unsigned Imm = Op2->getAsZExtVal();
3934 return !isUInt<15>(Imm)
3935 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3936 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3937 DAG.getConstant(Imm, DL, GRLenVT));
3938 }
3939 case Intrinsic::loongarch_movgr2fcsr: {
3940 if (!Subtarget.hasBasicF())
3941 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3942 unsigned Imm = Op2->getAsZExtVal();
3943 return !isUInt<2>(Imm)
3944 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3945 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3946 DAG.getConstant(Imm, DL, GRLenVT),
3947 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3948 Op.getOperand(3)));
3949 }
3950 case Intrinsic::loongarch_syscall: {
3951 unsigned Imm = Op2->getAsZExtVal();
3952 return !isUInt<15>(Imm)
3953 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3954 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3955 DAG.getConstant(Imm, DL, GRLenVT));
3956 }
3957#define IOCSRWR_CASE(NAME, NODE) \
3958 case Intrinsic::loongarch_##NAME: { \
3959 SDValue Op3 = Op.getOperand(3); \
3960 return Subtarget.is64Bit() \
3961 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3962 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3963 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3964 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3965 Op3); \
3966 }
3967 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3968 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3969 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3970#undef IOCSRWR_CASE
3971 case Intrinsic::loongarch_iocsrwr_d: {
3972 return !Subtarget.is64Bit()
3973 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3974 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3975 Op2,
3976 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3977 Op.getOperand(3)));
3978 }
3979#define ASRT_LE_GT_CASE(NAME) \
3980 case Intrinsic::loongarch_##NAME: { \
3981 return !Subtarget.is64Bit() \
3982 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3983 : Op; \
3984 }
3985 ASRT_LE_GT_CASE(asrtle_d)
3986 ASRT_LE_GT_CASE(asrtgt_d)
3987#undef ASRT_LE_GT_CASE
3988 case Intrinsic::loongarch_ldpte_d: {
3989 unsigned Imm = Op.getConstantOperandVal(3);
3990 return !Subtarget.is64Bit()
3991 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3992 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3993 : Op;
3994 }
3995 case Intrinsic::loongarch_lsx_vst:
3996 case Intrinsic::loongarch_lasx_xvst:
3997 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3998 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3999 : SDValue();
4000 case Intrinsic::loongarch_lasx_xvstelm_b:
4001 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4002 !isUInt<5>(Op.getConstantOperandVal(5)))
4003 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4004 : SDValue();
4005 case Intrinsic::loongarch_lsx_vstelm_b:
4006 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4007 !isUInt<4>(Op.getConstantOperandVal(5)))
4008 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4009 : SDValue();
4010 case Intrinsic::loongarch_lasx_xvstelm_h:
4011 return (!isShiftedInt<8, 1>(
4012 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4013 !isUInt<4>(Op.getConstantOperandVal(5)))
4015 Op, "argument out of range or not a multiple of 2", DAG)
4016 : SDValue();
4017 case Intrinsic::loongarch_lsx_vstelm_h:
4018 return (!isShiftedInt<8, 1>(
4019 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4020 !isUInt<3>(Op.getConstantOperandVal(5)))
4022 Op, "argument out of range or not a multiple of 2", DAG)
4023 : SDValue();
4024 case Intrinsic::loongarch_lasx_xvstelm_w:
4025 return (!isShiftedInt<8, 2>(
4026 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4027 !isUInt<3>(Op.getConstantOperandVal(5)))
4029 Op, "argument out of range or not a multiple of 4", DAG)
4030 : SDValue();
4031 case Intrinsic::loongarch_lsx_vstelm_w:
4032 return (!isShiftedInt<8, 2>(
4033 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4034 !isUInt<2>(Op.getConstantOperandVal(5)))
4036 Op, "argument out of range or not a multiple of 4", DAG)
4037 : SDValue();
4038 case Intrinsic::loongarch_lasx_xvstelm_d:
4039 return (!isShiftedInt<8, 3>(
4040 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4041 !isUInt<2>(Op.getConstantOperandVal(5)))
4043 Op, "argument out of range or not a multiple of 8", DAG)
4044 : SDValue();
4045 case Intrinsic::loongarch_lsx_vstelm_d:
4046 return (!isShiftedInt<8, 3>(
4047 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4048 !isUInt<1>(Op.getConstantOperandVal(5)))
4050 Op, "argument out of range or not a multiple of 8", DAG)
4051 : SDValue();
4052 }
4053}
4054
4055SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4056 SelectionDAG &DAG) const {
4057 SDLoc DL(Op);
4058 SDValue Lo = Op.getOperand(0);
4059 SDValue Hi = Op.getOperand(1);
4060 SDValue Shamt = Op.getOperand(2);
4061 EVT VT = Lo.getValueType();
4062
4063 // if Shamt-GRLen < 0: // Shamt < GRLen
4064 // Lo = Lo << Shamt
4065 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4066 // else:
4067 // Lo = 0
4068 // Hi = Lo << (Shamt-GRLen)
4069
4070 SDValue Zero = DAG.getConstant(0, DL, VT);
4071 SDValue One = DAG.getConstant(1, DL, VT);
4072 SDValue MinusGRLen =
4073 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4074 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4075 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4076 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4077
4078 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4079 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4080 SDValue ShiftRightLo =
4081 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4082 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4083 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4084 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4085
4086 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4087
4088 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4089 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4090
4091 SDValue Parts[2] = {Lo, Hi};
4092 return DAG.getMergeValues(Parts, DL);
4093}
4094
4095SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4096 SelectionDAG &DAG,
4097 bool IsSRA) const {
4098 SDLoc DL(Op);
4099 SDValue Lo = Op.getOperand(0);
4100 SDValue Hi = Op.getOperand(1);
4101 SDValue Shamt = Op.getOperand(2);
4102 EVT VT = Lo.getValueType();
4103
4104 // SRA expansion:
4105 // if Shamt-GRLen < 0: // Shamt < GRLen
4106 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4107 // Hi = Hi >>s Shamt
4108 // else:
4109 // Lo = Hi >>s (Shamt-GRLen);
4110 // Hi = Hi >>s (GRLen-1)
4111 //
4112 // SRL expansion:
4113 // if Shamt-GRLen < 0: // Shamt < GRLen
4114 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4115 // Hi = Hi >>u Shamt
4116 // else:
4117 // Lo = Hi >>u (Shamt-GRLen);
4118 // Hi = 0;
4119
4120 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4121
4122 SDValue Zero = DAG.getConstant(0, DL, VT);
4123 SDValue One = DAG.getConstant(1, DL, VT);
4124 SDValue MinusGRLen =
4125 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4126 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4127 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4128 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4129
4130 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4131 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4132 SDValue ShiftLeftHi =
4133 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4134 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4135 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4136 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4137 SDValue HiFalse =
4138 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4139
4140 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4141
4142 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4143 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4144
4145 SDValue Parts[2] = {Lo, Hi};
4146 return DAG.getMergeValues(Parts, DL);
4147}
4148
4149// Returns the opcode of the target-specific SDNode that implements the 32-bit
4150// form of the given Opcode.
4152 switch (Opcode) {
4153 default:
4154 llvm_unreachable("Unexpected opcode");
4155 case ISD::SDIV:
4156 return LoongArchISD::DIV_W;
4157 case ISD::UDIV:
4158 return LoongArchISD::DIV_WU;
4159 case ISD::SREM:
4160 return LoongArchISD::MOD_W;
4161 case ISD::UREM:
4162 return LoongArchISD::MOD_WU;
4163 case ISD::SHL:
4164 return LoongArchISD::SLL_W;
4165 case ISD::SRA:
4166 return LoongArchISD::SRA_W;
4167 case ISD::SRL:
4168 return LoongArchISD::SRL_W;
4169 case ISD::ROTL:
4170 case ISD::ROTR:
4171 return LoongArchISD::ROTR_W;
4172 case ISD::CTTZ:
4173 return LoongArchISD::CTZ_W;
4174 case ISD::CTLZ:
4175 return LoongArchISD::CLZ_W;
4176 }
4177}
4178
4179// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4180// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4181// otherwise be promoted to i64, making it difficult to select the
4182// SLL_W/.../*W later one because the fact the operation was originally of
4183// type i8/i16/i32 is lost.
4185 unsigned ExtOpc = ISD::ANY_EXTEND) {
4186 SDLoc DL(N);
4187 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4188 SDValue NewOp0, NewRes;
4189
4190 switch (NumOp) {
4191 default:
4192 llvm_unreachable("Unexpected NumOp");
4193 case 1: {
4194 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4195 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4196 break;
4197 }
4198 case 2: {
4199 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4200 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4201 if (N->getOpcode() == ISD::ROTL) {
4202 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4203 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4204 }
4205 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4206 break;
4207 }
4208 // TODO:Handle more NumOp.
4209 }
4210
4211 // ReplaceNodeResults requires we maintain the same type for the return
4212 // value.
4213 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4214}
4215
4216// Converts the given 32-bit operation to a i64 operation with signed extension
4217// semantic to reduce the signed extension instructions.
4219 SDLoc DL(N);
4220 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4221 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4222 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4223 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4224 DAG.getValueType(MVT::i32));
4225 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4226}
4227
4228// Helper function that emits error message for intrinsics with/without chain
4229// and return a UNDEF or and the chain as the results.
4232 StringRef ErrorMsg, bool WithChain = true) {
4233 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4234 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4235 if (!WithChain)
4236 return;
4237 Results.push_back(N->getOperand(0));
4238}
4239
4240template <unsigned N>
4241static void
4243 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4244 unsigned ResOp) {
4245 const StringRef ErrorMsgOOR = "argument out of range";
4246 unsigned Imm = Node->getConstantOperandVal(2);
4247 if (!isUInt<N>(Imm)) {
4249 /*WithChain=*/false);
4250 return;
4251 }
4252 SDLoc DL(Node);
4253 SDValue Vec = Node->getOperand(1);
4254
4255 SDValue PickElt =
4256 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4257 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4259 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4260 PickElt.getValue(0)));
4261}
4262
4265 SelectionDAG &DAG,
4266 const LoongArchSubtarget &Subtarget,
4267 unsigned ResOp) {
4268 SDLoc DL(N);
4269 SDValue Vec = N->getOperand(1);
4270
4271 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4272 Results.push_back(
4273 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4274}
4275
4276static void
4278 SelectionDAG &DAG,
4279 const LoongArchSubtarget &Subtarget) {
4280 switch (N->getConstantOperandVal(0)) {
4281 default:
4282 llvm_unreachable("Unexpected Intrinsic.");
4283 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4284 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4286 break;
4287 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4288 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4289 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4291 break;
4292 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4293 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4295 break;
4296 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4297 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4299 break;
4300 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4301 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4302 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4304 break;
4305 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4306 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4308 break;
4309 case Intrinsic::loongarch_lsx_bz_b:
4310 case Intrinsic::loongarch_lsx_bz_h:
4311 case Intrinsic::loongarch_lsx_bz_w:
4312 case Intrinsic::loongarch_lsx_bz_d:
4313 case Intrinsic::loongarch_lasx_xbz_b:
4314 case Intrinsic::loongarch_lasx_xbz_h:
4315 case Intrinsic::loongarch_lasx_xbz_w:
4316 case Intrinsic::loongarch_lasx_xbz_d:
4317 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4319 break;
4320 case Intrinsic::loongarch_lsx_bz_v:
4321 case Intrinsic::loongarch_lasx_xbz_v:
4322 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4324 break;
4325 case Intrinsic::loongarch_lsx_bnz_b:
4326 case Intrinsic::loongarch_lsx_bnz_h:
4327 case Intrinsic::loongarch_lsx_bnz_w:
4328 case Intrinsic::loongarch_lsx_bnz_d:
4329 case Intrinsic::loongarch_lasx_xbnz_b:
4330 case Intrinsic::loongarch_lasx_xbnz_h:
4331 case Intrinsic::loongarch_lasx_xbnz_w:
4332 case Intrinsic::loongarch_lasx_xbnz_d:
4333 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4335 break;
4336 case Intrinsic::loongarch_lsx_bnz_v:
4337 case Intrinsic::loongarch_lasx_xbnz_v:
4338 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4340 break;
4341 }
4342}
4343
4346 SelectionDAG &DAG) {
4347 assert(N->getValueType(0) == MVT::i128 &&
4348 "AtomicCmpSwap on types less than 128 should be legal");
4349 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4350
4351 unsigned Opcode;
4352 switch (MemOp->getMergedOrdering()) {
4356 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4357 break;
4360 Opcode = LoongArch::PseudoCmpXchg128;
4361 break;
4362 default:
4363 llvm_unreachable("Unexpected ordering!");
4364 }
4365
4366 SDLoc DL(N);
4367 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4368 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4369 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4370 NewVal.first, NewVal.second, N->getOperand(0)};
4371
4372 SDNode *CmpSwap = DAG.getMachineNode(
4373 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4374 Ops);
4375 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4376 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4377 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4378 Results.push_back(SDValue(CmpSwap, 3));
4379}
4380
4383 SDLoc DL(N);
4384 EVT VT = N->getValueType(0);
4385 switch (N->getOpcode()) {
4386 default:
4387 llvm_unreachable("Don't know how to legalize this operation");
4388 case ISD::ADD:
4389 case ISD::SUB:
4390 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4391 "Unexpected custom legalisation");
4392 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4393 break;
4394 case ISD::SDIV:
4395 case ISD::UDIV:
4396 case ISD::SREM:
4397 case ISD::UREM:
4398 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4399 "Unexpected custom legalisation");
4400 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4401 Subtarget.hasDiv32() && VT == MVT::i32
4403 : ISD::SIGN_EXTEND));
4404 break;
4405 case ISD::SHL:
4406 case ISD::SRA:
4407 case ISD::SRL:
4408 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4409 "Unexpected custom legalisation");
4410 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4411 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4412 break;
4413 }
4414 break;
4415 case ISD::ROTL:
4416 case ISD::ROTR:
4417 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4418 "Unexpected custom legalisation");
4419 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4420 break;
4421 case ISD::FP_TO_SINT: {
4422 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4423 "Unexpected custom legalisation");
4424 SDValue Src = N->getOperand(0);
4425 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4426 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4428 if (!isTypeLegal(Src.getValueType()))
4429 return;
4430 if (Src.getValueType() == MVT::f16)
4431 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4432 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4433 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4434 return;
4435 }
4436 // If the FP type needs to be softened, emit a library call using the 'si'
4437 // version. If we left it to default legalization we'd end up with 'di'.
4438 RTLIB::Libcall LC;
4439 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4440 MakeLibCallOptions CallOptions;
4441 EVT OpVT = Src.getValueType();
4442 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4443 SDValue Chain = SDValue();
4444 SDValue Result;
4445 std::tie(Result, Chain) =
4446 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4447 Results.push_back(Result);
4448 break;
4449 }
4450 case ISD::BITCAST: {
4451 SDValue Src = N->getOperand(0);
4452 EVT SrcVT = Src.getValueType();
4453 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4454 Subtarget.hasBasicF()) {
4455 SDValue Dst =
4456 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4457 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4458 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4460 DAG.getVTList(MVT::i32, MVT::i32), Src);
4461 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4462 NewReg.getValue(0), NewReg.getValue(1));
4463 Results.push_back(RetReg);
4464 }
4465 break;
4466 }
4467 case ISD::FP_TO_UINT: {
4468 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4469 "Unexpected custom legalisation");
4470 auto &TLI = DAG.getTargetLoweringInfo();
4471 SDValue Tmp1, Tmp2;
4472 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4473 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4474 break;
4475 }
4476 case ISD::BSWAP: {
4477 SDValue Src = N->getOperand(0);
4478 assert((VT == MVT::i16 || VT == MVT::i32) &&
4479 "Unexpected custom legalization");
4480 MVT GRLenVT = Subtarget.getGRLenVT();
4481 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4482 SDValue Tmp;
4483 switch (VT.getSizeInBits()) {
4484 default:
4485 llvm_unreachable("Unexpected operand width");
4486 case 16:
4487 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4488 break;
4489 case 32:
4490 // Only LA64 will get to here due to the size mismatch between VT and
4491 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4492 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4493 break;
4494 }
4495 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4496 break;
4497 }
4498 case ISD::BITREVERSE: {
4499 SDValue Src = N->getOperand(0);
4500 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4501 "Unexpected custom legalization");
4502 MVT GRLenVT = Subtarget.getGRLenVT();
4503 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4504 SDValue Tmp;
4505 switch (VT.getSizeInBits()) {
4506 default:
4507 llvm_unreachable("Unexpected operand width");
4508 case 8:
4509 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4510 break;
4511 case 32:
4512 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4513 break;
4514 }
4515 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4516 break;
4517 }
4518 case ISD::CTLZ:
4519 case ISD::CTTZ: {
4520 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4521 "Unexpected custom legalisation");
4522 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4523 break;
4524 }
4526 SDValue Chain = N->getOperand(0);
4527 SDValue Op2 = N->getOperand(2);
4528 MVT GRLenVT = Subtarget.getGRLenVT();
4529 const StringRef ErrorMsgOOR = "argument out of range";
4530 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4531 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4532
4533 switch (N->getConstantOperandVal(1)) {
4534 default:
4535 llvm_unreachable("Unexpected Intrinsic.");
4536 case Intrinsic::loongarch_movfcsr2gr: {
4537 if (!Subtarget.hasBasicF()) {
4538 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4539 return;
4540 }
4541 unsigned Imm = Op2->getAsZExtVal();
4542 if (!isUInt<2>(Imm)) {
4543 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4544 return;
4545 }
4546 SDValue MOVFCSR2GRResults = DAG.getNode(
4547 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4548 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4549 Results.push_back(
4550 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4551 Results.push_back(MOVFCSR2GRResults.getValue(1));
4552 break;
4553 }
4554#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4555 case Intrinsic::loongarch_##NAME: { \
4556 SDValue NODE = DAG.getNode( \
4557 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4558 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4559 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4560 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4561 Results.push_back(NODE.getValue(1)); \
4562 break; \
4563 }
4564 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4565 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4566 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4567 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4568 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4569 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4570#undef CRC_CASE_EXT_BINARYOP
4571
4572#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4573 case Intrinsic::loongarch_##NAME: { \
4574 SDValue NODE = DAG.getNode( \
4575 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4576 {Chain, Op2, \
4577 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4578 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4579 Results.push_back(NODE.getValue(1)); \
4580 break; \
4581 }
4582 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4583 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4584#undef CRC_CASE_EXT_UNARYOP
4585#define CSR_CASE(ID) \
4586 case Intrinsic::loongarch_##ID: { \
4587 if (!Subtarget.is64Bit()) \
4588 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4589 break; \
4590 }
4591 CSR_CASE(csrrd_d);
4592 CSR_CASE(csrwr_d);
4593 CSR_CASE(csrxchg_d);
4594 CSR_CASE(iocsrrd_d);
4595#undef CSR_CASE
4596 case Intrinsic::loongarch_csrrd_w: {
4597 unsigned Imm = Op2->getAsZExtVal();
4598 if (!isUInt<14>(Imm)) {
4599 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4600 return;
4601 }
4602 SDValue CSRRDResults =
4603 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4604 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4605 Results.push_back(
4606 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4607 Results.push_back(CSRRDResults.getValue(1));
4608 break;
4609 }
4610 case Intrinsic::loongarch_csrwr_w: {
4611 unsigned Imm = N->getConstantOperandVal(3);
4612 if (!isUInt<14>(Imm)) {
4613 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4614 return;
4615 }
4616 SDValue CSRWRResults =
4617 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4618 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4619 DAG.getConstant(Imm, DL, GRLenVT)});
4620 Results.push_back(
4621 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4622 Results.push_back(CSRWRResults.getValue(1));
4623 break;
4624 }
4625 case Intrinsic::loongarch_csrxchg_w: {
4626 unsigned Imm = N->getConstantOperandVal(4);
4627 if (!isUInt<14>(Imm)) {
4628 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4629 return;
4630 }
4631 SDValue CSRXCHGResults = DAG.getNode(
4632 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4633 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4634 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4635 DAG.getConstant(Imm, DL, GRLenVT)});
4636 Results.push_back(
4637 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4638 Results.push_back(CSRXCHGResults.getValue(1));
4639 break;
4640 }
4641#define IOCSRRD_CASE(NAME, NODE) \
4642 case Intrinsic::loongarch_##NAME: { \
4643 SDValue IOCSRRDResults = \
4644 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4645 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4646 Results.push_back( \
4647 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4648 Results.push_back(IOCSRRDResults.getValue(1)); \
4649 break; \
4650 }
4651 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4652 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4653 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4654#undef IOCSRRD_CASE
4655 case Intrinsic::loongarch_cpucfg: {
4656 SDValue CPUCFGResults =
4657 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4658 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4659 Results.push_back(
4660 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4661 Results.push_back(CPUCFGResults.getValue(1));
4662 break;
4663 }
4664 case Intrinsic::loongarch_lddir_d: {
4665 if (!Subtarget.is64Bit()) {
4666 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4667 return;
4668 }
4669 break;
4670 }
4671 }
4672 break;
4673 }
4674 case ISD::READ_REGISTER: {
4675 if (Subtarget.is64Bit())
4676 DAG.getContext()->emitError(
4677 "On LA64, only 64-bit registers can be read.");
4678 else
4679 DAG.getContext()->emitError(
4680 "On LA32, only 32-bit registers can be read.");
4681 Results.push_back(DAG.getUNDEF(VT));
4682 Results.push_back(N->getOperand(0));
4683 break;
4684 }
4686 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4687 break;
4688 }
4689 case ISD::LROUND: {
4690 SDValue Op0 = N->getOperand(0);
4691 EVT OpVT = Op0.getValueType();
4692 RTLIB::Libcall LC =
4693 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4694 MakeLibCallOptions CallOptions;
4695 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4696 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4697 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4698 Results.push_back(Result);
4699 break;
4700 }
4701 case ISD::ATOMIC_CMP_SWAP: {
4703 break;
4704 }
4705 case ISD::TRUNCATE: {
4706 MVT VT = N->getSimpleValueType(0);
4707 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4708 return;
4709
4710 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4711 SDValue In = N->getOperand(0);
4712 EVT InVT = In.getValueType();
4713 EVT InEltVT = InVT.getVectorElementType();
4714 EVT EltVT = VT.getVectorElementType();
4715 unsigned MinElts = VT.getVectorNumElements();
4716 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4717 unsigned InBits = InVT.getSizeInBits();
4718
4719 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4720 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4721 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4722 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4723 for (unsigned I = 0; I < MinElts; ++I)
4724 TruncMask[I] = Scale * I;
4725
4726 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4727 MVT SVT = In.getSimpleValueType().getScalarType();
4728 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4729 SDValue WidenIn =
4730 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4731 DAG.getVectorIdxConstant(0, DL));
4732 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4733 "Illegal vector type in truncation");
4734 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4735 Results.push_back(
4736 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4737 return;
4738 }
4739 }
4740
4741 break;
4742 }
4743 }
4744}
4745
4748 const LoongArchSubtarget &Subtarget) {
4749 if (DCI.isBeforeLegalizeOps())
4750 return SDValue();
4751
4752 SDValue FirstOperand = N->getOperand(0);
4753 SDValue SecondOperand = N->getOperand(1);
4754 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4755 EVT ValTy = N->getValueType(0);
4756 SDLoc DL(N);
4757 uint64_t lsb, msb;
4758 unsigned SMIdx, SMLen;
4759 ConstantSDNode *CN;
4760 SDValue NewOperand;
4761 MVT GRLenVT = Subtarget.getGRLenVT();
4762
4763 // BSTRPICK requires the 32S feature.
4764 if (!Subtarget.has32S())
4765 return SDValue();
4766
4767 // Op's second operand must be a shifted mask.
4768 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4769 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4770 return SDValue();
4771
4772 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4773 // Pattern match BSTRPICK.
4774 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4775 // => BSTRPICK $dst, $src, msb, lsb
4776 // where msb = lsb + len - 1
4777
4778 // The second operand of the shift must be an immediate.
4779 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4780 return SDValue();
4781
4782 lsb = CN->getZExtValue();
4783
4784 // Return if the shifted mask does not start at bit 0 or the sum of its
4785 // length and lsb exceeds the word's size.
4786 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4787 return SDValue();
4788
4789 NewOperand = FirstOperand.getOperand(0);
4790 } else {
4791 // Pattern match BSTRPICK.
4792 // $dst = and $src, (2**len- 1) , if len > 12
4793 // => BSTRPICK $dst, $src, msb, lsb
4794 // where lsb = 0 and msb = len - 1
4795
4796 // If the mask is <= 0xfff, andi can be used instead.
4797 if (CN->getZExtValue() <= 0xfff)
4798 return SDValue();
4799
4800 // Return if the MSB exceeds.
4801 if (SMIdx + SMLen > ValTy.getSizeInBits())
4802 return SDValue();
4803
4804 if (SMIdx > 0) {
4805 // Omit if the constant has more than 2 uses. This a conservative
4806 // decision. Whether it is a win depends on the HW microarchitecture.
4807 // However it should always be better for 1 and 2 uses.
4808 if (CN->use_size() > 2)
4809 return SDValue();
4810 // Return if the constant can be composed by a single LU12I.W.
4811 if ((CN->getZExtValue() & 0xfff) == 0)
4812 return SDValue();
4813 // Return if the constand can be composed by a single ADDI with
4814 // the zero register.
4815 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4816 return SDValue();
4817 }
4818
4819 lsb = SMIdx;
4820 NewOperand = FirstOperand;
4821 }
4822
4823 msb = lsb + SMLen - 1;
4824 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4825 DAG.getConstant(msb, DL, GRLenVT),
4826 DAG.getConstant(lsb, DL, GRLenVT));
4827 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4828 return NR0;
4829 // Try to optimize to
4830 // bstrpick $Rd, $Rs, msb, lsb
4831 // slli $Rd, $Rd, lsb
4832 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4833 DAG.getConstant(lsb, DL, GRLenVT));
4834}
4835
4838 const LoongArchSubtarget &Subtarget) {
4839 // BSTRPICK requires the 32S feature.
4840 if (!Subtarget.has32S())
4841 return SDValue();
4842
4843 if (DCI.isBeforeLegalizeOps())
4844 return SDValue();
4845
4846 // $dst = srl (and $src, Mask), Shamt
4847 // =>
4848 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4849 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4850 //
4851
4852 SDValue FirstOperand = N->getOperand(0);
4853 ConstantSDNode *CN;
4854 EVT ValTy = N->getValueType(0);
4855 SDLoc DL(N);
4856 MVT GRLenVT = Subtarget.getGRLenVT();
4857 unsigned MaskIdx, MaskLen;
4858 uint64_t Shamt;
4859
4860 // The first operand must be an AND and the second operand of the AND must be
4861 // a shifted mask.
4862 if (FirstOperand.getOpcode() != ISD::AND ||
4863 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4864 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4865 return SDValue();
4866
4867 // The second operand (shift amount) must be an immediate.
4868 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4869 return SDValue();
4870
4871 Shamt = CN->getZExtValue();
4872 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4873 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4874 FirstOperand->getOperand(0),
4875 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4876 DAG.getConstant(Shamt, DL, GRLenVT));
4877
4878 return SDValue();
4879}
4880
4881// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4882// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4883static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4884 unsigned Depth) {
4885 // Limit recursion.
4887 return false;
4888 switch (Src.getOpcode()) {
4889 case ISD::SETCC:
4890 case ISD::TRUNCATE:
4891 return Src.getOperand(0).getValueSizeInBits() == Size;
4892 case ISD::FREEZE:
4893 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4894 case ISD::AND:
4895 case ISD::XOR:
4896 case ISD::OR:
4897 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4898 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4899 case ISD::SELECT:
4900 case ISD::VSELECT:
4901 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4902 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4903 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4904 case ISD::BUILD_VECTOR:
4905 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4906 ISD::isBuildVectorAllOnes(Src.getNode());
4907 }
4908 return false;
4909}
4910
4911// Helper to push sign extension of vXi1 SETCC result through bitops.
4913 SDValue Src, const SDLoc &DL) {
4914 switch (Src.getOpcode()) {
4915 case ISD::SETCC:
4916 case ISD::FREEZE:
4917 case ISD::TRUNCATE:
4918 case ISD::BUILD_VECTOR:
4919 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4920 case ISD::AND:
4921 case ISD::XOR:
4922 case ISD::OR:
4923 return DAG.getNode(
4924 Src.getOpcode(), DL, SExtVT,
4925 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4926 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4927 case ISD::SELECT:
4928 case ISD::VSELECT:
4929 return DAG.getSelect(
4930 DL, SExtVT, Src.getOperand(0),
4931 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4932 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4933 }
4934 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4935}
4936
4937static SDValue
4940 const LoongArchSubtarget &Subtarget) {
4941 SDLoc DL(N);
4942 EVT VT = N->getValueType(0);
4943 SDValue Src = N->getOperand(0);
4944 EVT SrcVT = Src.getValueType();
4945
4946 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4947 return SDValue();
4948
4949 bool UseLASX;
4950 unsigned Opc = ISD::DELETED_NODE;
4951 EVT CmpVT = Src.getOperand(0).getValueType();
4952 EVT EltVT = CmpVT.getVectorElementType();
4953
4954 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4955 UseLASX = false;
4956 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4957 CmpVT.getSizeInBits() == 256)
4958 UseLASX = true;
4959 else
4960 return SDValue();
4961
4962 SDValue SrcN1 = Src.getOperand(1);
4963 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4964 default:
4965 break;
4966 case ISD::SETEQ:
4967 // x == 0 => not (vmsknez.b x)
4968 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4970 break;
4971 case ISD::SETGT:
4972 // x > -1 => vmskgez.b x
4973 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4975 break;
4976 case ISD::SETGE:
4977 // x >= 0 => vmskgez.b x
4978 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4980 break;
4981 case ISD::SETLT:
4982 // x < 0 => vmskltz.{b,h,w,d} x
4983 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4984 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4985 EltVT == MVT::i64))
4987 break;
4988 case ISD::SETLE:
4989 // x <= -1 => vmskltz.{b,h,w,d} x
4990 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4991 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4992 EltVT == MVT::i64))
4994 break;
4995 case ISD::SETNE:
4996 // x != 0 => vmsknez.b x
4997 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4999 break;
5000 }
5001
5002 if (Opc == ISD::DELETED_NODE)
5003 return SDValue();
5004
5005 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
5007 V = DAG.getZExtOrTrunc(V, DL, T);
5008 return DAG.getBitcast(VT, V);
5009}
5010
5013 const LoongArchSubtarget &Subtarget) {
5014 SDLoc DL(N);
5015 EVT VT = N->getValueType(0);
5016 SDValue Src = N->getOperand(0);
5017 EVT SrcVT = Src.getValueType();
5018
5019 if (!DCI.isBeforeLegalizeOps())
5020 return SDValue();
5021
5022 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5023 return SDValue();
5024
5025 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5026 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5027 if (Res)
5028 return Res;
5029
5030 // Generate vXi1 using [X]VMSKLTZ
5031 MVT SExtVT;
5032 unsigned Opc;
5033 bool UseLASX = false;
5034 bool PropagateSExt = false;
5035
5036 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5037 EVT CmpVT = Src.getOperand(0).getValueType();
5038 if (CmpVT.getSizeInBits() > 256)
5039 return SDValue();
5040 }
5041
5042 switch (SrcVT.getSimpleVT().SimpleTy) {
5043 default:
5044 return SDValue();
5045 case MVT::v2i1:
5046 SExtVT = MVT::v2i64;
5047 break;
5048 case MVT::v4i1:
5049 SExtVT = MVT::v4i32;
5050 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5051 SExtVT = MVT::v4i64;
5052 UseLASX = true;
5053 PropagateSExt = true;
5054 }
5055 break;
5056 case MVT::v8i1:
5057 SExtVT = MVT::v8i16;
5058 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5059 SExtVT = MVT::v8i32;
5060 UseLASX = true;
5061 PropagateSExt = true;
5062 }
5063 break;
5064 case MVT::v16i1:
5065 SExtVT = MVT::v16i8;
5066 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5067 SExtVT = MVT::v16i16;
5068 UseLASX = true;
5069 PropagateSExt = true;
5070 }
5071 break;
5072 case MVT::v32i1:
5073 SExtVT = MVT::v32i8;
5074 UseLASX = true;
5075 break;
5076 };
5077 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5078 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5079
5080 SDValue V;
5081 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5082 if (Src.getSimpleValueType() == MVT::v32i8) {
5083 SDValue Lo, Hi;
5084 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5085 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5086 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5087 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5088 DAG.getConstant(16, DL, MVT::i8));
5089 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5090 } else if (UseLASX) {
5091 return SDValue();
5092 }
5093 }
5094
5095 if (!V) {
5097 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5098 }
5099
5101 V = DAG.getZExtOrTrunc(V, DL, T);
5102 return DAG.getBitcast(VT, V);
5103}
5104
5107 const LoongArchSubtarget &Subtarget) {
5108 MVT GRLenVT = Subtarget.getGRLenVT();
5109 EVT ValTy = N->getValueType(0);
5110 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5111 ConstantSDNode *CN0, *CN1;
5112 SDLoc DL(N);
5113 unsigned ValBits = ValTy.getSizeInBits();
5114 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5115 unsigned Shamt;
5116 bool SwapAndRetried = false;
5117
5118 // BSTRPICK requires the 32S feature.
5119 if (!Subtarget.has32S())
5120 return SDValue();
5121
5122 if (DCI.isBeforeLegalizeOps())
5123 return SDValue();
5124
5125 if (ValBits != 32 && ValBits != 64)
5126 return SDValue();
5127
5128Retry:
5129 // 1st pattern to match BSTRINS:
5130 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5131 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5132 // =>
5133 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5134 if (N0.getOpcode() == ISD::AND &&
5135 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5136 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5137 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5138 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5139 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5140 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5141 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5142 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5143 (MaskIdx0 + MaskLen0 <= ValBits)) {
5144 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5145 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5146 N1.getOperand(0).getOperand(0),
5147 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5148 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5149 }
5150
5151 // 2nd pattern to match BSTRINS:
5152 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5153 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5154 // =>
5155 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5156 if (N0.getOpcode() == ISD::AND &&
5157 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5158 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5159 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5160 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5161 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5162 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5163 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5164 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5165 (MaskIdx0 + MaskLen0 <= ValBits)) {
5166 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5167 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5168 N1.getOperand(0).getOperand(0),
5169 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5170 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5171 }
5172
5173 // 3rd pattern to match BSTRINS:
5174 // R = or (and X, mask0), (and Y, mask1)
5175 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5176 // =>
5177 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5178 // where msb = lsb + size - 1
5179 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5180 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5181 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5182 (MaskIdx0 + MaskLen0 <= 64) &&
5183 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5184 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5185 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5186 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5187 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5188 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5189 DAG.getConstant(ValBits == 32
5190 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5191 : (MaskIdx0 + MaskLen0 - 1),
5192 DL, GRLenVT),
5193 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5194 }
5195
5196 // 4th pattern to match BSTRINS:
5197 // R = or (and X, mask), (shl Y, shamt)
5198 // where mask = (2**shamt - 1)
5199 // =>
5200 // R = BSTRINS X, Y, ValBits - 1, shamt
5201 // where ValBits = 32 or 64
5202 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5203 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5204 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5205 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5206 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5207 (MaskIdx0 + MaskLen0 <= ValBits)) {
5208 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5209 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5210 N1.getOperand(0),
5211 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5212 DAG.getConstant(Shamt, DL, GRLenVT));
5213 }
5214
5215 // 5th pattern to match BSTRINS:
5216 // R = or (and X, mask), const
5217 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5218 // =>
5219 // R = BSTRINS X, (const >> lsb), msb, lsb
5220 // where msb = lsb + size - 1
5221 if (N0.getOpcode() == ISD::AND &&
5222 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5223 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5224 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5225 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5226 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5227 return DAG.getNode(
5228 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5229 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5230 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5231 : (MaskIdx0 + MaskLen0 - 1),
5232 DL, GRLenVT),
5233 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5234 }
5235
5236 // 6th pattern.
5237 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5238 // by the incoming bits are known to be zero.
5239 // =>
5240 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5241 //
5242 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5243 // pattern is more common than the 1st. So we put the 1st before the 6th in
5244 // order to match as many nodes as possible.
5245 ConstantSDNode *CNMask, *CNShamt;
5246 unsigned MaskIdx, MaskLen;
5247 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5248 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5249 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5250 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5251 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5252 Shamt = CNShamt->getZExtValue();
5253 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5254 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5255 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5256 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5257 N1.getOperand(0).getOperand(0),
5258 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5259 DAG.getConstant(Shamt, DL, GRLenVT));
5260 }
5261 }
5262
5263 // 7th pattern.
5264 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5265 // overwritten by the incoming bits are known to be zero.
5266 // =>
5267 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5268 //
5269 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5270 // before the 7th in order to match as many nodes as possible.
5271 if (N1.getOpcode() == ISD::AND &&
5272 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5273 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5274 N1.getOperand(0).getOpcode() == ISD::SHL &&
5275 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5276 CNShamt->getZExtValue() == MaskIdx) {
5277 APInt ShMask(ValBits, CNMask->getZExtValue());
5278 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5279 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5280 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5281 N1.getOperand(0).getOperand(0),
5282 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5283 DAG.getConstant(MaskIdx, DL, GRLenVT));
5284 }
5285 }
5286
5287 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5288 if (!SwapAndRetried) {
5289 std::swap(N0, N1);
5290 SwapAndRetried = true;
5291 goto Retry;
5292 }
5293
5294 SwapAndRetried = false;
5295Retry2:
5296 // 8th pattern.
5297 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5298 // the incoming bits are known to be zero.
5299 // =>
5300 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5301 //
5302 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5303 // we put it here in order to match as many nodes as possible or generate less
5304 // instructions.
5305 if (N1.getOpcode() == ISD::AND &&
5306 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5307 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5308 APInt ShMask(ValBits, CNMask->getZExtValue());
5309 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5310 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5311 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5312 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5313 N1->getOperand(0),
5314 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5315 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5316 DAG.getConstant(MaskIdx, DL, GRLenVT));
5317 }
5318 }
5319 // Swap N0/N1 and retry.
5320 if (!SwapAndRetried) {
5321 std::swap(N0, N1);
5322 SwapAndRetried = true;
5323 goto Retry2;
5324 }
5325
5326 return SDValue();
5327}
5328
5329static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5330 ExtType = ISD::NON_EXTLOAD;
5331
5332 switch (V.getNode()->getOpcode()) {
5333 case ISD::LOAD: {
5334 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5335 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5336 (LoadNode->getMemoryVT() == MVT::i16)) {
5337 ExtType = LoadNode->getExtensionType();
5338 return true;
5339 }
5340 return false;
5341 }
5342 case ISD::AssertSext: {
5343 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5344 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5345 ExtType = ISD::SEXTLOAD;
5346 return true;
5347 }
5348 return false;
5349 }
5350 case ISD::AssertZext: {
5351 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5352 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5353 ExtType = ISD::ZEXTLOAD;
5354 return true;
5355 }
5356 return false;
5357 }
5358 default:
5359 return false;
5360 }
5361
5362 return false;
5363}
5364
5365// Eliminate redundant truncation and zero-extension nodes.
5366// * Case 1:
5367// +------------+ +------------+ +------------+
5368// | Input1 | | Input2 | | CC |
5369// +------------+ +------------+ +------------+
5370// | | |
5371// V V +----+
5372// +------------+ +------------+ |
5373// | TRUNCATE | | TRUNCATE | |
5374// +------------+ +------------+ |
5375// | | |
5376// V V |
5377// +------------+ +------------+ |
5378// | ZERO_EXT | | ZERO_EXT | |
5379// +------------+ +------------+ |
5380// | | |
5381// | +-------------+ |
5382// V V | |
5383// +----------------+ | |
5384// | AND | | |
5385// +----------------+ | |
5386// | | |
5387// +---------------+ | |
5388// | | |
5389// V V V
5390// +-------------+
5391// | CMP |
5392// +-------------+
5393// * Case 2:
5394// +------------+ +------------+ +-------------+ +------------+ +------------+
5395// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5396// +------------+ +------------+ +-------------+ +------------+ +------------+
5397// | | | | |
5398// V | | | |
5399// +------------+ | | | |
5400// | XOR |<---------------------+ | |
5401// +------------+ | | |
5402// | | | |
5403// V V +---------------+ |
5404// +------------+ +------------+ | |
5405// | TRUNCATE | | TRUNCATE | | +-------------------------+
5406// +------------+ +------------+ | |
5407// | | | |
5408// V V | |
5409// +------------+ +------------+ | |
5410// | ZERO_EXT | | ZERO_EXT | | |
5411// +------------+ +------------+ | |
5412// | | | |
5413// V V | |
5414// +----------------+ | |
5415// | AND | | |
5416// +----------------+ | |
5417// | | |
5418// +---------------+ | |
5419// | | |
5420// V V V
5421// +-------------+
5422// | CMP |
5423// +-------------+
5426 const LoongArchSubtarget &Subtarget) {
5427 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5428
5429 SDNode *AndNode = N->getOperand(0).getNode();
5430 if (AndNode->getOpcode() != ISD::AND)
5431 return SDValue();
5432
5433 SDValue AndInputValue2 = AndNode->getOperand(1);
5434 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5435 return SDValue();
5436
5437 SDValue CmpInputValue = N->getOperand(1);
5438 SDValue AndInputValue1 = AndNode->getOperand(0);
5439 if (AndInputValue1.getOpcode() == ISD::XOR) {
5440 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5441 return SDValue();
5442 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5443 if (!CN || CN->getSExtValue() != -1)
5444 return SDValue();
5445 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5446 if (!CN || CN->getSExtValue() != 0)
5447 return SDValue();
5448 AndInputValue1 = AndInputValue1.getOperand(0);
5449 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5450 return SDValue();
5451 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5452 if (AndInputValue2 != CmpInputValue)
5453 return SDValue();
5454 } else {
5455 return SDValue();
5456 }
5457
5458 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5459 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5460 return SDValue();
5461
5462 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5463 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5464 return SDValue();
5465
5466 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5467 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5468 ISD::LoadExtType ExtType1;
5469 ISD::LoadExtType ExtType2;
5470
5471 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5472 !checkValueWidth(TruncInputValue2, ExtType2))
5473 return SDValue();
5474
5475 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5476 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5477 return SDValue();
5478
5479 if ((ExtType2 != ISD::ZEXTLOAD) &&
5480 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5481 return SDValue();
5482
5483 // These truncation and zero-extension nodes are not necessary, remove them.
5484 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5485 TruncInputValue1, TruncInputValue2);
5486 SDValue NewSetCC =
5487 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5488 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5489 return SDValue(N, 0);
5490}
5491
5492// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5495 const LoongArchSubtarget &Subtarget) {
5496 if (DCI.isBeforeLegalizeOps())
5497 return SDValue();
5498
5499 SDValue Src = N->getOperand(0);
5500 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5501 return SDValue();
5502
5503 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5504 Src.getOperand(0));
5505}
5506
5507// Perform common combines for BR_CC and SELECT_CC conditions.
5508static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5509 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5510 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5511
5512 // As far as arithmetic right shift always saves the sign,
5513 // shift can be omitted.
5514 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5515 // setge (sra X, N), 0 -> setge X, 0
5516 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5517 LHS.getOpcode() == ISD::SRA) {
5518 LHS = LHS.getOperand(0);
5519 return true;
5520 }
5521
5522 if (!ISD::isIntEqualitySetCC(CCVal))
5523 return false;
5524
5525 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5526 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5527 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5528 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5529 // If we're looking for eq 0 instead of ne 0, we need to invert the
5530 // condition.
5531 bool Invert = CCVal == ISD::SETEQ;
5532 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5533 if (Invert)
5534 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5535
5536 RHS = LHS.getOperand(1);
5537 LHS = LHS.getOperand(0);
5538 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5539
5540 CC = DAG.getCondCode(CCVal);
5541 return true;
5542 }
5543
5544 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5545 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5546 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5547 SDValue LHS0 = LHS.getOperand(0);
5548 if (LHS0.getOpcode() == ISD::AND &&
5549 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5550 uint64_t Mask = LHS0.getConstantOperandVal(1);
5551 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5552 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5553 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5554 CC = DAG.getCondCode(CCVal);
5555
5556 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5557 LHS = LHS0.getOperand(0);
5558 if (ShAmt != 0)
5559 LHS =
5560 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5561 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5562 return true;
5563 }
5564 }
5565 }
5566
5567 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5568 // This can occur when legalizing some floating point comparisons.
5569 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5570 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5571 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5572 CC = DAG.getCondCode(CCVal);
5573 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5574 return true;
5575 }
5576
5577 return false;
5578}
5579
5582 const LoongArchSubtarget &Subtarget) {
5583 SDValue LHS = N->getOperand(1);
5584 SDValue RHS = N->getOperand(2);
5585 SDValue CC = N->getOperand(3);
5586 SDLoc DL(N);
5587
5588 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5589 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5590 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5591
5592 return SDValue();
5593}
5594
5597 const LoongArchSubtarget &Subtarget) {
5598 // Transform
5599 SDValue LHS = N->getOperand(0);
5600 SDValue RHS = N->getOperand(1);
5601 SDValue CC = N->getOperand(2);
5602 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5603 SDValue TrueV = N->getOperand(3);
5604 SDValue FalseV = N->getOperand(4);
5605 SDLoc DL(N);
5606 EVT VT = N->getValueType(0);
5607
5608 // If the True and False values are the same, we don't need a select_cc.
5609 if (TrueV == FalseV)
5610 return TrueV;
5611
5612 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5613 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5614 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5616 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5617 if (CCVal == ISD::CondCode::SETGE)
5618 std::swap(TrueV, FalseV);
5619
5620 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5621 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5622 // Only handle simm12, if it is not in this range, it can be considered as
5623 // register.
5624 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5625 isInt<12>(TrueSImm - FalseSImm)) {
5626 SDValue SRA =
5627 DAG.getNode(ISD::SRA, DL, VT, LHS,
5628 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5629 SDValue AND =
5630 DAG.getNode(ISD::AND, DL, VT, SRA,
5631 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5632 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5633 }
5634
5635 if (CCVal == ISD::CondCode::SETGE)
5636 std::swap(TrueV, FalseV);
5637 }
5638
5639 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5640 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5641 {LHS, RHS, CC, TrueV, FalseV});
5642
5643 return SDValue();
5644}
5645
5646template <unsigned N>
5648 SelectionDAG &DAG,
5649 const LoongArchSubtarget &Subtarget,
5650 bool IsSigned = false) {
5651 SDLoc DL(Node);
5652 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5653 // Check the ImmArg.
5654 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5655 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5656 DAG.getContext()->emitError(Node->getOperationName(0) +
5657 ": argument out of range.");
5658 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5659 }
5660 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5661}
5662
5663template <unsigned N>
5664static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5665 SelectionDAG &DAG, bool IsSigned = false) {
5666 SDLoc DL(Node);
5667 EVT ResTy = Node->getValueType(0);
5668 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5669
5670 // Check the ImmArg.
5671 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5672 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5673 DAG.getContext()->emitError(Node->getOperationName(0) +
5674 ": argument out of range.");
5675 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5676 }
5677 return DAG.getConstant(
5679 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5680 DL, ResTy);
5681}
5682
5684 SDLoc DL(Node);
5685 EVT ResTy = Node->getValueType(0);
5686 SDValue Vec = Node->getOperand(2);
5687 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5688 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5689}
5690
5692 SDLoc DL(Node);
5693 EVT ResTy = Node->getValueType(0);
5694 SDValue One = DAG.getConstant(1, DL, ResTy);
5695 SDValue Bit =
5696 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5697
5698 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5699 DAG.getNOT(DL, Bit, ResTy));
5700}
5701
5702template <unsigned N>
5704 SDLoc DL(Node);
5705 EVT ResTy = Node->getValueType(0);
5706 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5707 // Check the unsigned ImmArg.
5708 if (!isUInt<N>(CImm->getZExtValue())) {
5709 DAG.getContext()->emitError(Node->getOperationName(0) +
5710 ": argument out of range.");
5711 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5712 }
5713
5714 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5715 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5716
5717 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5718}
5719
5720template <unsigned N>
5722 SDLoc DL(Node);
5723 EVT ResTy = Node->getValueType(0);
5724 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5725 // Check the unsigned ImmArg.
5726 if (!isUInt<N>(CImm->getZExtValue())) {
5727 DAG.getContext()->emitError(Node->getOperationName(0) +
5728 ": argument out of range.");
5729 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5730 }
5731
5732 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5733 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5734 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5735}
5736
5737template <unsigned N>
5739 SDLoc DL(Node);
5740 EVT ResTy = Node->getValueType(0);
5741 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5742 // Check the unsigned ImmArg.
5743 if (!isUInt<N>(CImm->getZExtValue())) {
5744 DAG.getContext()->emitError(Node->getOperationName(0) +
5745 ": argument out of range.");
5746 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5747 }
5748
5749 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5750 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5751 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5752}
5753
5754static SDValue
5757 const LoongArchSubtarget &Subtarget) {
5758 SDLoc DL(N);
5759 switch (N->getConstantOperandVal(0)) {
5760 default:
5761 break;
5762 case Intrinsic::loongarch_lsx_vadd_b:
5763 case Intrinsic::loongarch_lsx_vadd_h:
5764 case Intrinsic::loongarch_lsx_vadd_w:
5765 case Intrinsic::loongarch_lsx_vadd_d:
5766 case Intrinsic::loongarch_lasx_xvadd_b:
5767 case Intrinsic::loongarch_lasx_xvadd_h:
5768 case Intrinsic::loongarch_lasx_xvadd_w:
5769 case Intrinsic::loongarch_lasx_xvadd_d:
5770 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5771 N->getOperand(2));
5772 case Intrinsic::loongarch_lsx_vaddi_bu:
5773 case Intrinsic::loongarch_lsx_vaddi_hu:
5774 case Intrinsic::loongarch_lsx_vaddi_wu:
5775 case Intrinsic::loongarch_lsx_vaddi_du:
5776 case Intrinsic::loongarch_lasx_xvaddi_bu:
5777 case Intrinsic::loongarch_lasx_xvaddi_hu:
5778 case Intrinsic::loongarch_lasx_xvaddi_wu:
5779 case Intrinsic::loongarch_lasx_xvaddi_du:
5780 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5781 lowerVectorSplatImm<5>(N, 2, DAG));
5782 case Intrinsic::loongarch_lsx_vsub_b:
5783 case Intrinsic::loongarch_lsx_vsub_h:
5784 case Intrinsic::loongarch_lsx_vsub_w:
5785 case Intrinsic::loongarch_lsx_vsub_d:
5786 case Intrinsic::loongarch_lasx_xvsub_b:
5787 case Intrinsic::loongarch_lasx_xvsub_h:
5788 case Intrinsic::loongarch_lasx_xvsub_w:
5789 case Intrinsic::loongarch_lasx_xvsub_d:
5790 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5791 N->getOperand(2));
5792 case Intrinsic::loongarch_lsx_vsubi_bu:
5793 case Intrinsic::loongarch_lsx_vsubi_hu:
5794 case Intrinsic::loongarch_lsx_vsubi_wu:
5795 case Intrinsic::loongarch_lsx_vsubi_du:
5796 case Intrinsic::loongarch_lasx_xvsubi_bu:
5797 case Intrinsic::loongarch_lasx_xvsubi_hu:
5798 case Intrinsic::loongarch_lasx_xvsubi_wu:
5799 case Intrinsic::loongarch_lasx_xvsubi_du:
5800 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5801 lowerVectorSplatImm<5>(N, 2, DAG));
5802 case Intrinsic::loongarch_lsx_vneg_b:
5803 case Intrinsic::loongarch_lsx_vneg_h:
5804 case Intrinsic::loongarch_lsx_vneg_w:
5805 case Intrinsic::loongarch_lsx_vneg_d:
5806 case Intrinsic::loongarch_lasx_xvneg_b:
5807 case Intrinsic::loongarch_lasx_xvneg_h:
5808 case Intrinsic::loongarch_lasx_xvneg_w:
5809 case Intrinsic::loongarch_lasx_xvneg_d:
5810 return DAG.getNode(
5811 ISD::SUB, DL, N->getValueType(0),
5812 DAG.getConstant(
5813 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5814 /*isSigned=*/true),
5815 SDLoc(N), N->getValueType(0)),
5816 N->getOperand(1));
5817 case Intrinsic::loongarch_lsx_vmax_b:
5818 case Intrinsic::loongarch_lsx_vmax_h:
5819 case Intrinsic::loongarch_lsx_vmax_w:
5820 case Intrinsic::loongarch_lsx_vmax_d:
5821 case Intrinsic::loongarch_lasx_xvmax_b:
5822 case Intrinsic::loongarch_lasx_xvmax_h:
5823 case Intrinsic::loongarch_lasx_xvmax_w:
5824 case Intrinsic::loongarch_lasx_xvmax_d:
5825 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5826 N->getOperand(2));
5827 case Intrinsic::loongarch_lsx_vmax_bu:
5828 case Intrinsic::loongarch_lsx_vmax_hu:
5829 case Intrinsic::loongarch_lsx_vmax_wu:
5830 case Intrinsic::loongarch_lsx_vmax_du:
5831 case Intrinsic::loongarch_lasx_xvmax_bu:
5832 case Intrinsic::loongarch_lasx_xvmax_hu:
5833 case Intrinsic::loongarch_lasx_xvmax_wu:
5834 case Intrinsic::loongarch_lasx_xvmax_du:
5835 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5836 N->getOperand(2));
5837 case Intrinsic::loongarch_lsx_vmaxi_b:
5838 case Intrinsic::loongarch_lsx_vmaxi_h:
5839 case Intrinsic::loongarch_lsx_vmaxi_w:
5840 case Intrinsic::loongarch_lsx_vmaxi_d:
5841 case Intrinsic::loongarch_lasx_xvmaxi_b:
5842 case Intrinsic::loongarch_lasx_xvmaxi_h:
5843 case Intrinsic::loongarch_lasx_xvmaxi_w:
5844 case Intrinsic::loongarch_lasx_xvmaxi_d:
5845 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5846 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5847 case Intrinsic::loongarch_lsx_vmaxi_bu:
5848 case Intrinsic::loongarch_lsx_vmaxi_hu:
5849 case Intrinsic::loongarch_lsx_vmaxi_wu:
5850 case Intrinsic::loongarch_lsx_vmaxi_du:
5851 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5852 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5853 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5854 case Intrinsic::loongarch_lasx_xvmaxi_du:
5855 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5856 lowerVectorSplatImm<5>(N, 2, DAG));
5857 case Intrinsic::loongarch_lsx_vmin_b:
5858 case Intrinsic::loongarch_lsx_vmin_h:
5859 case Intrinsic::loongarch_lsx_vmin_w:
5860 case Intrinsic::loongarch_lsx_vmin_d:
5861 case Intrinsic::loongarch_lasx_xvmin_b:
5862 case Intrinsic::loongarch_lasx_xvmin_h:
5863 case Intrinsic::loongarch_lasx_xvmin_w:
5864 case Intrinsic::loongarch_lasx_xvmin_d:
5865 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5866 N->getOperand(2));
5867 case Intrinsic::loongarch_lsx_vmin_bu:
5868 case Intrinsic::loongarch_lsx_vmin_hu:
5869 case Intrinsic::loongarch_lsx_vmin_wu:
5870 case Intrinsic::loongarch_lsx_vmin_du:
5871 case Intrinsic::loongarch_lasx_xvmin_bu:
5872 case Intrinsic::loongarch_lasx_xvmin_hu:
5873 case Intrinsic::loongarch_lasx_xvmin_wu:
5874 case Intrinsic::loongarch_lasx_xvmin_du:
5875 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5876 N->getOperand(2));
5877 case Intrinsic::loongarch_lsx_vmini_b:
5878 case Intrinsic::loongarch_lsx_vmini_h:
5879 case Intrinsic::loongarch_lsx_vmini_w:
5880 case Intrinsic::loongarch_lsx_vmini_d:
5881 case Intrinsic::loongarch_lasx_xvmini_b:
5882 case Intrinsic::loongarch_lasx_xvmini_h:
5883 case Intrinsic::loongarch_lasx_xvmini_w:
5884 case Intrinsic::loongarch_lasx_xvmini_d:
5885 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5886 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5887 case Intrinsic::loongarch_lsx_vmini_bu:
5888 case Intrinsic::loongarch_lsx_vmini_hu:
5889 case Intrinsic::loongarch_lsx_vmini_wu:
5890 case Intrinsic::loongarch_lsx_vmini_du:
5891 case Intrinsic::loongarch_lasx_xvmini_bu:
5892 case Intrinsic::loongarch_lasx_xvmini_hu:
5893 case Intrinsic::loongarch_lasx_xvmini_wu:
5894 case Intrinsic::loongarch_lasx_xvmini_du:
5895 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5896 lowerVectorSplatImm<5>(N, 2, DAG));
5897 case Intrinsic::loongarch_lsx_vmul_b:
5898 case Intrinsic::loongarch_lsx_vmul_h:
5899 case Intrinsic::loongarch_lsx_vmul_w:
5900 case Intrinsic::loongarch_lsx_vmul_d:
5901 case Intrinsic::loongarch_lasx_xvmul_b:
5902 case Intrinsic::loongarch_lasx_xvmul_h:
5903 case Intrinsic::loongarch_lasx_xvmul_w:
5904 case Intrinsic::loongarch_lasx_xvmul_d:
5905 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5906 N->getOperand(2));
5907 case Intrinsic::loongarch_lsx_vmadd_b:
5908 case Intrinsic::loongarch_lsx_vmadd_h:
5909 case Intrinsic::loongarch_lsx_vmadd_w:
5910 case Intrinsic::loongarch_lsx_vmadd_d:
5911 case Intrinsic::loongarch_lasx_xvmadd_b:
5912 case Intrinsic::loongarch_lasx_xvmadd_h:
5913 case Intrinsic::loongarch_lasx_xvmadd_w:
5914 case Intrinsic::loongarch_lasx_xvmadd_d: {
5915 EVT ResTy = N->getValueType(0);
5916 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5917 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5918 N->getOperand(3)));
5919 }
5920 case Intrinsic::loongarch_lsx_vmsub_b:
5921 case Intrinsic::loongarch_lsx_vmsub_h:
5922 case Intrinsic::loongarch_lsx_vmsub_w:
5923 case Intrinsic::loongarch_lsx_vmsub_d:
5924 case Intrinsic::loongarch_lasx_xvmsub_b:
5925 case Intrinsic::loongarch_lasx_xvmsub_h:
5926 case Intrinsic::loongarch_lasx_xvmsub_w:
5927 case Intrinsic::loongarch_lasx_xvmsub_d: {
5928 EVT ResTy = N->getValueType(0);
5929 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5930 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5931 N->getOperand(3)));
5932 }
5933 case Intrinsic::loongarch_lsx_vdiv_b:
5934 case Intrinsic::loongarch_lsx_vdiv_h:
5935 case Intrinsic::loongarch_lsx_vdiv_w:
5936 case Intrinsic::loongarch_lsx_vdiv_d:
5937 case Intrinsic::loongarch_lasx_xvdiv_b:
5938 case Intrinsic::loongarch_lasx_xvdiv_h:
5939 case Intrinsic::loongarch_lasx_xvdiv_w:
5940 case Intrinsic::loongarch_lasx_xvdiv_d:
5941 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5942 N->getOperand(2));
5943 case Intrinsic::loongarch_lsx_vdiv_bu:
5944 case Intrinsic::loongarch_lsx_vdiv_hu:
5945 case Intrinsic::loongarch_lsx_vdiv_wu:
5946 case Intrinsic::loongarch_lsx_vdiv_du:
5947 case Intrinsic::loongarch_lasx_xvdiv_bu:
5948 case Intrinsic::loongarch_lasx_xvdiv_hu:
5949 case Intrinsic::loongarch_lasx_xvdiv_wu:
5950 case Intrinsic::loongarch_lasx_xvdiv_du:
5951 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5952 N->getOperand(2));
5953 case Intrinsic::loongarch_lsx_vmod_b:
5954 case Intrinsic::loongarch_lsx_vmod_h:
5955 case Intrinsic::loongarch_lsx_vmod_w:
5956 case Intrinsic::loongarch_lsx_vmod_d:
5957 case Intrinsic::loongarch_lasx_xvmod_b:
5958 case Intrinsic::loongarch_lasx_xvmod_h:
5959 case Intrinsic::loongarch_lasx_xvmod_w:
5960 case Intrinsic::loongarch_lasx_xvmod_d:
5961 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5962 N->getOperand(2));
5963 case Intrinsic::loongarch_lsx_vmod_bu:
5964 case Intrinsic::loongarch_lsx_vmod_hu:
5965 case Intrinsic::loongarch_lsx_vmod_wu:
5966 case Intrinsic::loongarch_lsx_vmod_du:
5967 case Intrinsic::loongarch_lasx_xvmod_bu:
5968 case Intrinsic::loongarch_lasx_xvmod_hu:
5969 case Intrinsic::loongarch_lasx_xvmod_wu:
5970 case Intrinsic::loongarch_lasx_xvmod_du:
5971 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5972 N->getOperand(2));
5973 case Intrinsic::loongarch_lsx_vand_v:
5974 case Intrinsic::loongarch_lasx_xvand_v:
5975 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5976 N->getOperand(2));
5977 case Intrinsic::loongarch_lsx_vor_v:
5978 case Intrinsic::loongarch_lasx_xvor_v:
5979 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5980 N->getOperand(2));
5981 case Intrinsic::loongarch_lsx_vxor_v:
5982 case Intrinsic::loongarch_lasx_xvxor_v:
5983 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5984 N->getOperand(2));
5985 case Intrinsic::loongarch_lsx_vnor_v:
5986 case Intrinsic::loongarch_lasx_xvnor_v: {
5987 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5988 N->getOperand(2));
5989 return DAG.getNOT(DL, Res, Res->getValueType(0));
5990 }
5991 case Intrinsic::loongarch_lsx_vandi_b:
5992 case Intrinsic::loongarch_lasx_xvandi_b:
5993 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5994 lowerVectorSplatImm<8>(N, 2, DAG));
5995 case Intrinsic::loongarch_lsx_vori_b:
5996 case Intrinsic::loongarch_lasx_xvori_b:
5997 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5998 lowerVectorSplatImm<8>(N, 2, DAG));
5999 case Intrinsic::loongarch_lsx_vxori_b:
6000 case Intrinsic::loongarch_lasx_xvxori_b:
6001 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6002 lowerVectorSplatImm<8>(N, 2, DAG));
6003 case Intrinsic::loongarch_lsx_vsll_b:
6004 case Intrinsic::loongarch_lsx_vsll_h:
6005 case Intrinsic::loongarch_lsx_vsll_w:
6006 case Intrinsic::loongarch_lsx_vsll_d:
6007 case Intrinsic::loongarch_lasx_xvsll_b:
6008 case Intrinsic::loongarch_lasx_xvsll_h:
6009 case Intrinsic::loongarch_lasx_xvsll_w:
6010 case Intrinsic::loongarch_lasx_xvsll_d:
6011 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6012 truncateVecElts(N, DAG));
6013 case Intrinsic::loongarch_lsx_vslli_b:
6014 case Intrinsic::loongarch_lasx_xvslli_b:
6015 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6016 lowerVectorSplatImm<3>(N, 2, DAG));
6017 case Intrinsic::loongarch_lsx_vslli_h:
6018 case Intrinsic::loongarch_lasx_xvslli_h:
6019 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6020 lowerVectorSplatImm<4>(N, 2, DAG));
6021 case Intrinsic::loongarch_lsx_vslli_w:
6022 case Intrinsic::loongarch_lasx_xvslli_w:
6023 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6024 lowerVectorSplatImm<5>(N, 2, DAG));
6025 case Intrinsic::loongarch_lsx_vslli_d:
6026 case Intrinsic::loongarch_lasx_xvslli_d:
6027 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6028 lowerVectorSplatImm<6>(N, 2, DAG));
6029 case Intrinsic::loongarch_lsx_vsrl_b:
6030 case Intrinsic::loongarch_lsx_vsrl_h:
6031 case Intrinsic::loongarch_lsx_vsrl_w:
6032 case Intrinsic::loongarch_lsx_vsrl_d:
6033 case Intrinsic::loongarch_lasx_xvsrl_b:
6034 case Intrinsic::loongarch_lasx_xvsrl_h:
6035 case Intrinsic::loongarch_lasx_xvsrl_w:
6036 case Intrinsic::loongarch_lasx_xvsrl_d:
6037 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6038 truncateVecElts(N, DAG));
6039 case Intrinsic::loongarch_lsx_vsrli_b:
6040 case Intrinsic::loongarch_lasx_xvsrli_b:
6041 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6042 lowerVectorSplatImm<3>(N, 2, DAG));
6043 case Intrinsic::loongarch_lsx_vsrli_h:
6044 case Intrinsic::loongarch_lasx_xvsrli_h:
6045 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6046 lowerVectorSplatImm<4>(N, 2, DAG));
6047 case Intrinsic::loongarch_lsx_vsrli_w:
6048 case Intrinsic::loongarch_lasx_xvsrli_w:
6049 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6050 lowerVectorSplatImm<5>(N, 2, DAG));
6051 case Intrinsic::loongarch_lsx_vsrli_d:
6052 case Intrinsic::loongarch_lasx_xvsrli_d:
6053 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6054 lowerVectorSplatImm<6>(N, 2, DAG));
6055 case Intrinsic::loongarch_lsx_vsra_b:
6056 case Intrinsic::loongarch_lsx_vsra_h:
6057 case Intrinsic::loongarch_lsx_vsra_w:
6058 case Intrinsic::loongarch_lsx_vsra_d:
6059 case Intrinsic::loongarch_lasx_xvsra_b:
6060 case Intrinsic::loongarch_lasx_xvsra_h:
6061 case Intrinsic::loongarch_lasx_xvsra_w:
6062 case Intrinsic::loongarch_lasx_xvsra_d:
6063 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6064 truncateVecElts(N, DAG));
6065 case Intrinsic::loongarch_lsx_vsrai_b:
6066 case Intrinsic::loongarch_lasx_xvsrai_b:
6067 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6068 lowerVectorSplatImm<3>(N, 2, DAG));
6069 case Intrinsic::loongarch_lsx_vsrai_h:
6070 case Intrinsic::loongarch_lasx_xvsrai_h:
6071 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6072 lowerVectorSplatImm<4>(N, 2, DAG));
6073 case Intrinsic::loongarch_lsx_vsrai_w:
6074 case Intrinsic::loongarch_lasx_xvsrai_w:
6075 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6076 lowerVectorSplatImm<5>(N, 2, DAG));
6077 case Intrinsic::loongarch_lsx_vsrai_d:
6078 case Intrinsic::loongarch_lasx_xvsrai_d:
6079 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6080 lowerVectorSplatImm<6>(N, 2, DAG));
6081 case Intrinsic::loongarch_lsx_vclz_b:
6082 case Intrinsic::loongarch_lsx_vclz_h:
6083 case Intrinsic::loongarch_lsx_vclz_w:
6084 case Intrinsic::loongarch_lsx_vclz_d:
6085 case Intrinsic::loongarch_lasx_xvclz_b:
6086 case Intrinsic::loongarch_lasx_xvclz_h:
6087 case Intrinsic::loongarch_lasx_xvclz_w:
6088 case Intrinsic::loongarch_lasx_xvclz_d:
6089 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6090 case Intrinsic::loongarch_lsx_vpcnt_b:
6091 case Intrinsic::loongarch_lsx_vpcnt_h:
6092 case Intrinsic::loongarch_lsx_vpcnt_w:
6093 case Intrinsic::loongarch_lsx_vpcnt_d:
6094 case Intrinsic::loongarch_lasx_xvpcnt_b:
6095 case Intrinsic::loongarch_lasx_xvpcnt_h:
6096 case Intrinsic::loongarch_lasx_xvpcnt_w:
6097 case Intrinsic::loongarch_lasx_xvpcnt_d:
6098 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6099 case Intrinsic::loongarch_lsx_vbitclr_b:
6100 case Intrinsic::loongarch_lsx_vbitclr_h:
6101 case Intrinsic::loongarch_lsx_vbitclr_w:
6102 case Intrinsic::loongarch_lsx_vbitclr_d:
6103 case Intrinsic::loongarch_lasx_xvbitclr_b:
6104 case Intrinsic::loongarch_lasx_xvbitclr_h:
6105 case Intrinsic::loongarch_lasx_xvbitclr_w:
6106 case Intrinsic::loongarch_lasx_xvbitclr_d:
6107 return lowerVectorBitClear(N, DAG);
6108 case Intrinsic::loongarch_lsx_vbitclri_b:
6109 case Intrinsic::loongarch_lasx_xvbitclri_b:
6110 return lowerVectorBitClearImm<3>(N, DAG);
6111 case Intrinsic::loongarch_lsx_vbitclri_h:
6112 case Intrinsic::loongarch_lasx_xvbitclri_h:
6113 return lowerVectorBitClearImm<4>(N, DAG);
6114 case Intrinsic::loongarch_lsx_vbitclri_w:
6115 case Intrinsic::loongarch_lasx_xvbitclri_w:
6116 return lowerVectorBitClearImm<5>(N, DAG);
6117 case Intrinsic::loongarch_lsx_vbitclri_d:
6118 case Intrinsic::loongarch_lasx_xvbitclri_d:
6119 return lowerVectorBitClearImm<6>(N, DAG);
6120 case Intrinsic::loongarch_lsx_vbitset_b:
6121 case Intrinsic::loongarch_lsx_vbitset_h:
6122 case Intrinsic::loongarch_lsx_vbitset_w:
6123 case Intrinsic::loongarch_lsx_vbitset_d:
6124 case Intrinsic::loongarch_lasx_xvbitset_b:
6125 case Intrinsic::loongarch_lasx_xvbitset_h:
6126 case Intrinsic::loongarch_lasx_xvbitset_w:
6127 case Intrinsic::loongarch_lasx_xvbitset_d: {
6128 EVT VecTy = N->getValueType(0);
6129 SDValue One = DAG.getConstant(1, DL, VecTy);
6130 return DAG.getNode(
6131 ISD::OR, DL, VecTy, N->getOperand(1),
6132 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6133 }
6134 case Intrinsic::loongarch_lsx_vbitseti_b:
6135 case Intrinsic::loongarch_lasx_xvbitseti_b:
6136 return lowerVectorBitSetImm<3>(N, DAG);
6137 case Intrinsic::loongarch_lsx_vbitseti_h:
6138 case Intrinsic::loongarch_lasx_xvbitseti_h:
6139 return lowerVectorBitSetImm<4>(N, DAG);
6140 case Intrinsic::loongarch_lsx_vbitseti_w:
6141 case Intrinsic::loongarch_lasx_xvbitseti_w:
6142 return lowerVectorBitSetImm<5>(N, DAG);
6143 case Intrinsic::loongarch_lsx_vbitseti_d:
6144 case Intrinsic::loongarch_lasx_xvbitseti_d:
6145 return lowerVectorBitSetImm<6>(N, DAG);
6146 case Intrinsic::loongarch_lsx_vbitrev_b:
6147 case Intrinsic::loongarch_lsx_vbitrev_h:
6148 case Intrinsic::loongarch_lsx_vbitrev_w:
6149 case Intrinsic::loongarch_lsx_vbitrev_d:
6150 case Intrinsic::loongarch_lasx_xvbitrev_b:
6151 case Intrinsic::loongarch_lasx_xvbitrev_h:
6152 case Intrinsic::loongarch_lasx_xvbitrev_w:
6153 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6154 EVT VecTy = N->getValueType(0);
6155 SDValue One = DAG.getConstant(1, DL, VecTy);
6156 return DAG.getNode(
6157 ISD::XOR, DL, VecTy, N->getOperand(1),
6158 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6159 }
6160 case Intrinsic::loongarch_lsx_vbitrevi_b:
6161 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6162 return lowerVectorBitRevImm<3>(N, DAG);
6163 case Intrinsic::loongarch_lsx_vbitrevi_h:
6164 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6165 return lowerVectorBitRevImm<4>(N, DAG);
6166 case Intrinsic::loongarch_lsx_vbitrevi_w:
6167 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6168 return lowerVectorBitRevImm<5>(N, DAG);
6169 case Intrinsic::loongarch_lsx_vbitrevi_d:
6170 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6171 return lowerVectorBitRevImm<6>(N, DAG);
6172 case Intrinsic::loongarch_lsx_vfadd_s:
6173 case Intrinsic::loongarch_lsx_vfadd_d:
6174 case Intrinsic::loongarch_lasx_xvfadd_s:
6175 case Intrinsic::loongarch_lasx_xvfadd_d:
6176 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6177 N->getOperand(2));
6178 case Intrinsic::loongarch_lsx_vfsub_s:
6179 case Intrinsic::loongarch_lsx_vfsub_d:
6180 case Intrinsic::loongarch_lasx_xvfsub_s:
6181 case Intrinsic::loongarch_lasx_xvfsub_d:
6182 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6183 N->getOperand(2));
6184 case Intrinsic::loongarch_lsx_vfmul_s:
6185 case Intrinsic::loongarch_lsx_vfmul_d:
6186 case Intrinsic::loongarch_lasx_xvfmul_s:
6187 case Intrinsic::loongarch_lasx_xvfmul_d:
6188 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6189 N->getOperand(2));
6190 case Intrinsic::loongarch_lsx_vfdiv_s:
6191 case Intrinsic::loongarch_lsx_vfdiv_d:
6192 case Intrinsic::loongarch_lasx_xvfdiv_s:
6193 case Intrinsic::loongarch_lasx_xvfdiv_d:
6194 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6195 N->getOperand(2));
6196 case Intrinsic::loongarch_lsx_vfmadd_s:
6197 case Intrinsic::loongarch_lsx_vfmadd_d:
6198 case Intrinsic::loongarch_lasx_xvfmadd_s:
6199 case Intrinsic::loongarch_lasx_xvfmadd_d:
6200 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6201 N->getOperand(2), N->getOperand(3));
6202 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6203 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6204 N->getOperand(1), N->getOperand(2),
6205 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6206 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6207 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6208 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6209 N->getOperand(1), N->getOperand(2),
6210 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6211 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6212 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6213 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6214 N->getOperand(1), N->getOperand(2),
6215 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6216 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6217 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6218 N->getOperand(1), N->getOperand(2),
6219 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6220 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6221 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6222 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6223 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6224 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6225 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6226 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6227 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6228 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6229 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6230 N->getOperand(1)));
6231 case Intrinsic::loongarch_lsx_vreplve_b:
6232 case Intrinsic::loongarch_lsx_vreplve_h:
6233 case Intrinsic::loongarch_lsx_vreplve_w:
6234 case Intrinsic::loongarch_lsx_vreplve_d:
6235 case Intrinsic::loongarch_lasx_xvreplve_b:
6236 case Intrinsic::loongarch_lasx_xvreplve_h:
6237 case Intrinsic::loongarch_lasx_xvreplve_w:
6238 case Intrinsic::loongarch_lasx_xvreplve_d:
6239 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6240 N->getOperand(1),
6241 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6242 N->getOperand(2)));
6243 }
6244 return SDValue();
6245}
6246
6249 const LoongArchSubtarget &Subtarget) {
6250 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6251 // conversion is unnecessary and can be replaced with the
6252 // MOVFR2GR_S_LA64 operand.
6253 SDValue Op0 = N->getOperand(0);
6255 return Op0.getOperand(0);
6256 return SDValue();
6257}
6258
6261 const LoongArchSubtarget &Subtarget) {
6262 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6263 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6264 // operand.
6265 SDValue Op0 = N->getOperand(0);
6267 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6268 "Unexpected value type!");
6269 return Op0.getOperand(0);
6270 }
6271 return SDValue();
6272}
6273
6276 const LoongArchSubtarget &Subtarget) {
6277 MVT VT = N->getSimpleValueType(0);
6278 unsigned NumBits = VT.getScalarSizeInBits();
6279
6280 // Simplify the inputs.
6281 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6282 APInt DemandedMask(APInt::getAllOnes(NumBits));
6283 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6284 return SDValue(N, 0);
6285
6286 return SDValue();
6287}
6288
6289static SDValue
6292 const LoongArchSubtarget &Subtarget) {
6293 SDValue Op0 = N->getOperand(0);
6294 SDLoc DL(N);
6295
6296 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6297 // redundant. Instead, use BuildPairF64's operands directly.
6299 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6300
6301 if (Op0->isUndef()) {
6302 SDValue Lo = DAG.getUNDEF(MVT::i32);
6303 SDValue Hi = DAG.getUNDEF(MVT::i32);
6304 return DCI.CombineTo(N, Lo, Hi);
6305 }
6306
6307 // It's cheaper to materialise two 32-bit integers than to load a double
6308 // from the constant pool and transfer it to integer registers through the
6309 // stack.
6311 APInt V = C->getValueAPF().bitcastToAPInt();
6312 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6313 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6314 return DCI.CombineTo(N, Lo, Hi);
6315 }
6316
6317 return SDValue();
6318}
6319
6320static SDValue
6323 const LoongArchSubtarget &Subtarget) {
6324 if (!DCI.isBeforeLegalize())
6325 return SDValue();
6326
6327 MVT EltVT = N->getSimpleValueType(0);
6328 SDValue Vec = N->getOperand(0);
6329 EVT VecTy = Vec->getValueType(0);
6330 SDValue Idx = N->getOperand(1);
6331 unsigned IdxOp = Idx.getOpcode();
6332 SDLoc DL(N);
6333
6334 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6335 return SDValue();
6336
6337 // Combine:
6338 // t2 = truncate t1
6339 // t3 = {zero/sign/any}_extend t2
6340 // t4 = extract_vector_elt t0, t3
6341 // to:
6342 // t4 = extract_vector_elt t0, t1
6343 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6344 IdxOp == ISD::ANY_EXTEND) {
6345 SDValue IdxOrig = Idx.getOperand(0);
6346 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6347 return SDValue();
6348
6349 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6350 IdxOrig.getOperand(0));
6351 }
6352
6353 return SDValue();
6354}
6355
6357 DAGCombinerInfo &DCI) const {
6358 SelectionDAG &DAG = DCI.DAG;
6359 switch (N->getOpcode()) {
6360 default:
6361 break;
6362 case ISD::AND:
6363 return performANDCombine(N, DAG, DCI, Subtarget);
6364 case ISD::OR:
6365 return performORCombine(N, DAG, DCI, Subtarget);
6366 case ISD::SETCC:
6367 return performSETCCCombine(N, DAG, DCI, Subtarget);
6368 case ISD::SRL:
6369 return performSRLCombine(N, DAG, DCI, Subtarget);
6370 case ISD::BITCAST:
6371 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6373 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6375 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6377 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6379 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6381 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6383 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6386 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6388 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6390 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6391 }
6392 return SDValue();
6393}
6394
6397 if (!ZeroDivCheck)
6398 return MBB;
6399
6400 // Build instructions:
6401 // MBB:
6402 // div(or mod) $dst, $dividend, $divisor
6403 // bne $divisor, $zero, SinkMBB
6404 // BreakMBB:
6405 // break 7 // BRK_DIVZERO
6406 // SinkMBB:
6407 // fallthrough
6408 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6409 MachineFunction::iterator It = ++MBB->getIterator();
6410 MachineFunction *MF = MBB->getParent();
6411 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6412 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6413 MF->insert(It, BreakMBB);
6414 MF->insert(It, SinkMBB);
6415
6416 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6417 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6418 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6419
6420 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6421 DebugLoc DL = MI.getDebugLoc();
6422 MachineOperand &Divisor = MI.getOperand(2);
6423 Register DivisorReg = Divisor.getReg();
6424
6425 // MBB:
6426 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6427 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6428 .addReg(LoongArch::R0)
6429 .addMBB(SinkMBB);
6430 MBB->addSuccessor(BreakMBB);
6431 MBB->addSuccessor(SinkMBB);
6432
6433 // BreakMBB:
6434 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6435 // definition of BRK_DIVZERO.
6436 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6437 BreakMBB->addSuccessor(SinkMBB);
6438
6439 // Clear Divisor's kill flag.
6440 Divisor.setIsKill(false);
6441
6442 return SinkMBB;
6443}
6444
6445static MachineBasicBlock *
6447 const LoongArchSubtarget &Subtarget) {
6448 unsigned CondOpc;
6449 switch (MI.getOpcode()) {
6450 default:
6451 llvm_unreachable("Unexpected opcode");
6452 case LoongArch::PseudoVBZ:
6453 CondOpc = LoongArch::VSETEQZ_V;
6454 break;
6455 case LoongArch::PseudoVBZ_B:
6456 CondOpc = LoongArch::VSETANYEQZ_B;
6457 break;
6458 case LoongArch::PseudoVBZ_H:
6459 CondOpc = LoongArch::VSETANYEQZ_H;
6460 break;
6461 case LoongArch::PseudoVBZ_W:
6462 CondOpc = LoongArch::VSETANYEQZ_W;
6463 break;
6464 case LoongArch::PseudoVBZ_D:
6465 CondOpc = LoongArch::VSETANYEQZ_D;
6466 break;
6467 case LoongArch::PseudoVBNZ:
6468 CondOpc = LoongArch::VSETNEZ_V;
6469 break;
6470 case LoongArch::PseudoVBNZ_B:
6471 CondOpc = LoongArch::VSETALLNEZ_B;
6472 break;
6473 case LoongArch::PseudoVBNZ_H:
6474 CondOpc = LoongArch::VSETALLNEZ_H;
6475 break;
6476 case LoongArch::PseudoVBNZ_W:
6477 CondOpc = LoongArch::VSETALLNEZ_W;
6478 break;
6479 case LoongArch::PseudoVBNZ_D:
6480 CondOpc = LoongArch::VSETALLNEZ_D;
6481 break;
6482 case LoongArch::PseudoXVBZ:
6483 CondOpc = LoongArch::XVSETEQZ_V;
6484 break;
6485 case LoongArch::PseudoXVBZ_B:
6486 CondOpc = LoongArch::XVSETANYEQZ_B;
6487 break;
6488 case LoongArch::PseudoXVBZ_H:
6489 CondOpc = LoongArch::XVSETANYEQZ_H;
6490 break;
6491 case LoongArch::PseudoXVBZ_W:
6492 CondOpc = LoongArch::XVSETANYEQZ_W;
6493 break;
6494 case LoongArch::PseudoXVBZ_D:
6495 CondOpc = LoongArch::XVSETANYEQZ_D;
6496 break;
6497 case LoongArch::PseudoXVBNZ:
6498 CondOpc = LoongArch::XVSETNEZ_V;
6499 break;
6500 case LoongArch::PseudoXVBNZ_B:
6501 CondOpc = LoongArch::XVSETALLNEZ_B;
6502 break;
6503 case LoongArch::PseudoXVBNZ_H:
6504 CondOpc = LoongArch::XVSETALLNEZ_H;
6505 break;
6506 case LoongArch::PseudoXVBNZ_W:
6507 CondOpc = LoongArch::XVSETALLNEZ_W;
6508 break;
6509 case LoongArch::PseudoXVBNZ_D:
6510 CondOpc = LoongArch::XVSETALLNEZ_D;
6511 break;
6512 }
6513
6514 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6515 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6516 DebugLoc DL = MI.getDebugLoc();
6519
6520 MachineFunction *F = BB->getParent();
6521 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6522 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6523 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6524
6525 F->insert(It, FalseBB);
6526 F->insert(It, TrueBB);
6527 F->insert(It, SinkBB);
6528
6529 // Transfer the remainder of MBB and its successor edges to Sink.
6530 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6532
6533 // Insert the real instruction to BB.
6534 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6535 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6536
6537 // Insert branch.
6538 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6539 BB->addSuccessor(FalseBB);
6540 BB->addSuccessor(TrueBB);
6541
6542 // FalseBB.
6543 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6544 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6545 .addReg(LoongArch::R0)
6546 .addImm(0);
6547 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6548 FalseBB->addSuccessor(SinkBB);
6549
6550 // TrueBB.
6551 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6552 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6553 .addReg(LoongArch::R0)
6554 .addImm(1);
6555 TrueBB->addSuccessor(SinkBB);
6556
6557 // SinkBB: merge the results.
6558 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6559 MI.getOperand(0).getReg())
6560 .addReg(RD1)
6561 .addMBB(FalseBB)
6562 .addReg(RD2)
6563 .addMBB(TrueBB);
6564
6565 // The pseudo instruction is gone now.
6566 MI.eraseFromParent();
6567 return SinkBB;
6568}
6569
6570static MachineBasicBlock *
6572 const LoongArchSubtarget &Subtarget) {
6573 unsigned InsOp;
6574 unsigned BroadcastOp;
6575 unsigned HalfSize;
6576 switch (MI.getOpcode()) {
6577 default:
6578 llvm_unreachable("Unexpected opcode");
6579 case LoongArch::PseudoXVINSGR2VR_B:
6580 HalfSize = 16;
6581 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6582 InsOp = LoongArch::XVEXTRINS_B;
6583 break;
6584 case LoongArch::PseudoXVINSGR2VR_H:
6585 HalfSize = 8;
6586 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6587 InsOp = LoongArch::XVEXTRINS_H;
6588 break;
6589 }
6590 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6591 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6592 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6593 DebugLoc DL = MI.getDebugLoc();
6595 // XDst = vector_insert XSrc, Elt, Idx
6596 Register XDst = MI.getOperand(0).getReg();
6597 Register XSrc = MI.getOperand(1).getReg();
6598 Register Elt = MI.getOperand(2).getReg();
6599 unsigned Idx = MI.getOperand(3).getImm();
6600
6601 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6602 Idx < HalfSize) {
6603 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6604 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6605
6606 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6607 .addReg(XSrc, 0, LoongArch::sub_128);
6608 BuildMI(*BB, MI, DL,
6609 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6610 : LoongArch::VINSGR2VR_B),
6611 ScratchSubReg2)
6612 .addReg(ScratchSubReg1)
6613 .addReg(Elt)
6614 .addImm(Idx);
6615
6616 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6617 .addImm(0)
6618 .addReg(ScratchSubReg2)
6619 .addImm(LoongArch::sub_128);
6620 } else {
6621 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6622 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6623
6624 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6625
6626 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6627 .addReg(ScratchReg1)
6628 .addReg(XSrc)
6629 .addImm(Idx >= HalfSize ? 48 : 18);
6630
6631 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6632 .addReg(XSrc)
6633 .addReg(ScratchReg2)
6634 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6635 }
6636
6637 MI.eraseFromParent();
6638 return BB;
6639}
6640
6643 const LoongArchSubtarget &Subtarget) {
6644 assert(Subtarget.hasExtLSX());
6645 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6646 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6647 DebugLoc DL = MI.getDebugLoc();
6649 Register Dst = MI.getOperand(0).getReg();
6650 Register Src = MI.getOperand(1).getReg();
6651 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6652 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6653 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6654
6655 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6656 BuildMI(*BB, MI, DL,
6657 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6658 : LoongArch::VINSGR2VR_W),
6659 ScratchReg2)
6660 .addReg(ScratchReg1)
6661 .addReg(Src)
6662 .addImm(0);
6663 BuildMI(
6664 *BB, MI, DL,
6665 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6666 ScratchReg3)
6667 .addReg(ScratchReg2);
6668 BuildMI(*BB, MI, DL,
6669 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6670 : LoongArch::VPICKVE2GR_W),
6671 Dst)
6672 .addReg(ScratchReg3)
6673 .addImm(0);
6674
6675 MI.eraseFromParent();
6676 return BB;
6677}
6678
6679static MachineBasicBlock *
6681 const LoongArchSubtarget &Subtarget) {
6682 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6683 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6684 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6686 Register Dst = MI.getOperand(0).getReg();
6687 Register Src = MI.getOperand(1).getReg();
6688 DebugLoc DL = MI.getDebugLoc();
6689 unsigned EleBits = 8;
6690 unsigned NotOpc = 0;
6691 unsigned MskOpc;
6692
6693 switch (MI.getOpcode()) {
6694 default:
6695 llvm_unreachable("Unexpected opcode");
6696 case LoongArch::PseudoVMSKLTZ_B:
6697 MskOpc = LoongArch::VMSKLTZ_B;
6698 break;
6699 case LoongArch::PseudoVMSKLTZ_H:
6700 MskOpc = LoongArch::VMSKLTZ_H;
6701 EleBits = 16;
6702 break;
6703 case LoongArch::PseudoVMSKLTZ_W:
6704 MskOpc = LoongArch::VMSKLTZ_W;
6705 EleBits = 32;
6706 break;
6707 case LoongArch::PseudoVMSKLTZ_D:
6708 MskOpc = LoongArch::VMSKLTZ_D;
6709 EleBits = 64;
6710 break;
6711 case LoongArch::PseudoVMSKGEZ_B:
6712 MskOpc = LoongArch::VMSKGEZ_B;
6713 break;
6714 case LoongArch::PseudoVMSKEQZ_B:
6715 MskOpc = LoongArch::VMSKNZ_B;
6716 NotOpc = LoongArch::VNOR_V;
6717 break;
6718 case LoongArch::PseudoVMSKNEZ_B:
6719 MskOpc = LoongArch::VMSKNZ_B;
6720 break;
6721 case LoongArch::PseudoXVMSKLTZ_B:
6722 MskOpc = LoongArch::XVMSKLTZ_B;
6723 RC = &LoongArch::LASX256RegClass;
6724 break;
6725 case LoongArch::PseudoXVMSKLTZ_H:
6726 MskOpc = LoongArch::XVMSKLTZ_H;
6727 RC = &LoongArch::LASX256RegClass;
6728 EleBits = 16;
6729 break;
6730 case LoongArch::PseudoXVMSKLTZ_W:
6731 MskOpc = LoongArch::XVMSKLTZ_W;
6732 RC = &LoongArch::LASX256RegClass;
6733 EleBits = 32;
6734 break;
6735 case LoongArch::PseudoXVMSKLTZ_D:
6736 MskOpc = LoongArch::XVMSKLTZ_D;
6737 RC = &LoongArch::LASX256RegClass;
6738 EleBits = 64;
6739 break;
6740 case LoongArch::PseudoXVMSKGEZ_B:
6741 MskOpc = LoongArch::XVMSKGEZ_B;
6742 RC = &LoongArch::LASX256RegClass;
6743 break;
6744 case LoongArch::PseudoXVMSKEQZ_B:
6745 MskOpc = LoongArch::XVMSKNZ_B;
6746 NotOpc = LoongArch::XVNOR_V;
6747 RC = &LoongArch::LASX256RegClass;
6748 break;
6749 case LoongArch::PseudoXVMSKNEZ_B:
6750 MskOpc = LoongArch::XVMSKNZ_B;
6751 RC = &LoongArch::LASX256RegClass;
6752 break;
6753 }
6754
6755 Register Msk = MRI.createVirtualRegister(RC);
6756 if (NotOpc) {
6757 Register Tmp = MRI.createVirtualRegister(RC);
6758 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6759 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6760 .addReg(Tmp, RegState::Kill)
6761 .addReg(Tmp, RegState::Kill);
6762 } else {
6763 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6764 }
6765
6766 if (TRI->getRegSizeInBits(*RC) > 128) {
6767 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6768 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6769 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6770 .addReg(Msk)
6771 .addImm(0);
6772 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6773 .addReg(Msk, RegState::Kill)
6774 .addImm(4);
6775 BuildMI(*BB, MI, DL,
6776 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6777 : LoongArch::BSTRINS_W),
6778 Dst)
6781 .addImm(256 / EleBits - 1)
6782 .addImm(128 / EleBits);
6783 } else {
6784 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6785 .addReg(Msk, RegState::Kill)
6786 .addImm(0);
6787 }
6788
6789 MI.eraseFromParent();
6790 return BB;
6791}
6792
6793static MachineBasicBlock *
6795 const LoongArchSubtarget &Subtarget) {
6796 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6797 "Unexpected instruction");
6798
6799 MachineFunction &MF = *BB->getParent();
6800 DebugLoc DL = MI.getDebugLoc();
6802 Register LoReg = MI.getOperand(0).getReg();
6803 Register HiReg = MI.getOperand(1).getReg();
6804 Register SrcReg = MI.getOperand(2).getReg();
6805
6806 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6807 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6808 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6809 MI.eraseFromParent(); // The pseudo instruction is gone now.
6810 return BB;
6811}
6812
6813static MachineBasicBlock *
6815 const LoongArchSubtarget &Subtarget) {
6816 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6817 "Unexpected instruction");
6818
6819 MachineFunction &MF = *BB->getParent();
6820 DebugLoc DL = MI.getDebugLoc();
6823 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6824 Register DstReg = MI.getOperand(0).getReg();
6825 Register LoReg = MI.getOperand(1).getReg();
6826 Register HiReg = MI.getOperand(2).getReg();
6827
6828 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6829 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6830 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6831 .addReg(TmpReg, RegState::Kill)
6832 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6833 MI.eraseFromParent(); // The pseudo instruction is gone now.
6834 return BB;
6835}
6836
6838 switch (MI.getOpcode()) {
6839 default:
6840 return false;
6841 case LoongArch::Select_GPR_Using_CC_GPR:
6842 return true;
6843 }
6844}
6845
6846static MachineBasicBlock *
6848 const LoongArchSubtarget &Subtarget) {
6849 // To "insert" Select_* instructions, we actually have to insert the triangle
6850 // control-flow pattern. The incoming instructions know the destination vreg
6851 // to set, the condition code register to branch on, the true/false values to
6852 // select between, and the condcode to use to select the appropriate branch.
6853 //
6854 // We produce the following control flow:
6855 // HeadMBB
6856 // | \
6857 // | IfFalseMBB
6858 // | /
6859 // TailMBB
6860 //
6861 // When we find a sequence of selects we attempt to optimize their emission
6862 // by sharing the control flow. Currently we only handle cases where we have
6863 // multiple selects with the exact same condition (same LHS, RHS and CC).
6864 // The selects may be interleaved with other instructions if the other
6865 // instructions meet some requirements we deem safe:
6866 // - They are not pseudo instructions.
6867 // - They are debug instructions. Otherwise,
6868 // - They do not have side-effects, do not access memory and their inputs do
6869 // not depend on the results of the select pseudo-instructions.
6870 // The TrueV/FalseV operands of the selects cannot depend on the result of
6871 // previous selects in the sequence.
6872 // These conditions could be further relaxed. See the X86 target for a
6873 // related approach and more information.
6874
6875 Register LHS = MI.getOperand(1).getReg();
6876 Register RHS;
6877 if (MI.getOperand(2).isReg())
6878 RHS = MI.getOperand(2).getReg();
6879 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6880
6881 SmallVector<MachineInstr *, 4> SelectDebugValues;
6882 SmallSet<Register, 4> SelectDests;
6883 SelectDests.insert(MI.getOperand(0).getReg());
6884
6885 MachineInstr *LastSelectPseudo = &MI;
6886 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6887 SequenceMBBI != E; ++SequenceMBBI) {
6888 if (SequenceMBBI->isDebugInstr())
6889 continue;
6890 if (isSelectPseudo(*SequenceMBBI)) {
6891 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6892 !SequenceMBBI->getOperand(2).isReg() ||
6893 SequenceMBBI->getOperand(2).getReg() != RHS ||
6894 SequenceMBBI->getOperand(3).getImm() != CC ||
6895 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6896 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6897 break;
6898 LastSelectPseudo = &*SequenceMBBI;
6899 SequenceMBBI->collectDebugValues(SelectDebugValues);
6900 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6901 continue;
6902 }
6903 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6904 SequenceMBBI->mayLoadOrStore() ||
6905 SequenceMBBI->usesCustomInsertionHook())
6906 break;
6907 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6908 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6909 }))
6910 break;
6911 }
6912
6913 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6914 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6915 DebugLoc DL = MI.getDebugLoc();
6917
6918 MachineBasicBlock *HeadMBB = BB;
6919 MachineFunction *F = BB->getParent();
6920 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6921 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6922
6923 F->insert(I, IfFalseMBB);
6924 F->insert(I, TailMBB);
6925
6926 // Set the call frame size on entry to the new basic blocks.
6927 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6928 IfFalseMBB->setCallFrameSize(CallFrameSize);
6929 TailMBB->setCallFrameSize(CallFrameSize);
6930
6931 // Transfer debug instructions associated with the selects to TailMBB.
6932 for (MachineInstr *DebugInstr : SelectDebugValues) {
6933 TailMBB->push_back(DebugInstr->removeFromParent());
6934 }
6935
6936 // Move all instructions after the sequence to TailMBB.
6937 TailMBB->splice(TailMBB->end(), HeadMBB,
6938 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6939 // Update machine-CFG edges by transferring all successors of the current
6940 // block to the new block which will contain the Phi nodes for the selects.
6941 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6942 // Set the successors for HeadMBB.
6943 HeadMBB->addSuccessor(IfFalseMBB);
6944 HeadMBB->addSuccessor(TailMBB);
6945
6946 // Insert appropriate branch.
6947 if (MI.getOperand(2).isImm())
6948 BuildMI(HeadMBB, DL, TII.get(CC))
6949 .addReg(LHS)
6950 .addImm(MI.getOperand(2).getImm())
6951 .addMBB(TailMBB);
6952 else
6953 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6954
6955 // IfFalseMBB just falls through to TailMBB.
6956 IfFalseMBB->addSuccessor(TailMBB);
6957
6958 // Create PHIs for all of the select pseudo-instructions.
6959 auto SelectMBBI = MI.getIterator();
6960 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6961 auto InsertionPoint = TailMBB->begin();
6962 while (SelectMBBI != SelectEnd) {
6963 auto Next = std::next(SelectMBBI);
6964 if (isSelectPseudo(*SelectMBBI)) {
6965 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6966 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6967 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6968 .addReg(SelectMBBI->getOperand(4).getReg())
6969 .addMBB(HeadMBB)
6970 .addReg(SelectMBBI->getOperand(5).getReg())
6971 .addMBB(IfFalseMBB);
6972 SelectMBBI->eraseFromParent();
6973 }
6974 SelectMBBI = Next;
6975 }
6976
6977 F->getProperties().resetNoPHIs();
6978 return TailMBB;
6979}
6980
6981MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6982 MachineInstr &MI, MachineBasicBlock *BB) const {
6983 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6984 DebugLoc DL = MI.getDebugLoc();
6985
6986 switch (MI.getOpcode()) {
6987 default:
6988 llvm_unreachable("Unexpected instr type to insert");
6989 case LoongArch::DIV_W:
6990 case LoongArch::DIV_WU:
6991 case LoongArch::MOD_W:
6992 case LoongArch::MOD_WU:
6993 case LoongArch::DIV_D:
6994 case LoongArch::DIV_DU:
6995 case LoongArch::MOD_D:
6996 case LoongArch::MOD_DU:
6997 return insertDivByZeroTrap(MI, BB);
6998 break;
6999 case LoongArch::WRFCSR: {
7000 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7001 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7002 .addReg(MI.getOperand(1).getReg());
7003 MI.eraseFromParent();
7004 return BB;
7005 }
7006 case LoongArch::RDFCSR: {
7007 MachineInstr *ReadFCSR =
7008 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7009 MI.getOperand(0).getReg())
7010 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7011 ReadFCSR->getOperand(1).setIsUndef();
7012 MI.eraseFromParent();
7013 return BB;
7014 }
7015 case LoongArch::Select_GPR_Using_CC_GPR:
7016 return emitSelectPseudo(MI, BB, Subtarget);
7017 case LoongArch::BuildPairF64Pseudo:
7018 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7019 case LoongArch::SplitPairF64Pseudo:
7020 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7021 case LoongArch::PseudoVBZ:
7022 case LoongArch::PseudoVBZ_B:
7023 case LoongArch::PseudoVBZ_H:
7024 case LoongArch::PseudoVBZ_W:
7025 case LoongArch::PseudoVBZ_D:
7026 case LoongArch::PseudoVBNZ:
7027 case LoongArch::PseudoVBNZ_B:
7028 case LoongArch::PseudoVBNZ_H:
7029 case LoongArch::PseudoVBNZ_W:
7030 case LoongArch::PseudoVBNZ_D:
7031 case LoongArch::PseudoXVBZ:
7032 case LoongArch::PseudoXVBZ_B:
7033 case LoongArch::PseudoXVBZ_H:
7034 case LoongArch::PseudoXVBZ_W:
7035 case LoongArch::PseudoXVBZ_D:
7036 case LoongArch::PseudoXVBNZ:
7037 case LoongArch::PseudoXVBNZ_B:
7038 case LoongArch::PseudoXVBNZ_H:
7039 case LoongArch::PseudoXVBNZ_W:
7040 case LoongArch::PseudoXVBNZ_D:
7041 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7042 case LoongArch::PseudoXVINSGR2VR_B:
7043 case LoongArch::PseudoXVINSGR2VR_H:
7044 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7045 case LoongArch::PseudoCTPOP:
7046 return emitPseudoCTPOP(MI, BB, Subtarget);
7047 case LoongArch::PseudoVMSKLTZ_B:
7048 case LoongArch::PseudoVMSKLTZ_H:
7049 case LoongArch::PseudoVMSKLTZ_W:
7050 case LoongArch::PseudoVMSKLTZ_D:
7051 case LoongArch::PseudoVMSKGEZ_B:
7052 case LoongArch::PseudoVMSKEQZ_B:
7053 case LoongArch::PseudoVMSKNEZ_B:
7054 case LoongArch::PseudoXVMSKLTZ_B:
7055 case LoongArch::PseudoXVMSKLTZ_H:
7056 case LoongArch::PseudoXVMSKLTZ_W:
7057 case LoongArch::PseudoXVMSKLTZ_D:
7058 case LoongArch::PseudoXVMSKGEZ_B:
7059 case LoongArch::PseudoXVMSKEQZ_B:
7060 case LoongArch::PseudoXVMSKNEZ_B:
7061 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7062 case TargetOpcode::STATEPOINT:
7063 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7064 // while bl call instruction (where statepoint will be lowered at the
7065 // end) has implicit def. This def is early-clobber as it will be set at
7066 // the moment of the call and earlier than any use is read.
7067 // Add this implicit dead def here as a workaround.
7068 MI.addOperand(*MI.getMF(),
7070 LoongArch::R1, /*isDef*/ true,
7071 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7072 /*isUndef*/ false, /*isEarlyClobber*/ true));
7073 if (!Subtarget.is64Bit())
7074 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7075 return emitPatchPoint(MI, BB);
7076 }
7077}
7078
7080 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7081 unsigned *Fast) const {
7082 if (!Subtarget.hasUAL())
7083 return false;
7084
7085 // TODO: set reasonable speed number.
7086 if (Fast)
7087 *Fast = 1;
7088 return true;
7089}
7090
7091const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7092 switch ((LoongArchISD::NodeType)Opcode) {
7094 break;
7095
7096#define NODE_NAME_CASE(node) \
7097 case LoongArchISD::node: \
7098 return "LoongArchISD::" #node;
7099
7100 // TODO: Add more target-dependent nodes later.
7101 NODE_NAME_CASE(CALL)
7102 NODE_NAME_CASE(CALL_MEDIUM)
7103 NODE_NAME_CASE(CALL_LARGE)
7104 NODE_NAME_CASE(RET)
7105 NODE_NAME_CASE(TAIL)
7106 NODE_NAME_CASE(TAIL_MEDIUM)
7107 NODE_NAME_CASE(TAIL_LARGE)
7108 NODE_NAME_CASE(SELECT_CC)
7109 NODE_NAME_CASE(BR_CC)
7110 NODE_NAME_CASE(BRCOND)
7111 NODE_NAME_CASE(SLL_W)
7112 NODE_NAME_CASE(SRA_W)
7113 NODE_NAME_CASE(SRL_W)
7114 NODE_NAME_CASE(BSTRINS)
7115 NODE_NAME_CASE(BSTRPICK)
7116 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7117 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7118 NODE_NAME_CASE(FTINT)
7119 NODE_NAME_CASE(BUILD_PAIR_F64)
7120 NODE_NAME_CASE(SPLIT_PAIR_F64)
7121 NODE_NAME_CASE(REVB_2H)
7122 NODE_NAME_CASE(REVB_2W)
7123 NODE_NAME_CASE(BITREV_4B)
7124 NODE_NAME_CASE(BITREV_8B)
7125 NODE_NAME_CASE(BITREV_W)
7126 NODE_NAME_CASE(ROTR_W)
7127 NODE_NAME_CASE(ROTL_W)
7128 NODE_NAME_CASE(DIV_W)
7129 NODE_NAME_CASE(DIV_WU)
7130 NODE_NAME_CASE(MOD_W)
7131 NODE_NAME_CASE(MOD_WU)
7132 NODE_NAME_CASE(CLZ_W)
7133 NODE_NAME_CASE(CTZ_W)
7134 NODE_NAME_CASE(DBAR)
7135 NODE_NAME_CASE(IBAR)
7136 NODE_NAME_CASE(BREAK)
7137 NODE_NAME_CASE(SYSCALL)
7138 NODE_NAME_CASE(CRC_W_B_W)
7139 NODE_NAME_CASE(CRC_W_H_W)
7140 NODE_NAME_CASE(CRC_W_W_W)
7141 NODE_NAME_CASE(CRC_W_D_W)
7142 NODE_NAME_CASE(CRCC_W_B_W)
7143 NODE_NAME_CASE(CRCC_W_H_W)
7144 NODE_NAME_CASE(CRCC_W_W_W)
7145 NODE_NAME_CASE(CRCC_W_D_W)
7146 NODE_NAME_CASE(CSRRD)
7147 NODE_NAME_CASE(CSRWR)
7148 NODE_NAME_CASE(CSRXCHG)
7149 NODE_NAME_CASE(IOCSRRD_B)
7150 NODE_NAME_CASE(IOCSRRD_H)
7151 NODE_NAME_CASE(IOCSRRD_W)
7152 NODE_NAME_CASE(IOCSRRD_D)
7153 NODE_NAME_CASE(IOCSRWR_B)
7154 NODE_NAME_CASE(IOCSRWR_H)
7155 NODE_NAME_CASE(IOCSRWR_W)
7156 NODE_NAME_CASE(IOCSRWR_D)
7157 NODE_NAME_CASE(CPUCFG)
7158 NODE_NAME_CASE(MOVGR2FCSR)
7159 NODE_NAME_CASE(MOVFCSR2GR)
7160 NODE_NAME_CASE(CACOP_D)
7161 NODE_NAME_CASE(CACOP_W)
7162 NODE_NAME_CASE(VSHUF)
7163 NODE_NAME_CASE(VPICKEV)
7164 NODE_NAME_CASE(VPICKOD)
7165 NODE_NAME_CASE(VPACKEV)
7166 NODE_NAME_CASE(VPACKOD)
7167 NODE_NAME_CASE(VILVL)
7168 NODE_NAME_CASE(VILVH)
7169 NODE_NAME_CASE(VSHUF4I)
7170 NODE_NAME_CASE(VREPLVEI)
7171 NODE_NAME_CASE(VREPLGR2VR)
7172 NODE_NAME_CASE(XVPERMI)
7173 NODE_NAME_CASE(XVPERM)
7174 NODE_NAME_CASE(XVREPLVE0)
7175 NODE_NAME_CASE(XVREPLVE0Q)
7176 NODE_NAME_CASE(VPICK_SEXT_ELT)
7177 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7178 NODE_NAME_CASE(VREPLVE)
7179 NODE_NAME_CASE(VALL_ZERO)
7180 NODE_NAME_CASE(VANY_ZERO)
7181 NODE_NAME_CASE(VALL_NONZERO)
7182 NODE_NAME_CASE(VANY_NONZERO)
7183 NODE_NAME_CASE(FRECIPE)
7184 NODE_NAME_CASE(FRSQRTE)
7185 NODE_NAME_CASE(VSLLI)
7186 NODE_NAME_CASE(VSRLI)
7187 NODE_NAME_CASE(VBSLL)
7188 NODE_NAME_CASE(VBSRL)
7189 NODE_NAME_CASE(VLDREPL)
7190 NODE_NAME_CASE(VMSKLTZ)
7191 NODE_NAME_CASE(VMSKGEZ)
7192 NODE_NAME_CASE(VMSKEQZ)
7193 NODE_NAME_CASE(VMSKNEZ)
7194 NODE_NAME_CASE(XVMSKLTZ)
7195 NODE_NAME_CASE(XVMSKGEZ)
7196 NODE_NAME_CASE(XVMSKEQZ)
7197 NODE_NAME_CASE(XVMSKNEZ)
7198 NODE_NAME_CASE(VHADDW)
7199 }
7200#undef NODE_NAME_CASE
7201 return nullptr;
7202}
7203
7204//===----------------------------------------------------------------------===//
7205// Calling Convention Implementation
7206//===----------------------------------------------------------------------===//
7207
7208// Eight general-purpose registers a0-a7 used for passing integer arguments,
7209// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7210// fixed-point arguments, and floating-point arguments when no FPR is available
7211// or with soft float ABI.
7212const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7213 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7214 LoongArch::R10, LoongArch::R11};
7215// Eight floating-point registers fa0-fa7 used for passing floating-point
7216// arguments, and fa0-fa1 are also used to return values.
7217const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7218 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7219 LoongArch::F6, LoongArch::F7};
7220// FPR32 and FPR64 alias each other.
7222 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7223 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7224
7225const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7226 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7227 LoongArch::VR6, LoongArch::VR7};
7228
7229const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7230 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7231 LoongArch::XR6, LoongArch::XR7};
7232
7233// Pass a 2*GRLen argument that has been split into two GRLen values through
7234// registers or the stack as necessary.
7235static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7236 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7237 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7238 ISD::ArgFlagsTy ArgFlags2) {
7239 unsigned GRLenInBytes = GRLen / 8;
7240 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7241 // At least one half can be passed via register.
7242 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7243 VA1.getLocVT(), CCValAssign::Full));
7244 } else {
7245 // Both halves must be passed on the stack, with proper alignment.
7246 Align StackAlign =
7247 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7248 State.addLoc(
7250 State.AllocateStack(GRLenInBytes, StackAlign),
7251 VA1.getLocVT(), CCValAssign::Full));
7252 State.addLoc(CCValAssign::getMem(
7253 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7254 LocVT2, CCValAssign::Full));
7255 return false;
7256 }
7257 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7258 // The second half can also be passed via register.
7259 State.addLoc(
7260 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7261 } else {
7262 // The second half is passed via the stack, without additional alignment.
7263 State.addLoc(CCValAssign::getMem(
7264 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7265 LocVT2, CCValAssign::Full));
7266 }
7267 return false;
7268}
7269
7270// Implements the LoongArch calling convention. Returns true upon failure.
7272 unsigned ValNo, MVT ValVT,
7273 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7274 CCState &State, bool IsRet, Type *OrigTy) {
7275 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7276 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7277 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7278 MVT LocVT = ValVT;
7279
7280 // Any return value split into more than two values can't be returned
7281 // directly.
7282 if (IsRet && ValNo > 1)
7283 return true;
7284
7285 // If passing a variadic argument, or if no FPR is available.
7286 bool UseGPRForFloat = true;
7287
7288 switch (ABI) {
7289 default:
7290 llvm_unreachable("Unexpected ABI");
7291 break;
7296 UseGPRForFloat = ArgFlags.isVarArg();
7297 break;
7300 break;
7301 }
7302
7303 // If this is a variadic argument, the LoongArch calling convention requires
7304 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7305 // byte alignment. An aligned register should be used regardless of whether
7306 // the original argument was split during legalisation or not. The argument
7307 // will not be passed by registers if the original type is larger than
7308 // 2*GRLen, so the register alignment rule does not apply.
7309 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7310 if (ArgFlags.isVarArg() &&
7311 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7312 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7313 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7314 // Skip 'odd' register if necessary.
7315 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7316 State.AllocateReg(ArgGPRs);
7317 }
7318
7319 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7320 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7321 State.getPendingArgFlags();
7322
7323 assert(PendingLocs.size() == PendingArgFlags.size() &&
7324 "PendingLocs and PendingArgFlags out of sync");
7325
7326 // FPR32 and FPR64 alias each other.
7327 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7328 UseGPRForFloat = true;
7329
7330 if (UseGPRForFloat && ValVT == MVT::f32) {
7331 LocVT = GRLenVT;
7332 LocInfo = CCValAssign::BCvt;
7333 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7334 LocVT = MVT::i64;
7335 LocInfo = CCValAssign::BCvt;
7336 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7337 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7338 // registers are exhausted.
7339 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7340 // Depending on available argument GPRS, f64 may be passed in a pair of
7341 // GPRs, split between a GPR and the stack, or passed completely on the
7342 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7343 // cases.
7344 MCRegister Reg = State.AllocateReg(ArgGPRs);
7345 if (!Reg) {
7346 int64_t StackOffset = State.AllocateStack(8, Align(8));
7347 State.addLoc(
7348 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7349 return false;
7350 }
7351 LocVT = MVT::i32;
7352 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7353 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7354 if (HiReg) {
7355 State.addLoc(
7356 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7357 } else {
7358 int64_t StackOffset = State.AllocateStack(4, Align(4));
7359 State.addLoc(
7360 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7361 }
7362 return false;
7363 }
7364
7365 // Split arguments might be passed indirectly, so keep track of the pending
7366 // values.
7367 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7368 LocVT = GRLenVT;
7369 LocInfo = CCValAssign::Indirect;
7370 PendingLocs.push_back(
7371 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7372 PendingArgFlags.push_back(ArgFlags);
7373 if (!ArgFlags.isSplitEnd()) {
7374 return false;
7375 }
7376 }
7377
7378 // If the split argument only had two elements, it should be passed directly
7379 // in registers or on the stack.
7380 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7381 PendingLocs.size() <= 2) {
7382 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7383 // Apply the normal calling convention rules to the first half of the
7384 // split argument.
7385 CCValAssign VA = PendingLocs[0];
7386 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7387 PendingLocs.clear();
7388 PendingArgFlags.clear();
7389 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7390 ArgFlags);
7391 }
7392
7393 // Allocate to a register if possible, or else a stack slot.
7394 Register Reg;
7395 unsigned StoreSizeBytes = GRLen / 8;
7396 Align StackAlign = Align(GRLen / 8);
7397
7398 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7399 Reg = State.AllocateReg(ArgFPR32s);
7400 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7401 Reg = State.AllocateReg(ArgFPR64s);
7402 } else if (ValVT.is128BitVector()) {
7403 Reg = State.AllocateReg(ArgVRs);
7404 UseGPRForFloat = false;
7405 StoreSizeBytes = 16;
7406 StackAlign = Align(16);
7407 } else if (ValVT.is256BitVector()) {
7408 Reg = State.AllocateReg(ArgXRs);
7409 UseGPRForFloat = false;
7410 StoreSizeBytes = 32;
7411 StackAlign = Align(32);
7412 } else {
7413 Reg = State.AllocateReg(ArgGPRs);
7414 }
7415
7416 unsigned StackOffset =
7417 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7418
7419 // If we reach this point and PendingLocs is non-empty, we must be at the
7420 // end of a split argument that must be passed indirectly.
7421 if (!PendingLocs.empty()) {
7422 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7423 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7424 for (auto &It : PendingLocs) {
7425 if (Reg)
7426 It.convertToReg(Reg);
7427 else
7428 It.convertToMem(StackOffset);
7429 State.addLoc(It);
7430 }
7431 PendingLocs.clear();
7432 PendingArgFlags.clear();
7433 return false;
7434 }
7435 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7436 "Expected an GRLenVT at this stage");
7437
7438 if (Reg) {
7439 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7440 return false;
7441 }
7442
7443 // When a floating-point value is passed on the stack, no bit-cast is needed.
7444 if (ValVT.isFloatingPoint()) {
7445 LocVT = ValVT;
7446 LocInfo = CCValAssign::Full;
7447 }
7448
7449 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7450 return false;
7451}
7452
7453void LoongArchTargetLowering::analyzeInputArgs(
7454 MachineFunction &MF, CCState &CCInfo,
7455 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7456 LoongArchCCAssignFn Fn) const {
7457 FunctionType *FType = MF.getFunction().getFunctionType();
7458 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7459 MVT ArgVT = Ins[i].VT;
7460 Type *ArgTy = nullptr;
7461 if (IsRet)
7462 ArgTy = FType->getReturnType();
7463 else if (Ins[i].isOrigArg())
7464 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7466 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7467 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7468 CCInfo, IsRet, ArgTy)) {
7469 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7470 << '\n');
7471 llvm_unreachable("");
7472 }
7473 }
7474}
7475
7476void LoongArchTargetLowering::analyzeOutputArgs(
7477 MachineFunction &MF, CCState &CCInfo,
7478 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7479 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7480 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7481 MVT ArgVT = Outs[i].VT;
7482 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7484 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7485 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7486 CCInfo, IsRet, OrigTy)) {
7487 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7488 << "\n");
7489 llvm_unreachable("");
7490 }
7491 }
7492}
7493
7494// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7495// values.
7497 const CCValAssign &VA, const SDLoc &DL) {
7498 switch (VA.getLocInfo()) {
7499 default:
7500 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7501 case CCValAssign::Full:
7503 break;
7504 case CCValAssign::BCvt:
7505 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7506 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7507 else
7508 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7509 break;
7510 }
7511 return Val;
7512}
7513
7515 const CCValAssign &VA, const SDLoc &DL,
7516 const ISD::InputArg &In,
7517 const LoongArchTargetLowering &TLI) {
7520 EVT LocVT = VA.getLocVT();
7521 SDValue Val;
7522 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7523 Register VReg = RegInfo.createVirtualRegister(RC);
7524 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7525 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7526
7527 // If input is sign extended from 32 bits, note it for the OptW pass.
7528 if (In.isOrigArg()) {
7529 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7530 if (OrigArg->getType()->isIntegerTy()) {
7531 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7532 // An input zero extended from i31 can also be considered sign extended.
7533 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7534 (BitWidth < 32 && In.Flags.isZExt())) {
7537 LAFI->addSExt32Register(VReg);
7538 }
7539 }
7540 }
7541
7542 return convertLocVTToValVT(DAG, Val, VA, DL);
7543}
7544
7545// The caller is responsible for loading the full value if the argument is
7546// passed with CCValAssign::Indirect.
7548 const CCValAssign &VA, const SDLoc &DL) {
7550 MachineFrameInfo &MFI = MF.getFrameInfo();
7551 EVT ValVT = VA.getValVT();
7552 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7553 /*IsImmutable=*/true);
7554 SDValue FIN = DAG.getFrameIndex(
7556
7557 ISD::LoadExtType ExtType;
7558 switch (VA.getLocInfo()) {
7559 default:
7560 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7561 case CCValAssign::Full:
7563 case CCValAssign::BCvt:
7564 ExtType = ISD::NON_EXTLOAD;
7565 break;
7566 }
7567 return DAG.getExtLoad(
7568 ExtType, DL, VA.getLocVT(), Chain, FIN,
7570}
7571
7573 const CCValAssign &VA,
7574 const CCValAssign &HiVA,
7575 const SDLoc &DL) {
7576 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7577 "Unexpected VA");
7579 MachineFrameInfo &MFI = MF.getFrameInfo();
7581
7582 assert(VA.isRegLoc() && "Expected register VA assignment");
7583
7584 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7585 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7586 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7587 SDValue Hi;
7588 if (HiVA.isMemLoc()) {
7589 // Second half of f64 is passed on the stack.
7590 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7591 /*IsImmutable=*/true);
7592 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7593 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7595 } else {
7596 // Second half of f64 is passed in another GPR.
7597 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7598 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7599 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7600 }
7601 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7602}
7603
7605 const CCValAssign &VA, const SDLoc &DL) {
7606 EVT LocVT = VA.getLocVT();
7607
7608 switch (VA.getLocInfo()) {
7609 default:
7610 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7611 case CCValAssign::Full:
7612 break;
7613 case CCValAssign::BCvt:
7614 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7615 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7616 else
7617 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7618 break;
7619 }
7620 return Val;
7621}
7622
7623static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7624 CCValAssign::LocInfo LocInfo,
7625 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7626 CCState &State) {
7627 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7628 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7629 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7630 static const MCPhysReg GPRList[] = {
7631 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7632 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7633 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7634 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7635 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7636 return false;
7637 }
7638 }
7639
7640 if (LocVT == MVT::f32) {
7641 // Pass in STG registers: F1, F2, F3, F4
7642 // fs0,fs1,fs2,fs3
7643 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7644 LoongArch::F26, LoongArch::F27};
7645 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7646 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7647 return false;
7648 }
7649 }
7650
7651 if (LocVT == MVT::f64) {
7652 // Pass in STG registers: D1, D2, D3, D4
7653 // fs4,fs5,fs6,fs7
7654 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7655 LoongArch::F30_64, LoongArch::F31_64};
7656 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7657 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7658 return false;
7659 }
7660 }
7661
7662 report_fatal_error("No registers left in GHC calling convention");
7663 return true;
7664}
7665
7666// Transform physical registers into virtual registers.
7668 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7669 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7670 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7671
7673
7674 switch (CallConv) {
7675 default:
7676 llvm_unreachable("Unsupported calling convention");
7677 case CallingConv::C:
7678 case CallingConv::Fast:
7680 break;
7681 case CallingConv::GHC:
7682 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7683 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7685 "GHC calling convention requires the F and D extensions");
7686 }
7687
7688 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7689 MVT GRLenVT = Subtarget.getGRLenVT();
7690 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7691 // Used with varargs to acumulate store chains.
7692 std::vector<SDValue> OutChains;
7693
7694 // Assign locations to all of the incoming arguments.
7696 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7697
7698 if (CallConv == CallingConv::GHC)
7700 else
7701 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7702
7703 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7704 CCValAssign &VA = ArgLocs[i];
7705 SDValue ArgValue;
7706 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7707 // case.
7708 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7709 assert(VA.needsCustom());
7710 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7711 } else if (VA.isRegLoc())
7712 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7713 else
7714 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7715 if (VA.getLocInfo() == CCValAssign::Indirect) {
7716 // If the original argument was split and passed by reference, we need to
7717 // load all parts of it here (using the same address).
7718 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7720 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7721 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7722 assert(ArgPartOffset == 0);
7723 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7724 CCValAssign &PartVA = ArgLocs[i + 1];
7725 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7726 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7727 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7728 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7730 ++i;
7731 ++InsIdx;
7732 }
7733 continue;
7734 }
7735 InVals.push_back(ArgValue);
7736 }
7737
7738 if (IsVarArg) {
7740 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7741 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7742 MachineFrameInfo &MFI = MF.getFrameInfo();
7743 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7744 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7745
7746 // Offset of the first variable argument from stack pointer, and size of
7747 // the vararg save area. For now, the varargs save area is either zero or
7748 // large enough to hold a0-a7.
7749 int VaArgOffset, VarArgsSaveSize;
7750
7751 // If all registers are allocated, then all varargs must be passed on the
7752 // stack and we don't need to save any argregs.
7753 if (ArgRegs.size() == Idx) {
7754 VaArgOffset = CCInfo.getStackSize();
7755 VarArgsSaveSize = 0;
7756 } else {
7757 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7758 VaArgOffset = -VarArgsSaveSize;
7759 }
7760
7761 // Record the frame index of the first variable argument
7762 // which is a value necessary to VASTART.
7763 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7764 LoongArchFI->setVarArgsFrameIndex(FI);
7765
7766 // If saving an odd number of registers then create an extra stack slot to
7767 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7768 // offsets to even-numbered registered remain 2*GRLen-aligned.
7769 if (Idx % 2) {
7770 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7771 true);
7772 VarArgsSaveSize += GRLenInBytes;
7773 }
7774
7775 // Copy the integer registers that may have been used for passing varargs
7776 // to the vararg save area.
7777 for (unsigned I = Idx; I < ArgRegs.size();
7778 ++I, VaArgOffset += GRLenInBytes) {
7779 const Register Reg = RegInfo.createVirtualRegister(RC);
7780 RegInfo.addLiveIn(ArgRegs[I], Reg);
7781 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7782 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7783 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7784 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7786 cast<StoreSDNode>(Store.getNode())
7787 ->getMemOperand()
7788 ->setValue((Value *)nullptr);
7789 OutChains.push_back(Store);
7790 }
7791 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7792 }
7793
7794 // All stores are grouped in one node to allow the matching between
7795 // the size of Ins and InVals. This only happens for vararg functions.
7796 if (!OutChains.empty()) {
7797 OutChains.push_back(Chain);
7798 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7799 }
7800
7801 return Chain;
7802}
7803
7805 return CI->isTailCall();
7806}
7807
7808// Check if the return value is used as only a return value, as otherwise
7809// we can't perform a tail-call.
7811 SDValue &Chain) const {
7812 if (N->getNumValues() != 1)
7813 return false;
7814 if (!N->hasNUsesOfValue(1, 0))
7815 return false;
7816
7817 SDNode *Copy = *N->user_begin();
7818 if (Copy->getOpcode() != ISD::CopyToReg)
7819 return false;
7820
7821 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7822 // isn't safe to perform a tail call.
7823 if (Copy->getGluedNode())
7824 return false;
7825
7826 // The copy must be used by a LoongArchISD::RET, and nothing else.
7827 bool HasRet = false;
7828 for (SDNode *Node : Copy->users()) {
7829 if (Node->getOpcode() != LoongArchISD::RET)
7830 return false;
7831 HasRet = true;
7832 }
7833
7834 if (!HasRet)
7835 return false;
7836
7837 Chain = Copy->getOperand(0);
7838 return true;
7839}
7840
7841// Check whether the call is eligible for tail call optimization.
7842bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7843 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7844 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7845
7846 auto CalleeCC = CLI.CallConv;
7847 auto &Outs = CLI.Outs;
7848 auto &Caller = MF.getFunction();
7849 auto CallerCC = Caller.getCallingConv();
7850
7851 // Do not tail call opt if the stack is used to pass parameters.
7852 if (CCInfo.getStackSize() != 0)
7853 return false;
7854
7855 // Do not tail call opt if any parameters need to be passed indirectly.
7856 for (auto &VA : ArgLocs)
7857 if (VA.getLocInfo() == CCValAssign::Indirect)
7858 return false;
7859
7860 // Do not tail call opt if either caller or callee uses struct return
7861 // semantics.
7862 auto IsCallerStructRet = Caller.hasStructRetAttr();
7863 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7864 if (IsCallerStructRet || IsCalleeStructRet)
7865 return false;
7866
7867 // Do not tail call opt if either the callee or caller has a byval argument.
7868 for (auto &Arg : Outs)
7869 if (Arg.Flags.isByVal())
7870 return false;
7871
7872 // The callee has to preserve all registers the caller needs to preserve.
7873 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7874 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7875 if (CalleeCC != CallerCC) {
7876 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7877 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7878 return false;
7879 }
7880 return true;
7881}
7882
7884 return DAG.getDataLayout().getPrefTypeAlign(
7885 VT.getTypeForEVT(*DAG.getContext()));
7886}
7887
7888// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7889// and output parameter nodes.
7890SDValue
7892 SmallVectorImpl<SDValue> &InVals) const {
7893 SelectionDAG &DAG = CLI.DAG;
7894 SDLoc &DL = CLI.DL;
7896 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7898 SDValue Chain = CLI.Chain;
7899 SDValue Callee = CLI.Callee;
7900 CallingConv::ID CallConv = CLI.CallConv;
7901 bool IsVarArg = CLI.IsVarArg;
7902 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7903 MVT GRLenVT = Subtarget.getGRLenVT();
7904 bool &IsTailCall = CLI.IsTailCall;
7905
7907
7908 // Analyze the operands of the call, assigning locations to each operand.
7910 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7911
7912 if (CallConv == CallingConv::GHC)
7913 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7914 else
7915 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7916
7917 // Check if it's really possible to do a tail call.
7918 if (IsTailCall)
7919 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7920
7921 if (IsTailCall)
7922 ++NumTailCalls;
7923 else if (CLI.CB && CLI.CB->isMustTailCall())
7924 report_fatal_error("failed to perform tail call elimination on a call "
7925 "site marked musttail");
7926
7927 // Get a count of how many bytes are to be pushed on the stack.
7928 unsigned NumBytes = ArgCCInfo.getStackSize();
7929
7930 // Create local copies for byval args.
7931 SmallVector<SDValue> ByValArgs;
7932 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7933 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7934 if (!Flags.isByVal())
7935 continue;
7936
7937 SDValue Arg = OutVals[i];
7938 unsigned Size = Flags.getByValSize();
7939 Align Alignment = Flags.getNonZeroByValAlign();
7940
7941 int FI =
7942 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7943 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7944 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7945
7946 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7947 /*IsVolatile=*/false,
7948 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7950 ByValArgs.push_back(FIPtr);
7951 }
7952
7953 if (!IsTailCall)
7954 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7955
7956 // Copy argument values to their designated locations.
7958 SmallVector<SDValue> MemOpChains;
7959 SDValue StackPtr;
7960 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7961 ++i, ++OutIdx) {
7962 CCValAssign &VA = ArgLocs[i];
7963 SDValue ArgValue = OutVals[OutIdx];
7964 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7965
7966 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7967 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7968 assert(VA.isRegLoc() && "Expected register VA assignment");
7969 assert(VA.needsCustom());
7970 SDValue SplitF64 =
7972 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7973 SDValue Lo = SplitF64.getValue(0);
7974 SDValue Hi = SplitF64.getValue(1);
7975
7976 Register RegLo = VA.getLocReg();
7977 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7978
7979 // Get the CCValAssign for the Hi part.
7980 CCValAssign &HiVA = ArgLocs[++i];
7981
7982 if (HiVA.isMemLoc()) {
7983 // Second half of f64 is passed on the stack.
7984 if (!StackPtr.getNode())
7985 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7987 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7988 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7989 // Emit the store.
7990 MemOpChains.push_back(DAG.getStore(
7991 Chain, DL, Hi, Address,
7993 } else {
7994 // Second half of f64 is passed in another GPR.
7995 Register RegHigh = HiVA.getLocReg();
7996 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7997 }
7998 continue;
7999 }
8000
8001 // Promote the value if needed.
8002 // For now, only handle fully promoted and indirect arguments.
8003 if (VA.getLocInfo() == CCValAssign::Indirect) {
8004 // Store the argument in a stack slot and pass its address.
8005 Align StackAlign =
8006 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8007 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8008 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8009 // If the original argument was split and passed by reference, we need to
8010 // store the required parts of it here (and pass just one address).
8011 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8012 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8013 assert(ArgPartOffset == 0);
8014 // Calculate the total size to store. We don't have access to what we're
8015 // actually storing other than performing the loop and collecting the
8016 // info.
8018 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8019 SDValue PartValue = OutVals[OutIdx + 1];
8020 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8021 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8022 EVT PartVT = PartValue.getValueType();
8023
8024 StoredSize += PartVT.getStoreSize();
8025 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8026 Parts.push_back(std::make_pair(PartValue, Offset));
8027 ++i;
8028 ++OutIdx;
8029 }
8030 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8031 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8032 MemOpChains.push_back(
8033 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8035 for (const auto &Part : Parts) {
8036 SDValue PartValue = Part.first;
8037 SDValue PartOffset = Part.second;
8039 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8040 MemOpChains.push_back(
8041 DAG.getStore(Chain, DL, PartValue, Address,
8043 }
8044 ArgValue = SpillSlot;
8045 } else {
8046 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8047 }
8048
8049 // Use local copy if it is a byval arg.
8050 if (Flags.isByVal())
8051 ArgValue = ByValArgs[j++];
8052
8053 if (VA.isRegLoc()) {
8054 // Queue up the argument copies and emit them at the end.
8055 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8056 } else {
8057 assert(VA.isMemLoc() && "Argument not register or memory");
8058 assert(!IsTailCall && "Tail call not allowed if stack is used "
8059 "for passing parameters");
8060
8061 // Work out the address of the stack slot.
8062 if (!StackPtr.getNode())
8063 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8065 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8067
8068 // Emit the store.
8069 MemOpChains.push_back(
8070 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8071 }
8072 }
8073
8074 // Join the stores, which are independent of one another.
8075 if (!MemOpChains.empty())
8076 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8077
8078 SDValue Glue;
8079
8080 // Build a sequence of copy-to-reg nodes, chained and glued together.
8081 for (auto &Reg : RegsToPass) {
8082 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8083 Glue = Chain.getValue(1);
8084 }
8085
8086 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8087 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8088 // split it and then direct call can be matched by PseudoCALL.
8090 const GlobalValue *GV = S->getGlobal();
8091 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8094 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8095 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8096 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8099 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8100 }
8101
8102 // The first call operand is the chain and the second is the target address.
8104 Ops.push_back(Chain);
8105 Ops.push_back(Callee);
8106
8107 // Add argument registers to the end of the list so that they are
8108 // known live into the call.
8109 for (auto &Reg : RegsToPass)
8110 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8111
8112 if (!IsTailCall) {
8113 // Add a register mask operand representing the call-preserved registers.
8114 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8115 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8116 assert(Mask && "Missing call preserved mask for calling convention");
8117 Ops.push_back(DAG.getRegisterMask(Mask));
8118 }
8119
8120 // Glue the call to the argument copies, if any.
8121 if (Glue.getNode())
8122 Ops.push_back(Glue);
8123
8124 // Emit the call.
8125 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8126 unsigned Op;
8127 switch (DAG.getTarget().getCodeModel()) {
8128 default:
8129 report_fatal_error("Unsupported code model");
8130 case CodeModel::Small:
8131 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8132 break;
8133 case CodeModel::Medium:
8134 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8136 break;
8137 case CodeModel::Large:
8138 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8140 break;
8141 }
8142
8143 if (IsTailCall) {
8145 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8146 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8147 return Ret;
8148 }
8149
8150 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8151 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8152 Glue = Chain.getValue(1);
8153
8154 // Mark the end of the call, which is glued to the call itself.
8155 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8156 Glue = Chain.getValue(1);
8157
8158 // Assign locations to each value returned by this call.
8160 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8161 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8162
8163 // Copy all of the result registers out of their specified physreg.
8164 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8165 auto &VA = RVLocs[i];
8166 // Copy the value out.
8167 SDValue RetValue =
8168 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8169 // Glue the RetValue to the end of the call sequence.
8170 Chain = RetValue.getValue(1);
8171 Glue = RetValue.getValue(2);
8172
8173 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8174 assert(VA.needsCustom());
8175 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8176 MVT::i32, Glue);
8177 Chain = RetValue2.getValue(1);
8178 Glue = RetValue2.getValue(2);
8179 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8180 RetValue, RetValue2);
8181 } else
8182 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8183
8184 InVals.push_back(RetValue);
8185 }
8186
8187 return Chain;
8188}
8189
8191 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8192 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8193 const Type *RetTy) const {
8195 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8196
8197 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8198 LoongArchABI::ABI ABI =
8199 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8200 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8201 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8202 return false;
8203 }
8204 return true;
8205}
8206
8208 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8210 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8211 SelectionDAG &DAG) const {
8212 // Stores the assignment of the return value to a location.
8214
8215 // Info about the registers and stack slot.
8216 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8217 *DAG.getContext());
8218
8219 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8220 nullptr, CC_LoongArch);
8221 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8222 report_fatal_error("GHC functions return void only");
8223 SDValue Glue;
8224 SmallVector<SDValue, 4> RetOps(1, Chain);
8225
8226 // Copy the result values into the output registers.
8227 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8228 SDValue Val = OutVals[OutIdx];
8229 CCValAssign &VA = RVLocs[i];
8230 assert(VA.isRegLoc() && "Can only return in registers!");
8231
8232 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8233 // Handle returning f64 on LA32D with a soft float ABI.
8234 assert(VA.isRegLoc() && "Expected return via registers");
8235 assert(VA.needsCustom());
8237 DAG.getVTList(MVT::i32, MVT::i32), Val);
8238 SDValue Lo = SplitF64.getValue(0);
8239 SDValue Hi = SplitF64.getValue(1);
8240 Register RegLo = VA.getLocReg();
8241 Register RegHi = RVLocs[++i].getLocReg();
8242
8243 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8244 Glue = Chain.getValue(1);
8245 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8246 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8247 Glue = Chain.getValue(1);
8248 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8249 } else {
8250 // Handle a 'normal' return.
8251 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8252 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8253
8254 // Guarantee that all emitted copies are stuck together.
8255 Glue = Chain.getValue(1);
8256 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8257 }
8258 }
8259
8260 RetOps[0] = Chain; // Update chain.
8261
8262 // Add the glue node if we have it.
8263 if (Glue.getNode())
8264 RetOps.push_back(Glue);
8265
8266 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8267}
8268
8270 EVT VT) const {
8271 if (!Subtarget.hasExtLSX())
8272 return false;
8273
8274 if (VT == MVT::f32) {
8275 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8276 return (masked == 0x3e000000 || masked == 0x40000000);
8277 }
8278
8279 if (VT == MVT::f64) {
8280 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8281 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8282 }
8283
8284 return false;
8285}
8286
8287bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8288 bool ForCodeSize) const {
8289 // TODO: Maybe need more checks here after vector extension is supported.
8290 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8291 return false;
8292 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8293 return false;
8294 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8295}
8296
8298 return true;
8299}
8300
8302 return true;
8303}
8304
8305bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8306 const Instruction *I) const {
8307 if (!Subtarget.is64Bit())
8308 return isa<LoadInst>(I) || isa<StoreInst>(I);
8309
8310 if (isa<LoadInst>(I))
8311 return true;
8312
8313 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8314 // require fences beacuse we can use amswap_db.[w/d].
8315 Type *Ty = I->getOperand(0)->getType();
8316 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8317 unsigned Size = Ty->getIntegerBitWidth();
8318 return (Size == 8 || Size == 16);
8319 }
8320
8321 return false;
8322}
8323
8325 LLVMContext &Context,
8326 EVT VT) const {
8327 if (!VT.isVector())
8328 return getPointerTy(DL);
8330}
8331
8333 // TODO: Support vectors.
8334 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8335}
8336
8338 const CallInst &I,
8339 MachineFunction &MF,
8340 unsigned Intrinsic) const {
8341 switch (Intrinsic) {
8342 default:
8343 return false;
8344 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8345 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8346 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8347 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8348 Info.opc = ISD::INTRINSIC_W_CHAIN;
8349 Info.memVT = MVT::i32;
8350 Info.ptrVal = I.getArgOperand(0);
8351 Info.offset = 0;
8352 Info.align = Align(4);
8355 return true;
8356 // TODO: Add more Intrinsics later.
8357 }
8358}
8359
8360// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8361// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8362// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8363// regression, we need to implement it manually.
8366
8368 Op == AtomicRMWInst::And) &&
8369 "Unable to expand");
8370 unsigned MinWordSize = 4;
8371
8372 IRBuilder<> Builder(AI);
8373 LLVMContext &Ctx = Builder.getContext();
8374 const DataLayout &DL = AI->getDataLayout();
8375 Type *ValueType = AI->getType();
8376 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8377
8378 Value *Addr = AI->getPointerOperand();
8379 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8380 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8381
8382 Value *AlignedAddr = Builder.CreateIntrinsic(
8383 Intrinsic::ptrmask, {PtrTy, IntTy},
8384 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8385 "AlignedAddr");
8386
8387 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8388 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8389 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8390 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8391 Value *Mask = Builder.CreateShl(
8392 ConstantInt::get(WordType,
8393 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8394 ShiftAmt, "Mask");
8395 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8396 Value *ValOperand_Shifted =
8397 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8398 ShiftAmt, "ValOperand_Shifted");
8399 Value *NewOperand;
8400 if (Op == AtomicRMWInst::And)
8401 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8402 else
8403 NewOperand = ValOperand_Shifted;
8404
8405 AtomicRMWInst *NewAI =
8406 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8407 AI->getOrdering(), AI->getSyncScopeID());
8408
8409 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8410 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8411 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8412 AI->replaceAllUsesWith(FinalOldResult);
8413 AI->eraseFromParent();
8414}
8415
8418 // TODO: Add more AtomicRMWInst that needs to be extended.
8419
8420 // Since floating-point operation requires a non-trivial set of data
8421 // operations, use CmpXChg to expand.
8422 if (AI->isFloatingPointOperation() ||
8428
8429 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8432 AI->getOperation() == AtomicRMWInst::Sub)) {
8434 }
8435
8436 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8437 if (Subtarget.hasLAMCAS()) {
8438 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8442 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8444 }
8445
8446 if (Size == 8 || Size == 16)
8449}
8450
8451static Intrinsic::ID
8453 AtomicRMWInst::BinOp BinOp) {
8454 if (GRLen == 64) {
8455 switch (BinOp) {
8456 default:
8457 llvm_unreachable("Unexpected AtomicRMW BinOp");
8459 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8460 case AtomicRMWInst::Add:
8461 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8462 case AtomicRMWInst::Sub:
8463 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8465 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8467 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8469 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8470 case AtomicRMWInst::Max:
8471 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8472 case AtomicRMWInst::Min:
8473 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8474 // TODO: support other AtomicRMWInst.
8475 }
8476 }
8477
8478 if (GRLen == 32) {
8479 switch (BinOp) {
8480 default:
8481 llvm_unreachable("Unexpected AtomicRMW BinOp");
8483 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8484 case AtomicRMWInst::Add:
8485 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8486 case AtomicRMWInst::Sub:
8487 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8489 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8491 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8493 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8494 case AtomicRMWInst::Max:
8495 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8496 case AtomicRMWInst::Min:
8497 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8498 // TODO: support other AtomicRMWInst.
8499 }
8500 }
8501
8502 llvm_unreachable("Unexpected GRLen\n");
8503}
8504
8507 AtomicCmpXchgInst *CI) const {
8508
8509 if (Subtarget.hasLAMCAS())
8511
8513 if (Size == 8 || Size == 16)
8516}
8517
8519 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8520 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8521 unsigned GRLen = Subtarget.getGRLen();
8522 AtomicOrdering FailOrd = CI->getFailureOrdering();
8523 Value *FailureOrdering =
8524 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8525 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8526 if (GRLen == 64) {
8527 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8528 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8529 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8530 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8531 }
8532 Type *Tys[] = {AlignedAddr->getType()};
8533 Value *Result = Builder.CreateIntrinsic(
8534 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8535 if (GRLen == 64)
8536 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8537 return Result;
8538}
8539
8541 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8542 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8543 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8544 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8545 // mask, as this produces better code than the LL/SC loop emitted by
8546 // int_loongarch_masked_atomicrmw_xchg.
8547 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8550 if (CVal->isZero())
8551 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8552 Builder.CreateNot(Mask, "Inv_Mask"),
8553 AI->getAlign(), Ord);
8554 if (CVal->isMinusOne())
8555 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8556 AI->getAlign(), Ord);
8557 }
8558
8559 unsigned GRLen = Subtarget.getGRLen();
8560 Value *Ordering =
8561 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8562 Type *Tys[] = {AlignedAddr->getType()};
8564 AI->getModule(),
8566
8567 if (GRLen == 64) {
8568 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8569 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8570 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8571 }
8572
8573 Value *Result;
8574
8575 // Must pass the shift amount needed to sign extend the loaded value prior
8576 // to performing a signed comparison for min/max. ShiftAmt is the number of
8577 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8578 // is the number of bits to left+right shift the value in order to
8579 // sign-extend.
8580 if (AI->getOperation() == AtomicRMWInst::Min ||
8582 const DataLayout &DL = AI->getDataLayout();
8583 unsigned ValWidth =
8584 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8585 Value *SextShamt =
8586 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8587 Result = Builder.CreateCall(LlwOpScwLoop,
8588 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8589 } else {
8590 Result =
8591 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8592 }
8593
8594 if (GRLen == 64)
8595 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8596 return Result;
8597}
8598
8600 const MachineFunction &MF, EVT VT) const {
8601 VT = VT.getScalarType();
8602
8603 if (!VT.isSimple())
8604 return false;
8605
8606 switch (VT.getSimpleVT().SimpleTy) {
8607 case MVT::f32:
8608 case MVT::f64:
8609 return true;
8610 default:
8611 break;
8612 }
8613
8614 return false;
8615}
8616
8618 const Constant *PersonalityFn) const {
8619 return LoongArch::R4;
8620}
8621
8623 const Constant *PersonalityFn) const {
8624 return LoongArch::R5;
8625}
8626
8627//===----------------------------------------------------------------------===//
8628// Target Optimization Hooks
8629//===----------------------------------------------------------------------===//
8630
8632 const LoongArchSubtarget &Subtarget) {
8633 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8634 // IEEE float has 23 digits and double has 52 digits.
8635 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8636 return RefinementSteps;
8637}
8638
8640 SelectionDAG &DAG, int Enabled,
8641 int &RefinementSteps,
8642 bool &UseOneConstNR,
8643 bool Reciprocal) const {
8644 if (Subtarget.hasFrecipe()) {
8645 SDLoc DL(Operand);
8646 EVT VT = Operand.getValueType();
8647
8648 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8649 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8650 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8651 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8652 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8653
8654 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8655 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8656
8657 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8658 if (Reciprocal)
8659 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8660
8661 return Estimate;
8662 }
8663 }
8664
8665 return SDValue();
8666}
8667
8669 SelectionDAG &DAG,
8670 int Enabled,
8671 int &RefinementSteps) const {
8672 if (Subtarget.hasFrecipe()) {
8673 SDLoc DL(Operand);
8674 EVT VT = Operand.getValueType();
8675
8676 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8677 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8678 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8679 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8680 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8681
8682 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8683 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8684
8685 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8686 }
8687 }
8688
8689 return SDValue();
8690}
8691
8692//===----------------------------------------------------------------------===//
8693// LoongArch Inline Assembly Support
8694//===----------------------------------------------------------------------===//
8695
8697LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8698 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8699 //
8700 // 'f': A floating-point register (if available).
8701 // 'k': A memory operand whose address is formed by a base register and
8702 // (optionally scaled) index register.
8703 // 'l': A signed 16-bit constant.
8704 // 'm': A memory operand whose address is formed by a base register and
8705 // offset that is suitable for use in instructions with the same
8706 // addressing mode as st.w and ld.w.
8707 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8708 // instruction)
8709 // 'I': A signed 12-bit constant (for arithmetic instructions).
8710 // 'J': Integer zero.
8711 // 'K': An unsigned 12-bit constant (for logic instructions).
8712 // "ZB": An address that is held in a general-purpose register. The offset is
8713 // zero.
8714 // "ZC": A memory operand whose address is formed by a base register and
8715 // offset that is suitable for use in instructions with the same
8716 // addressing mode as ll.w and sc.w.
8717 if (Constraint.size() == 1) {
8718 switch (Constraint[0]) {
8719 default:
8720 break;
8721 case 'f':
8722 case 'q':
8723 return C_RegisterClass;
8724 case 'l':
8725 case 'I':
8726 case 'J':
8727 case 'K':
8728 return C_Immediate;
8729 case 'k':
8730 return C_Memory;
8731 }
8732 }
8733
8734 if (Constraint == "ZC" || Constraint == "ZB")
8735 return C_Memory;
8736
8737 // 'm' is handled here.
8738 return TargetLowering::getConstraintType(Constraint);
8739}
8740
8741InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8742 StringRef ConstraintCode) const {
8743 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8747 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8748}
8749
8750std::pair<unsigned, const TargetRegisterClass *>
8751LoongArchTargetLowering::getRegForInlineAsmConstraint(
8752 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8753 // First, see if this is a constraint that directly corresponds to a LoongArch
8754 // register class.
8755 if (Constraint.size() == 1) {
8756 switch (Constraint[0]) {
8757 case 'r':
8758 // TODO: Support fixed vectors up to GRLen?
8759 if (VT.isVector())
8760 break;
8761 return std::make_pair(0U, &LoongArch::GPRRegClass);
8762 case 'q':
8763 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8764 case 'f':
8765 if (Subtarget.hasBasicF() && VT == MVT::f32)
8766 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8767 if (Subtarget.hasBasicD() && VT == MVT::f64)
8768 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8769 if (Subtarget.hasExtLSX() &&
8770 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8771 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8772 if (Subtarget.hasExtLASX() &&
8773 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8774 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8775 break;
8776 default:
8777 break;
8778 }
8779 }
8780
8781 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8782 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8783 // constraints while the official register name is prefixed with a '$'. So we
8784 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8785 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8786 // case insensitive, so no need to convert the constraint to upper case here.
8787 //
8788 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8789 // decode the usage of register name aliases into their official names. And
8790 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8791 // official register names.
8792 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8793 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8794 bool IsFP = Constraint[2] == 'f';
8795 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8796 std::pair<unsigned, const TargetRegisterClass *> R;
8798 TRI, join_items("", Temp.first, Temp.second), VT);
8799 // Match those names to the widest floating point register type available.
8800 if (IsFP) {
8801 unsigned RegNo = R.first;
8802 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8803 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8804 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8805 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8806 }
8807 }
8808 }
8809 return R;
8810 }
8811
8812 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8813}
8814
8815void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8816 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8817 SelectionDAG &DAG) const {
8818 // Currently only support length 1 constraints.
8819 if (Constraint.size() == 1) {
8820 switch (Constraint[0]) {
8821 case 'l':
8822 // Validate & create a 16-bit signed immediate operand.
8823 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8824 uint64_t CVal = C->getSExtValue();
8825 if (isInt<16>(CVal))
8826 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8827 Subtarget.getGRLenVT()));
8828 }
8829 return;
8830 case 'I':
8831 // Validate & create a 12-bit signed immediate operand.
8832 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8833 uint64_t CVal = C->getSExtValue();
8834 if (isInt<12>(CVal))
8835 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8836 Subtarget.getGRLenVT()));
8837 }
8838 return;
8839 case 'J':
8840 // Validate & create an integer zero operand.
8841 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8842 if (C->getZExtValue() == 0)
8843 Ops.push_back(
8844 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8845 return;
8846 case 'K':
8847 // Validate & create a 12-bit unsigned immediate operand.
8848 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8849 uint64_t CVal = C->getZExtValue();
8850 if (isUInt<12>(CVal))
8851 Ops.push_back(
8852 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8853 }
8854 return;
8855 default:
8856 break;
8857 }
8858 }
8860}
8861
8862#define GET_REGISTER_MATCHER
8863#include "LoongArchGenAsmMatcher.inc"
8864
8867 const MachineFunction &MF) const {
8868 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8869 std::string NewRegName = Name.second.str();
8870 Register Reg = MatchRegisterAltName(NewRegName);
8871 if (!Reg)
8872 Reg = MatchRegisterName(NewRegName);
8873 if (!Reg)
8874 return Reg;
8875 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8876 if (!ReservedRegs.test(Reg))
8877 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8878 StringRef(RegName) + "\"."));
8879 return Reg;
8880}
8881
8883 EVT VT, SDValue C) const {
8884 // TODO: Support vectors.
8885 if (!VT.isScalarInteger())
8886 return false;
8887
8888 // Omit the optimization if the data size exceeds GRLen.
8889 if (VT.getSizeInBits() > Subtarget.getGRLen())
8890 return false;
8891
8892 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8893 const APInt &Imm = ConstNode->getAPIntValue();
8894 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8895 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8896 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8897 return true;
8898 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8899 if (ConstNode->hasOneUse() &&
8900 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8901 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8902 return true;
8903 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8904 // in which the immediate has two set bits. Or Break (MUL x, imm)
8905 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8906 // equals to (1 << s0) - (1 << s1).
8907 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8908 unsigned Shifts = Imm.countr_zero();
8909 // Reject immediates which can be composed via a single LUI.
8910 if (Shifts >= 12)
8911 return false;
8912 // Reject multiplications can be optimized to
8913 // (SLLI (ALSL x, x, 1/2/3/4), s).
8914 APInt ImmPop = Imm.ashr(Shifts);
8915 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8916 return false;
8917 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8918 // since it needs one more instruction than other 3 cases.
8919 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8920 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8921 (ImmSmall - Imm).isPowerOf2())
8922 return true;
8923 }
8924 }
8925
8926 return false;
8927}
8928
8930 const AddrMode &AM,
8931 Type *Ty, unsigned AS,
8932 Instruction *I) const {
8933 // LoongArch has four basic addressing modes:
8934 // 1. reg
8935 // 2. reg + 12-bit signed offset
8936 // 3. reg + 14-bit signed offset left-shifted by 2
8937 // 4. reg1 + reg2
8938 // TODO: Add more checks after support vector extension.
8939
8940 // No global is ever allowed as a base.
8941 if (AM.BaseGV)
8942 return false;
8943
8944 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8945 // with `UAL` feature.
8946 if (!isInt<12>(AM.BaseOffs) &&
8947 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8948 return false;
8949
8950 switch (AM.Scale) {
8951 case 0:
8952 // "r+i" or just "i", depending on HasBaseReg.
8953 break;
8954 case 1:
8955 // "r+r+i" is not allowed.
8956 if (AM.HasBaseReg && AM.BaseOffs)
8957 return false;
8958 // Otherwise we have "r+r" or "r+i".
8959 break;
8960 case 2:
8961 // "2*r+r" or "2*r+i" is not allowed.
8962 if (AM.HasBaseReg || AM.BaseOffs)
8963 return false;
8964 // Allow "2*r" as "r+r".
8965 break;
8966 default:
8967 return false;
8968 }
8969
8970 return true;
8971}
8972
8974 return isInt<12>(Imm);
8975}
8976
8978 return isInt<12>(Imm);
8979}
8980
8982 // Zexts are free if they can be combined with a load.
8983 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8984 // poorly with type legalization of compares preferring sext.
8985 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8986 EVT MemVT = LD->getMemoryVT();
8987 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8988 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8989 LD->getExtensionType() == ISD::ZEXTLOAD))
8990 return true;
8991 }
8992
8993 return TargetLowering::isZExtFree(Val, VT2);
8994}
8995
8997 EVT DstVT) const {
8998 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8999}
9000
9002 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9003}
9004
9006 // TODO: Support vectors.
9007 if (Y.getValueType().isVector())
9008 return false;
9009
9010 return !isa<ConstantSDNode>(Y);
9011}
9012
9014 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9015 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9016}
9017
9019 Type *Ty, bool IsSigned) const {
9020 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9021 return true;
9022
9023 return IsSigned;
9024}
9025
9027 // Return false to suppress the unnecessary extensions if the LibCall
9028 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9029 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9030 Type.getSizeInBits() < Subtarget.getGRLen()))
9031 return false;
9032 return true;
9033}
9034
9035// memcpy, and other memory intrinsics, typically tries to use wider load/store
9036// if the source/dest is aligned and the copy size is large enough. We therefore
9037// want to align such objects passed to memory intrinsics.
9039 unsigned &MinSize,
9040 Align &PrefAlign) const {
9041 if (!isa<MemIntrinsic>(CI))
9042 return false;
9043
9044 if (Subtarget.is64Bit()) {
9045 MinSize = 8;
9046 PrefAlign = Align(8);
9047 } else {
9048 MinSize = 4;
9049 PrefAlign = Align(4);
9050 }
9051
9052 return true;
9053}
9054
9063
9064bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9065 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9066 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9067 bool IsABIRegCopy = CC.has_value();
9068 EVT ValueVT = Val.getValueType();
9069
9070 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9071 PartVT == MVT::f32) {
9072 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9073 // nan, and cast to f32.
9074 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9075 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9076 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9077 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9078 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9079 Parts[0] = Val;
9080 return true;
9081 }
9082
9083 return false;
9084}
9085
9086SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9087 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9088 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9089 bool IsABIRegCopy = CC.has_value();
9090
9091 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9092 PartVT == MVT::f32) {
9093 SDValue Val = Parts[0];
9094
9095 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9096 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9097 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9098 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9099 return Val;
9100 }
9101
9102 return SDValue();
9103}
9104
9105MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9106 CallingConv::ID CC,
9107 EVT VT) const {
9108 // Use f32 to pass f16.
9109 if (VT == MVT::f16 && Subtarget.hasBasicF())
9110 return MVT::f32;
9111
9113}
9114
9115unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9116 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9117 // Use f32 to pass f16.
9118 if (VT == MVT::f16 && Subtarget.hasBasicF())
9119 return 1;
9120
9122}
9123
9125 SDValue Op, const APInt &OriginalDemandedBits,
9126 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9127 unsigned Depth) const {
9128 EVT VT = Op.getValueType();
9129 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9130 unsigned Opc = Op.getOpcode();
9131 switch (Opc) {
9132 default:
9133 break;
9136 SDValue Src = Op.getOperand(0);
9137 MVT SrcVT = Src.getSimpleValueType();
9138 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9139 unsigned NumElts = SrcVT.getVectorNumElements();
9140
9141 // If we don't need the sign bits at all just return zero.
9142 if (OriginalDemandedBits.countr_zero() >= NumElts)
9143 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9144
9145 // Only demand the vector elements of the sign bits we need.
9146 APInt KnownUndef, KnownZero;
9147 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9148 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9149 TLO, Depth + 1))
9150 return true;
9151
9152 Known.Zero = KnownZero.zext(BitWidth);
9153 Known.Zero.setHighBits(BitWidth - NumElts);
9154
9155 // [X]VMSKLTZ only uses the MSB from each vector element.
9156 KnownBits KnownSrc;
9157 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9158 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9159 Depth + 1))
9160 return true;
9161
9162 if (KnownSrc.One[SrcBits - 1])
9163 Known.One.setLowBits(NumElts);
9164 else if (KnownSrc.Zero[SrcBits - 1])
9165 Known.Zero.setLowBits(NumElts);
9166
9167 // Attempt to avoid multi-use ops if we don't need anything from it.
9169 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9170 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9171 return false;
9172 }
9173 }
9174
9176 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9177}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:162
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:130
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...