Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/Constants.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 bool tryReadRegister(SDNode *N);
310 bool tryWriteRegister(SDNode *N);
311
312 bool tryInlineAsm(SDNode *N);
313
314 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
315
316 void SelectCMP_SWAP(SDNode *N);
317
318 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
319 /// inline asm expressions.
320 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
321 InlineAsm::ConstraintCode ConstraintID,
322 std::vector<SDValue> &OutOps) override;
323
324 // Form pairs of consecutive R, S, D, or Q registers.
325 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
326 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
327 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
328 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
329
330 // Form sequences of 4 consecutive S, D, or Q registers.
331 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
332 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
333 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334
335 // Get the alignment operand for a NEON VLD or VST instruction.
336 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
337 bool is64BitVector);
338
339 /// Checks if N is a multiplication by a constant where we can extract out a
340 /// power of two from the constant so that it can be used in a shift, but only
341 /// if it simplifies the materialization of the constant. Returns true if it
342 /// is, and assigns to PowerOfTwo the power of two that should be extracted
343 /// out and to NewMulConst the new constant to be multiplied by.
344 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
345 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
346
347 /// Replace N with M in CurDAG, in a way that also ensures that M gets
348 /// selected when N would have been selected.
349 void replaceDAGValue(const SDValue &N, SDValue M);
350};
351
352class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
353public:
354 static char ID;
355 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
356 : SelectionDAGISelLegacy(
357 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
358};
359}
360
361char ARMDAGToDAGISelLegacy::ID = 0;
362
363INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
364
365/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
366/// operand. If so Imm will receive the 32-bit value.
367static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
368 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
369 Imm = N->getAsZExtVal();
370 return true;
371 }
372 return false;
373}
374
375// isInt32Immediate - This method tests to see if a constant operand.
376// If so Imm will receive the 32 bit value.
377static bool isInt32Immediate(SDValue N, unsigned &Imm) {
378 return isInt32Immediate(N.getNode(), Imm);
379}
380
381// isOpcWithIntImmediate - This method tests to see if the node is a specific
382// opcode and that it has a immediate integer right operand.
383// If so Imm will receive the 32 bit value.
384static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
385 return N->getOpcode() == Opc &&
386 isInt32Immediate(N->getOperand(1).getNode(), Imm);
387}
388
389/// Check whether a particular node is a constant value representable as
390/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
391///
392/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
393static bool isScaledConstantInRange(SDValue Node, int Scale,
394 int RangeMin, int RangeMax,
395 int &ScaledConstant) {
396 assert(Scale > 0 && "Invalid scale!");
397
398 // Check that this is a constant.
400 if (!C)
401 return false;
402
403 ScaledConstant = (int) C->getZExtValue();
404 if ((ScaledConstant % Scale) != 0)
405 return false;
406
407 ScaledConstant /= Scale;
408 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
409}
410
411void ARMDAGToDAGISel::PreprocessISelDAG() {
412 if (!Subtarget->hasV6T2Ops())
413 return;
414
415 bool isThumb2 = Subtarget->isThumb();
416 // We use make_early_inc_range to avoid invalidation issues.
417 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
418 if (N.getOpcode() != ISD::ADD)
419 continue;
420
421 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
422 // leading zeros, followed by consecutive set bits, followed by 1 or 2
423 // trailing zeros, e.g. 1020.
424 // Transform the expression to
425 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
426 // of trailing zeros of c2. The left shift would be folded as an shifter
427 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
428 // node (UBFX).
429
430 SDValue N0 = N.getOperand(0);
431 SDValue N1 = N.getOperand(1);
432 unsigned And_imm = 0;
433 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
434 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
435 std::swap(N0, N1);
436 }
437 if (!And_imm)
438 continue;
439
440 // Check if the AND mask is an immediate of the form: 000.....1111111100
441 unsigned TZ = llvm::countr_zero(And_imm);
442 if (TZ != 1 && TZ != 2)
443 // Be conservative here. Shifter operands aren't always free. e.g. On
444 // Swift, left shifter operand of 1 / 2 for free but others are not.
445 // e.g.
446 // ubfx r3, r1, #16, #8
447 // ldr.w r3, [r0, r3, lsl #2]
448 // vs.
449 // mov.w r9, #1020
450 // and.w r2, r9, r1, lsr #14
451 // ldr r2, [r0, r2]
452 continue;
453 And_imm >>= TZ;
454 if (And_imm & (And_imm + 1))
455 continue;
456
457 // Look for (and (srl X, c1), c2).
458 SDValue Srl = N1.getOperand(0);
459 unsigned Srl_imm = 0;
460 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
461 (Srl_imm <= 2))
462 continue;
463
464 // Make sure first operand is not a shifter operand which would prevent
465 // folding of the left shift.
466 SDValue CPTmp0;
467 SDValue CPTmp1;
468 SDValue CPTmp2;
469 if (isThumb2) {
470 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
471 continue;
472 } else {
473 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
474 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
475 continue;
476 }
477
478 // Now make the transformation.
479 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
480 Srl.getOperand(0),
481 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
482 MVT::i32));
483 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
484 Srl,
485 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
486 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
487 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
488 CurDAG->UpdateNodeOperands(&N, N0, N1);
489 }
490}
491
492/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
493/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
494/// least on current ARM implementations) which should be avoidded.
495bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
496 if (OptLevel == CodeGenOptLevel::None)
497 return true;
498
499 if (!Subtarget->hasVMLxHazards())
500 return true;
501
502 if (!N->hasOneUse())
503 return false;
504
505 SDNode *User = *N->user_begin();
506 if (User->getOpcode() == ISD::CopyToReg)
507 return true;
508 if (User->isMachineOpcode()) {
509 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
510 CurDAG->getSubtarget().getInstrInfo());
511
512 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
513 if (MCID.mayStore())
514 return true;
515 unsigned Opcode = MCID.getOpcode();
516 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
517 return true;
518 // vmlx feeding into another vmlx. We actually want to unfold
519 // the use later in the MLxExpansion pass. e.g.
520 // vmla
521 // vmla (stall 8 cycles)
522 //
523 // vmul (5 cycles)
524 // vadd (5 cycles)
525 // vmla
526 // This adds up to about 18 - 19 cycles.
527 //
528 // vmla
529 // vmul (stall 4 cycles)
530 // vadd adds up to about 14 cycles.
531 return TII->isFpMLxInstruction(Opcode);
532 }
533
534 return false;
535}
536
537bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
538 ARM_AM::ShiftOpc ShOpcVal,
539 unsigned ShAmt) {
540 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
541 return true;
542 if (Shift.hasOneUse())
543 return true;
544 // R << 2 is free.
545 return ShOpcVal == ARM_AM::lsl &&
546 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
547}
548
549bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
550 unsigned MaxShift,
551 unsigned &PowerOfTwo,
552 SDValue &NewMulConst) const {
553 assert(N.getOpcode() == ISD::MUL);
554 assert(MaxShift > 0);
555
556 // If the multiply is used in more than one place then changing the constant
557 // will make other uses incorrect, so don't.
558 if (!N.hasOneUse()) return false;
559 // Check if the multiply is by a constant
560 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
561 if (!MulConst) return false;
562 // If the constant is used in more than one place then modifying it will mean
563 // we need to materialize two constants instead of one, which is a bad idea.
564 if (!MulConst->hasOneUse()) return false;
565 unsigned MulConstVal = MulConst->getZExtValue();
566 if (MulConstVal == 0) return false;
567
568 // Find the largest power of 2 that MulConstVal is a multiple of
569 PowerOfTwo = MaxShift;
570 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
571 --PowerOfTwo;
572 if (PowerOfTwo == 0) return false;
573 }
574
575 // Only optimise if the new cost is better
576 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
577 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
578 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
579 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
580 return NewCost < OldCost;
581}
582
583void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
584 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
585 ReplaceUses(N, M);
586}
587
588bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
589 SDValue &BaseReg,
590 SDValue &Opc,
591 bool CheckProfitability) {
593 return false;
594
595 // If N is a multiply-by-constant and it's profitable to extract a shift and
596 // use it in a shifted operand do so.
597 if (N.getOpcode() == ISD::MUL) {
598 unsigned PowerOfTwo = 0;
599 SDValue NewMulConst;
600 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
601 HandleSDNode Handle(N);
602 SDLoc Loc(N);
603 replaceDAGValue(N.getOperand(1), NewMulConst);
604 BaseReg = Handle.getValue();
605 Opc = CurDAG->getTargetConstant(
606 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
607 return true;
608 }
609 }
610
611 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
612
613 // Don't match base register only case. That is matched to a separate
614 // lower complexity pattern with explicit register operand.
615 if (ShOpcVal == ARM_AM::no_shift) return false;
616
617 BaseReg = N.getOperand(0);
618 unsigned ShImmVal = 0;
619 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
620 if (!RHS) return false;
621 ShImmVal = RHS->getZExtValue() & 31;
622 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
623 SDLoc(N), MVT::i32);
624 return true;
625}
626
627bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
628 SDValue &BaseReg,
629 SDValue &ShReg,
630 SDValue &Opc,
631 bool CheckProfitability) {
633 return false;
634
635 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
636
637 // Don't match base register only case. That is matched to a separate
638 // lower complexity pattern with explicit register operand.
639 if (ShOpcVal == ARM_AM::no_shift) return false;
640
641 BaseReg = N.getOperand(0);
642 unsigned ShImmVal = 0;
643 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
644 if (RHS) return false;
645
646 ShReg = N.getOperand(1);
647 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
648 return false;
649 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
650 SDLoc(N), MVT::i32);
651 return true;
652}
653
654// Determine whether an ISD::OR's operands are suitable to turn the operation
655// into an addition, which often has more compact encodings.
656bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
657 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
658 Out = N;
659 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
660}
661
662
663bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
664 SDValue &Base,
665 SDValue &OffImm) {
666 // Match simple R + imm12 operands.
667
668 // Base only.
669 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
670 !CurDAG->isBaseWithConstantOffset(N)) {
671 if (N.getOpcode() == ISD::FrameIndex) {
672 // Match frame index.
673 int FI = cast<FrameIndexSDNode>(N)->getIndex();
674 Base = CurDAG->getTargetFrameIndex(
675 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
676 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
677 return true;
678 }
679
680 if (N.getOpcode() == ARMISD::Wrapper &&
681 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
682 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
683 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
684 Base = N.getOperand(0);
685 } else
686 Base = N;
687 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
688 return true;
689 }
690
691 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
692 int RHSC = (int)RHS->getSExtValue();
693 if (N.getOpcode() == ISD::SUB)
694 RHSC = -RHSC;
695
696 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
697 Base = N.getOperand(0);
698 if (Base.getOpcode() == ISD::FrameIndex) {
699 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
700 Base = CurDAG->getTargetFrameIndex(
701 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
702 }
703 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
704 return true;
705 }
706 }
707
708 // Base only.
709 Base = N;
710 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
711 return true;
712}
713
714
715
716bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
717 SDValue &Opc) {
718 if (N.getOpcode() == ISD::MUL &&
719 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
720 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
721 // X * [3,5,9] -> X + X * [2,4,8] etc.
722 int RHSC = (int)RHS->getZExtValue();
723 if (RHSC & 1) {
724 RHSC = RHSC & ~1;
726 if (RHSC < 0) {
728 RHSC = - RHSC;
729 }
730 if (isPowerOf2_32(RHSC)) {
731 unsigned ShAmt = Log2_32(RHSC);
732 Base = Offset = N.getOperand(0);
733 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
735 SDLoc(N), MVT::i32);
736 return true;
737 }
738 }
739 }
740 }
741
742 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
743 // ISD::OR that is equivalent to an ISD::ADD.
744 !CurDAG->isBaseWithConstantOffset(N))
745 return false;
746
747 // Leave simple R +/- imm12 operands for LDRi12
748 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
749 int RHSC;
750 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
751 -0x1000+1, 0x1000, RHSC)) // 12 bits.
752 return false;
753 }
754
755 // Otherwise this is R +/- [possibly shifted] R.
757 ARM_AM::ShiftOpc ShOpcVal =
758 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
759 unsigned ShAmt = 0;
760
761 Base = N.getOperand(0);
762 Offset = N.getOperand(1);
763
764 if (ShOpcVal != ARM_AM::no_shift) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
766 // it.
767 if (ConstantSDNode *Sh =
768 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
769 ShAmt = Sh->getZExtValue();
770 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
771 Offset = N.getOperand(1).getOperand(0);
772 else {
773 ShAmt = 0;
774 ShOpcVal = ARM_AM::no_shift;
775 }
776 } else {
777 ShOpcVal = ARM_AM::no_shift;
778 }
779 }
780
781 // Try matching (R shl C) + (R).
782 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
784 N.getOperand(0).hasOneUse())) {
785 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
786 if (ShOpcVal != ARM_AM::no_shift) {
787 // Check to see if the RHS of the shift is a constant, if not, we can't
788 // fold it.
789 if (ConstantSDNode *Sh =
790 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
791 ShAmt = Sh->getZExtValue();
792 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
793 Offset = N.getOperand(0).getOperand(0);
794 Base = N.getOperand(1);
795 } else {
796 ShAmt = 0;
797 ShOpcVal = ARM_AM::no_shift;
798 }
799 } else {
800 ShOpcVal = ARM_AM::no_shift;
801 }
802 }
803 }
804
805 // If Offset is a multiply-by-constant and it's profitable to extract a shift
806 // and use it in a shifted operand do so.
807 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
808 unsigned PowerOfTwo = 0;
809 SDValue NewMulConst;
810 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
811 HandleSDNode Handle(Offset);
812 replaceDAGValue(Offset.getOperand(1), NewMulConst);
813 Offset = Handle.getValue();
814 ShAmt = PowerOfTwo;
815 ShOpcVal = ARM_AM::lsl;
816 }
817 }
818
819 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
820 SDLoc(N), MVT::i32);
821 return true;
822}
823
824bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
826 unsigned Opcode = Op->getOpcode();
827 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
828 ? cast<LoadSDNode>(Op)->getAddressingMode()
829 : cast<StoreSDNode>(Op)->getAddressingMode();
831 ? ARM_AM::add : ARM_AM::sub;
832 int Val;
833 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
834 return false;
835
836 Offset = N;
837 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
838 unsigned ShAmt = 0;
839 if (ShOpcVal != ARM_AM::no_shift) {
840 // Check to see if the RHS of the shift is a constant, if not, we can't fold
841 // it.
842 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
843 ShAmt = Sh->getZExtValue();
844 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
845 Offset = N.getOperand(0);
846 else {
847 ShAmt = 0;
848 ShOpcVal = ARM_AM::no_shift;
849 }
850 } else {
851 ShOpcVal = ARM_AM::no_shift;
852 }
853 }
854
855 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
856 SDLoc(N), MVT::i32);
857 return true;
858}
859
860bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
862 unsigned Opcode = Op->getOpcode();
863 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
864 ? cast<LoadSDNode>(Op)->getAddressingMode()
865 : cast<StoreSDNode>(Op)->getAddressingMode();
867 ? ARM_AM::add : ARM_AM::sub;
868 int Val;
869 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
870 if (AddSub == ARM_AM::sub) Val *= -1;
871 Offset = CurDAG->getRegister(0, MVT::i32);
872 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32);
873 return true;
874 }
875
876 return false;
877}
878
879
880bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
882 unsigned Opcode = Op->getOpcode();
883 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
884 ? cast<LoadSDNode>(Op)->getAddressingMode()
885 : cast<StoreSDNode>(Op)->getAddressingMode();
887 ? ARM_AM::add : ARM_AM::sub;
888 int Val;
889 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
890 Offset = CurDAG->getRegister(0, MVT::i32);
891 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
893 SDLoc(Op), MVT::i32);
894 return true;
895 }
896
897 return false;
898}
899
900bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
901 Base = N;
902 return true;
903}
904
905bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
907 SDValue &Opc) {
908 if (N.getOpcode() == ISD::SUB) {
909 // X - C is canonicalize to X + -C, no need to handle it here.
910 Base = N.getOperand(0);
911 Offset = N.getOperand(1);
912 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
913 MVT::i32);
914 return true;
915 }
916
917 if (!CurDAG->isBaseWithConstantOffset(N)) {
918 Base = N;
919 if (N.getOpcode() == ISD::FrameIndex) {
920 int FI = cast<FrameIndexSDNode>(N)->getIndex();
921 Base = CurDAG->getTargetFrameIndex(
922 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
923 }
924 Offset = CurDAG->getRegister(0, MVT::i32);
925 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
926 MVT::i32);
927 return true;
928 }
929
930 // If the RHS is +/- imm8, fold into addr mode.
931 int RHSC;
932 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
933 -256 + 1, 256, RHSC)) { // 8 bits.
934 Base = N.getOperand(0);
935 if (Base.getOpcode() == ISD::FrameIndex) {
936 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
937 Base = CurDAG->getTargetFrameIndex(
938 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
939 }
940 Offset = CurDAG->getRegister(0, MVT::i32);
941
943 if (RHSC < 0) {
945 RHSC = -RHSC;
946 }
947 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
948 MVT::i32);
949 return true;
950 }
951
952 Base = N.getOperand(0);
953 Offset = N.getOperand(1);
954 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
955 MVT::i32);
956 return true;
957}
958
959bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
961 unsigned Opcode = Op->getOpcode();
962 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
963 ? cast<LoadSDNode>(Op)->getAddressingMode()
964 : cast<StoreSDNode>(Op)->getAddressingMode();
966 ? ARM_AM::add : ARM_AM::sub;
967 int Val;
968 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
969 Offset = CurDAG->getRegister(0, MVT::i32);
970 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
971 MVT::i32);
972 return true;
973 }
974
975 Offset = N;
976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
977 MVT::i32);
978 return true;
979}
980
981bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
982 bool FP16) {
983 if (!CurDAG->isBaseWithConstantOffset(N)) {
984 Base = N;
985 if (N.getOpcode() == ISD::FrameIndex) {
986 int FI = cast<FrameIndexSDNode>(N)->getIndex();
987 Base = CurDAG->getTargetFrameIndex(
988 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
989 } else if (N.getOpcode() == ARMISD::Wrapper &&
990 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
991 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
992 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
993 Base = N.getOperand(0);
994 }
995 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
996 SDLoc(N), MVT::i32);
997 return true;
998 }
999
1000 // If the RHS is +/- imm8, fold into addr mode.
1001 int RHSC;
1002 const int Scale = FP16 ? 2 : 4;
1003
1004 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1005 Base = N.getOperand(0);
1006 if (Base.getOpcode() == ISD::FrameIndex) {
1007 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1008 Base = CurDAG->getTargetFrameIndex(
1009 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1010 }
1011
1013 if (RHSC < 0) {
1015 RHSC = -RHSC;
1016 }
1017
1018 if (FP16)
1019 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1020 SDLoc(N), MVT::i32);
1021 else
1022 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1023 SDLoc(N), MVT::i32);
1024
1025 return true;
1026 }
1027
1028 Base = N;
1029
1030 if (FP16)
1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1032 SDLoc(N), MVT::i32);
1033 else
1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1035 SDLoc(N), MVT::i32);
1036
1037 return true;
1038}
1039
1040bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1042 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1043}
1044
1045bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1047 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1048}
1049
1050bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1051 SDValue &Align) {
1052 Addr = N;
1053
1054 unsigned Alignment = 0;
1055
1056 MemSDNode *MemN = cast<MemSDNode>(Parent);
1057
1058 if (isa<LSBaseSDNode>(MemN) ||
1059 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1060 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1061 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1062 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1063 // The maximum alignment is equal to the memory size being referenced.
1064 llvm::Align MMOAlign = MemN->getAlign();
1065 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1066 if (MMOAlign.value() >= MemSize && MemSize > 1)
1067 Alignment = MemSize;
1068 } else {
1069 // All other uses of addrmode6 are for intrinsics. For now just record
1070 // the raw alignment value; it will be refined later based on the legal
1071 // alignment operands for the intrinsic.
1072 Alignment = MemN->getAlign().value();
1073 }
1074
1075 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1076 return true;
1077}
1078
1079bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1080 SDValue &Offset) {
1081 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1083 if (AM != ISD::POST_INC)
1084 return false;
1085 Offset = N;
1086 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1087 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1088 Offset = CurDAG->getRegister(0, MVT::i32);
1089 }
1090 return true;
1091}
1092
1093bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1094 SDValue &Offset, SDValue &Label) {
1095 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1096 Offset = N.getOperand(0);
1097 SDValue N1 = N.getOperand(1);
1098 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1099 return true;
1100 }
1101
1102 return false;
1103}
1104
1105
1106//===----------------------------------------------------------------------===//
1107// Thumb Addressing Modes
1108//===----------------------------------------------------------------------===//
1109
1111 // Negative numbers are difficult to materialise in thumb1. If we are
1112 // selecting the add of a negative, instead try to select ri with a zero
1113 // offset, so create the add node directly which will become a sub.
1114 if (N.getOpcode() != ISD::ADD)
1115 return false;
1116
1117 // Look for an imm which is not legal for ld/st, but is legal for sub.
1118 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1119 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1120
1121 return false;
1122}
1123
1124bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1125 SDValue &Offset) {
1126 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1127 if (!isNullConstant(N))
1128 return false;
1129
1130 Base = Offset = N;
1131 return true;
1132 }
1133
1134 Base = N.getOperand(0);
1135 Offset = N.getOperand(1);
1136 return true;
1137}
1138
1139bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1140 SDValue &Offset) {
1142 return false; // Select ri instead
1143 return SelectThumbAddrModeRRSext(N, Base, Offset);
1144}
1145
1146bool
1147ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1148 SDValue &Base, SDValue &OffImm) {
1150 Base = N;
1151 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1152 return true;
1153 }
1154
1155 if (!CurDAG->isBaseWithConstantOffset(N)) {
1156 if (N.getOpcode() == ISD::ADD) {
1157 return false; // We want to select register offset instead
1158 } else if (N.getOpcode() == ARMISD::Wrapper &&
1159 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1160 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1161 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1162 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1163 Base = N.getOperand(0);
1164 } else {
1165 Base = N;
1166 }
1167
1168 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1169 return true;
1170 }
1171
1172 // If the RHS is + imm5 * scale, fold into addr mode.
1173 int RHSC;
1174 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1175 Base = N.getOperand(0);
1176 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1177 return true;
1178 }
1179
1180 // Offset is too large, so use register offset instead.
1181 return false;
1182}
1183
1184bool
1185ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1186 SDValue &OffImm) {
1187 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1188}
1189
1190bool
1191ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1200}
1201
1202bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1203 SDValue &Base, SDValue &OffImm) {
1204 if (N.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1206 // Only multiples of 4 are allowed for the offset, so the frame object
1207 // alignment must be at least 4.
1208 MachineFrameInfo &MFI = MF->getFrameInfo();
1209 if (MFI.getObjectAlign(FI) < Align(4))
1210 MFI.setObjectAlignment(FI, Align(4));
1211 Base = CurDAG->getTargetFrameIndex(
1212 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1213 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1214 return true;
1215 }
1216
1217 if (!CurDAG->isBaseWithConstantOffset(N))
1218 return false;
1219
1220 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1221 // If the RHS is + imm8 * scale, fold into addr mode.
1222 int RHSC;
1223 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1224 Base = N.getOperand(0);
1225 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1226 // Make sure the offset is inside the object, or we might fail to
1227 // allocate an emergency spill slot. (An out-of-range access is UB, but
1228 // it could show up anyway.)
1229 MachineFrameInfo &MFI = MF->getFrameInfo();
1230 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1231 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1232 // indexed by the LHS must be 4-byte aligned.
1233 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1234 MFI.setObjectAlignment(FI, Align(4));
1235 if (MFI.getObjectAlign(FI) >= Align(4)) {
1236 Base = CurDAG->getTargetFrameIndex(
1237 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1238 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1239 return true;
1240 }
1241 }
1242 }
1243 }
1244
1245 return false;
1246}
1247
1248template <unsigned Shift>
1249bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1250 SDValue &OffImm) {
1251 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1252 int RHSC;
1253 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1254 RHSC)) {
1255 Base = N.getOperand(0);
1256 if (N.getOpcode() == ISD::SUB)
1257 RHSC = -RHSC;
1258 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1259 MVT::i32);
1260 return true;
1261 }
1262 }
1263
1264 // Base only.
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1267 return true;
1268}
1269
1270
1271//===----------------------------------------------------------------------===//
1272// Thumb 2 Addressing Modes
1273//===----------------------------------------------------------------------===//
1274
1275
1276bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1277 SDValue &Base, SDValue &OffImm) {
1278 // Match simple R + imm12 operands.
1279
1280 // Base only.
1281 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1282 !CurDAG->isBaseWithConstantOffset(N)) {
1283 if (N.getOpcode() == ISD::FrameIndex) {
1284 // Match frame index.
1285 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1286 Base = CurDAG->getTargetFrameIndex(
1287 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1288 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1289 return true;
1290 }
1291
1292 if (N.getOpcode() == ARMISD::Wrapper &&
1293 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1294 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1295 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1296 Base = N.getOperand(0);
1297 if (Base.getOpcode() == ISD::TargetConstantPool)
1298 return false; // We want to select t2LDRpci instead.
1299 } else
1300 Base = N;
1301 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1302 return true;
1303 }
1304
1305 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1306 if (SelectT2AddrModeImm8(N, Base, OffImm))
1307 // Let t2LDRi8 handle (R - imm8).
1308 return false;
1309
1310 int RHSC = (int)RHS->getZExtValue();
1311 if (N.getOpcode() == ISD::SUB)
1312 RHSC = -RHSC;
1313
1314 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1315 Base = N.getOperand(0);
1316 if (Base.getOpcode() == ISD::FrameIndex) {
1317 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1318 Base = CurDAG->getTargetFrameIndex(
1319 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1320 }
1321 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1322 return true;
1323 }
1324 }
1325
1326 // Base only.
1327 Base = N;
1328 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1329 return true;
1330}
1331
1332template <unsigned Shift>
1333bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1334 SDValue &OffImm) {
1335 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1336 int RHSC;
1337 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1338 Base = N.getOperand(0);
1339 if (Base.getOpcode() == ISD::FrameIndex) {
1340 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1341 Base = CurDAG->getTargetFrameIndex(
1342 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1343 }
1344
1345 if (N.getOpcode() == ISD::SUB)
1346 RHSC = -RHSC;
1347 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1348 MVT::i32);
1349 return true;
1350 }
1351 }
1352
1353 // Base only.
1354 Base = N;
1355 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1356 return true;
1357}
1358
1359bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1360 SDValue &Base, SDValue &OffImm) {
1361 // Match simple R - imm8 operands.
1362 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1363 !CurDAG->isBaseWithConstantOffset(N))
1364 return false;
1365
1366 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1367 int RHSC = (int)RHS->getSExtValue();
1368 if (N.getOpcode() == ISD::SUB)
1369 RHSC = -RHSC;
1370
1371 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1372 Base = N.getOperand(0);
1373 if (Base.getOpcode() == ISD::FrameIndex) {
1374 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1375 Base = CurDAG->getTargetFrameIndex(
1376 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1377 }
1378 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1379 return true;
1380 }
1381 }
1382
1383 return false;
1384}
1385
1386bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1387 SDValue &OffImm){
1388 unsigned Opcode = Op->getOpcode();
1389 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1390 ? cast<LoadSDNode>(Op)->getAddressingMode()
1391 : cast<StoreSDNode>(Op)->getAddressingMode();
1392 int RHSC;
1393 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1394 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1395 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32)
1396 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1397 return true;
1398 }
1399
1400 return false;
1401}
1402
1403template <unsigned Shift>
1404bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1405 SDValue &OffImm) {
1406 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1407 int RHSC;
1408 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1409 RHSC)) {
1410 Base = N.getOperand(0);
1411 if (Base.getOpcode() == ISD::FrameIndex) {
1412 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1413 Base = CurDAG->getTargetFrameIndex(
1414 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1415 }
1416
1417 if (N.getOpcode() == ISD::SUB)
1418 RHSC = -RHSC;
1419 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1420 MVT::i32);
1421 return true;
1422 }
1423 }
1424
1425 // Base only.
1426 Base = N;
1427 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1428 return true;
1429}
1430
1431template <unsigned Shift>
1432bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1433 SDValue &OffImm) {
1434 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1435}
1436
1437bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1438 SDValue &OffImm,
1439 unsigned Shift) {
1440 unsigned Opcode = Op->getOpcode();
1442 switch (Opcode) {
1443 case ISD::LOAD:
1444 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1445 break;
1446 case ISD::STORE:
1447 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1448 break;
1449 case ISD::MLOAD:
1450 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::MSTORE:
1453 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1454 break;
1455 default:
1456 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1457 }
1458
1459 int RHSC;
1460 // 7 bit constant, shifted by Shift.
1461 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1462 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1463 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift),
1464 SDLoc(N), MVT::i32)
1465 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift),
1466 SDLoc(N), MVT::i32);
1467 return true;
1468 }
1469 return false;
1470}
1471
1472template <int Min, int Max>
1473bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1474 int Val;
1475 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1476 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32);
1477 return true;
1478 }
1479 return false;
1480}
1481
1482bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1483 SDValue &Base,
1484 SDValue &OffReg, SDValue &ShImm) {
1485 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1486 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1487 return false;
1488
1489 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1490 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1491 int RHSC = (int)RHS->getZExtValue();
1492 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1493 return false;
1494 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1495 return false;
1496 }
1497
1498 // Look for (R + R) or (R + (R << [1,2,3])).
1499 unsigned ShAmt = 0;
1500 Base = N.getOperand(0);
1501 OffReg = N.getOperand(1);
1502
1503 // Swap if it is ((R << c) + R).
1505 if (ShOpcVal != ARM_AM::lsl) {
1506 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1507 if (ShOpcVal == ARM_AM::lsl)
1508 std::swap(Base, OffReg);
1509 }
1510
1511 if (ShOpcVal == ARM_AM::lsl) {
1512 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1513 // it.
1514 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1515 ShAmt = Sh->getZExtValue();
1516 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1517 OffReg = OffReg.getOperand(0);
1518 else {
1519 ShAmt = 0;
1520 }
1521 }
1522 }
1523
1524 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1525 // and use it in a shifted operand do so.
1526 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1527 unsigned PowerOfTwo = 0;
1528 SDValue NewMulConst;
1529 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1530 HandleSDNode Handle(OffReg);
1531 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1532 OffReg = Handle.getValue();
1533 ShAmt = PowerOfTwo;
1534 }
1535 }
1536
1537 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1538
1539 return true;
1540}
1541
1542bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1543 SDValue &OffImm) {
1544 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1545 // instructions.
1546 Base = N;
1547 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1548
1549 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1550 return true;
1551
1552 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1553 if (!RHS)
1554 return true;
1555
1556 uint32_t RHSC = (int)RHS->getZExtValue();
1557 if (RHSC > 1020 || RHSC % 4 != 0)
1558 return true;
1559
1560 Base = N.getOperand(0);
1561 if (Base.getOpcode() == ISD::FrameIndex) {
1562 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1563 Base = CurDAG->getTargetFrameIndex(
1564 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1565 }
1566
1567 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1568 return true;
1569}
1570
1571//===--------------------------------------------------------------------===//
1572
1573/// getAL - Returns a ARMCC::AL immediate node.
1574static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1575 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1576}
1577
1578void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1579 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1580 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1581}
1582
1583bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1584 LoadSDNode *LD = cast<LoadSDNode>(N);
1585 ISD::MemIndexedMode AM = LD->getAddressingMode();
1586 if (AM == ISD::UNINDEXED)
1587 return false;
1588
1589 EVT LoadedVT = LD->getMemoryVT();
1590 SDValue Offset, AMOpc;
1591 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1592 unsigned Opcode = 0;
1593 bool Match = false;
1594 if (LoadedVT == MVT::i32 && isPre &&
1595 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1596 Opcode = ARM::LDR_PRE_IMM;
1597 Match = true;
1598 } else if (LoadedVT == MVT::i32 && !isPre &&
1599 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1600 Opcode = ARM::LDR_POST_IMM;
1601 Match = true;
1602 } else if (LoadedVT == MVT::i32 &&
1603 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1604 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1605 Match = true;
1606
1607 } else if (LoadedVT == MVT::i16 &&
1608 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1609 Match = true;
1610 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1611 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1612 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1613 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1614 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1615 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1616 Match = true;
1617 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1618 }
1619 } else {
1620 if (isPre &&
1621 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1622 Match = true;
1623 Opcode = ARM::LDRB_PRE_IMM;
1624 } else if (!isPre &&
1625 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1626 Match = true;
1627 Opcode = ARM::LDRB_POST_IMM;
1628 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1629 Match = true;
1630 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1631 }
1632 }
1633 }
1634
1635 if (Match) {
1636 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1637 SDValue Chain = LD->getChain();
1638 SDValue Base = LD->getBasePtr();
1639 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1640 CurDAG->getRegister(0, MVT::i32), Chain };
1641 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1642 MVT::Other, Ops);
1643 transferMemOperands(N, New);
1644 ReplaceNode(N, New);
1645 return true;
1646 } else {
1647 SDValue Chain = LD->getChain();
1648 SDValue Base = LD->getBasePtr();
1649 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1650 CurDAG->getRegister(0, MVT::i32), Chain };
1651 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1652 MVT::Other, Ops);
1653 transferMemOperands(N, New);
1654 ReplaceNode(N, New);
1655 return true;
1656 }
1657 }
1658
1659 return false;
1660}
1661
1662bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1663 LoadSDNode *LD = cast<LoadSDNode>(N);
1664 EVT LoadedVT = LD->getMemoryVT();
1665 ISD::MemIndexedMode AM = LD->getAddressingMode();
1666 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1667 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1668 return false;
1669
1670 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1671 if (!COffs || COffs->getZExtValue() != 4)
1672 return false;
1673
1674 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1675 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1676 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1677 // ISel.
1678 SDValue Chain = LD->getChain();
1679 SDValue Base = LD->getBasePtr();
1680 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1681 CurDAG->getRegister(0, MVT::i32), Chain };
1682 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1683 MVT::i32, MVT::Other, Ops);
1684 transferMemOperands(N, New);
1685 ReplaceNode(N, New);
1686 return true;
1687}
1688
1689bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1690 LoadSDNode *LD = cast<LoadSDNode>(N);
1691 ISD::MemIndexedMode AM = LD->getAddressingMode();
1692 if (AM == ISD::UNINDEXED)
1693 return false;
1694
1695 EVT LoadedVT = LD->getMemoryVT();
1696 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1698 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1699 unsigned Opcode = 0;
1700 bool Match = false;
1701 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1702 switch (LoadedVT.getSimpleVT().SimpleTy) {
1703 case MVT::i32:
1704 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1705 break;
1706 case MVT::i16:
1707 if (isSExtLd)
1708 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1709 else
1710 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1711 break;
1712 case MVT::i8:
1713 case MVT::i1:
1714 if (isSExtLd)
1715 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1716 else
1717 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1718 break;
1719 default:
1720 return false;
1721 }
1722 Match = true;
1723 }
1724
1725 if (Match) {
1726 SDValue Chain = LD->getChain();
1727 SDValue Base = LD->getBasePtr();
1728 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1729 CurDAG->getRegister(0, MVT::i32), Chain };
1730 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1731 MVT::Other, Ops);
1732 transferMemOperands(N, New);
1733 ReplaceNode(N, New);
1734 return true;
1735 }
1736
1737 return false;
1738}
1739
1740bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1741 EVT LoadedVT;
1742 unsigned Opcode = 0;
1743 bool isSExtLd, isPre;
1744 Align Alignment;
1745 ARMVCC::VPTCodes Pred;
1746 SDValue PredReg;
1747 SDValue Chain, Base, Offset;
1748
1749 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1750 ISD::MemIndexedMode AM = LD->getAddressingMode();
1751 if (AM == ISD::UNINDEXED)
1752 return false;
1753 LoadedVT = LD->getMemoryVT();
1754 if (!LoadedVT.isVector())
1755 return false;
1756
1757 Chain = LD->getChain();
1758 Base = LD->getBasePtr();
1759 Offset = LD->getOffset();
1760 Alignment = LD->getAlign();
1761 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1762 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1763 Pred = ARMVCC::None;
1764 PredReg = CurDAG->getRegister(0, MVT::i32);
1765 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1766 ISD::MemIndexedMode AM = LD->getAddressingMode();
1767 if (AM == ISD::UNINDEXED)
1768 return false;
1769 LoadedVT = LD->getMemoryVT();
1770 if (!LoadedVT.isVector())
1771 return false;
1772
1773 Chain = LD->getChain();
1774 Base = LD->getBasePtr();
1775 Offset = LD->getOffset();
1776 Alignment = LD->getAlign();
1777 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1778 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1779 Pred = ARMVCC::Then;
1780 PredReg = LD->getMask();
1781 } else
1782 llvm_unreachable("Expected a Load or a Masked Load!");
1783
1784 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1785 // as opposed to a vldrw.32). This can allow extra addressing modes or
1786 // alignments for what is otherwise an equivalent instruction.
1787 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1788
1789 SDValue NewOffset;
1790 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1791 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1792 if (isSExtLd)
1793 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1794 else
1795 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1796 } else if (LoadedVT == MVT::v8i8 &&
1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1802 } else if (LoadedVT == MVT::v4i8 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1808 } else if (Alignment >= Align(4) &&
1809 (CanChangeType || LoadedVT == MVT::v4i32 ||
1810 LoadedVT == MVT::v4f32) &&
1811 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1812 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1813 else if (Alignment >= Align(2) &&
1814 (CanChangeType || LoadedVT == MVT::v8i16 ||
1815 LoadedVT == MVT::v8f16) &&
1816 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1817 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1818 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1819 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1820 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1821 else
1822 return false;
1823
1824 SDValue Ops[] = {Base,
1825 NewOffset,
1826 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1827 PredReg,
1828 CurDAG->getRegister(0, MVT::i32), // tp_reg
1829 Chain};
1830 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1831 N->getValueType(0), MVT::Other, Ops);
1832 transferMemOperands(N, New);
1833 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1834 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1835 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1836 CurDAG->RemoveDeadNode(N);
1837 return true;
1838}
1839
1840/// Form a GPRPair pseudo register from a pair of GPR regs.
1841SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1842 SDLoc dl(V0.getNode());
1843 SDValue RegClass =
1844 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1845 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1846 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1847 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1848 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1849}
1850
1851/// Form a D register from a pair of S registers.
1852SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1853 SDLoc dl(V0.getNode());
1854 SDValue RegClass =
1855 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1856 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1857 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1858 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1859 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1860}
1861
1862/// Form a quad register from a pair of D registers.
1863SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1864 SDLoc dl(V0.getNode());
1865 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1866 MVT::i32);
1867 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1868 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1869 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871}
1872
1873/// Form 4 consecutive D registers from a pair of Q registers.
1874SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875 SDLoc dl(V0.getNode());
1876 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1877 MVT::i32);
1878 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1879 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1880 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882}
1883
1884/// Form 4 consecutive S registers.
1885SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1886 SDValue V2, SDValue V3) {
1887 SDLoc dl(V0.getNode());
1888 SDValue RegClass =
1889 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1890 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1891 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1892 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1893 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1894 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1895 V2, SubReg2, V3, SubReg3 };
1896 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1897}
1898
1899/// Form 4 consecutive D registers.
1900SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1901 SDValue V2, SDValue V3) {
1902 SDLoc dl(V0.getNode());
1903 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1904 MVT::i32);
1905 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1906 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1907 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1908 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1909 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1910 V2, SubReg2, V3, SubReg3 };
1911 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1912}
1913
1914/// Form 4 consecutive Q registers.
1915SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1916 SDValue V2, SDValue V3) {
1917 SDLoc dl(V0.getNode());
1918 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1919 MVT::i32);
1920 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1921 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1922 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1923 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1924 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1925 V2, SubReg2, V3, SubReg3 };
1926 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1927}
1928
1929/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1930/// of a NEON VLD or VST instruction. The supported values depend on the
1931/// number of registers being loaded.
1932SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1933 unsigned NumVecs, bool is64BitVector) {
1934 unsigned NumRegs = NumVecs;
1935 if (!is64BitVector && NumVecs < 3)
1936 NumRegs *= 2;
1937
1938 unsigned Alignment = Align->getAsZExtVal();
1939 if (Alignment >= 32 && NumRegs == 4)
1940 Alignment = 32;
1941 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1942 Alignment = 16;
1943 else if (Alignment >= 8)
1944 Alignment = 8;
1945 else
1946 Alignment = 0;
1947
1948 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1949}
1950
1951static bool isVLDfixed(unsigned Opc)
1952{
1953 switch (Opc) {
1954 default: return false;
1955 case ARM::VLD1d8wb_fixed : return true;
1956 case ARM::VLD1d16wb_fixed : return true;
1957 case ARM::VLD1d64Qwb_fixed : return true;
1958 case ARM::VLD1d32wb_fixed : return true;
1959 case ARM::VLD1d64wb_fixed : return true;
1960 case ARM::VLD1d8TPseudoWB_fixed : return true;
1961 case ARM::VLD1d16TPseudoWB_fixed : return true;
1962 case ARM::VLD1d32TPseudoWB_fixed : return true;
1963 case ARM::VLD1d64TPseudoWB_fixed : return true;
1964 case ARM::VLD1d8QPseudoWB_fixed : return true;
1965 case ARM::VLD1d16QPseudoWB_fixed : return true;
1966 case ARM::VLD1d32QPseudoWB_fixed : return true;
1967 case ARM::VLD1d64QPseudoWB_fixed : return true;
1968 case ARM::VLD1q8wb_fixed : return true;
1969 case ARM::VLD1q16wb_fixed : return true;
1970 case ARM::VLD1q32wb_fixed : return true;
1971 case ARM::VLD1q64wb_fixed : return true;
1972 case ARM::VLD1DUPd8wb_fixed : return true;
1973 case ARM::VLD1DUPd16wb_fixed : return true;
1974 case ARM::VLD1DUPd32wb_fixed : return true;
1975 case ARM::VLD1DUPq8wb_fixed : return true;
1976 case ARM::VLD1DUPq16wb_fixed : return true;
1977 case ARM::VLD1DUPq32wb_fixed : return true;
1978 case ARM::VLD2d8wb_fixed : return true;
1979 case ARM::VLD2d16wb_fixed : return true;
1980 case ARM::VLD2d32wb_fixed : return true;
1981 case ARM::VLD2q8PseudoWB_fixed : return true;
1982 case ARM::VLD2q16PseudoWB_fixed : return true;
1983 case ARM::VLD2q32PseudoWB_fixed : return true;
1984 case ARM::VLD2DUPd8wb_fixed : return true;
1985 case ARM::VLD2DUPd16wb_fixed : return true;
1986 case ARM::VLD2DUPd32wb_fixed : return true;
1987 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1988 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1989 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1990 }
1991}
1992
1993static bool isVSTfixed(unsigned Opc)
1994{
1995 switch (Opc) {
1996 default: return false;
1997 case ARM::VST1d8wb_fixed : return true;
1998 case ARM::VST1d16wb_fixed : return true;
1999 case ARM::VST1d32wb_fixed : return true;
2000 case ARM::VST1d64wb_fixed : return true;
2001 case ARM::VST1q8wb_fixed : return true;
2002 case ARM::VST1q16wb_fixed : return true;
2003 case ARM::VST1q32wb_fixed : return true;
2004 case ARM::VST1q64wb_fixed : return true;
2005 case ARM::VST1d8TPseudoWB_fixed : return true;
2006 case ARM::VST1d16TPseudoWB_fixed : return true;
2007 case ARM::VST1d32TPseudoWB_fixed : return true;
2008 case ARM::VST1d64TPseudoWB_fixed : return true;
2009 case ARM::VST1d8QPseudoWB_fixed : return true;
2010 case ARM::VST1d16QPseudoWB_fixed : return true;
2011 case ARM::VST1d32QPseudoWB_fixed : return true;
2012 case ARM::VST1d64QPseudoWB_fixed : return true;
2013 case ARM::VST2d8wb_fixed : return true;
2014 case ARM::VST2d16wb_fixed : return true;
2015 case ARM::VST2d32wb_fixed : return true;
2016 case ARM::VST2q8PseudoWB_fixed : return true;
2017 case ARM::VST2q16PseudoWB_fixed : return true;
2018 case ARM::VST2q32PseudoWB_fixed : return true;
2019 }
2020}
2021
2022// Get the register stride update opcode of a VLD/VST instruction that
2023// is otherwise equivalent to the given fixed stride updating instruction.
2024static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2026 && "Incorrect fixed stride updating instruction.");
2027 switch (Opc) {
2028 default: break;
2029 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2030 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2031 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2032 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2033 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2034 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2035 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2036 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2037 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2038 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2039 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2040 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2041 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2042 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2043 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2044 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2045 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2046 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2047 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2048 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2049 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2050 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2051 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2052 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2053 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2054 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2055 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2056
2057 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2058 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2059 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2060 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2061 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2062 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2063 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2064 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2065 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2066 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2067 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2068 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2069 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2070 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2071 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2072 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2073
2074 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2075 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2076 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2077 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2078 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2079 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2080
2081 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2082 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2083 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2084 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2085 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2086 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2087
2088 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2089 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2090 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2091 }
2092 return Opc; // If not one we handle, return it unchanged.
2093}
2094
2095/// Returns true if the given increment is a Constant known to be equal to the
2096/// access size performed by a NEON load/store. This means the "[rN]!" form can
2097/// be used.
2098static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2099 auto C = dyn_cast<ConstantSDNode>(Inc);
2100 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2101}
2102
2103void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2104 const uint16_t *DOpcodes,
2105 const uint16_t *QOpcodes0,
2106 const uint16_t *QOpcodes1) {
2107 assert(Subtarget->hasNEON());
2108 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2109 SDLoc dl(N);
2110
2111 SDValue MemAddr, Align;
2112 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2113 // nodes are not intrinsics.
2114 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2115 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2116 return;
2117
2118 SDValue Chain = N->getOperand(0);
2119 EVT VT = N->getValueType(0);
2120 bool is64BitVector = VT.is64BitVector();
2121 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2122
2123 unsigned OpcodeIndex;
2124 switch (VT.getSimpleVT().SimpleTy) {
2125 default: llvm_unreachable("unhandled vld type");
2126 // Double-register operations:
2127 case MVT::v8i8: OpcodeIndex = 0; break;
2128 case MVT::v4f16:
2129 case MVT::v4bf16:
2130 case MVT::v4i16: OpcodeIndex = 1; break;
2131 case MVT::v2f32:
2132 case MVT::v2i32: OpcodeIndex = 2; break;
2133 case MVT::v1i64: OpcodeIndex = 3; break;
2134 // Quad-register operations:
2135 case MVT::v16i8: OpcodeIndex = 0; break;
2136 case MVT::v8f16:
2137 case MVT::v8bf16:
2138 case MVT::v8i16: OpcodeIndex = 1; break;
2139 case MVT::v4f32:
2140 case MVT::v4i32: OpcodeIndex = 2; break;
2141 case MVT::v2f64:
2142 case MVT::v2i64: OpcodeIndex = 3; break;
2143 }
2144
2145 EVT ResTy;
2146 if (NumVecs == 1)
2147 ResTy = VT;
2148 else {
2149 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2150 if (!is64BitVector)
2151 ResTyElts *= 2;
2152 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2153 }
2154 std::vector<EVT> ResTys;
2155 ResTys.push_back(ResTy);
2156 if (isUpdating)
2157 ResTys.push_back(MVT::i32);
2158 ResTys.push_back(MVT::Other);
2159
2160 SDValue Pred = getAL(CurDAG, dl);
2161 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2162 SDNode *VLd;
2164
2165 // Double registers and VLD1/VLD2 quad registers are directly supported.
2166 if (is64BitVector || NumVecs <= 2) {
2167 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168 QOpcodes0[OpcodeIndex]);
2169 Ops.push_back(MemAddr);
2170 Ops.push_back(Align);
2171 if (isUpdating) {
2172 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2173 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2174 if (!IsImmUpdate) {
2175 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2176 // check for the opcode rather than the number of vector elements.
2177 if (isVLDfixed(Opc))
2179 Ops.push_back(Inc);
2180 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2181 // the operands if not such an opcode.
2182 } else if (!isVLDfixed(Opc))
2183 Ops.push_back(Reg0);
2184 }
2185 Ops.push_back(Pred);
2186 Ops.push_back(Reg0);
2187 Ops.push_back(Chain);
2188 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2189
2190 } else {
2191 // Otherwise, quad registers are loaded with two separate instructions,
2192 // where one loads the even registers and the other loads the odd registers.
2193 EVT AddrTy = MemAddr.getValueType();
2194
2195 // Load the even subregs. This is always an updating load, so that it
2196 // provides the address to the second load for the odd subregs.
2197 SDValue ImplDef =
2198 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2199 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2200 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2201 ResTy, AddrTy, MVT::Other, OpsA);
2202 Chain = SDValue(VLdA, 2);
2203
2204 // Load the odd subregs.
2205 Ops.push_back(SDValue(VLdA, 1));
2206 Ops.push_back(Align);
2207 if (isUpdating) {
2208 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2210 "only constant post-increment update allowed for VLD3/4");
2211 (void)Inc;
2212 Ops.push_back(Reg0);
2213 }
2214 Ops.push_back(SDValue(VLdA, 0));
2215 Ops.push_back(Pred);
2216 Ops.push_back(Reg0);
2217 Ops.push_back(Chain);
2218 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2219 }
2220
2221 // Transfer memoperands.
2222 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2223 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2224
2225 if (NumVecs == 1) {
2226 ReplaceNode(N, VLd);
2227 return;
2228 }
2229
2230 // Extract out the subregisters.
2231 SDValue SuperReg = SDValue(VLd, 0);
2232 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2233 ARM::qsub_3 == ARM::qsub_0 + 3,
2234 "Unexpected subreg numbering");
2235 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2236 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2237 ReplaceUses(SDValue(N, Vec),
2238 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2239 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2240 if (isUpdating)
2241 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2242 CurDAG->RemoveDeadNode(N);
2243}
2244
2245void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2246 const uint16_t *DOpcodes,
2247 const uint16_t *QOpcodes0,
2248 const uint16_t *QOpcodes1) {
2249 assert(Subtarget->hasNEON());
2250 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2251 SDLoc dl(N);
2252
2253 SDValue MemAddr, Align;
2254 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2255 // nodes are not intrinsics.
2256 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2257 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2258 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2259 return;
2260
2261 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2262
2263 SDValue Chain = N->getOperand(0);
2264 EVT VT = N->getOperand(Vec0Idx).getValueType();
2265 bool is64BitVector = VT.is64BitVector();
2266 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2267
2268 unsigned OpcodeIndex;
2269 switch (VT.getSimpleVT().SimpleTy) {
2270 default: llvm_unreachable("unhandled vst type");
2271 // Double-register operations:
2272 case MVT::v8i8: OpcodeIndex = 0; break;
2273 case MVT::v4f16:
2274 case MVT::v4bf16:
2275 case MVT::v4i16: OpcodeIndex = 1; break;
2276 case MVT::v2f32:
2277 case MVT::v2i32: OpcodeIndex = 2; break;
2278 case MVT::v1i64: OpcodeIndex = 3; break;
2279 // Quad-register operations:
2280 case MVT::v16i8: OpcodeIndex = 0; break;
2281 case MVT::v8f16:
2282 case MVT::v8bf16:
2283 case MVT::v8i16: OpcodeIndex = 1; break;
2284 case MVT::v4f32:
2285 case MVT::v4i32: OpcodeIndex = 2; break;
2286 case MVT::v2f64:
2287 case MVT::v2i64: OpcodeIndex = 3; break;
2288 }
2289
2290 std::vector<EVT> ResTys;
2291 if (isUpdating)
2292 ResTys.push_back(MVT::i32);
2293 ResTys.push_back(MVT::Other);
2294
2295 SDValue Pred = getAL(CurDAG, dl);
2296 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2298
2299 // Double registers and VST1/VST2 quad registers are directly supported.
2300 if (is64BitVector || NumVecs <= 2) {
2301 SDValue SrcReg;
2302 if (NumVecs == 1) {
2303 SrcReg = N->getOperand(Vec0Idx);
2304 } else if (is64BitVector) {
2305 // Form a REG_SEQUENCE to force register allocation.
2306 SDValue V0 = N->getOperand(Vec0Idx + 0);
2307 SDValue V1 = N->getOperand(Vec0Idx + 1);
2308 if (NumVecs == 2)
2309 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2310 else {
2311 SDValue V2 = N->getOperand(Vec0Idx + 2);
2312 // If it's a vst3, form a quad D-register and leave the last part as
2313 // an undef.
2314 SDValue V3 = (NumVecs == 3)
2315 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2316 : N->getOperand(Vec0Idx + 3);
2317 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2318 }
2319 } else {
2320 // Form a QQ register.
2321 SDValue Q0 = N->getOperand(Vec0Idx);
2322 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2323 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2324 }
2325
2326 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2327 QOpcodes0[OpcodeIndex]);
2328 Ops.push_back(MemAddr);
2329 Ops.push_back(Align);
2330 if (isUpdating) {
2331 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2332 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2333 if (!IsImmUpdate) {
2334 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2335 // check for the opcode rather than the number of vector elements.
2336 if (isVSTfixed(Opc))
2338 Ops.push_back(Inc);
2339 }
2340 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2341 // the operands if not such an opcode.
2342 else if (!isVSTfixed(Opc))
2343 Ops.push_back(Reg0);
2344 }
2345 Ops.push_back(SrcReg);
2346 Ops.push_back(Pred);
2347 Ops.push_back(Reg0);
2348 Ops.push_back(Chain);
2349 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2350
2351 // Transfer memoperands.
2352 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2353
2354 ReplaceNode(N, VSt);
2355 return;
2356 }
2357
2358 // Otherwise, quad registers are stored with two separate instructions,
2359 // where one stores the even registers and the other stores the odd registers.
2360
2361 // Form the QQQQ REG_SEQUENCE.
2362 SDValue V0 = N->getOperand(Vec0Idx + 0);
2363 SDValue V1 = N->getOperand(Vec0Idx + 1);
2364 SDValue V2 = N->getOperand(Vec0Idx + 2);
2365 SDValue V3 = (NumVecs == 3)
2366 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2367 : N->getOperand(Vec0Idx + 3);
2368 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2369
2370 // Store the even D registers. This is always an updating store, so that it
2371 // provides the address to the second store for the odd subregs.
2372 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2373 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2374 MemAddr.getValueType(),
2375 MVT::Other, OpsA);
2376 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2377 Chain = SDValue(VStA, 1);
2378
2379 // Store the odd D registers.
2380 Ops.push_back(SDValue(VStA, 0));
2381 Ops.push_back(Align);
2382 if (isUpdating) {
2383 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2385 "only constant post-increment update allowed for VST3/4");
2386 (void)Inc;
2387 Ops.push_back(Reg0);
2388 }
2389 Ops.push_back(RegSeq);
2390 Ops.push_back(Pred);
2391 Ops.push_back(Reg0);
2392 Ops.push_back(Chain);
2393 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2394 Ops);
2395 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2396 ReplaceNode(N, VStB);
2397}
2398
2399void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2400 unsigned NumVecs,
2401 const uint16_t *DOpcodes,
2402 const uint16_t *QOpcodes) {
2403 assert(Subtarget->hasNEON());
2404 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2405 SDLoc dl(N);
2406
2407 SDValue MemAddr, Align;
2408 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2409 // nodes are not intrinsics.
2410 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2411 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2412 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2413 return;
2414
2415 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2416
2417 SDValue Chain = N->getOperand(0);
2418 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2419 EVT VT = N->getOperand(Vec0Idx).getValueType();
2420 bool is64BitVector = VT.is64BitVector();
2421
2422 unsigned Alignment = 0;
2423 if (NumVecs != 3) {
2424 Alignment = Align->getAsZExtVal();
2425 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2426 if (Alignment > NumBytes)
2427 Alignment = NumBytes;
2428 if (Alignment < 8 && Alignment < NumBytes)
2429 Alignment = 0;
2430 // Alignment must be a power of two; make sure of that.
2431 Alignment = (Alignment & -Alignment);
2432 if (Alignment == 1)
2433 Alignment = 0;
2434 }
2435 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2436
2437 unsigned OpcodeIndex;
2438 switch (VT.getSimpleVT().SimpleTy) {
2439 default: llvm_unreachable("unhandled vld/vst lane type");
2440 // Double-register operations:
2441 case MVT::v8i8: OpcodeIndex = 0; break;
2442 case MVT::v4f16:
2443 case MVT::v4bf16:
2444 case MVT::v4i16: OpcodeIndex = 1; break;
2445 case MVT::v2f32:
2446 case MVT::v2i32: OpcodeIndex = 2; break;
2447 // Quad-register operations:
2448 case MVT::v8f16:
2449 case MVT::v8bf16:
2450 case MVT::v8i16: OpcodeIndex = 0; break;
2451 case MVT::v4f32:
2452 case MVT::v4i32: OpcodeIndex = 1; break;
2453 }
2454
2455 std::vector<EVT> ResTys;
2456 if (IsLoad) {
2457 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2458 if (!is64BitVector)
2459 ResTyElts *= 2;
2460 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2461 MVT::i64, ResTyElts));
2462 }
2463 if (isUpdating)
2464 ResTys.push_back(MVT::i32);
2465 ResTys.push_back(MVT::Other);
2466
2467 SDValue Pred = getAL(CurDAG, dl);
2468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2469
2471 Ops.push_back(MemAddr);
2472 Ops.push_back(Align);
2473 if (isUpdating) {
2474 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2475 bool IsImmUpdate =
2476 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2477 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2478 }
2479
2480 SDValue SuperReg;
2481 SDValue V0 = N->getOperand(Vec0Idx + 0);
2482 SDValue V1 = N->getOperand(Vec0Idx + 1);
2483 if (NumVecs == 2) {
2484 if (is64BitVector)
2485 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2486 else
2487 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2488 } else {
2489 SDValue V2 = N->getOperand(Vec0Idx + 2);
2490 SDValue V3 = (NumVecs == 3)
2491 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2492 : N->getOperand(Vec0Idx + 3);
2493 if (is64BitVector)
2494 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2495 else
2496 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2497 }
2498 Ops.push_back(SuperReg);
2499 Ops.push_back(getI32Imm(Lane, dl));
2500 Ops.push_back(Pred);
2501 Ops.push_back(Reg0);
2502 Ops.push_back(Chain);
2503
2504 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2505 QOpcodes[OpcodeIndex]);
2506 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2507 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2508 if (!IsLoad) {
2509 ReplaceNode(N, VLdLn);
2510 return;
2511 }
2512
2513 // Extract the subregisters.
2514 SuperReg = SDValue(VLdLn, 0);
2515 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2516 ARM::qsub_3 == ARM::qsub_0 + 3,
2517 "Unexpected subreg numbering");
2518 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2519 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2520 ReplaceUses(SDValue(N, Vec),
2521 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2522 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2523 if (isUpdating)
2524 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2525 CurDAG->RemoveDeadNode(N);
2526}
2527
2528template <typename SDValueVector>
2529void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2530 SDValue PredicateMask) {
2531 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2532 Ops.push_back(PredicateMask);
2533 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2534}
2535
2536template <typename SDValueVector>
2537void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2538 SDValue PredicateMask,
2539 SDValue Inactive) {
2540 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2541 Ops.push_back(PredicateMask);
2542 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2543 Ops.push_back(Inactive);
2544}
2545
2546template <typename SDValueVector>
2547void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2548 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2549 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2550 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2551}
2552
2553template <typename SDValueVector>
2554void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2555 EVT InactiveTy) {
2556 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2557 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2558 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2559 Ops.push_back(SDValue(
2560 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2561}
2562
2563void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2564 bool Predicated) {
2565 SDLoc Loc(N);
2567
2568 uint16_t Opcode;
2569 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2570 case 32:
2571 Opcode = Opcodes[0];
2572 break;
2573 case 64:
2574 Opcode = Opcodes[1];
2575 break;
2576 default:
2577 llvm_unreachable("bad vector element size in SelectMVE_WB");
2578 }
2579
2580 Ops.push_back(N->getOperand(2)); // vector of base addresses
2581
2582 int32_t ImmValue = N->getConstantOperandVal(3);
2583 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2584
2585 if (Predicated)
2586 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2587 else
2588 AddEmptyMVEPredicateToOps(Ops, Loc);
2589
2590 Ops.push_back(N->getOperand(0)); // chain
2591
2593 VTs.push_back(N->getValueType(1));
2594 VTs.push_back(N->getValueType(0));
2595 VTs.push_back(N->getValueType(2));
2596
2597 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2598 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2599 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2600 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2601 transferMemOperands(N, New);
2602 CurDAG->RemoveDeadNode(N);
2603}
2604
2605void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2606 bool Immediate,
2607 bool HasSaturationOperand) {
2608 SDLoc Loc(N);
2610
2611 // Two 32-bit halves of the value to be shifted
2612 Ops.push_back(N->getOperand(1));
2613 Ops.push_back(N->getOperand(2));
2614
2615 // The shift count
2616 if (Immediate) {
2617 int32_t ImmValue = N->getConstantOperandVal(3);
2618 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2619 } else {
2620 Ops.push_back(N->getOperand(3));
2621 }
2622
2623 // The immediate saturation operand, if any
2624 if (HasSaturationOperand) {
2625 int32_t SatOp = N->getConstantOperandVal(4);
2626 int SatBit = (SatOp == 64 ? 0 : 1);
2627 Ops.push_back(getI32Imm(SatBit, Loc));
2628 }
2629
2630 // MVE scalar shifts are IT-predicable, so include the standard
2631 // predicate arguments.
2632 Ops.push_back(getAL(CurDAG, Loc));
2633 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2634
2635 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2636}
2637
2638void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2639 uint16_t OpcodeWithNoCarry,
2640 bool Add, bool Predicated) {
2641 SDLoc Loc(N);
2643 uint16_t Opcode;
2644
2645 unsigned FirstInputOp = Predicated ? 2 : 1;
2646
2647 // Two input vectors and the input carry flag
2648 Ops.push_back(N->getOperand(FirstInputOp));
2649 Ops.push_back(N->getOperand(FirstInputOp + 1));
2650 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2651 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2652 uint32_t CarryMask = 1 << 29;
2653 uint32_t CarryExpected = Add ? 0 : CarryMask;
2654 if (CarryInConstant &&
2655 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2656 Opcode = OpcodeWithNoCarry;
2657 } else {
2658 Ops.push_back(CarryIn);
2659 Opcode = OpcodeWithCarry;
2660 }
2661
2662 if (Predicated)
2663 AddMVEPredicateToOps(Ops, Loc,
2664 N->getOperand(FirstInputOp + 3), // predicate
2665 N->getOperand(FirstInputOp - 1)); // inactive
2666 else
2667 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2668
2669 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2670}
2671
2672void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2673 SDLoc Loc(N);
2675
2676 // One vector input, followed by a 32-bit word of bits to shift in
2677 // and then an immediate shift count
2678 Ops.push_back(N->getOperand(1));
2679 Ops.push_back(N->getOperand(2));
2680 int32_t ImmValue = N->getConstantOperandVal(3);
2681 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2682
2683 if (Predicated)
2684 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2685 else
2686 AddEmptyMVEPredicateToOps(Ops, Loc);
2687
2688 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2689}
2690
2691static bool SDValueToConstBool(SDValue SDVal) {
2692 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2693 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2694 uint64_t Value = SDValConstant->getZExtValue();
2695 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2696 return Value;
2697}
2698
2699void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2700 const uint16_t *OpcodesS,
2701 const uint16_t *OpcodesU,
2702 size_t Stride, size_t TySize) {
2703 assert(TySize < Stride && "Invalid TySize");
2704 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2705 bool IsSub = SDValueToConstBool(N->getOperand(2));
2706 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2707 if (IsUnsigned) {
2708 assert(!IsSub &&
2709 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2710 assert(!IsExchange &&
2711 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2712 }
2713
2714 auto OpIsZero = [N](size_t OpNo) {
2715 return isNullConstant(N->getOperand(OpNo));
2716 };
2717
2718 // If the input accumulator value is not zero, select an instruction with
2719 // accumulator, otherwise select an instruction without accumulator
2720 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2721
2722 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2723 if (IsSub)
2724 Opcodes += 4 * Stride;
2725 if (IsExchange)
2726 Opcodes += 2 * Stride;
2727 if (IsAccum)
2728 Opcodes += Stride;
2729 uint16_t Opcode = Opcodes[TySize];
2730
2731 SDLoc Loc(N);
2733 // Push the accumulator operands, if they are used
2734 if (IsAccum) {
2735 Ops.push_back(N->getOperand(4));
2736 Ops.push_back(N->getOperand(5));
2737 }
2738 // Push the two vector operands
2739 Ops.push_back(N->getOperand(6));
2740 Ops.push_back(N->getOperand(7));
2741
2742 if (Predicated)
2743 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2744 else
2745 AddEmptyMVEPredicateToOps(Ops, Loc);
2746
2747 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2748}
2749
2750void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2751 const uint16_t *OpcodesS,
2752 const uint16_t *OpcodesU) {
2753 EVT VecTy = N->getOperand(6).getValueType();
2754 size_t SizeIndex;
2755 switch (VecTy.getVectorElementType().getSizeInBits()) {
2756 case 16:
2757 SizeIndex = 0;
2758 break;
2759 case 32:
2760 SizeIndex = 1;
2761 break;
2762 default:
2763 llvm_unreachable("bad vector element size");
2764 }
2765
2766 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2767}
2768
2769void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2770 const uint16_t *OpcodesS,
2771 const uint16_t *OpcodesU) {
2772 assert(
2773 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2774 32 &&
2775 "bad vector element size");
2776 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2777}
2778
2779void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2780 const uint16_t *const *Opcodes,
2781 bool HasWriteback) {
2782 EVT VT = N->getValueType(0);
2783 SDLoc Loc(N);
2784
2785 const uint16_t *OurOpcodes;
2786 switch (VT.getVectorElementType().getSizeInBits()) {
2787 case 8:
2788 OurOpcodes = Opcodes[0];
2789 break;
2790 case 16:
2791 OurOpcodes = Opcodes[1];
2792 break;
2793 case 32:
2794 OurOpcodes = Opcodes[2];
2795 break;
2796 default:
2797 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2798 }
2799
2800 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2801 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2802 unsigned PtrOperand = HasWriteback ? 1 : 2;
2803
2804 auto Data = SDValue(
2805 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2806 SDValue Chain = N->getOperand(0);
2807 // Add a MVE_VLDn instruction for each Vec, except the last
2808 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2809 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2810 auto LoadInst =
2811 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2812 Data = SDValue(LoadInst, 0);
2813 Chain = SDValue(LoadInst, 1);
2814 transferMemOperands(N, LoadInst);
2815 }
2816 // The last may need a writeback on it
2817 if (HasWriteback)
2818 ResultTys = {DataTy, MVT::i32, MVT::Other};
2819 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2820 auto LoadInst =
2821 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2822 transferMemOperands(N, LoadInst);
2823
2824 unsigned i;
2825 for (i = 0; i < NumVecs; i++)
2826 ReplaceUses(SDValue(N, i),
2827 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2828 SDValue(LoadInst, 0)));
2829 if (HasWriteback)
2830 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2831 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2832 CurDAG->RemoveDeadNode(N);
2833}
2834
2835void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2836 bool Wrapping, bool Predicated) {
2837 EVT VT = N->getValueType(0);
2838 SDLoc Loc(N);
2839
2840 uint16_t Opcode;
2841 switch (VT.getScalarSizeInBits()) {
2842 case 8:
2843 Opcode = Opcodes[0];
2844 break;
2845 case 16:
2846 Opcode = Opcodes[1];
2847 break;
2848 case 32:
2849 Opcode = Opcodes[2];
2850 break;
2851 default:
2852 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2853 }
2854
2856 unsigned OpIdx = 1;
2857
2858 SDValue Inactive;
2859 if (Predicated)
2860 Inactive = N->getOperand(OpIdx++);
2861
2862 Ops.push_back(N->getOperand(OpIdx++)); // base
2863 if (Wrapping)
2864 Ops.push_back(N->getOperand(OpIdx++)); // limit
2865
2866 SDValue ImmOp = N->getOperand(OpIdx++); // step
2867 int ImmValue = ImmOp->getAsZExtVal();
2868 Ops.push_back(getI32Imm(ImmValue, Loc));
2869
2870 if (Predicated)
2871 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2872 else
2873 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2874
2875 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2876}
2877
2878void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2879 size_t NumExtraOps, bool HasAccum) {
2880 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2881 SDLoc Loc(N);
2883
2884 unsigned OpIdx = 1;
2885
2886 // Convert and append the immediate operand designating the coprocessor.
2887 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2888 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2889 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2890
2891 // For accumulating variants copy the low and high order parts of the
2892 // accumulator into a register pair and add it to the operand vector.
2893 if (HasAccum) {
2894 SDValue AccLo = N->getOperand(OpIdx++);
2895 SDValue AccHi = N->getOperand(OpIdx++);
2896 if (IsBigEndian)
2897 std::swap(AccLo, AccHi);
2898 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2899 }
2900
2901 // Copy extra operands as-is.
2902 for (size_t I = 0; I < NumExtraOps; I++)
2903 Ops.push_back(N->getOperand(OpIdx++));
2904
2905 // Convert and append the immediate operand
2906 SDValue Imm = N->getOperand(OpIdx);
2907 uint32_t ImmVal = Imm->getAsZExtVal();
2908 Ops.push_back(getI32Imm(ImmVal, Loc));
2909
2910 // Accumulating variants are IT-predicable, add predicate operands.
2911 if (HasAccum) {
2912 SDValue Pred = getAL(CurDAG, Loc);
2913 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2914 Ops.push_back(Pred);
2915 Ops.push_back(PredReg);
2916 }
2917
2918 // Create the CDE intruction
2919 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2920 SDValue ResultPair = SDValue(InstrNode, 0);
2921
2922 // The original intrinsic had two outputs, and the output of the dual-register
2923 // CDE instruction is a register pair. We need to extract the two subregisters
2924 // and replace all uses of the original outputs with the extracted
2925 // subregisters.
2926 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2927 if (IsBigEndian)
2928 std::swap(SubRegs[0], SubRegs[1]);
2929
2930 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2931 if (SDValue(N, ResIdx).use_empty())
2932 continue;
2933 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2934 MVT::i32, ResultPair);
2935 ReplaceUses(SDValue(N, ResIdx), SubReg);
2936 }
2937
2938 CurDAG->RemoveDeadNode(N);
2939}
2940
2941void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2942 bool isUpdating, unsigned NumVecs,
2943 const uint16_t *DOpcodes,
2944 const uint16_t *QOpcodes0,
2945 const uint16_t *QOpcodes1) {
2946 assert(Subtarget->hasNEON());
2947 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2948 SDLoc dl(N);
2949
2950 SDValue MemAddr, Align;
2951 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2952 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2953 return;
2954
2955 SDValue Chain = N->getOperand(0);
2956 EVT VT = N->getValueType(0);
2957 bool is64BitVector = VT.is64BitVector();
2958
2959 unsigned Alignment = 0;
2960 if (NumVecs != 3) {
2961 Alignment = Align->getAsZExtVal();
2962 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2963 if (Alignment > NumBytes)
2964 Alignment = NumBytes;
2965 if (Alignment < 8 && Alignment < NumBytes)
2966 Alignment = 0;
2967 // Alignment must be a power of two; make sure of that.
2968 Alignment = (Alignment & -Alignment);
2969 if (Alignment == 1)
2970 Alignment = 0;
2971 }
2972 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2973
2974 unsigned OpcodeIndex;
2975 switch (VT.getSimpleVT().SimpleTy) {
2976 default: llvm_unreachable("unhandled vld-dup type");
2977 case MVT::v8i8:
2978 case MVT::v16i8: OpcodeIndex = 0; break;
2979 case MVT::v4i16:
2980 case MVT::v8i16:
2981 case MVT::v4f16:
2982 case MVT::v8f16:
2983 case MVT::v4bf16:
2984 case MVT::v8bf16:
2985 OpcodeIndex = 1; break;
2986 case MVT::v2f32:
2987 case MVT::v2i32:
2988 case MVT::v4f32:
2989 case MVT::v4i32: OpcodeIndex = 2; break;
2990 case MVT::v1f64:
2991 case MVT::v1i64: OpcodeIndex = 3; break;
2992 }
2993
2994 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2995 if (!is64BitVector)
2996 ResTyElts *= 2;
2997 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2998
2999 std::vector<EVT> ResTys;
3000 ResTys.push_back(ResTy);
3001 if (isUpdating)
3002 ResTys.push_back(MVT::i32);
3003 ResTys.push_back(MVT::Other);
3004
3005 SDValue Pred = getAL(CurDAG, dl);
3006 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3007
3009 Ops.push_back(MemAddr);
3010 Ops.push_back(Align);
3011 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3012 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3013 : QOpcodes1[OpcodeIndex];
3014 if (isUpdating) {
3015 SDValue Inc = N->getOperand(2);
3016 bool IsImmUpdate =
3017 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3018 if (IsImmUpdate) {
3019 if (!isVLDfixed(Opc))
3020 Ops.push_back(Reg0);
3021 } else {
3022 if (isVLDfixed(Opc))
3024 Ops.push_back(Inc);
3025 }
3026 }
3027 if (is64BitVector || NumVecs == 1) {
3028 // Double registers and VLD1 quad registers are directly supported.
3029 } else {
3030 SDValue ImplDef = SDValue(
3031 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3032 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3033 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3034 MVT::Other, OpsA);
3035 Ops.push_back(SDValue(VLdA, 0));
3036 Chain = SDValue(VLdA, 1);
3037 }
3038
3039 Ops.push_back(Pred);
3040 Ops.push_back(Reg0);
3041 Ops.push_back(Chain);
3042
3043 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3044
3045 // Transfer memoperands.
3046 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3047 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3048
3049 // Extract the subregisters.
3050 if (NumVecs == 1) {
3051 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3052 } else {
3053 SDValue SuperReg = SDValue(VLdDup, 0);
3054 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3055 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3056 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3057 ReplaceUses(SDValue(N, Vec),
3058 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3059 }
3060 }
3061 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3062 if (isUpdating)
3063 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3064 CurDAG->RemoveDeadNode(N);
3065}
3066
3067bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3068 if (!Subtarget->hasMVEIntegerOps())
3069 return false;
3070
3071 SDLoc dl(N);
3072
3073 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3074 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3075 // inserts of the correct type:
3076 SDValue Ins1 = SDValue(N, 0);
3077 SDValue Ins2 = N->getOperand(0);
3078 EVT VT = Ins1.getValueType();
3079 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3080 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3081 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3082 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3083 return false;
3084
3085 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3086 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3087 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3088 return false;
3089
3090 // If the inserted values will be able to use T/B already, leave it to the
3091 // existing tablegen patterns. For example VCVTT/VCVTB.
3092 SDValue Val1 = Ins1.getOperand(1);
3093 SDValue Val2 = Ins2.getOperand(1);
3094 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3095 return false;
3096
3097 // Check if the inserted values are both extracts.
3098 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3099 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3101 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3104 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3105 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3106 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3107 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3108 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3109 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3110
3111 // If the two extracted lanes are from the same place and adjacent, this
3112 // simplifies into a f32 lane move.
3113 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3114 ExtractLane1 == ExtractLane2 + 1) {
3115 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3116 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3117 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3118 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3119 NewExt);
3120 ReplaceUses(Ins1, NewIns);
3121 return true;
3122 }
3123
3124 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3125 // extracting odd lanes.
3126 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3127 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3128 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3129 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3130 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3131 if (ExtractLane1 % 2 != 0)
3132 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3133 if (ExtractLane2 % 2 != 0)
3134 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3135 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3136 SDValue NewIns =
3137 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3138 Ins2.getOperand(0), SDValue(VINS, 0));
3139 ReplaceUses(Ins1, NewIns);
3140 return true;
3141 }
3142 }
3143
3144 // The inserted values are not extracted - if they are f16 then insert them
3145 // directly using a VINS.
3146 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3147 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3148 SDValue NewIns =
3149 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3150 Ins2.getOperand(0), SDValue(VINS, 0));
3151 ReplaceUses(Ins1, NewIns);
3152 return true;
3153 }
3154
3155 return false;
3156}
3157
3158bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3159 SDNode *FMul,
3160 bool IsUnsigned,
3161 bool FixedToFloat) {
3162 auto Type = N->getValueType(0);
3163 unsigned ScalarBits = Type.getScalarSizeInBits();
3164 if (ScalarBits > 32)
3165 return false;
3166
3167 SDNodeFlags FMulFlags = FMul->getFlags();
3168 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3169 // allowed in 16 bit unsigned floats
3170 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3171 return false;
3172
3173 SDValue ImmNode = FMul->getOperand(1);
3174 SDValue VecVal = FMul->getOperand(0);
3175 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3176 VecVal->getOpcode() == ISD::SINT_TO_FP)
3177 VecVal = VecVal->getOperand(0);
3178
3179 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3180 return false;
3181
3182 if (ImmNode.getOpcode() == ISD::BITCAST) {
3183 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3184 return false;
3185 ImmNode = ImmNode.getOperand(0);
3186 }
3187
3188 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3189 return false;
3190
3191 APFloat ImmAPF(0.0f);
3192 switch (ImmNode.getOpcode()) {
3193 case ARMISD::VMOVIMM:
3194 case ARMISD::VDUP: {
3195 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3196 return false;
3197 unsigned Imm = ImmNode.getConstantOperandVal(0);
3198 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3199 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3200 ImmAPF =
3201 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3202 APInt(ScalarBits, Imm));
3203 break;
3204 }
3205 case ARMISD::VMOVFPIMM: {
3207 break;
3208 }
3209 default:
3210 return false;
3211 }
3212
3213 // Where n is the number of fractional bits, multiplying by 2^n will convert
3214 // from float to fixed and multiplying by 2^-n will convert from fixed to
3215 // float. Taking log2 of the factor (after taking the inverse in the case of
3216 // float to fixed) will give n.
3217 APFloat ToConvert = ImmAPF;
3218 if (FixedToFloat) {
3219 if (!ImmAPF.getExactInverse(&ToConvert))
3220 return false;
3221 }
3222 APSInt Converted(64, false);
3223 bool IsExact;
3225 &IsExact);
3226 if (!IsExact || !Converted.isPowerOf2())
3227 return false;
3228
3229 unsigned FracBits = Converted.logBase2();
3230 if (FracBits > ScalarBits)
3231 return false;
3232
3234 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3235 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3236
3237 unsigned int Opcode;
3238 switch (ScalarBits) {
3239 case 16:
3240 if (FixedToFloat)
3241 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3242 else
3243 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3244 break;
3245 case 32:
3246 if (FixedToFloat)
3247 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3248 else
3249 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3250 break;
3251 default:
3252 llvm_unreachable("unexpected number of scalar bits");
3253 break;
3254 }
3255
3256 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3257 return true;
3258}
3259
3260bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3261 // Transform a floating-point to fixed-point conversion to a VCVT
3262 if (!Subtarget->hasMVEFloatOps())
3263 return false;
3264 EVT Type = N->getValueType(0);
3265 if (!Type.isVector())
3266 return false;
3267 unsigned int ScalarBits = Type.getScalarSizeInBits();
3268
3269 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3270 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3271 SDNode *Node = N->getOperand(0).getNode();
3272
3273 // floating-point to fixed-point with one fractional bit gets turned into an
3274 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3275 if (Node->getOpcode() == ISD::FADD) {
3276 if (Node->getOperand(0) != Node->getOperand(1))
3277 return false;
3278 SDNodeFlags Flags = Node->getFlags();
3279 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3280 // allowed in 16 bit unsigned floats
3281 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3282 return false;
3283
3284 unsigned Opcode;
3285 switch (ScalarBits) {
3286 case 16:
3287 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3288 break;
3289 case 32:
3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3291 break;
3292 }
3293 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3294 CurDAG->getConstant(1, dl, MVT::i32)};
3295 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3296
3297 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3298 return true;
3299 }
3300
3301 if (Node->getOpcode() != ISD::FMUL)
3302 return false;
3303
3304 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3305}
3306
3307bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3308 // Transform a fixed-point to floating-point conversion to a VCVT
3309 if (!Subtarget->hasMVEFloatOps())
3310 return false;
3311 auto Type = N->getValueType(0);
3312 if (!Type.isVector())
3313 return false;
3314
3315 auto LHS = N->getOperand(0);
3316 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3317 return false;
3318
3319 return transformFixedFloatingPointConversion(
3320 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3321}
3322
3323bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3324 if (!Subtarget->hasV6T2Ops())
3325 return false;
3326
3327 unsigned Opc = isSigned
3328 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3329 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3330 SDLoc dl(N);
3331
3332 // For unsigned extracts, check for a shift right and mask
3333 unsigned And_imm = 0;
3334 if (N->getOpcode() == ISD::AND) {
3335 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3336
3337 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3338 if (And_imm & (And_imm + 1))
3339 return false;
3340
3341 unsigned Srl_imm = 0;
3342 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3343 Srl_imm)) {
3344 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3345
3346 // Mask off the unnecessary bits of the AND immediate; normally
3347 // DAGCombine will do this, but that might not happen if
3348 // targetShrinkDemandedConstant chooses a different immediate.
3349 And_imm &= -1U >> Srl_imm;
3350
3351 // Note: The width operand is encoded as width-1.
3352 unsigned Width = llvm::countr_one(And_imm) - 1;
3353 unsigned LSB = Srl_imm;
3354
3355 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3356
3357 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3358 // It's cheaper to use a right shift to extract the top bits.
3359 if (Subtarget->isThumb()) {
3360 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3361 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3362 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3363 getAL(CurDAG, dl), Reg0, Reg0 };
3364 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3365 return true;
3366 }
3367
3368 // ARM models shift instructions as MOVsi with shifter operand.
3370 SDValue ShOpc =
3371 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3372 MVT::i32);
3373 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3374 getAL(CurDAG, dl), Reg0, Reg0 };
3375 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3376 return true;
3377 }
3378
3379 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3380 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3381 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3382 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3383 getAL(CurDAG, dl), Reg0 };
3384 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3385 return true;
3386 }
3387 }
3388 return false;
3389 }
3390
3391 // Otherwise, we're looking for a shift of a shift
3392 unsigned Shl_imm = 0;
3393 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3394 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3395 unsigned Srl_imm = 0;
3396 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3397 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3398 // Note: The width operand is encoded as width-1.
3399 unsigned Width = 32 - Srl_imm - 1;
3400 int LSB = Srl_imm - Shl_imm;
3401 if (LSB < 0)
3402 return false;
3403 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3404 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3405 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3406 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3407 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3408 getAL(CurDAG, dl), Reg0 };
3409 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3410 return true;
3411 }
3412 }
3413
3414 // Or we are looking for a shift of an and, with a mask operand
3415 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3416 isShiftedMask_32(And_imm)) {
3417 unsigned Srl_imm = 0;
3418 unsigned LSB = llvm::countr_zero(And_imm);
3419 // Shift must be the same as the ands lsb
3420 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3421 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3422 unsigned MSB = llvm::Log2_32(And_imm);
3423 // Note: The width operand is encoded as width-1.
3424 unsigned Width = MSB - LSB;
3425 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3426 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3427 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3428 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3429 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3430 getAL(CurDAG, dl), Reg0 };
3431 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3432 return true;
3433 }
3434 }
3435
3436 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3437 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3438 unsigned LSB = 0;
3439 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3440 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3441 return false;
3442
3443 if (LSB + Width > 32)
3444 return false;
3445
3446 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3447 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3448 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3449 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3450 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3451 getAL(CurDAG, dl), Reg0 };
3452 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3453 return true;
3454 }
3455
3456 return false;
3457}
3458
3459/// We've got special pseudo-instructions for these
3460void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3461 unsigned Opcode;
3462 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3463 if (MemTy == MVT::i8)
3464 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3465 else if (MemTy == MVT::i16)
3466 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3467 else if (MemTy == MVT::i32)
3468 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3469 else
3470 llvm_unreachable("Unknown AtomicCmpSwap type");
3471
3472 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3473 N->getOperand(0)};
3474 SDNode *CmpSwap = CurDAG->getMachineNode(
3475 Opcode, SDLoc(N),
3476 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3477
3478 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3479 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3480
3481 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3482 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3483 CurDAG->RemoveDeadNode(N);
3484}
3485
3486static std::optional<std::pair<unsigned, unsigned>>
3488 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3489 unsigned LastOne = A.countr_zero();
3490 if (A.popcount() != (FirstOne - LastOne + 1))
3491 return std::nullopt;
3492 return std::make_pair(FirstOne, LastOne);
3493}
3494
3495void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3496 assert(N->getOpcode() == ARMISD::CMPZ);
3497 SwitchEQNEToPLMI = false;
3498
3499 if (!Subtarget->isThumb())
3500 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3501 // LSR don't exist as standalone instructions - they need the barrel shifter.
3502 return;
3503
3504 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3505 SDValue And = N->getOperand(0);
3506 if (!And->hasOneUse())
3507 return;
3508
3509 SDValue Zero = N->getOperand(1);
3510 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3511 return;
3512 SDValue X = And.getOperand(0);
3513 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3514
3515 if (!C)
3516 return;
3517 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3518 if (!Range)
3519 return;
3520
3521 // There are several ways to lower this:
3522 SDNode *NewN;
3523 SDLoc dl(N);
3524
3525 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3526 if (Subtarget->isThumb2()) {
3527 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3528 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3529 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3530 CurDAG->getRegister(0, MVT::i32) };
3531 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3532 } else {
3533 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3534 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3535 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3536 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3537 }
3538 };
3539
3540 if (Range->second == 0) {
3541 // 1. Mask includes the LSB -> Simply shift the top N bits off
3542 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3543 ReplaceNode(And.getNode(), NewN);
3544 } else if (Range->first == 31) {
3545 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3546 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3547 ReplaceNode(And.getNode(), NewN);
3548 } else if (Range->first == Range->second) {
3549 // 3. Only one bit is set. We can shift this into the sign bit and use a
3550 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3551 // only one of them (the one currently being selected) will be switched
3552 // to use the new condition code.
3553 if (!N->hasOneUse())
3554 return;
3555 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3556 ReplaceNode(And.getNode(), NewN);
3557
3558 SwitchEQNEToPLMI = true;
3559 } else if (!Subtarget->hasV6T2Ops()) {
3560 // 4. Do a double shift to clear bottom and top bits, but only in
3561 // thumb-1 mode as in thumb-2 we can use UBFX.
3562 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3563 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3564 Range->second + (31 - Range->first));
3565 ReplaceNode(And.getNode(), NewN);
3566 }
3567}
3568
3569static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3570 unsigned Opc128[3]) {
3571 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3572 "Unexpected vector shuffle length");
3573 switch (VT.getScalarSizeInBits()) {
3574 default:
3575 llvm_unreachable("Unexpected vector shuffle element size");
3576 case 8:
3577 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3578 case 16:
3579 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3580 case 32:
3581 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3582 }
3583}
3584
3585void ARMDAGToDAGISel::Select(SDNode *N) {
3586 SDLoc dl(N);
3587
3588 if (N->isMachineOpcode()) {
3589 N->setNodeId(-1);
3590 return; // Already selected.
3591 }
3592
3593 switch (N->getOpcode()) {
3594 default: break;
3595 case ISD::STORE: {
3596 // For Thumb1, match an sp-relative store in C++. This is a little
3597 // unfortunate, but I don't think I can make the chain check work
3598 // otherwise. (The chain of the store has to be the same as the chain
3599 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3600 // a direct reference to "SP".)
3601 //
3602 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3603 // a different addressing mode from other four-byte stores.
3604 //
3605 // This pattern usually comes up with call arguments.
3606 StoreSDNode *ST = cast<StoreSDNode>(N);
3607 SDValue Ptr = ST->getBasePtr();
3608 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3609 int RHSC = 0;
3610 if (Ptr.getOpcode() == ISD::ADD &&
3611 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3612 Ptr = Ptr.getOperand(0);
3613
3614 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3615 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3616 Ptr.getOperand(0) == ST->getChain()) {
3617 SDValue Ops[] = {ST->getValue(),
3618 CurDAG->getRegister(ARM::SP, MVT::i32),
3619 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3620 getAL(CurDAG, dl),
3621 CurDAG->getRegister(0, MVT::i32),
3622 ST->getChain()};
3623 MachineSDNode *ResNode =
3624 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3625 MachineMemOperand *MemOp = ST->getMemOperand();
3626 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3627 ReplaceNode(N, ResNode);
3628 return;
3629 }
3630 }
3631 break;
3632 }
3634 if (tryWriteRegister(N))
3635 return;
3636 break;
3637 case ISD::READ_REGISTER:
3638 if (tryReadRegister(N))
3639 return;
3640 break;
3641 case ISD::INLINEASM:
3642 case ISD::INLINEASM_BR:
3643 if (tryInlineAsm(N))
3644 return;
3645 break;
3646 case ISD::Constant: {
3647 unsigned Val = N->getAsZExtVal();
3648 // If we can't materialize the constant we need to use a literal pool
3649 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3650 !Subtarget->genExecuteOnly()) {
3651 SDValue CPIdx = CurDAG->getTargetConstantPool(
3652 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3653 TLI->getPointerTy(CurDAG->getDataLayout()));
3654
3655 SDNode *ResNode;
3656 if (Subtarget->isThumb()) {
3657 SDValue Ops[] = {
3658 CPIdx,
3659 getAL(CurDAG, dl),
3660 CurDAG->getRegister(0, MVT::i32),
3661 CurDAG->getEntryNode()
3662 };
3663 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3664 Ops);
3665 } else {
3666 SDValue Ops[] = {
3667 CPIdx,
3668 CurDAG->getTargetConstant(0, dl, MVT::i32),
3669 getAL(CurDAG, dl),
3670 CurDAG->getRegister(0, MVT::i32),
3671 CurDAG->getEntryNode()
3672 };
3673 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3674 Ops);
3675 }
3676 // Annotate the Node with memory operand information so that MachineInstr
3677 // queries work properly. This e.g. gives the register allocation the
3678 // required information for rematerialization.
3679 MachineFunction& MF = CurDAG->getMachineFunction();
3680 MachineMemOperand *MemOp =
3683
3684 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3685
3686 ReplaceNode(N, ResNode);
3687 return;
3688 }
3689
3690 // Other cases are autogenerated.
3691 break;
3692 }
3693 case ISD::FrameIndex: {
3694 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3695 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3696 SDValue TFI = CurDAG->getTargetFrameIndex(
3697 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3698 if (Subtarget->isThumb1Only()) {
3699 // Set the alignment of the frame object to 4, to avoid having to generate
3700 // more than one ADD
3701 MachineFrameInfo &MFI = MF->getFrameInfo();
3702 if (MFI.getObjectAlign(FI) < Align(4))
3703 MFI.setObjectAlignment(FI, Align(4));
3704 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3705 CurDAG->getTargetConstant(0, dl, MVT::i32));
3706 return;
3707 } else {
3708 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3709 ARM::t2ADDri : ARM::ADDri);
3710 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3711 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3712 CurDAG->getRegister(0, MVT::i32) };
3713 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3714 return;
3715 }
3716 }
3718 if (tryInsertVectorElt(N))
3719 return;
3720 break;
3721 }
3722 case ISD::SRL:
3723 if (tryV6T2BitfieldExtractOp(N, false))
3724 return;
3725 break;
3727 case ISD::SRA:
3728 if (tryV6T2BitfieldExtractOp(N, true))
3729 return;
3730 break;
3731 case ISD::FP_TO_UINT:
3732 case ISD::FP_TO_SINT:
3735 if (tryFP_TO_INT(N, dl))
3736 return;
3737 break;
3738 case ISD::FMUL:
3739 if (tryFMULFixed(N, dl))
3740 return;
3741 break;
3742 case ISD::MUL:
3743 if (Subtarget->isThumb1Only())
3744 break;
3745 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3746 unsigned RHSV = C->getZExtValue();
3747 if (!RHSV) break;
3748 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3749 unsigned ShImm = Log2_32(RHSV-1);
3750 if (ShImm >= 32)
3751 break;
3752 SDValue V = N->getOperand(0);
3753 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3754 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3755 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3756 if (Subtarget->isThumb()) {
3757 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3758 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3759 return;
3760 } else {
3761 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3762 Reg0 };
3763 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3764 return;
3765 }
3766 }
3767 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3768 unsigned ShImm = Log2_32(RHSV+1);
3769 if (ShImm >= 32)
3770 break;
3771 SDValue V = N->getOperand(0);
3772 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3773 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3774 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3775 if (Subtarget->isThumb()) {
3776 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3777 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3778 return;
3779 } else {
3780 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3781 Reg0 };
3782 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3783 return;
3784 }
3785 }
3786 }
3787 break;
3788 case ISD::AND: {
3789 // Check for unsigned bitfield extract
3790 if (tryV6T2BitfieldExtractOp(N, false))
3791 return;
3792
3793 // If an immediate is used in an AND node, it is possible that the immediate
3794 // can be more optimally materialized when negated. If this is the case we
3795 // can negate the immediate and use a BIC instead.
3796 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3797 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3798 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3799
3800 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3801 // immediate can be negated and fit in the immediate operand of
3802 // a t2BIC, don't do any manual transform here as this can be
3803 // handled by the generic ISel machinery.
3804 bool PreferImmediateEncoding =
3805 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3806 if (!PreferImmediateEncoding &&
3807 ConstantMaterializationCost(Imm, Subtarget) >
3808 ConstantMaterializationCost(~Imm, Subtarget)) {
3809 // The current immediate costs more to materialize than a negated
3810 // immediate, so negate the immediate and use a BIC.
3811 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3812 // If the new constant didn't exist before, reposition it in the topological
3813 // ordering so it is just before N. Otherwise, don't touch its location.
3814 if (NewImm->getNodeId() == -1)
3815 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3816
3817 if (!Subtarget->hasThumb2()) {
3818 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3819 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3820 CurDAG->getRegister(0, MVT::i32)};
3821 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3822 return;
3823 } else {
3824 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3825 CurDAG->getRegister(0, MVT::i32),
3826 CurDAG->getRegister(0, MVT::i32)};
3827 ReplaceNode(N,
3828 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3829 return;
3830 }
3831 }
3832 }
3833
3834 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3835 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3836 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3837 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3838 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3839 EVT VT = N->getValueType(0);
3840 if (VT != MVT::i32)
3841 break;
3842 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3843 ? ARM::t2MOVTi16
3844 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3845 if (!Opc)
3846 break;
3847 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3848 N1C = dyn_cast<ConstantSDNode>(N1);
3849 if (!N1C)
3850 break;
3851 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3852 SDValue N2 = N0.getOperand(1);
3853 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3854 if (!N2C)
3855 break;
3856 unsigned N1CVal = N1C->getZExtValue();
3857 unsigned N2CVal = N2C->getZExtValue();
3858 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3859 (N1CVal & 0xffffU) == 0xffffU &&
3860 (N2CVal & 0xffffU) == 0x0U) {
3861 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3862 dl, MVT::i32);
3863 SDValue Ops[] = { N0.getOperand(0), Imm16,
3864 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3865 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3866 return;
3867 }
3868 }
3869
3870 break;
3871 }
3872 case ARMISD::UMAAL: {
3873 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3874 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3875 N->getOperand(2), N->getOperand(3),
3876 getAL(CurDAG, dl),
3877 CurDAG->getRegister(0, MVT::i32) };
3878 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3879 return;
3880 }
3881 case ARMISD::UMLAL:{
3882 if (Subtarget->isThumb()) {
3883 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3884 N->getOperand(3), getAL(CurDAG, dl),
3885 CurDAG->getRegister(0, MVT::i32)};
3886 ReplaceNode(
3887 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3888 return;
3889 }else{
3890 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3891 N->getOperand(3), getAL(CurDAG, dl),
3892 CurDAG->getRegister(0, MVT::i32),
3893 CurDAG->getRegister(0, MVT::i32) };
3894 ReplaceNode(N, CurDAG->getMachineNode(
3895 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3896 MVT::i32, MVT::i32, Ops));
3897 return;
3898 }
3899 }
3900 case ARMISD::SMLAL:{
3901 if (Subtarget->isThumb()) {
3902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3903 N->getOperand(3), getAL(CurDAG, dl),
3904 CurDAG->getRegister(0, MVT::i32)};
3905 ReplaceNode(
3906 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3907 return;
3908 }else{
3909 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3910 N->getOperand(3), getAL(CurDAG, dl),
3911 CurDAG->getRegister(0, MVT::i32),
3912 CurDAG->getRegister(0, MVT::i32) };
3913 ReplaceNode(N, CurDAG->getMachineNode(
3914 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3915 MVT::i32, MVT::i32, Ops));
3916 return;
3917 }
3918 }
3919 case ARMISD::SUBE: {
3920 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3921 break;
3922 // Look for a pattern to match SMMLS
3923 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3924 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3925 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3926 !SDValue(N, 1).use_empty())
3927 break;
3928
3929 if (Subtarget->isThumb())
3930 assert(Subtarget->hasThumb2() &&
3931 "This pattern should not be generated for Thumb");
3932
3933 SDValue SmulLoHi = N->getOperand(1);
3934 SDValue Subc = N->getOperand(2);
3935 SDValue Zero = Subc.getOperand(0);
3936
3937 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3938 N->getOperand(1) != SmulLoHi.getValue(1) ||
3939 N->getOperand(2) != Subc.getValue(1))
3940 break;
3941
3942 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3943 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3944 N->getOperand(0), getAL(CurDAG, dl),
3945 CurDAG->getRegister(0, MVT::i32) };
3946 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3947 return;
3948 }
3949 case ISD::LOAD: {
3950 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3951 return;
3952 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3953 if (tryT2IndexedLoad(N))
3954 return;
3955 } else if (Subtarget->isThumb()) {
3956 if (tryT1IndexedLoad(N))
3957 return;
3958 } else if (tryARMIndexedLoad(N))
3959 return;
3960 // Other cases are autogenerated.
3961 break;
3962 }
3963 case ISD::MLOAD:
3964 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3965 return;
3966 // Other cases are autogenerated.
3967 break;
3968 case ARMISD::WLSSETUP: {
3969 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
3970 N->getOperand(0));
3971 ReplaceUses(N, New);
3972 CurDAG->RemoveDeadNode(N);
3973 return;
3974 }
3975 case ARMISD::WLS: {
3976 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
3977 N->getOperand(1), N->getOperand(2),
3978 N->getOperand(0));
3979 ReplaceUses(N, New);
3980 CurDAG->RemoveDeadNode(N);
3981 return;
3982 }
3983 case ARMISD::LE: {
3984 SDValue Ops[] = { N->getOperand(1),
3985 N->getOperand(2),
3986 N->getOperand(0) };
3987 unsigned Opc = ARM::t2LoopEnd;
3988 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3989 ReplaceUses(N, New);
3990 CurDAG->RemoveDeadNode(N);
3991 return;
3992 }
3993 case ARMISD::LDRD: {
3994 if (Subtarget->isThumb2())
3995 break; // TableGen handles isel in this case.
3996 SDValue Base, RegOffset, ImmOffset;
3997 const SDValue &Chain = N->getOperand(0);
3998 const SDValue &Addr = N->getOperand(1);
3999 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4000 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4001 // The register-offset variant of LDRD mandates that the register
4002 // allocated to RegOffset is not reused in any of the remaining operands.
4003 // This restriction is currently not enforced. Therefore emitting this
4004 // variant is explicitly avoided.
4005 Base = Addr;
4006 RegOffset = CurDAG->getRegister(0, MVT::i32);
4007 }
4008 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4009 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4010 {MVT::Untyped, MVT::Other}, Ops);
4011 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4012 SDValue(New, 0));
4013 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4014 SDValue(New, 0));
4015 transferMemOperands(N, New);
4016 ReplaceUses(SDValue(N, 0), Lo);
4017 ReplaceUses(SDValue(N, 1), Hi);
4018 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4019 CurDAG->RemoveDeadNode(N);
4020 return;
4021 }
4022 case ARMISD::STRD: {
4023 if (Subtarget->isThumb2())
4024 break; // TableGen handles isel in this case.
4025 SDValue Base, RegOffset, ImmOffset;
4026 const SDValue &Chain = N->getOperand(0);
4027 const SDValue &Addr = N->getOperand(3);
4028 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4029 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4030 // The register-offset variant of STRD mandates that the register
4031 // allocated to RegOffset is not reused in any of the remaining operands.
4032 // This restriction is currently not enforced. Therefore emitting this
4033 // variant is explicitly avoided.
4034 Base = Addr;
4035 RegOffset = CurDAG->getRegister(0, MVT::i32);
4036 }
4037 SDNode *RegPair =
4038 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4039 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4040 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4041 transferMemOperands(N, New);
4042 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4043 CurDAG->RemoveDeadNode(N);
4044 return;
4045 }
4046 case ARMISD::LOOP_DEC: {
4047 SDValue Ops[] = { N->getOperand(1),
4048 N->getOperand(2),
4049 N->getOperand(0) };
4050 SDNode *Dec =
4051 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4052 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4053 ReplaceUses(N, Dec);
4054 CurDAG->RemoveDeadNode(N);
4055 return;
4056 }
4057 case ARMISD::BRCOND: {
4058 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4059 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4060 // Pattern complexity = 6 cost = 1 size = 0
4061
4062 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4063 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4064 // Pattern complexity = 6 cost = 1 size = 0
4065
4066 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4067 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4068 // Pattern complexity = 6 cost = 1 size = 0
4069
4070 unsigned Opc = Subtarget->isThumb() ?
4071 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4072 SDValue Chain = N->getOperand(0);
4073 SDValue N1 = N->getOperand(1);
4074 SDValue N2 = N->getOperand(2);
4075 SDValue Flags = N->getOperand(3);
4078
4079 unsigned CC = (unsigned)N2->getAsZExtVal();
4080
4081 if (Flags.getOpcode() == ARMISD::CMPZ) {
4082 if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4083 SDValue Int = Flags.getOperand(0);
4084 uint64_t ID = Int->getConstantOperandVal(1);
4085
4086 // Handle low-overhead loops.
4087 if (ID == Intrinsic::loop_decrement_reg) {
4088 SDValue Elements = Int.getOperand(2);
4089 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4090 dl, MVT::i32);
4091
4092 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4093 SDNode *LoopDec =
4094 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4095 CurDAG->getVTList(MVT::i32, MVT::Other),
4096 Args);
4097 ReplaceUses(Int.getNode(), LoopDec);
4098
4099 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4100 SDNode *LoopEnd =
4101 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4102
4103 ReplaceUses(N, LoopEnd);
4104 CurDAG->RemoveDeadNode(N);
4105 CurDAG->RemoveDeadNode(Flags.getNode());
4106 CurDAG->RemoveDeadNode(Int.getNode());
4107 return;
4108 }
4109 }
4110
4111 bool SwitchEQNEToPLMI;
4112 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4113 Flags = N->getOperand(3);
4114
4115 if (SwitchEQNEToPLMI) {
4116 switch ((ARMCC::CondCodes)CC) {
4117 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4118 case ARMCC::NE:
4119 CC = (unsigned)ARMCC::MI;
4120 break;
4121 case ARMCC::EQ:
4122 CC = (unsigned)ARMCC::PL;
4123 break;
4124 }
4125 }
4126 }
4127
4128 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4129 Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
4130 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
4131 Chain.getValue(1)};
4132 CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
4133 return;
4134 }
4135
4136 case ARMISD::CMPZ: {
4137 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4138 // This allows us to avoid materializing the expensive negative constant.
4139 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4140 // it for its flags output.
4141 SDValue X = N->getOperand(0);
4142 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4143 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4144 int64_t Addend = -C->getSExtValue();
4145
4146 SDNode *Add = nullptr;
4147 // ADDS can be better than CMN if the immediate fits in a
4148 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4149 // Outside that range we can just use a CMN which is 32-bit but has a
4150 // 12-bit immediate range.
4151 if (Addend < 1<<8) {
4152 if (Subtarget->isThumb2()) {
4153 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4154 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4155 CurDAG->getRegister(0, MVT::i32) };
4156 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4157 } else {
4158 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4159 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4160 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4161 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4162 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4163 }
4164 }
4165 if (Add) {
4166 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4167 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
4168 }
4169 }
4170 // Other cases are autogenerated.
4171 break;
4172 }
4173
4174 case ARMISD::CMOV: {
4175 SDValue Flags = N->getOperand(3);
4176
4177 if (Flags.getOpcode() == ARMISD::CMPZ) {
4178 bool SwitchEQNEToPLMI;
4179 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4180
4181 if (SwitchEQNEToPLMI) {
4182 SDValue ARMcc = N->getOperand(2);
4184
4185 switch (CC) {
4186 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4187 case ARMCC::NE:
4188 CC = ARMCC::MI;
4189 break;
4190 case ARMCC::EQ:
4191 CC = ARMCC::PL;
4192 break;
4193 }
4194 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4195 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4196 N->getOperand(3)};
4197 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4198 }
4199 }
4200 // Other cases are autogenerated.
4201 break;
4202 }
4203 case ARMISD::VZIP: {
4204 EVT VT = N->getValueType(0);
4205 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4206 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4207 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4208 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4209 SDValue Pred = getAL(CurDAG, dl);
4210 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4211 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4212 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4213 return;
4214 }
4215 case ARMISD::VUZP: {
4216 EVT VT = N->getValueType(0);
4217 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4218 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4219 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4220 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4221 SDValue Pred = getAL(CurDAG, dl);
4222 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4223 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4224 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4225 return;
4226 }
4227 case ARMISD::VTRN: {
4228 EVT VT = N->getValueType(0);
4229 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4230 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4231 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4232 SDValue Pred = getAL(CurDAG, dl);
4233 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4234 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4235 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4236 return;
4237 }
4238 case ARMISD::BUILD_VECTOR: {
4239 EVT VecVT = N->getValueType(0);
4240 EVT EltVT = VecVT.getVectorElementType();
4241 unsigned NumElts = VecVT.getVectorNumElements();
4242 if (EltVT == MVT::f64) {
4243 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4244 ReplaceNode(
4245 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4246 return;
4247 }
4248 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4249 if (NumElts == 2) {
4250 ReplaceNode(
4251 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4252 return;
4253 }
4254 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4255 ReplaceNode(N,
4256 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4257 N->getOperand(2), N->getOperand(3)));
4258 return;
4259 }
4260
4261 case ARMISD::VLD1DUP: {
4262 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4263 ARM::VLD1DUPd32 };
4264 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4265 ARM::VLD1DUPq32 };
4266 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4267 return;
4268 }
4269
4270 case ARMISD::VLD2DUP: {
4271 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4272 ARM::VLD2DUPd32 };
4273 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4274 return;
4275 }
4276
4277 case ARMISD::VLD3DUP: {
4278 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4279 ARM::VLD3DUPd16Pseudo,
4280 ARM::VLD3DUPd32Pseudo };
4281 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4282 return;
4283 }
4284
4285 case ARMISD::VLD4DUP: {
4286 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4287 ARM::VLD4DUPd16Pseudo,
4288 ARM::VLD4DUPd32Pseudo };
4289 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4290 return;
4291 }
4292
4293 case ARMISD::VLD1DUP_UPD: {
4294 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4295 ARM::VLD1DUPd16wb_fixed,
4296 ARM::VLD1DUPd32wb_fixed };
4297 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4298 ARM::VLD1DUPq16wb_fixed,
4299 ARM::VLD1DUPq32wb_fixed };
4300 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4301 return;
4302 }
4303
4304 case ARMISD::VLD2DUP_UPD: {
4305 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4306 ARM::VLD2DUPd16wb_fixed,
4307 ARM::VLD2DUPd32wb_fixed,
4308 ARM::VLD1q64wb_fixed };
4309 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4310 ARM::VLD2DUPq16EvenPseudo,
4311 ARM::VLD2DUPq32EvenPseudo };
4312 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4313 ARM::VLD2DUPq16OddPseudoWB_fixed,
4314 ARM::VLD2DUPq32OddPseudoWB_fixed };
4315 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4316 return;
4317 }
4318
4319 case ARMISD::VLD3DUP_UPD: {
4320 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4321 ARM::VLD3DUPd16Pseudo_UPD,
4322 ARM::VLD3DUPd32Pseudo_UPD,
4323 ARM::VLD1d64TPseudoWB_fixed };
4324 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4325 ARM::VLD3DUPq16EvenPseudo,
4326 ARM::VLD3DUPq32EvenPseudo };
4327 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4328 ARM::VLD3DUPq16OddPseudo_UPD,
4329 ARM::VLD3DUPq32OddPseudo_UPD };
4330 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4331 return;
4332 }
4333
4334 case ARMISD::VLD4DUP_UPD: {
4335 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4336 ARM::VLD4DUPd16Pseudo_UPD,
4337 ARM::VLD4DUPd32Pseudo_UPD,
4338 ARM::VLD1d64QPseudoWB_fixed };
4339 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4340 ARM::VLD4DUPq16EvenPseudo,
4341 ARM::VLD4DUPq32EvenPseudo };
4342 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4343 ARM::VLD4DUPq16OddPseudo_UPD,
4344 ARM::VLD4DUPq32OddPseudo_UPD };
4345 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4346 return;
4347 }
4348
4349 case ARMISD::VLD1_UPD: {
4350 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4351 ARM::VLD1d16wb_fixed,
4352 ARM::VLD1d32wb_fixed,
4353 ARM::VLD1d64wb_fixed };
4354 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4355 ARM::VLD1q16wb_fixed,
4356 ARM::VLD1q32wb_fixed,
4357 ARM::VLD1q64wb_fixed };
4358 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4359 return;
4360 }
4361
4362 case ARMISD::VLD2_UPD: {
4363 if (Subtarget->hasNEON()) {
4364 static const uint16_t DOpcodes[] = {
4365 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4366 ARM::VLD1q64wb_fixed};
4367 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4368 ARM::VLD2q16PseudoWB_fixed,
4369 ARM::VLD2q32PseudoWB_fixed};
4370 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4371 } else {
4372 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4373 ARM::MVE_VLD21_8_wb};
4374 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4375 ARM::MVE_VLD21_16_wb};
4376 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4377 ARM::MVE_VLD21_32_wb};
4378 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4379 SelectMVE_VLD(N, 2, Opcodes, true);
4380 }
4381 return;
4382 }
4383
4384 case ARMISD::VLD3_UPD: {
4385 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4386 ARM::VLD3d16Pseudo_UPD,
4387 ARM::VLD3d32Pseudo_UPD,
4388 ARM::VLD1d64TPseudoWB_fixed};
4389 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4390 ARM::VLD3q16Pseudo_UPD,
4391 ARM::VLD3q32Pseudo_UPD };
4392 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4393 ARM::VLD3q16oddPseudo_UPD,
4394 ARM::VLD3q32oddPseudo_UPD };
4395 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4396 return;
4397 }
4398
4399 case ARMISD::VLD4_UPD: {
4400 if (Subtarget->hasNEON()) {
4401 static const uint16_t DOpcodes[] = {
4402 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4403 ARM::VLD1d64QPseudoWB_fixed};
4404 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4405 ARM::VLD4q16Pseudo_UPD,
4406 ARM::VLD4q32Pseudo_UPD};
4407 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4408 ARM::VLD4q16oddPseudo_UPD,
4409 ARM::VLD4q32oddPseudo_UPD};
4410 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4411 } else {
4412 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4413 ARM::MVE_VLD42_8,
4414 ARM::MVE_VLD43_8_wb};
4415 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4416 ARM::MVE_VLD42_16,
4417 ARM::MVE_VLD43_16_wb};
4418 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4419 ARM::MVE_VLD42_32,
4420 ARM::MVE_VLD43_32_wb};
4421 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4422 SelectMVE_VLD(N, 4, Opcodes, true);
4423 }
4424 return;
4425 }
4426
4427 case ARMISD::VLD1x2_UPD: {
4428 if (Subtarget->hasNEON()) {
4429 static const uint16_t DOpcodes[] = {
4430 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4431 ARM::VLD1q64wb_fixed};
4432 static const uint16_t QOpcodes[] = {
4433 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4434 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4435 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4436 return;
4437 }
4438 break;
4439 }
4440
4441 case ARMISD::VLD1x3_UPD: {
4442 if (Subtarget->hasNEON()) {
4443 static const uint16_t DOpcodes[] = {
4444 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4445 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4446 static const uint16_t QOpcodes0[] = {
4447 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4448 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4449 static const uint16_t QOpcodes1[] = {
4450 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4451 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4452 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4453 return;
4454 }
4455 break;
4456 }
4457
4458 case ARMISD::VLD1x4_UPD: {
4459 if (Subtarget->hasNEON()) {
4460 static const uint16_t DOpcodes[] = {
4461 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4462 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4463 static const uint16_t QOpcodes0[] = {
4464 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4465 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4466 static const uint16_t QOpcodes1[] = {
4467 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4468 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4469 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4470 return;
4471 }
4472 break;
4473 }
4474
4475 case ARMISD::VLD2LN_UPD: {
4476 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4477 ARM::VLD2LNd16Pseudo_UPD,
4478 ARM::VLD2LNd32Pseudo_UPD };
4479 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4480 ARM::VLD2LNq32Pseudo_UPD };
4481 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4482 return;
4483 }
4484
4485 case ARMISD::VLD3LN_UPD: {
4486 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4487 ARM::VLD3LNd16Pseudo_UPD,
4488 ARM::VLD3LNd32Pseudo_UPD };
4489 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4490 ARM::VLD3LNq32Pseudo_UPD };
4491 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4492 return;
4493 }
4494
4495 case ARMISD::VLD4LN_UPD: {
4496 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4497 ARM::VLD4LNd16Pseudo_UPD,
4498 ARM::VLD4LNd32Pseudo_UPD };
4499 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4500 ARM::VLD4LNq32Pseudo_UPD };
4501 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4502 return;
4503 }
4504
4505 case ARMISD::VST1_UPD: {
4506 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4507 ARM::VST1d16wb_fixed,
4508 ARM::VST1d32wb_fixed,
4509 ARM::VST1d64wb_fixed };
4510 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4511 ARM::VST1q16wb_fixed,
4512 ARM::VST1q32wb_fixed,
4513 ARM::VST1q64wb_fixed };
4514 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4515 return;
4516 }
4517
4518 case ARMISD::VST2_UPD: {
4519 if (Subtarget->hasNEON()) {
4520 static const uint16_t DOpcodes[] = {
4521 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4522 ARM::VST1q64wb_fixed};
4523 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4524 ARM::VST2q16PseudoWB_fixed,
4525 ARM::VST2q32PseudoWB_fixed};
4526 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4527 return;
4528 }
4529 break;
4530 }
4531
4532 case ARMISD::VST3_UPD: {
4533 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4534 ARM::VST3d16Pseudo_UPD,
4535 ARM::VST3d32Pseudo_UPD,
4536 ARM::VST1d64TPseudoWB_fixed};
4537 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4538 ARM::VST3q16Pseudo_UPD,
4539 ARM::VST3q32Pseudo_UPD };
4540 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4541 ARM::VST3q16oddPseudo_UPD,
4542 ARM::VST3q32oddPseudo_UPD };
4543 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4544 return;
4545 }
4546
4547 case ARMISD::VST4_UPD: {
4548 if (Subtarget->hasNEON()) {
4549 static const uint16_t DOpcodes[] = {
4550 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4551 ARM::VST1d64QPseudoWB_fixed};
4552 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4553 ARM::VST4q16Pseudo_UPD,
4554 ARM::VST4q32Pseudo_UPD};
4555 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4556 ARM::VST4q16oddPseudo_UPD,
4557 ARM::VST4q32oddPseudo_UPD};
4558 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4559 return;
4560 }
4561 break;
4562 }
4563
4564 case ARMISD::VST1x2_UPD: {
4565 if (Subtarget->hasNEON()) {
4566 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4567 ARM::VST1q16wb_fixed,
4568 ARM::VST1q32wb_fixed,
4569 ARM::VST1q64wb_fixed};
4570 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4571 ARM::VST1d16QPseudoWB_fixed,
4572 ARM::VST1d32QPseudoWB_fixed,
4573 ARM::VST1d64QPseudoWB_fixed };
4574 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4575 return;
4576 }
4577 break;
4578 }
4579
4580 case ARMISD::VST1x3_UPD: {
4581 if (Subtarget->hasNEON()) {
4582 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4583 ARM::VST1d16TPseudoWB_fixed,
4584 ARM::VST1d32TPseudoWB_fixed,
4585 ARM::VST1d64TPseudoWB_fixed };
4586 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4587 ARM::VST1q16LowTPseudo_UPD,
4588 ARM::VST1q32LowTPseudo_UPD,
4589 ARM::VST1q64LowTPseudo_UPD };
4590 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4591 ARM::VST1q16HighTPseudo_UPD,
4592 ARM::VST1q32HighTPseudo_UPD,
4593 ARM::VST1q64HighTPseudo_UPD };
4594 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4595 return;
4596 }
4597 break;
4598 }
4599
4600 case ARMISD::VST1x4_UPD: {
4601 if (Subtarget->hasNEON()) {
4602 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4603 ARM::VST1d16QPseudoWB_fixed,
4604 ARM::VST1d32QPseudoWB_fixed,
4605 ARM::VST1d64QPseudoWB_fixed };
4606 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4607 ARM::VST1q16LowQPseudo_UPD,
4608 ARM::VST1q32LowQPseudo_UPD,
4609 ARM::VST1q64LowQPseudo_UPD };
4610 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4611 ARM::VST1q16HighQPseudo_UPD,
4612 ARM::VST1q32HighQPseudo_UPD,
4613 ARM::VST1q64HighQPseudo_UPD };
4614 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4615 return;
4616 }
4617 break;
4618 }
4619 case ARMISD::VST2LN_UPD: {
4620 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4621 ARM::VST2LNd16Pseudo_UPD,
4622 ARM::VST2LNd32Pseudo_UPD };
4623 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4624 ARM::VST2LNq32Pseudo_UPD };
4625 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4626 return;
4627 }
4628
4629 case ARMISD::VST3LN_UPD: {
4630 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4631 ARM::VST3LNd16Pseudo_UPD,
4632 ARM::VST3LNd32Pseudo_UPD };
4633 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4634 ARM::VST3LNq32Pseudo_UPD };
4635 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4636 return;
4637 }
4638
4639 case ARMISD::VST4LN_UPD: {
4640 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4641 ARM::VST4LNd16Pseudo_UPD,
4642 ARM::VST4LNd32Pseudo_UPD };
4643 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4644 ARM::VST4LNq32Pseudo_UPD };
4645 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4646 return;
4647 }
4648
4651 unsigned IntNo = N->getConstantOperandVal(1);
4652 switch (IntNo) {
4653 default:
4654 break;
4655
4656 case Intrinsic::arm_mrrc:
4657 case Intrinsic::arm_mrrc2: {
4658 SDLoc dl(N);
4659 SDValue Chain = N->getOperand(0);
4660 unsigned Opc;
4661
4662 if (Subtarget->isThumb())
4663 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4664 else
4665 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4666
4668 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4669 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4670 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4671
4672 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4673 // instruction will always be '1111' but it is possible in assembly language to specify
4674 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4675 if (Opc != ARM::MRRC2) {
4676 Ops.push_back(getAL(CurDAG, dl));
4677 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4678 }
4679
4680 Ops.push_back(Chain);
4681
4682 // Writes to two registers.
4683 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4684
4685 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4686 return;
4687 }
4688 case Intrinsic::arm_ldaexd:
4689 case Intrinsic::arm_ldrexd: {
4690 SDLoc dl(N);
4691 SDValue Chain = N->getOperand(0);
4692 SDValue MemAddr = N->getOperand(2);
4693 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4694
4695 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4696 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4697 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4698
4699 // arm_ldrexd returns a i64 value in {i32, i32}
4700 std::vector<EVT> ResTys;
4701 if (isThumb) {
4702 ResTys.push_back(MVT::i32);
4703 ResTys.push_back(MVT::i32);
4704 } else
4705 ResTys.push_back(MVT::Untyped);
4706 ResTys.push_back(MVT::Other);
4707
4708 // Place arguments in the right order.
4709 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4710 CurDAG->getRegister(0, MVT::i32), Chain};
4711 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4712 // Transfer memoperands.
4713 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4714 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4715
4716 // Remap uses.
4717 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4718 if (!SDValue(N, 0).use_empty()) {
4720 if (isThumb)
4721 Result = SDValue(Ld, 0);
4722 else {
4723 SDValue SubRegIdx =
4724 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4725 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4726 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4727 Result = SDValue(ResNode,0);
4728 }
4729 ReplaceUses(SDValue(N, 0), Result);
4730 }
4731 if (!SDValue(N, 1).use_empty()) {
4733 if (isThumb)
4734 Result = SDValue(Ld, 1);
4735 else {
4736 SDValue SubRegIdx =
4737 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4738 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4739 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4740 Result = SDValue(ResNode,0);
4741 }
4742 ReplaceUses(SDValue(N, 1), Result);
4743 }
4744 ReplaceUses(SDValue(N, 2), OutChain);
4745 CurDAG->RemoveDeadNode(N);
4746 return;
4747 }
4748 case Intrinsic::arm_stlexd:
4749 case Intrinsic::arm_strexd: {
4750 SDLoc dl(N);
4751 SDValue Chain = N->getOperand(0);
4752 SDValue Val0 = N->getOperand(2);
4753 SDValue Val1 = N->getOperand(3);
4754 SDValue MemAddr = N->getOperand(4);
4755
4756 // Store exclusive double return a i32 value which is the return status
4757 // of the issued store.
4758 const EVT ResTys[] = {MVT::i32, MVT::Other};
4759
4760 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4761 // Place arguments in the right order.
4763 if (isThumb) {
4764 Ops.push_back(Val0);
4765 Ops.push_back(Val1);
4766 } else
4767 // arm_strexd uses GPRPair.
4768 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4769 Ops.push_back(MemAddr);
4770 Ops.push_back(getAL(CurDAG, dl));
4771 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4772 Ops.push_back(Chain);
4773
4774 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4775 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4776 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4777
4778 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4779 // Transfer memoperands.
4780 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4781 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4782
4783 ReplaceNode(N, St);
4784 return;
4785 }
4786
4787 case Intrinsic::arm_neon_vld1: {
4788 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4789 ARM::VLD1d32, ARM::VLD1d64 };
4790 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4791 ARM::VLD1q32, ARM::VLD1q64};
4792 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4793 return;
4794 }
4795
4796 case Intrinsic::arm_neon_vld1x2: {
4797 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4798 ARM::VLD1q32, ARM::VLD1q64 };
4799 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4800 ARM::VLD1d16QPseudo,
4801 ARM::VLD1d32QPseudo,
4802 ARM::VLD1d64QPseudo };
4803 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4804 return;
4805 }
4806
4807 case Intrinsic::arm_neon_vld1x3: {
4808 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4809 ARM::VLD1d16TPseudo,
4810 ARM::VLD1d32TPseudo,
4811 ARM::VLD1d64TPseudo };
4812 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4813 ARM::VLD1q16LowTPseudo_UPD,
4814 ARM::VLD1q32LowTPseudo_UPD,
4815 ARM::VLD1q64LowTPseudo_UPD };
4816 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4817 ARM::VLD1q16HighTPseudo,
4818 ARM::VLD1q32HighTPseudo,
4819 ARM::VLD1q64HighTPseudo };
4820 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4821 return;
4822 }
4823
4824 case Intrinsic::arm_neon_vld1x4: {
4825 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4826 ARM::VLD1d16QPseudo,
4827 ARM::VLD1d32QPseudo,
4828 ARM::VLD1d64QPseudo };
4829 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4830 ARM::VLD1q16LowQPseudo_UPD,
4831 ARM::VLD1q32LowQPseudo_UPD,
4832 ARM::VLD1q64LowQPseudo_UPD };
4833 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4834 ARM::VLD1q16HighQPseudo,
4835 ARM::VLD1q32HighQPseudo,
4836 ARM::VLD1q64HighQPseudo };
4837 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4838 return;
4839 }
4840
4841 case Intrinsic::arm_neon_vld2: {
4842 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4843 ARM::VLD2d32, ARM::VLD1q64 };
4844 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4845 ARM::VLD2q32Pseudo };
4846 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4847 return;
4848 }
4849
4850 case Intrinsic::arm_neon_vld3: {
4851 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4852 ARM::VLD3d16Pseudo,
4853 ARM::VLD3d32Pseudo,
4854 ARM::VLD1d64TPseudo };
4855 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4856 ARM::VLD3q16Pseudo_UPD,
4857 ARM::VLD3q32Pseudo_UPD };
4858 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4859 ARM::VLD3q16oddPseudo,
4860 ARM::VLD3q32oddPseudo };
4861 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4862 return;
4863 }
4864
4865 case Intrinsic::arm_neon_vld4: {
4866 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4867 ARM::VLD4d16Pseudo,
4868 ARM::VLD4d32Pseudo,
4869 ARM::VLD1d64QPseudo };
4870 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4871 ARM::VLD4q16Pseudo_UPD,
4872 ARM::VLD4q32Pseudo_UPD };
4873 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4874 ARM::VLD4q16oddPseudo,
4875 ARM::VLD4q32oddPseudo };
4876 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4877 return;
4878 }
4879
4880 case Intrinsic::arm_neon_vld2dup: {
4881 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4882 ARM::VLD2DUPd32, ARM::VLD1q64 };
4883 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4884 ARM::VLD2DUPq16EvenPseudo,
4885 ARM::VLD2DUPq32EvenPseudo };
4886 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4887 ARM::VLD2DUPq16OddPseudo,
4888 ARM::VLD2DUPq32OddPseudo };
4889 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4890 DOpcodes, QOpcodes0, QOpcodes1);
4891 return;
4892 }
4893
4894 case Intrinsic::arm_neon_vld3dup: {
4895 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4896 ARM::VLD3DUPd16Pseudo,
4897 ARM::VLD3DUPd32Pseudo,
4898 ARM::VLD1d64TPseudo };
4899 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4900 ARM::VLD3DUPq16EvenPseudo,
4901 ARM::VLD3DUPq32EvenPseudo };
4902 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4903 ARM::VLD3DUPq16OddPseudo,
4904 ARM::VLD3DUPq32OddPseudo };
4905 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4906 DOpcodes, QOpcodes0, QOpcodes1);
4907 return;
4908 }
4909
4910 case Intrinsic::arm_neon_vld4dup: {
4911 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4912 ARM::VLD4DUPd16Pseudo,
4913 ARM::VLD4DUPd32Pseudo,
4914 ARM::VLD1d64QPseudo };
4915 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4916 ARM::VLD4DUPq16EvenPseudo,
4917 ARM::VLD4DUPq32EvenPseudo };
4918 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4919 ARM::VLD4DUPq16OddPseudo,
4920 ARM::VLD4DUPq32OddPseudo };
4921 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4922 DOpcodes, QOpcodes0, QOpcodes1);
4923 return;
4924 }
4925
4926 case Intrinsic::arm_neon_vld2lane: {
4927 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4928 ARM::VLD2LNd16Pseudo,
4929 ARM::VLD2LNd32Pseudo };
4930 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4931 ARM::VLD2LNq32Pseudo };
4932 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4933 return;
4934 }
4935
4936 case Intrinsic::arm_neon_vld3lane: {
4937 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4938 ARM::VLD3LNd16Pseudo,
4939 ARM::VLD3LNd32Pseudo };
4940 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4941 ARM::VLD3LNq32Pseudo };
4942 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4943 return;
4944 }
4945
4946 case Intrinsic::arm_neon_vld4lane: {
4947 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4948 ARM::VLD4LNd16Pseudo,
4949 ARM::VLD4LNd32Pseudo };
4950 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4951 ARM::VLD4LNq32Pseudo };
4952 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4953 return;
4954 }
4955
4956 case Intrinsic::arm_neon_vst1: {
4957 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4958 ARM::VST1d32, ARM::VST1d64 };
4959 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4960 ARM::VST1q32, ARM::VST1q64 };
4961 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4962 return;
4963 }
4964
4965 case Intrinsic::arm_neon_vst1x2: {
4966 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4967 ARM::VST1q32, ARM::VST1q64 };
4968 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4969 ARM::VST1d16QPseudo,
4970 ARM::VST1d32QPseudo,
4971 ARM::VST1d64QPseudo };
4972 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4973 return;
4974 }
4975
4976 case Intrinsic::arm_neon_vst1x3: {
4977 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4978 ARM::VST1d16TPseudo,
4979 ARM::VST1d32TPseudo,
4980 ARM::VST1d64TPseudo };
4981 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4982 ARM::VST1q16LowTPseudo_UPD,
4983 ARM::VST1q32LowTPseudo_UPD,
4984 ARM::VST1q64LowTPseudo_UPD };
4985 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4986 ARM::VST1q16HighTPseudo,
4987 ARM::VST1q32HighTPseudo,
4988 ARM::VST1q64HighTPseudo };
4989 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4990 return;
4991 }
4992
4993 case Intrinsic::arm_neon_vst1x4: {
4994 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4995 ARM::VST1d16QPseudo,
4996 ARM::VST1d32QPseudo,
4997 ARM::VST1d64QPseudo };
4998 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4999 ARM::VST1q16LowQPseudo_UPD,
5000 ARM::VST1q32LowQPseudo_UPD,
5001 ARM::VST1q64LowQPseudo_UPD };
5002 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5003 ARM::VST1q16HighQPseudo,
5004 ARM::VST1q32HighQPseudo,
5005 ARM::VST1q64HighQPseudo };
5006 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5007 return;
5008 }
5009
5010 case Intrinsic::arm_neon_vst2: {
5011 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5012 ARM::VST2d32, ARM::VST1q64 };
5013 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5014 ARM::VST2q32Pseudo };
5015 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5016 return;
5017 }
5018
5019 case Intrinsic::arm_neon_vst3: {
5020 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5021 ARM::VST3d16Pseudo,
5022 ARM::VST3d32Pseudo,
5023 ARM::VST1d64TPseudo };
5024 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5025 ARM::VST3q16Pseudo_UPD,
5026 ARM::VST3q32Pseudo_UPD };
5027 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5028 ARM::VST3q16oddPseudo,
5029 ARM::VST3q32oddPseudo };
5030 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5031 return;
5032 }
5033
5034 case Intrinsic::arm_neon_vst4: {
5035 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5036 ARM::VST4d16Pseudo,
5037 ARM::VST4d32Pseudo,
5038 ARM::VST1d64QPseudo };
5039 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5040 ARM::VST4q16Pseudo_UPD,
5041 ARM::VST4q32Pseudo_UPD };
5042 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5043 ARM::VST4q16oddPseudo,
5044 ARM::VST4q32oddPseudo };
5045 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5046 return;
5047 }
5048
5049 case Intrinsic::arm_neon_vst2lane: {
5050 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5051 ARM::VST2LNd16Pseudo,
5052 ARM::VST2LNd32Pseudo };
5053 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5054 ARM::VST2LNq32Pseudo };
5055 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5056 return;
5057 }
5058
5059 case Intrinsic::arm_neon_vst3lane: {
5060 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5061 ARM::VST3LNd16Pseudo,
5062 ARM::VST3LNd32Pseudo };
5063 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5064 ARM::VST3LNq32Pseudo };
5065 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5066 return;
5067 }
5068
5069 case Intrinsic::arm_neon_vst4lane: {
5070 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5071 ARM::VST4LNd16Pseudo,
5072 ARM::VST4LNd32Pseudo };
5073 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5074 ARM::VST4LNq32Pseudo };
5075 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5076 return;
5077 }
5078
5079 case Intrinsic::arm_mve_vldr_gather_base_wb:
5080 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5081 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5082 ARM::MVE_VLDRDU64_qi_pre};
5083 SelectMVE_WB(N, Opcodes,
5084 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5085 return;
5086 }
5087
5088 case Intrinsic::arm_mve_vld2q: {
5089 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5090 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5091 ARM::MVE_VLD21_16};
5092 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5093 ARM::MVE_VLD21_32};
5094 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5095 SelectMVE_VLD(N, 2, Opcodes, false);
5096 return;
5097 }
5098
5099 case Intrinsic::arm_mve_vld4q: {
5100 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5101 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5102 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5103 ARM::MVE_VLD42_16,
5104 ARM::MVE_VLD43_16};
5105 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5106 ARM::MVE_VLD42_32,
5107 ARM::MVE_VLD43_32};
5108 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5109 SelectMVE_VLD(N, 4, Opcodes, false);
5110 return;
5111 }
5112 }
5113 break;
5114 }
5115
5117 unsigned IntNo = N->getConstantOperandVal(0);
5118 switch (IntNo) {
5119 default:
5120 break;
5121
5122 // Scalar f32 -> bf16
5123 case Intrinsic::arm_neon_vcvtbfp2bf: {
5124 SDLoc dl(N);
5125 const SDValue &Src = N->getOperand(1);
5126 llvm::EVT DestTy = N->getValueType(0);
5127 SDValue Pred = getAL(CurDAG, dl);
5128 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5129 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5130 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5131 return;
5132 }
5133
5134 // Vector v4f32 -> v4bf16
5135 case Intrinsic::arm_neon_vcvtfp2bf: {
5136 SDLoc dl(N);
5137 const SDValue &Src = N->getOperand(1);
5138 SDValue Pred = getAL(CurDAG, dl);
5139 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5140 SDValue Ops[] = { Src, Pred, Reg0 };
5141 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5142 return;
5143 }
5144
5145 case Intrinsic::arm_mve_urshrl:
5146 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5147 return;
5148 case Intrinsic::arm_mve_uqshll:
5149 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5150 return;
5151 case Intrinsic::arm_mve_srshrl:
5152 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5153 return;
5154 case Intrinsic::arm_mve_sqshll:
5155 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5156 return;
5157 case Intrinsic::arm_mve_uqrshll:
5158 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5159 return;
5160 case Intrinsic::arm_mve_sqrshrl:
5161 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5162 return;
5163
5164 case Intrinsic::arm_mve_vadc:
5165 case Intrinsic::arm_mve_vadc_predicated:
5166 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5167 IntNo == Intrinsic::arm_mve_vadc_predicated);
5168 return;
5169 case Intrinsic::arm_mve_vsbc:
5170 case Intrinsic::arm_mve_vsbc_predicated:
5171 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
5172 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5173 return;
5174 case Intrinsic::arm_mve_vshlc:
5175 case Intrinsic::arm_mve_vshlc_predicated:
5176 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5177 return;
5178
5179 case Intrinsic::arm_mve_vmlldava:
5180 case Intrinsic::arm_mve_vmlldava_predicated: {
5181 static const uint16_t OpcodesU[] = {
5182 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5183 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5184 };
5185 static const uint16_t OpcodesS[] = {
5186 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5187 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5188 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5189 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5190 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5191 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5192 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5193 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5194 };
5195 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5196 OpcodesS, OpcodesU);
5197 return;
5198 }
5199
5200 case Intrinsic::arm_mve_vrmlldavha:
5201 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5202 static const uint16_t OpcodesU[] = {
5203 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5204 };
5205 static const uint16_t OpcodesS[] = {
5206 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5207 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5208 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5209 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5210 };
5211 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5212 OpcodesS, OpcodesU);
5213 return;
5214 }
5215
5216 case Intrinsic::arm_mve_vidup:
5217 case Intrinsic::arm_mve_vidup_predicated: {
5218 static const uint16_t Opcodes[] = {
5219 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5220 };
5221 SelectMVE_VxDUP(N, Opcodes, false,
5222 IntNo == Intrinsic::arm_mve_vidup_predicated);
5223 return;
5224 }
5225
5226 case Intrinsic::arm_mve_vddup:
5227 case Intrinsic::arm_mve_vddup_predicated: {
5228 static const uint16_t Opcodes[] = {
5229 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5230 };
5231 SelectMVE_VxDUP(N, Opcodes, false,
5232 IntNo == Intrinsic::arm_mve_vddup_predicated);
5233 return;
5234 }
5235
5236 case Intrinsic::arm_mve_viwdup:
5237 case Intrinsic::arm_mve_viwdup_predicated: {
5238 static const uint16_t Opcodes[] = {
5239 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5240 };
5241 SelectMVE_VxDUP(N, Opcodes, true,
5242 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5243 return;
5244 }
5245
5246 case Intrinsic::arm_mve_vdwdup:
5247 case Intrinsic::arm_mve_vdwdup_predicated: {
5248 static const uint16_t Opcodes[] = {
5249 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5250 };
5251 SelectMVE_VxDUP(N, Opcodes, true,
5252 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5253 return;
5254 }
5255
5256 case Intrinsic::arm_cde_cx1d:
5257 case Intrinsic::arm_cde_cx1da:
5258 case Intrinsic::arm_cde_cx2d:
5259 case Intrinsic::arm_cde_cx2da:
5260 case Intrinsic::arm_cde_cx3d:
5261 case Intrinsic::arm_cde_cx3da: {
5262 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5263 IntNo == Intrinsic::arm_cde_cx2da ||
5264 IntNo == Intrinsic::arm_cde_cx3da;
5265 size_t NumExtraOps;
5266 uint16_t Opcode;
5267 switch (IntNo) {
5268 case Intrinsic::arm_cde_cx1d:
5269 case Intrinsic::arm_cde_cx1da:
5270 NumExtraOps = 0;
5271 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5272 break;
5273 case Intrinsic::arm_cde_cx2d:
5274 case Intrinsic::arm_cde_cx2da:
5275 NumExtraOps = 1;
5276 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5277 break;
5278 case Intrinsic::arm_cde_cx3d:
5279 case Intrinsic::arm_cde_cx3da:
5280 NumExtraOps = 2;
5281 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5282 break;
5283 default:
5284 llvm_unreachable("Unexpected opcode");
5285 }
5286 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5287 return;
5288 }
5289 }
5290 break;
5291 }
5292
5293 case ISD::ATOMIC_CMP_SWAP:
5294 SelectCMP_SWAP(N);
5295 return;
5296 }
5297
5298 SelectCode(N);
5299}
5300
5301// Inspect a register string of the form
5302// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5303// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5304// and obtain the integer operands from them, adding these operands to the
5305// provided vector.
5307 SelectionDAG *CurDAG,
5308 const SDLoc &DL,
5309 std::vector<SDValue> &Ops) {
5311 RegString.split(Fields, ':');
5312
5313 if (Fields.size() > 1) {
5314 bool AllIntFields = true;
5315
5316 for (StringRef Field : Fields) {
5317 // Need to trim out leading 'cp' characters and get the integer field.
5318 unsigned IntField;
5319 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5320 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5321 }
5322
5323 assert(AllIntFields &&
5324 "Unexpected non-integer value in special register string.");
5325 (void)AllIntFields;
5326 }
5327}
5328
5329// Maps a Banked Register string to its mask value. The mask value returned is
5330// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5331// mask operand, which expresses which register is to be used, e.g. r8, and in
5332// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5333// was invalid.
5334static inline int getBankedRegisterMask(StringRef RegString) {
5335 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5336 if (!TheReg)
5337 return -1;
5338 return TheReg->Encoding;
5339}
5340
5341// The flags here are common to those allowed for apsr in the A class cores and
5342// those allowed for the special registers in the M class cores. Returns a
5343// value representing which flags were present, -1 if invalid.
5344static inline int getMClassFlagsMask(StringRef Flags) {
5345 return StringSwitch<int>(Flags)
5346 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5347 // correct when flags are not permitted
5348 .Case("g", 0x1)
5349 .Case("nzcvq", 0x2)
5350 .Case("nzcvqg", 0x3)
5351 .Default(-1);
5352}
5353
5354// Maps MClass special registers string to its value for use in the
5355// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5356// Returns -1 to signify that the string was invalid.
5357static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5358 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5359 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5360 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5361 return -1;
5362 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5363}
5364
5366 // The mask operand contains the special register (R Bit) in bit 4, whether
5367 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5368 // bits 3-0 contains the fields to be accessed in the special register, set by
5369 // the flags provided with the register.
5370 int Mask = 0;
5371 if (Reg == "apsr") {
5372 // The flags permitted for apsr are the same flags that are allowed in
5373 // M class registers. We get the flag value and then shift the flags into
5374 // the correct place to combine with the mask.
5375 Mask = getMClassFlagsMask(Flags);
5376 if (Mask == -1)
5377 return -1;
5378 return Mask << 2;
5379 }
5380
5381 if (Reg != "cpsr" && Reg != "spsr") {
5382 return -1;
5383 }
5384
5385 // This is the same as if the flags were "fc"
5386 if (Flags.empty() || Flags == "all")
5387 return Mask | 0x9;
5388
5389 // Inspect the supplied flags string and set the bits in the mask for
5390 // the relevant and valid flags allowed for cpsr and spsr.
5391 for (char Flag : Flags) {
5392 int FlagVal;
5393 switch (Flag) {
5394 case 'c':
5395 FlagVal = 0x1;
5396 break;
5397 case 'x':
5398 FlagVal = 0x2;
5399 break;
5400 case 's':
5401 FlagVal = 0x4;
5402 break;
5403 case 'f':
5404 FlagVal = 0x8;
5405 break;
5406 default:
5407 FlagVal = 0;
5408 }
5409
5410 // This avoids allowing strings where the same flag bit appears twice.
5411 if (!FlagVal || (Mask & FlagVal))
5412 return -1;
5413 Mask |= FlagVal;
5414 }
5415
5416 // If the register is spsr then we need to set the R bit.
5417 if (Reg == "spsr")
5418 Mask |= 0x10;
5419
5420 return Mask;
5421}
5422
5423// Lower the read_register intrinsic to ARM specific DAG nodes
5424// using the supplied metadata string to select the instruction node to use
5425// and the registers/masks to construct as operands for the node.
5426bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5427 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5428 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5429 bool IsThumb2 = Subtarget->isThumb2();
5430 SDLoc DL(N);
5431
5432 std::vector<SDValue> Ops;
5433 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5434
5435 if (!Ops.empty()) {
5436 // If the special register string was constructed of fields (as defined
5437 // in the ACLE) then need to lower to MRC node (32 bit) or
5438 // MRRC node(64 bit), we can make the distinction based on the number of
5439 // operands we have.
5440 unsigned Opcode;
5441 SmallVector<EVT, 3> ResTypes;
5442 if (Ops.size() == 5){
5443 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5444 ResTypes.append({ MVT::i32, MVT::Other });
5445 } else {
5446 assert(Ops.size() == 3 &&
5447 "Invalid number of fields in special register string.");
5448 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5449 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5450 }
5451
5452 Ops.push_back(getAL(CurDAG, DL));
5453 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5454 Ops.push_back(N->getOperand(0));
5455 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5456 return true;
5457 }
5458
5459 std::string SpecialReg = RegString->getString().lower();
5460
5461 int BankedReg = getBankedRegisterMask(SpecialReg);
5462 if (BankedReg != -1) {
5463 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5464 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5465 N->getOperand(0) };
5466 ReplaceNode(
5467 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5468 DL, MVT::i32, MVT::Other, Ops));
5469 return true;
5470 }
5471
5472 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5473 // corresponding to the register that is being read from. So we switch on the
5474 // string to find which opcode we need to use.
5475 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5476 .Case("fpscr", ARM::VMRS)
5477 .Case("fpexc", ARM::VMRS_FPEXC)
5478 .Case("fpsid", ARM::VMRS_FPSID)
5479 .Case("mvfr0", ARM::VMRS_MVFR0)
5480 .Case("mvfr1", ARM::VMRS_MVFR1)
5481 .Case("mvfr2", ARM::VMRS_MVFR2)
5482 .Case("fpinst", ARM::VMRS_FPINST)
5483 .Case("fpinst2", ARM::VMRS_FPINST2)
5484 .Default(0);
5485
5486 // If an opcode was found then we can lower the read to a VFP instruction.
5487 if (Opcode) {
5488 if (!Subtarget->hasVFP2Base())
5489 return false;
5490 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5491 return false;
5492
5493 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5494 N->getOperand(0) };
5495 ReplaceNode(N,
5496 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5497 return true;
5498 }
5499
5500 // If the target is M Class then need to validate that the register string
5501 // is an acceptable value, so check that a mask can be constructed from the
5502 // string.
5503 if (Subtarget->isMClass()) {
5504 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5505 if (SYSmValue == -1)
5506 return false;
5507
5508 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5509 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5510 N->getOperand(0) };
5511 ReplaceNode(
5512 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5513 return true;
5514 }
5515
5516 // Here we know the target is not M Class so we need to check if it is one
5517 // of the remaining possible values which are apsr, cpsr or spsr.
5518 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5519 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5520 N->getOperand(0) };
5521 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5522 DL, MVT::i32, MVT::Other, Ops));
5523 return true;
5524 }
5525
5526 if (SpecialReg == "spsr") {
5527 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5528 N->getOperand(0) };
5529 ReplaceNode(
5530 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5531 MVT::i32, MVT::Other, Ops));
5532 return true;
5533 }
5534
5535 return false;
5536}
5537
5538// Lower the write_register intrinsic to ARM specific DAG nodes
5539// using the supplied metadata string to select the instruction node to use
5540// and the registers/masks to use in the nodes
5541bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5542 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5543 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5544 bool IsThumb2 = Subtarget->isThumb2();
5545 SDLoc DL(N);
5546
5547 std::vector<SDValue> Ops;
5548 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5549
5550 if (!Ops.empty()) {
5551 // If the special register string was constructed of fields (as defined
5552 // in the ACLE) then need to lower to MCR node (32 bit) or
5553 // MCRR node(64 bit), we can make the distinction based on the number of
5554 // operands we have.
5555 unsigned Opcode;
5556 if (Ops.size() == 5) {
5557 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5558 Ops.insert(Ops.begin()+2, N->getOperand(2));
5559 } else {
5560 assert(Ops.size() == 3 &&
5561 "Invalid number of fields in special register string.");
5562 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5563 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5564 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5565 }
5566
5567 Ops.push_back(getAL(CurDAG, DL));
5568 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5569 Ops.push_back(N->getOperand(0));
5570
5571 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5572 return true;
5573 }
5574
5575 std::string SpecialReg = RegString->getString().lower();
5576 int BankedReg = getBankedRegisterMask(SpecialReg);
5577 if (BankedReg != -1) {
5578 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5579 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5580 N->getOperand(0) };
5581 ReplaceNode(
5582 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5583 DL, MVT::Other, Ops));
5584 return true;
5585 }
5586
5587 // The VFP registers are written to by creating SelectionDAG nodes with
5588 // opcodes corresponding to the register that is being written. So we switch
5589 // on the string to find which opcode we need to use.
5590 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5591 .Case("fpscr", ARM::VMSR)
5592 .Case("fpexc", ARM::VMSR_FPEXC)
5593 .Case("fpsid", ARM::VMSR_FPSID)
5594 .Case("fpinst", ARM::VMSR_FPINST)
5595 .Case("fpinst2", ARM::VMSR_FPINST2)
5596 .Default(0);
5597
5598 if (Opcode) {
5599 if (!Subtarget->hasVFP2Base())
5600 return false;
5601 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5602 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5603 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5604 return true;
5605 }
5606
5607 std::pair<StringRef, StringRef> Fields;
5608 Fields = StringRef(SpecialReg).rsplit('_');
5609 std::string Reg = Fields.first.str();
5610 StringRef Flags = Fields.second;
5611
5612 // If the target was M Class then need to validate the special register value
5613 // and retrieve the mask for use in the instruction node.
5614 if (Subtarget->isMClass()) {
5615 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5616 if (SYSmValue == -1)
5617 return false;
5618
5619 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5620 N->getOperand(2), getAL(CurDAG, DL),
5621 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5622 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5623 return true;
5624 }
5625
5626 // We then check to see if a valid mask can be constructed for one of the
5627 // register string values permitted for the A and R class cores. These values
5628 // are apsr, spsr and cpsr; these are also valid on older cores.
5629 int Mask = getARClassRegisterMask(Reg, Flags);
5630 if (Mask != -1) {
5631 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5632 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5633 N->getOperand(0) };
5634 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5635 DL, MVT::Other, Ops));
5636 return true;
5637 }
5638
5639 return false;
5640}
5641
5642bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5643 std::vector<SDValue> AsmNodeOperands;
5644 InlineAsm::Flag Flag;
5645 bool Changed = false;
5646 unsigned NumOps = N->getNumOperands();
5647
5648 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5649 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5650 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5651 // respectively. Since there is no constraint to explicitly specify a
5652 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5653 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5654 // them into a GPRPair.
5655
5656 SDLoc dl(N);
5657 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5658
5659 SmallVector<bool, 8> OpChanged;
5660 // Glue node will be appended late.
5661 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5662 SDValue op = N->getOperand(i);
5663 AsmNodeOperands.push_back(op);
5664
5666 continue;
5667
5668 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5669 Flag = InlineAsm::Flag(C->getZExtValue());
5670 else
5671 continue;
5672
5673 // Immediate operands to inline asm in the SelectionDAG are modeled with
5674 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5675 // the second is a constant with the value of the immediate. If we get here
5676 // and we have a Kind::Imm, skip the next operand, and continue.
5677 if (Flag.isImmKind()) {
5678 SDValue op = N->getOperand(++i);
5679 AsmNodeOperands.push_back(op);
5680 continue;
5681 }
5682
5683 const unsigned NumRegs = Flag.getNumOperandRegisters();
5684 if (NumRegs)
5685 OpChanged.push_back(false);
5686
5687 unsigned DefIdx = 0;
5688 bool IsTiedToChangedOp = false;
5689 // If it's a use that is tied with a previous def, it has no
5690 // reg class constraint.
5691 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5692 IsTiedToChangedOp = OpChanged[DefIdx];
5693
5694 // Memory operands to inline asm in the SelectionDAG are modeled with two
5695 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5696 // operand. If we get here and we have a Kind::Mem, skip the next operand
5697 // (so it doesn't get misinterpreted), and continue. We do this here because
5698 // it's important to update the OpChanged array correctly before moving on.
5699 if (Flag.isMemKind()) {
5700 SDValue op = N->getOperand(++i);
5701 AsmNodeOperands.push_back(op);
5702 continue;
5703 }
5704
5705 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5706 !Flag.isRegDefEarlyClobberKind())
5707 continue;
5708
5709 unsigned RC;
5710 const bool HasRC = Flag.hasRegClassConstraint(RC);
5711 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5712 || NumRegs != 2)
5713 continue;
5714
5715 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5716 SDValue V0 = N->getOperand(i+1);
5717 SDValue V1 = N->getOperand(i+2);
5718 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5719 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5720 SDValue PairedReg;
5721 MachineRegisterInfo &MRI = MF->getRegInfo();
5722
5723 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5724 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5725 // the original GPRs.
5726
5727 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5728 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5729 SDValue Chain = SDValue(N,0);
5730
5731 SDNode *GU = N->getGluedUser();
5732 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5733 Chain.getValue(1));
5734
5735 // Extract values from a GPRPair reg and copy to the original GPR reg.
5736 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5737 RegCopy);
5738 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5739 RegCopy);
5740 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5741 RegCopy.getValue(1));
5742 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5743
5744 // Update the original glue user.
5745 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5746 Ops.push_back(T1.getValue(1));
5747 CurDAG->UpdateNodeOperands(GU, Ops);
5748 } else {
5749 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5750 // GPRPair and then pass the GPRPair to the inline asm.
5751 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5752
5753 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5754 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5755 Chain.getValue(1));
5756 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5757 T0.getValue(1));
5758 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5759
5760 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5761 // i32 VRs of inline asm with it.
5762 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5763 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5764 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5765
5766 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5767 Glue = Chain.getValue(1);
5768 }
5769
5770 Changed = true;
5771
5772 if(PairedReg.getNode()) {
5773 OpChanged[OpChanged.size() -1 ] = true;
5774 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5775 if (IsTiedToChangedOp)
5776 Flag.setMatchingOp(DefIdx);
5777 else
5778 Flag.setRegClass(ARM::GPRPairRegClassID);
5779 // Replace the current flag.
5780 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5781 Flag, dl, MVT::i32);
5782 // Add the new register node and skip the original two GPRs.
5783 AsmNodeOperands.push_back(PairedReg);
5784 // Skip the next two GPRs.
5785 i += 2;
5786 }
5787 }
5788
5789 if (Glue.getNode())
5790 AsmNodeOperands.push_back(Glue);
5791 if (!Changed)
5792 return false;
5793
5794 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5795 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5796 New->setNodeId(-1);
5797 ReplaceNode(N, New.getNode());
5798 return true;
5799}
5800
5801bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5802 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5803 std::vector<SDValue> &OutOps) {
5804 switch(ConstraintID) {
5805 default:
5806 llvm_unreachable("Unexpected asm memory constraint");
5807 case InlineAsm::ConstraintCode::m:
5808 case InlineAsm::ConstraintCode::o:
5809 case InlineAsm::ConstraintCode::Q:
5810 case InlineAsm::ConstraintCode::Um:
5811 case InlineAsm::ConstraintCode::Un:
5812 case InlineAsm::ConstraintCode::Uq:
5813 case InlineAsm::ConstraintCode::Us:
5814 case InlineAsm::ConstraintCode::Ut:
5815 case InlineAsm::ConstraintCode::Uv:
5816 case InlineAsm::ConstraintCode::Uy:
5817 // Require the address to be in a register. That is safe for all ARM
5818 // variants and it is hard to do anything much smarter without knowing
5819 // how the operand is used.
5820 OutOps.push_back(Op);
5821 return false;
5822 }
5823 return true;
5824}
5825
5826/// createARMISelDag - This pass converts a legalized DAG into a
5827/// ARM-specific DAG, ready for instruction scheduling.
5828///
5830 CodeGenOptLevel OptLevel) {
5831 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5832}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSigned(unsigned int Opcode)
#define DEBUG_TYPE
#define op(i)
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
#define PASS_NAME
Value * RHS
Value * LHS
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
Class for arbitrary precision integers.
Definition APInt.h:78
bool isSwift() const
bool isThumb1Only() const
bool hasFPARMv8Base() const
bool isThumb2() const
bool isLikeA9() const
bool hasVFP2Base() const
bool isLittle() const
bool isMClass() const
uint64_t getZExtValue() const
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool mayStore() const
Return true if this instruction could possibly modify memory.
unsigned getOpcode() const
Return the opcode number for this descriptor.
SimpleValueType SimpleTy
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
LLVM Value Representation.
Definition Value.h:75
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:47
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:276
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
@ NearestTiesToEven
roundTiesToEven.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.