Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58enum class LitModifier { None, Lit, Lit64 };
59
60//===----------------------------------------------------------------------===//
61// Operand
62//===----------------------------------------------------------------------===//
63
64class AMDGPUOperand : public MCParsedAsmOperand {
65 enum KindTy {
66 Token,
67 Immediate,
68 Register,
69 Expression
70 } Kind;
71
72 SMLoc StartLoc, EndLoc;
73 const AMDGPUAsmParser *AsmParser;
74
75public:
76 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
77 : Kind(Kind_), AsmParser(AsmParser_) {}
78
79 using Ptr = std::unique_ptr<AMDGPUOperand>;
80
81 struct Modifiers {
82 bool Abs = false;
83 bool Neg = false;
84 bool Sext = false;
85 LitModifier Lit = LitModifier::None;
86
87 bool hasFPModifiers() const { return Abs || Neg; }
88 bool hasIntModifiers() const { return Sext; }
89 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
90
91 int64_t getFPModifiersOperand() const {
92 int64_t Operand = 0;
93 Operand |= Abs ? SISrcMods::ABS : 0u;
94 Operand |= Neg ? SISrcMods::NEG : 0u;
95 return Operand;
96 }
97
98 int64_t getIntModifiersOperand() const {
99 int64_t Operand = 0;
100 Operand |= Sext ? SISrcMods::SEXT : 0u;
101 return Operand;
102 }
103
104 int64_t getModifiersOperand() const {
105 assert(!(hasFPModifiers() && hasIntModifiers())
106 && "fp and int modifiers should not be used simultaneously");
107 if (hasFPModifiers())
108 return getFPModifiersOperand();
109 if (hasIntModifiers())
110 return getIntModifiersOperand();
111 return 0;
112 }
113
114 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
115 };
116
117 enum ImmTy {
118 ImmTyNone,
119 ImmTyGDS,
120 ImmTyLDS,
121 ImmTyOffen,
122 ImmTyIdxen,
123 ImmTyAddr64,
124 ImmTyOffset,
125 ImmTyInstOffset,
126 ImmTyOffset0,
127 ImmTyOffset1,
128 ImmTySMEMOffsetMod,
129 ImmTyCPol,
130 ImmTyTFE,
131 ImmTyD16,
132 ImmTyClamp,
133 ImmTyOModSI,
134 ImmTySDWADstSel,
135 ImmTySDWASrc0Sel,
136 ImmTySDWASrc1Sel,
137 ImmTySDWADstUnused,
138 ImmTyDMask,
139 ImmTyDim,
140 ImmTyUNorm,
141 ImmTyDA,
142 ImmTyR128A16,
143 ImmTyA16,
144 ImmTyLWE,
145 ImmTyExpTgt,
146 ImmTyExpCompr,
147 ImmTyExpVM,
148 ImmTyFORMAT,
149 ImmTyHwreg,
150 ImmTyOff,
151 ImmTySendMsg,
152 ImmTyInterpSlot,
153 ImmTyInterpAttr,
154 ImmTyInterpAttrChan,
155 ImmTyOpSel,
156 ImmTyOpSelHi,
157 ImmTyNegLo,
158 ImmTyNegHi,
159 ImmTyIndexKey8bit,
160 ImmTyIndexKey16bit,
161 ImmTyIndexKey32bit,
162 ImmTyDPP8,
163 ImmTyDppCtrl,
164 ImmTyDppRowMask,
165 ImmTyDppBankMask,
166 ImmTyDppBoundCtrl,
167 ImmTyDppFI,
168 ImmTySwizzle,
169 ImmTyGprIdxMode,
170 ImmTyHigh,
171 ImmTyBLGP,
172 ImmTyCBSZ,
173 ImmTyABID,
174 ImmTyEndpgm,
175 ImmTyWaitVDST,
176 ImmTyWaitEXP,
177 ImmTyWaitVAVDst,
178 ImmTyWaitVMVSrc,
179 ImmTyBitOp3,
180 ImmTyMatrixAFMT,
181 ImmTyMatrixBFMT,
182 ImmTyMatrixAScale,
183 ImmTyMatrixBScale,
184 ImmTyMatrixAScaleFmt,
185 ImmTyMatrixBScaleFmt,
186 ImmTyMatrixAReuse,
187 ImmTyMatrixBReuse,
188 ImmTyScaleSel,
189 ImmTyByteSel,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 MCRegister RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217 // The index of the associated MCInst operand.
218 mutable int MCOpIdx = -1;
219
220public:
221 bool isToken() const override { return Kind == Token; }
222
223 bool isSymbolRefExpr() const {
224 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
225 }
226
227 bool isImm() const override {
228 return Kind == Immediate;
229 }
230
231 bool isInlinableImm(MVT type) const;
232 bool isLiteralImm(MVT type) const;
233
234 bool isRegKind() const {
235 return Kind == Register;
236 }
237
238 bool isReg() const override {
239 return isRegKind() && !hasModifiers();
240 }
241
242 bool isRegOrInline(unsigned RCID, MVT type) const {
243 return isRegClass(RCID) || isInlinableImm(type);
244 }
245
246 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247 return isRegOrInline(RCID, type) || isLiteralImm(type);
248 }
249
250 bool isRegOrImmWithInt16InputMods() const {
251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252 }
253
254 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
256 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
257 }
258
259 bool isRegOrImmWithInt32InputMods() const {
260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
261 }
262
263 bool isRegOrInlineImmWithInt16InputMods() const {
264 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
265 }
266
267 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
268 return isRegOrInline(
269 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
270 }
271
272 bool isRegOrInlineImmWithInt32InputMods() const {
273 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
274 }
275
276 bool isRegOrImmWithInt64InputMods() const {
277 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
278 }
279
280 bool isRegOrImmWithFP16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
282 }
283
284 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
286 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
287 }
288
289 bool isRegOrImmWithFP32InputMods() const {
290 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
291 }
292
293 bool isRegOrImmWithFP64InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
295 }
296
297 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
298 return isRegOrInline(
299 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
300 }
301
302 bool isRegOrInlineImmWithFP32InputMods() const {
303 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
304 }
305
306 bool isRegOrInlineImmWithFP64InputMods() const {
307 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
308 }
309
310 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
311
312 bool isVRegWithFP32InputMods() const {
313 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
314 }
315
316 bool isVRegWithFP64InputMods() const {
317 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
318 }
319
320 bool isPackedFP16InputMods() const {
321 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
322 }
323
324 bool isPackedVGPRFP32InputMods() const {
325 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
326 }
327
328 bool isVReg() const {
329 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
330 isRegClass(AMDGPU::VReg_64RegClassID) ||
331 isRegClass(AMDGPU::VReg_96RegClassID) ||
332 isRegClass(AMDGPU::VReg_128RegClassID) ||
333 isRegClass(AMDGPU::VReg_160RegClassID) ||
334 isRegClass(AMDGPU::VReg_192RegClassID) ||
335 isRegClass(AMDGPU::VReg_256RegClassID) ||
336 isRegClass(AMDGPU::VReg_512RegClassID) ||
337 isRegClass(AMDGPU::VReg_1024RegClassID);
338 }
339
340 bool isVReg32() const {
341 return isRegClass(AMDGPU::VGPR_32RegClassID);
342 }
343
344 bool isVReg32OrOff() const {
345 return isOff() || isVReg32();
346 }
347
348 bool isNull() const {
349 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
350 }
351
352 bool isVRegWithInputMods() const;
353 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
354 template <bool IsFake16> bool isT16VRegWithInputMods() const;
355
356 bool isSDWAOperand(MVT type) const;
357 bool isSDWAFP16Operand() const;
358 bool isSDWAFP32Operand() const;
359 bool isSDWAInt16Operand() const;
360 bool isSDWAInt32Operand() const;
361
362 bool isImmTy(ImmTy ImmT) const {
363 return isImm() && Imm.Type == ImmT;
364 }
365
366 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
367
368 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
369
370 bool isImmModifier() const {
371 return isImm() && Imm.Type != ImmTyNone;
372 }
373
374 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
375 bool isDim() const { return isImmTy(ImmTyDim); }
376 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
377 bool isOff() const { return isImmTy(ImmTyOff); }
378 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
379 bool isOffen() const { return isImmTy(ImmTyOffen); }
380 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
381 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
382 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
383 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
384 bool isGDS() const { return isImmTy(ImmTyGDS); }
385 bool isLDS() const { return isImmTy(ImmTyLDS); }
386 bool isCPol() const { return isImmTy(ImmTyCPol); }
387 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
388 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
389 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
390 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
391 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
392 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
393 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
394 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
395 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
396 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
397 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
398 bool isTFE() const { return isImmTy(ImmTyTFE); }
399 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
400 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
401 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
402 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
403 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
404 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
405 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
406 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
407 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
408 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
409 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
410 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
411 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
412 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
413
414 bool isRegOrImm() const {
415 return isReg() || isImm();
416 }
417
418 bool isRegClass(unsigned RCID) const;
419
420 bool isInlineValue() const;
421
422 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
424 }
425
426 bool isSCSrcB16() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 }
429
430 bool isSCSrcV2B16() const {
431 return isSCSrcB16();
432 }
433
434 bool isSCSrc_b32() const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 }
437
438 bool isSCSrc_b64() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 }
441
442 bool isBoolReg() const;
443
444 bool isSCSrcF16() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 }
447
448 bool isSCSrcV2F16() const {
449 return isSCSrcF16();
450 }
451
452 bool isSCSrcF32() const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 }
455
456 bool isSCSrcF64() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 }
459
460 bool isSSrc_b32() const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 }
463
464 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
465
466 bool isSSrcV2B16() const {
467 llvm_unreachable("cannot happen");
468 return isSSrc_b16();
469 }
470
471 bool isSSrc_b64() const {
472 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
473 // See isVSrc64().
474 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
475 (((const MCTargetAsmParser *)AsmParser)
476 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
477 isExpr());
478 }
479
480 bool isSSrc_f32() const {
481 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
482 }
483
484 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
485
486 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
487
488 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
489
490 bool isSSrcV2F16() const {
491 llvm_unreachable("cannot happen");
492 return isSSrc_f16();
493 }
494
495 bool isSSrcV2FP32() const {
496 llvm_unreachable("cannot happen");
497 return isSSrc_f32();
498 }
499
500 bool isSCSrcV2FP32() const {
501 llvm_unreachable("cannot happen");
502 return isSCSrcF32();
503 }
504
505 bool isSSrcV2INT32() const {
506 llvm_unreachable("cannot happen");
507 return isSSrc_b32();
508 }
509
510 bool isSCSrcV2INT32() const {
511 llvm_unreachable("cannot happen");
512 return isSCSrc_b32();
513 }
514
515 bool isSSrcOrLds_b32() const {
516 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
517 isLiteralImm(MVT::i32) || isExpr();
518 }
519
520 bool isVCSrc_b32() const {
521 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
522 }
523
524 bool isVCSrc_b32_Lo256() const {
525 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
526 }
527
528 bool isVCSrc_b64_Lo256() const {
529 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
530 }
531
532 bool isVCSrc_b64() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
534 }
535
536 bool isVCSrcT_b16() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
538 }
539
540 bool isVCSrcTB16_Lo128() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
542 }
543
544 bool isVCSrcFake16B16_Lo128() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
546 }
547
548 bool isVCSrc_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
550 }
551
552 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
553
554 bool isVCSrc_f32() const {
555 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
556 }
557
558 bool isVCSrc_f64() const {
559 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
560 }
561
562 bool isVCSrcTBF16() const {
563 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
564 }
565
566 bool isVCSrcT_f16() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
568 }
569
570 bool isVCSrcT_bf16() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
572 }
573
574 bool isVCSrcTBF16_Lo128() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcTF16_Lo128() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcFake16BF16_Lo128() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
584 }
585
586 bool isVCSrcFake16F16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
588 }
589
590 bool isVCSrc_bf16() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
592 }
593
594 bool isVCSrc_f16() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
596 }
597
598 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
599
600 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
601
602 bool isVSrc_b32() const {
603 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
604 }
605
606 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
607
608 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
609
610 bool isVSrcT_b16_Lo128() const {
611 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
612 }
613
614 bool isVSrcFake16_b16_Lo128() const {
615 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
616 }
617
618 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
619
620 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
621
622 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
623
624 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
625
626 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
627
628 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
629
630 bool isVSrc_f32() const {
631 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
632 }
633
634 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
635
636 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
637
638 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
639
640 bool isVSrcT_bf16_Lo128() const {
641 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
642 }
643
644 bool isVSrcT_f16_Lo128() const {
645 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
646 }
647
648 bool isVSrcFake16_bf16_Lo128() const {
649 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
650 }
651
652 bool isVSrcFake16_f16_Lo128() const {
653 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
654 }
655
656 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
657
658 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
659
660 bool isVSrc_v2bf16() const {
661 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
662 }
663
664 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
665
666 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
667
668 bool isVISrcB32() const {
669 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
670 }
671
672 bool isVISrcB16() const {
673 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
674 }
675
676 bool isVISrcV2B16() const {
677 return isVISrcB16();
678 }
679
680 bool isVISrcF32() const {
681 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
682 }
683
684 bool isVISrcF16() const {
685 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
686 }
687
688 bool isVISrcV2F16() const {
689 return isVISrcF16() || isVISrcB32();
690 }
691
692 bool isVISrc_64_bf16() const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
694 }
695
696 bool isVISrc_64_f16() const {
697 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
698 }
699
700 bool isVISrc_64_b32() const {
701 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
702 }
703
704 bool isVISrc_64B64() const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
706 }
707
708 bool isVISrc_64_f64() const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
710 }
711
712 bool isVISrc_64V2FP32() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
714 }
715
716 bool isVISrc_64V2INT32() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 }
719
720 bool isVISrc_256_b32() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
722 }
723
724 bool isVISrc_256_f32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
726 }
727
728 bool isVISrc_256B64() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
730 }
731
732 bool isVISrc_256_f64() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
734 }
735
736 bool isVISrc_512_f64() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
738 }
739
740 bool isVISrc_128B16() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
742 }
743
744 bool isVISrc_128V2B16() const {
745 return isVISrc_128B16();
746 }
747
748 bool isVISrc_128_b32() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
750 }
751
752 bool isVISrc_128_f32() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
754 }
755
756 bool isVISrc_256V2FP32() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
758 }
759
760 bool isVISrc_256V2INT32() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
762 }
763
764 bool isVISrc_512_b32() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
766 }
767
768 bool isVISrc_512B16() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
770 }
771
772 bool isVISrc_512V2B16() const {
773 return isVISrc_512B16();
774 }
775
776 bool isVISrc_512_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_512F16() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
782 }
783
784 bool isVISrc_512V2F16() const {
785 return isVISrc_512F16() || isVISrc_512_b32();
786 }
787
788 bool isVISrc_1024_b32() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
790 }
791
792 bool isVISrc_1024B16() const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
794 }
795
796 bool isVISrc_1024V2B16() const {
797 return isVISrc_1024B16();
798 }
799
800 bool isVISrc_1024_f32() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
802 }
803
804 bool isVISrc_1024F16() const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
806 }
807
808 bool isVISrc_1024V2F16() const {
809 return isVISrc_1024F16() || isVISrc_1024_b32();
810 }
811
812 bool isAISrcB32() const {
813 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
814 }
815
816 bool isAISrcB16() const {
817 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
818 }
819
820 bool isAISrcV2B16() const {
821 return isAISrcB16();
822 }
823
824 bool isAISrcF32() const {
825 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
826 }
827
828 bool isAISrcF16() const {
829 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
830 }
831
832 bool isAISrcV2F16() const {
833 return isAISrcF16() || isAISrcB32();
834 }
835
836 bool isAISrc_64B64() const {
837 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
838 }
839
840 bool isAISrc_64_f64() const {
841 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
842 }
843
844 bool isAISrc_128_b32() const {
845 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
846 }
847
848 bool isAISrc_128B16() const {
849 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
850 }
851
852 bool isAISrc_128V2B16() const {
853 return isAISrc_128B16();
854 }
855
856 bool isAISrc_128_f32() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
858 }
859
860 bool isAISrc_128F16() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
862 }
863
864 bool isAISrc_128V2F16() const {
865 return isAISrc_128F16() || isAISrc_128_b32();
866 }
867
868 bool isVISrc_128_bf16() const {
869 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
870 }
871
872 bool isVISrc_128_f16() const {
873 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
874 }
875
876 bool isVISrc_128V2F16() const {
877 return isVISrc_128_f16() || isVISrc_128_b32();
878 }
879
880 bool isAISrc_256B64() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
882 }
883
884 bool isAISrc_256_f64() const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
886 }
887
888 bool isAISrc_512_b32() const {
889 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
890 }
891
892 bool isAISrc_512B16() const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
894 }
895
896 bool isAISrc_512V2B16() const {
897 return isAISrc_512B16();
898 }
899
900 bool isAISrc_512_f32() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
902 }
903
904 bool isAISrc_512F16() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
906 }
907
908 bool isAISrc_512V2F16() const {
909 return isAISrc_512F16() || isAISrc_512_b32();
910 }
911
912 bool isAISrc_1024_b32() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
914 }
915
916 bool isAISrc_1024B16() const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
918 }
919
920 bool isAISrc_1024V2B16() const {
921 return isAISrc_1024B16();
922 }
923
924 bool isAISrc_1024_f32() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
926 }
927
928 bool isAISrc_1024F16() const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
930 }
931
932 bool isAISrc_1024V2F16() const {
933 return isAISrc_1024F16() || isAISrc_1024_b32();
934 }
935
936 bool isKImmFP32() const {
937 return isLiteralImm(MVT::f32);
938 }
939
940 bool isKImmFP16() const {
941 return isLiteralImm(MVT::f16);
942 }
943
944 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
945
946 bool isMem() const override {
947 return false;
948 }
949
950 bool isExpr() const {
951 return Kind == Expression;
952 }
953
954 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
955
956 bool isSWaitCnt() const;
957 bool isDepCtr() const;
958 bool isSDelayALU() const;
959 bool isHwreg() const;
960 bool isSendMsg() const;
961 bool isSplitBarrier() const;
962 bool isSwizzle() const;
963 bool isSMRDOffset8() const;
964 bool isSMEMOffset() const;
965 bool isSMRDLiteralOffset() const;
966 bool isDPP8() const;
967 bool isDPPCtrl() const;
968 bool isBLGP() const;
969 bool isGPRIdxMode() const;
970 bool isS16Imm() const;
971 bool isU16Imm() const;
972 bool isEndpgm() const;
973
974 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
975 return [this, P]() { return P(*this); };
976 }
977
978 StringRef getToken() const {
979 assert(isToken());
980 return StringRef(Tok.Data, Tok.Length);
981 }
982
983 int64_t getImm() const {
984 assert(isImm());
985 return Imm.Val;
986 }
987
988 void setImm(int64_t Val) {
989 assert(isImm());
990 Imm.Val = Val;
991 }
992
993 ImmTy getImmTy() const {
994 assert(isImm());
995 return Imm.Type;
996 }
997
998 MCRegister getReg() const override {
999 assert(isRegKind());
1000 return Reg.RegNo;
1001 }
1002
1003 SMLoc getStartLoc() const override {
1004 return StartLoc;
1005 }
1006
1007 SMLoc getEndLoc() const override {
1008 return EndLoc;
1009 }
1010
1011 SMRange getLocRange() const {
1012 return SMRange(StartLoc, EndLoc);
1013 }
1014
1015 int getMCOpIdx() const { return MCOpIdx; }
1016
1017 Modifiers getModifiers() const {
1018 assert(isRegKind() || isImmTy(ImmTyNone));
1019 return isRegKind() ? Reg.Mods : Imm.Mods;
1020 }
1021
1022 void setModifiers(Modifiers Mods) {
1023 assert(isRegKind() || isImmTy(ImmTyNone));
1024 if (isRegKind())
1025 Reg.Mods = Mods;
1026 else
1027 Imm.Mods = Mods;
1028 }
1029
1030 bool hasModifiers() const {
1031 return getModifiers().hasModifiers();
1032 }
1033
1034 bool hasFPModifiers() const {
1035 return getModifiers().hasFPModifiers();
1036 }
1037
1038 bool hasIntModifiers() const {
1039 return getModifiers().hasIntModifiers();
1040 }
1041
1042 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1043
1044 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1045
1046 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1047
1048 void addRegOperands(MCInst &Inst, unsigned N) const;
1049
1050 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1051 if (isRegKind())
1052 addRegOperands(Inst, N);
1053 else
1054 addImmOperands(Inst, N);
1055 }
1056
1057 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1058 Modifiers Mods = getModifiers();
1059 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1060 if (isRegKind()) {
1061 addRegOperands(Inst, N);
1062 } else {
1063 addImmOperands(Inst, N, false);
1064 }
1065 }
1066
1067 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1068 assert(!hasIntModifiers());
1069 addRegOrImmWithInputModsOperands(Inst, N);
1070 }
1071
1072 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1073 assert(!hasFPModifiers());
1074 addRegOrImmWithInputModsOperands(Inst, N);
1075 }
1076
1077 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1078 Modifiers Mods = getModifiers();
1079 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1080 assert(isRegKind());
1081 addRegOperands(Inst, N);
1082 }
1083
1084 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasIntModifiers());
1086 addRegWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1090 assert(!hasFPModifiers());
1091 addRegWithInputModsOperands(Inst, N);
1092 }
1093
1094 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1095 // clang-format off
1096 switch (Type) {
1097 case ImmTyNone: OS << "None"; break;
1098 case ImmTyGDS: OS << "GDS"; break;
1099 case ImmTyLDS: OS << "LDS"; break;
1100 case ImmTyOffen: OS << "Offen"; break;
1101 case ImmTyIdxen: OS << "Idxen"; break;
1102 case ImmTyAddr64: OS << "Addr64"; break;
1103 case ImmTyOffset: OS << "Offset"; break;
1104 case ImmTyInstOffset: OS << "InstOffset"; break;
1105 case ImmTyOffset0: OS << "Offset0"; break;
1106 case ImmTyOffset1: OS << "Offset1"; break;
1107 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1108 case ImmTyCPol: OS << "CPol"; break;
1109 case ImmTyIndexKey8bit: OS << "index_key"; break;
1110 case ImmTyIndexKey16bit: OS << "index_key"; break;
1111 case ImmTyIndexKey32bit: OS << "index_key"; break;
1112 case ImmTyTFE: OS << "TFE"; break;
1113 case ImmTyD16: OS << "D16"; break;
1114 case ImmTyFORMAT: OS << "FORMAT"; break;
1115 case ImmTyClamp: OS << "Clamp"; break;
1116 case ImmTyOModSI: OS << "OModSI"; break;
1117 case ImmTyDPP8: OS << "DPP8"; break;
1118 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1119 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1120 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1121 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1122 case ImmTyDppFI: OS << "DppFI"; break;
1123 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1124 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1125 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1126 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1127 case ImmTyDMask: OS << "DMask"; break;
1128 case ImmTyDim: OS << "Dim"; break;
1129 case ImmTyUNorm: OS << "UNorm"; break;
1130 case ImmTyDA: OS << "DA"; break;
1131 case ImmTyR128A16: OS << "R128A16"; break;
1132 case ImmTyA16: OS << "A16"; break;
1133 case ImmTyLWE: OS << "LWE"; break;
1134 case ImmTyOff: OS << "Off"; break;
1135 case ImmTyExpTgt: OS << "ExpTgt"; break;
1136 case ImmTyExpCompr: OS << "ExpCompr"; break;
1137 case ImmTyExpVM: OS << "ExpVM"; break;
1138 case ImmTyHwreg: OS << "Hwreg"; break;
1139 case ImmTySendMsg: OS << "SendMsg"; break;
1140 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1141 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1142 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1143 case ImmTyOpSel: OS << "OpSel"; break;
1144 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1145 case ImmTyNegLo: OS << "NegLo"; break;
1146 case ImmTyNegHi: OS << "NegHi"; break;
1147 case ImmTySwizzle: OS << "Swizzle"; break;
1148 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1149 case ImmTyHigh: OS << "High"; break;
1150 case ImmTyBLGP: OS << "BLGP"; break;
1151 case ImmTyCBSZ: OS << "CBSZ"; break;
1152 case ImmTyABID: OS << "ABID"; break;
1153 case ImmTyEndpgm: OS << "Endpgm"; break;
1154 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1155 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1156 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1157 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1158 case ImmTyBitOp3: OS << "BitOp3"; break;
1159 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1160 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1161 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1162 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1163 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1164 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1165 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1166 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1167 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1168 case ImmTyByteSel: OS << "ByteSel" ; break;
1169 }
1170 // clang-format on
1171 }
1172
1173 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1174 switch (Kind) {
1175 case Register:
1176 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1177 << " mods: " << Reg.Mods << '>';
1178 break;
1179 case Immediate:
1180 OS << '<' << getImm();
1181 if (getImmTy() != ImmTyNone) {
1182 OS << " type: "; printImmTy(OS, getImmTy());
1183 }
1184 OS << " mods: " << Imm.Mods << '>';
1185 break;
1186 case Token:
1187 OS << '\'' << getToken() << '\'';
1188 break;
1189 case Expression:
1190 OS << "<expr ";
1191 MAI.printExpr(OS, *Expr);
1192 OS << '>';
1193 break;
1194 }
1195 }
1196
1197 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1198 int64_t Val, SMLoc Loc,
1199 ImmTy Type = ImmTyNone,
1200 bool IsFPImm = false) {
1201 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1202 Op->Imm.Val = Val;
1203 Op->Imm.IsFPImm = IsFPImm;
1204 Op->Imm.Type = Type;
1205 Op->Imm.Mods = Modifiers();
1206 Op->StartLoc = Loc;
1207 Op->EndLoc = Loc;
1208 return Op;
1209 }
1210
1211 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1212 StringRef Str, SMLoc Loc,
1213 bool HasExplicitEncodingSize = true) {
1214 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1215 Res->Tok.Data = Str.data();
1216 Res->Tok.Length = Str.size();
1217 Res->StartLoc = Loc;
1218 Res->EndLoc = Loc;
1219 return Res;
1220 }
1221
1222 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1223 MCRegister Reg, SMLoc S, SMLoc E) {
1224 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1225 Op->Reg.RegNo = Reg;
1226 Op->Reg.Mods = Modifiers();
1227 Op->StartLoc = S;
1228 Op->EndLoc = E;
1229 return Op;
1230 }
1231
1232 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1233 const class MCExpr *Expr, SMLoc S) {
1234 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1235 Op->Expr = Expr;
1236 Op->StartLoc = S;
1237 Op->EndLoc = S;
1238 return Op;
1239 }
1240};
1241
1242raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1243 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1244 return OS;
1245}
1246
1247//===----------------------------------------------------------------------===//
1248// AsmParser
1249//===----------------------------------------------------------------------===//
1250
1251// Holds info related to the current kernel, e.g. count of SGPRs used.
1252// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1253// .amdgpu_hsa_kernel or at EOF.
1254class KernelScopeInfo {
1255 int SgprIndexUnusedMin = -1;
1256 int VgprIndexUnusedMin = -1;
1257 int AgprIndexUnusedMin = -1;
1258 MCContext *Ctx = nullptr;
1259 MCSubtargetInfo const *MSTI = nullptr;
1260
1261 void usesSgprAt(int i) {
1262 if (i >= SgprIndexUnusedMin) {
1263 SgprIndexUnusedMin = ++i;
1264 if (Ctx) {
1265 MCSymbol* const Sym =
1266 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1267 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1268 }
1269 }
1270 }
1271
1272 void usesVgprAt(int i) {
1273 if (i >= VgprIndexUnusedMin) {
1274 VgprIndexUnusedMin = ++i;
1275 if (Ctx) {
1276 MCSymbol* const Sym =
1277 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1278 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1279 VgprIndexUnusedMin);
1280 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1281 }
1282 }
1283 }
1284
1285 void usesAgprAt(int i) {
1286 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1287 if (!hasMAIInsts(*MSTI))
1288 return;
1289
1290 if (i >= AgprIndexUnusedMin) {
1291 AgprIndexUnusedMin = ++i;
1292 if (Ctx) {
1293 MCSymbol* const Sym =
1294 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1295 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1296
1297 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1298 MCSymbol* const vSym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1301 VgprIndexUnusedMin);
1302 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1303 }
1304 }
1305 }
1306
1307public:
1308 KernelScopeInfo() = default;
1309
1310 void initialize(MCContext &Context) {
1311 Ctx = &Context;
1312 MSTI = Ctx->getSubtargetInfo();
1313
1314 usesSgprAt(SgprIndexUnusedMin = -1);
1315 usesVgprAt(VgprIndexUnusedMin = -1);
1316 if (hasMAIInsts(*MSTI)) {
1317 usesAgprAt(AgprIndexUnusedMin = -1);
1318 }
1319 }
1320
1321 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1322 unsigned RegWidth) {
1323 switch (RegKind) {
1324 case IS_SGPR:
1325 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1326 break;
1327 case IS_AGPR:
1328 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1329 break;
1330 case IS_VGPR:
1331 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1332 break;
1333 default:
1334 break;
1335 }
1336 }
1337};
1338
1339class AMDGPUAsmParser : public MCTargetAsmParser {
1340 MCAsmParser &Parser;
1341
1342 unsigned ForcedEncodingSize = 0;
1343 bool ForcedDPP = false;
1344 bool ForcedSDWA = false;
1345 KernelScopeInfo KernelScope;
1346
1347 /// @name Auto-generated Match Functions
1348 /// {
1349
1350#define GET_ASSEMBLER_HEADER
1351#include "AMDGPUGenAsmMatcher.inc"
1352
1353 /// }
1354
1355private:
1356 void createConstantSymbol(StringRef Id, int64_t Val);
1357
1358 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1359 bool OutOfRangeError(SMRange Range);
1360 /// Calculate VGPR/SGPR blocks required for given target, reserved
1361 /// registers, and user-specified NextFreeXGPR values.
1362 ///
1363 /// \param Features [in] Target features, used for bug corrections.
1364 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1365 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1366 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1367 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1368 /// descriptor field, if valid.
1369 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1370 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1371 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1372 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1373 /// \param VGPRBlocks [out] Result VGPR block count.
1374 /// \param SGPRBlocks [out] Result SGPR block count.
1375 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1376 const MCExpr *FlatScrUsed, bool XNACKUsed,
1377 std::optional<bool> EnableWavefrontSize32,
1378 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1379 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1380 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1381 bool ParseDirectiveAMDGCNTarget();
1382 bool ParseDirectiveAMDHSACodeObjectVersion();
1383 bool ParseDirectiveAMDHSAKernel();
1384 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1385 bool ParseDirectiveAMDKernelCodeT();
1386 // TODO: Possibly make subtargetHasRegister const.
1387 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1388 bool ParseDirectiveAMDGPUHsaKernel();
1389
1390 bool ParseDirectiveISAVersion();
1391 bool ParseDirectiveHSAMetadata();
1392 bool ParseDirectivePALMetadataBegin();
1393 bool ParseDirectivePALMetadata();
1394 bool ParseDirectiveAMDGPULDS();
1395
1396 /// Common code to parse out a block of text (typically YAML) between start and
1397 /// end directives.
1398 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1399 const char *AssemblerDirectiveEnd,
1400 std::string &CollectString);
1401
1402 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1403 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1404 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1405 unsigned &RegNum, unsigned &RegWidth,
1406 bool RestoreOnFailure = false);
1407 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1408 unsigned &RegNum, unsigned &RegWidth,
1410 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1411 unsigned &RegWidth,
1413 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1414 unsigned &RegWidth,
1416 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1417 unsigned &RegWidth,
1419 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1420 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1421 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1422
1423 bool isRegister();
1424 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1425 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1426 void initializeGprCountSymbol(RegisterKind RegKind);
1427 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1428 unsigned RegWidth);
1429 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1430 bool IsAtomic);
1431
1432public:
1433 enum OperandMode {
1434 OperandMode_Default,
1435 OperandMode_NSA,
1436 };
1437
1438 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1439
1440 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1441 const MCInstrInfo &MII,
1442 const MCTargetOptions &Options)
1443 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1445
1446 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1447
1448 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1449 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1450 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1451 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1452 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1453 } else {
1454 createConstantSymbol(".option.machine_version_major", ISA.Major);
1455 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1456 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1457 }
1458 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1459 initializeGprCountSymbol(IS_VGPR);
1460 initializeGprCountSymbol(IS_SGPR);
1461 } else
1462 KernelScope.initialize(getContext());
1463
1464 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1465 createConstantSymbol(Symbol, Code);
1466
1467 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1468 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1469 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1470 }
1471
1472 bool hasMIMG_R128() const {
1473 return AMDGPU::hasMIMG_R128(getSTI());
1474 }
1475
1476 bool hasPackedD16() const {
1477 return AMDGPU::hasPackedD16(getSTI());
1478 }
1479
1480 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1481
1482 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1483
1484 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1485
1486 bool isSI() const {
1487 return AMDGPU::isSI(getSTI());
1488 }
1489
1490 bool isCI() const {
1491 return AMDGPU::isCI(getSTI());
1492 }
1493
1494 bool isVI() const {
1495 return AMDGPU::isVI(getSTI());
1496 }
1497
1498 bool isGFX9() const {
1499 return AMDGPU::isGFX9(getSTI());
1500 }
1501
1502 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1503 bool isGFX90A() const {
1504 return AMDGPU::isGFX90A(getSTI());
1505 }
1506
1507 bool isGFX940() const {
1508 return AMDGPU::isGFX940(getSTI());
1509 }
1510
1511 bool isGFX9Plus() const {
1512 return AMDGPU::isGFX9Plus(getSTI());
1513 }
1514
1515 bool isGFX10() const {
1516 return AMDGPU::isGFX10(getSTI());
1517 }
1518
1519 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1520
1521 bool isGFX11() const {
1522 return AMDGPU::isGFX11(getSTI());
1523 }
1524
1525 bool isGFX11Plus() const {
1526 return AMDGPU::isGFX11Plus(getSTI());
1527 }
1528
1529 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1530
1531 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1532
1533 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1534
1535 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1536
1537 bool isGFX10_BEncoding() const {
1538 return AMDGPU::isGFX10_BEncoding(getSTI());
1539 }
1540
1541 bool hasInv2PiInlineImm() const {
1542 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1543 }
1544
1545 bool has64BitLiterals() const {
1546 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1547 }
1548
1549 bool hasFlatOffsets() const {
1550 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1551 }
1552
1553 bool hasTrue16Insts() const {
1554 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1555 }
1556
1557 bool hasArchitectedFlatScratch() const {
1558 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1559 }
1560
1561 bool hasSGPR102_SGPR103() const {
1562 return !isVI() && !isGFX9();
1563 }
1564
1565 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1566
1567 bool hasIntClamp() const {
1568 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1569 }
1570
1571 bool hasPartialNSAEncoding() const {
1572 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1573 }
1574
1575 bool hasGloballyAddressableScratch() const {
1576 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1577 }
1578
1579 unsigned getNSAMaxSize(bool HasSampler = false) const {
1580 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1581 }
1582
1583 unsigned getMaxNumUserSGPRs() const {
1584 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1585 }
1586
1587 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1588
1589 AMDGPUTargetStreamer &getTargetStreamer() {
1590 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1591 return static_cast<AMDGPUTargetStreamer &>(TS);
1592 }
1593
1594 const MCRegisterInfo *getMRI() const {
1595 // We need this const_cast because for some reason getContext() is not const
1596 // in MCAsmParser.
1597 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1598 }
1599
1600 const MCInstrInfo *getMII() const {
1601 return &MII;
1602 }
1603
1604 const FeatureBitset &getFeatureBits() const {
1605 return getSTI().getFeatureBits();
1606 }
1607
1608 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1609 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1610 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1611
1612 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1613 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1614 bool isForcedDPP() const { return ForcedDPP; }
1615 bool isForcedSDWA() const { return ForcedSDWA; }
1616 ArrayRef<unsigned> getMatchedVariants() const;
1617 StringRef getMatchedVariantName() const;
1618
1619 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1620 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1621 bool RestoreOnFailure);
1622 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1623 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1624 SMLoc &EndLoc) override;
1625 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1626 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1627 unsigned Kind) override;
1628 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1629 OperandVector &Operands, MCStreamer &Out,
1630 uint64_t &ErrorInfo,
1631 bool MatchingInlineAsm) override;
1632 bool ParseDirective(AsmToken DirectiveID) override;
1633 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1634 OperandMode Mode = OperandMode_Default);
1635 StringRef parseMnemonicSuffix(StringRef Name);
1636 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1637 SMLoc NameLoc, OperandVector &Operands) override;
1638 //bool ProcessInstruction(MCInst &Inst);
1639
1640 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1641
1642 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1643
1644 ParseStatus
1645 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1646 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1647 std::function<bool(int64_t &)> ConvertResult = nullptr);
1648
1649 ParseStatus parseOperandArrayWithPrefix(
1650 const char *Prefix, OperandVector &Operands,
1651 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1652 bool (*ConvertResult)(int64_t &) = nullptr);
1653
1654 ParseStatus
1655 parseNamedBit(StringRef Name, OperandVector &Operands,
1656 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1657 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1658 ParseStatus parseCPol(OperandVector &Operands);
1659 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1660 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1661 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1662 SMLoc &StringLoc);
1663 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1664 StringRef Name,
1665 ArrayRef<const char *> Ids,
1666 int64_t &IntVal);
1667 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1668 StringRef Name,
1669 ArrayRef<const char *> Ids,
1670 AMDGPUOperand::ImmTy Type);
1671
1672 bool isModifier();
1673 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1674 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1675 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1676 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1677 bool parseSP3NegModifier();
1678 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1679 LitModifier Lit = LitModifier::None);
1680 ParseStatus parseReg(OperandVector &Operands);
1681 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1682 LitModifier Lit = LitModifier::None);
1683 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1684 bool AllowImm = true);
1685 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1686 bool AllowImm = true);
1687 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1688 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1689 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1690 ParseStatus tryParseIndexKey(OperandVector &Operands,
1691 AMDGPUOperand::ImmTy ImmTy);
1692 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1693 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1694 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1695 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1696 AMDGPUOperand::ImmTy Type);
1697 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1698 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1699 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1700 AMDGPUOperand::ImmTy Type);
1701 ParseStatus parseMatrixAScale(OperandVector &Operands);
1702 ParseStatus parseMatrixBScale(OperandVector &Operands);
1703 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1704 AMDGPUOperand::ImmTy Type);
1705 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1706 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1707
1708 ParseStatus parseDfmtNfmt(int64_t &Format);
1709 ParseStatus parseUfmt(int64_t &Format);
1710 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1711 int64_t &Format);
1712 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1713 int64_t &Format);
1714 ParseStatus parseFORMAT(OperandVector &Operands);
1715 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1716 ParseStatus parseNumericFormat(int64_t &Format);
1717 ParseStatus parseFlatOffset(OperandVector &Operands);
1718 ParseStatus parseR128A16(OperandVector &Operands);
1719 ParseStatus parseBLGP(OperandVector &Operands);
1720 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1721 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1722
1723 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1724
1725 bool parseCnt(int64_t &IntVal);
1726 ParseStatus parseSWaitCnt(OperandVector &Operands);
1727
1728 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1729 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1730 ParseStatus parseDepCtr(OperandVector &Operands);
1731
1732 bool parseDelay(int64_t &Delay);
1733 ParseStatus parseSDelayALU(OperandVector &Operands);
1734
1735 ParseStatus parseHwreg(OperandVector &Operands);
1736
1737private:
1738 struct OperandInfoTy {
1739 SMLoc Loc;
1740 int64_t Val;
1741 bool IsSymbolic = false;
1742 bool IsDefined = false;
1743
1744 OperandInfoTy(int64_t Val) : Val(Val) {}
1745 };
1746
1747 struct StructuredOpField : OperandInfoTy {
1748 StringLiteral Id;
1749 StringLiteral Desc;
1750 unsigned Width;
1751 bool IsDefined = false;
1752
1753 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1754 int64_t Default)
1755 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1756 virtual ~StructuredOpField() = default;
1757
1758 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1759 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1760 return false;
1761 }
1762
1763 virtual bool validate(AMDGPUAsmParser &Parser) const {
1764 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1765 return Error(Parser, "not supported on this GPU");
1766 if (!isUIntN(Width, Val))
1767 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1768 return true;
1769 }
1770 };
1771
1772 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1773 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1774
1775 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1776 bool validateSendMsg(const OperandInfoTy &Msg,
1777 const OperandInfoTy &Op,
1778 const OperandInfoTy &Stream);
1779
1780 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1781 OperandInfoTy &Width);
1782
1783 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1784
1785 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1786 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1787 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1788
1789 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1790 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1791 const OperandVector &Operands) const;
1792 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1793 const OperandVector &Operands) const;
1794 SMLoc getInstLoc(const OperandVector &Operands) const;
1795
1796 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1797 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1798 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1799 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1800 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1801 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1802 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1803 bool AsVOPD3);
1804 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1805 bool tryVOPD(const MCInst &Inst);
1806 bool tryVOPD3(const MCInst &Inst);
1807 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1808
1809 bool validateIntClampSupported(const MCInst &Inst);
1810 bool validateMIMGAtomicDMask(const MCInst &Inst);
1811 bool validateMIMGGatherDMask(const MCInst &Inst);
1812 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1813 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1814 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1815 bool validateMIMGD16(const MCInst &Inst);
1816 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1817 bool validateTensorR128(const MCInst &Inst);
1818 bool validateMIMGMSAA(const MCInst &Inst);
1819 bool validateOpSel(const MCInst &Inst);
1820 bool validateTrue16OpSel(const MCInst &Inst);
1821 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1822 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1823 bool validateVccOperand(MCRegister Reg) const;
1824 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1825 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1826 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1827 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1828 bool validateAGPRLdSt(const MCInst &Inst) const;
1829 bool validateVGPRAlign(const MCInst &Inst) const;
1830 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1831 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1832 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1833 bool validateDivScale(const MCInst &Inst);
1834 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1835 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1836 const SMLoc &IDLoc);
1837 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1838 const unsigned CPol);
1839 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1841 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1842 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1843 unsigned getConstantBusLimit(unsigned Opcode) const;
1844 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1845 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1846 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1847
1848 bool isSupportedMnemo(StringRef Mnemo,
1849 const FeatureBitset &FBS);
1850 bool isSupportedMnemo(StringRef Mnemo,
1851 const FeatureBitset &FBS,
1852 ArrayRef<unsigned> Variants);
1853 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1854
1855 bool isId(const StringRef Id) const;
1856 bool isId(const AsmToken &Token, const StringRef Id) const;
1857 bool isToken(const AsmToken::TokenKind Kind) const;
1858 StringRef getId() const;
1859 bool trySkipId(const StringRef Id);
1860 bool trySkipId(const StringRef Pref, const StringRef Id);
1861 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1862 bool trySkipToken(const AsmToken::TokenKind Kind);
1863 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1864 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1865 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1866
1867 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1868 AsmToken::TokenKind getTokenKind() const;
1869 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1871 StringRef getTokenStr() const;
1872 AsmToken peekToken(bool ShouldSkipSpace = true);
1873 AsmToken getToken() const;
1874 SMLoc getLoc() const;
1875 void lex();
1876
1877public:
1878 void onBeginOfFile() override;
1879 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1880
1881 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1882
1883 ParseStatus parseExpTgt(OperandVector &Operands);
1884 ParseStatus parseSendMsg(OperandVector &Operands);
1885 ParseStatus parseInterpSlot(OperandVector &Operands);
1886 ParseStatus parseInterpAttr(OperandVector &Operands);
1887 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1888 ParseStatus parseBoolReg(OperandVector &Operands);
1889
1890 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1891 const unsigned MaxVal, const Twine &ErrMsg,
1892 SMLoc &Loc);
1893 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1894 const unsigned MinVal,
1895 const unsigned MaxVal,
1896 const StringRef ErrMsg);
1897 ParseStatus parseSwizzle(OperandVector &Operands);
1898 bool parseSwizzleOffset(int64_t &Imm);
1899 bool parseSwizzleMacro(int64_t &Imm);
1900 bool parseSwizzleQuadPerm(int64_t &Imm);
1901 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1902 bool parseSwizzleBroadcast(int64_t &Imm);
1903 bool parseSwizzleSwap(int64_t &Imm);
1904 bool parseSwizzleReverse(int64_t &Imm);
1905 bool parseSwizzleFFT(int64_t &Imm);
1906 bool parseSwizzleRotate(int64_t &Imm);
1907
1908 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1909 int64_t parseGPRIdxMacro();
1910
1911 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1912 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1913
1914 ParseStatus parseOModSI(OperandVector &Operands);
1915
1916 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1917 OptionalImmIndexMap &OptionalIdx);
1918 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1919 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1920 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1921 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1922 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1923
1924 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1925 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1926 OptionalImmIndexMap &OptionalIdx);
1927 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1928 OptionalImmIndexMap &OptionalIdx);
1929
1930 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1931 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1932
1933 bool parseDimId(unsigned &Encoding);
1934 ParseStatus parseDim(OperandVector &Operands);
1935 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1936 ParseStatus parseDPP8(OperandVector &Operands);
1937 ParseStatus parseDPPCtrl(OperandVector &Operands);
1938 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1939 int64_t parseDPPCtrlSel(StringRef Ctrl);
1940 int64_t parseDPPCtrlPerm();
1941 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1942 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1943 cvtDPP(Inst, Operands, true);
1944 }
1945 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1946 bool IsDPP8 = false);
1947 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1948 cvtVOP3DPP(Inst, Operands, true);
1949 }
1950
1951 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1952 AMDGPUOperand::ImmTy Type);
1953 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1954 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1955 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1956 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1957 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1958 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1959 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1960 uint64_t BasicInstType,
1961 bool SkipDstVcc = false,
1962 bool SkipSrcVcc = false);
1963
1964 ParseStatus parseEndpgm(OperandVector &Operands);
1965
1966 ParseStatus parseVOPD(OperandVector &Operands);
1967};
1968
1969} // end anonymous namespace
1970
1971// May be called with integer type with equivalent bitwidth.
1972static const fltSemantics *getFltSemantics(unsigned Size) {
1973 switch (Size) {
1974 case 4:
1975 return &APFloat::IEEEsingle();
1976 case 8:
1977 return &APFloat::IEEEdouble();
1978 case 2:
1979 return &APFloat::IEEEhalf();
1980 default:
1981 llvm_unreachable("unsupported fp type");
1982 }
1983}
1984
1986 return getFltSemantics(VT.getSizeInBits() / 8);
1987}
1988
1990 switch (OperandType) {
1991 // When floating-point immediate is used as operand of type i16, the 32-bit
1992 // representation of the constant truncated to the 16 LSBs should be used.
2007 return &APFloat::IEEEsingle();
2014 return &APFloat::IEEEdouble();
2021 return &APFloat::IEEEhalf();
2026 return &APFloat::BFloat();
2027 default:
2028 llvm_unreachable("unsupported fp type");
2029 }
2030}
2031
2032//===----------------------------------------------------------------------===//
2033// Operand
2034//===----------------------------------------------------------------------===//
2035
2036static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2037 bool Lost;
2038
2039 // Convert literal to single precision
2042 &Lost);
2043 // We allow precision lost but not overflow or underflow
2044 if (Status != APFloat::opOK &&
2045 Lost &&
2046 ((Status & APFloat::opOverflow) != 0 ||
2047 (Status & APFloat::opUnderflow) != 0)) {
2048 return false;
2049 }
2050
2051 return true;
2052}
2053
2054static bool isSafeTruncation(int64_t Val, unsigned Size) {
2055 return isUIntN(Size, Val) || isIntN(Size, Val);
2056}
2057
2058static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2059 if (VT.getScalarType() == MVT::i16)
2060 return isInlinableLiteral32(Val, HasInv2Pi);
2061
2062 if (VT.getScalarType() == MVT::f16)
2063 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2064
2065 assert(VT.getScalarType() == MVT::bf16);
2066
2067 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2068}
2069
2070bool AMDGPUOperand::isInlinableImm(MVT type) const {
2071
2072 // This is a hack to enable named inline values like
2073 // shared_base with both 32-bit and 64-bit operands.
2074 // Note that these values are defined as
2075 // 32-bit operands only.
2076 if (isInlineValue()) {
2077 return true;
2078 }
2079
2080 if (!isImmTy(ImmTyNone)) {
2081 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2082 return false;
2083 }
2084 // TODO: We should avoid using host float here. It would be better to
2085 // check the float bit values which is what a few other places do.
2086 // We've had bot failures before due to weird NaN support on mips hosts.
2087
2088 APInt Literal(64, Imm.Val);
2089
2090 if (Imm.IsFPImm) { // We got fp literal token
2091 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2093 AsmParser->hasInv2PiInlineImm());
2094 }
2095
2096 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2097 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2098 return false;
2099
2100 if (type.getScalarSizeInBits() == 16) {
2101 bool Lost = false;
2102 switch (type.getScalarType().SimpleTy) {
2103 default:
2104 llvm_unreachable("unknown 16-bit type");
2105 case MVT::bf16:
2106 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2107 &Lost);
2108 break;
2109 case MVT::f16:
2110 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2111 &Lost);
2112 break;
2113 case MVT::i16:
2114 FPLiteral.convert(APFloatBase::IEEEsingle(),
2115 APFloat::rmNearestTiesToEven, &Lost);
2116 break;
2117 }
2118 // We need to use 32-bit representation here because when a floating-point
2119 // inline constant is used as an i16 operand, its 32-bit representation
2120 // representation will be used. We will need the 32-bit value to check if
2121 // it is FP inline constant.
2122 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2123 return isInlineableLiteralOp16(ImmVal, type,
2124 AsmParser->hasInv2PiInlineImm());
2125 }
2126
2127 // Check if single precision literal is inlinable
2129 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2130 AsmParser->hasInv2PiInlineImm());
2131 }
2132
2133 // We got int literal token.
2134 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2136 AsmParser->hasInv2PiInlineImm());
2137 }
2138
2139 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2140 return false;
2141 }
2142
2143 if (type.getScalarSizeInBits() == 16) {
2145 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2146 type, AsmParser->hasInv2PiInlineImm());
2147 }
2148
2150 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2151 AsmParser->hasInv2PiInlineImm());
2152}
2153
2154bool AMDGPUOperand::isLiteralImm(MVT type) const {
2155 // Check that this immediate can be added as literal
2156 if (!isImmTy(ImmTyNone)) {
2157 return false;
2158 }
2159
2160 bool Allow64Bit =
2161 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2162
2163 if (!Imm.IsFPImm) {
2164 // We got int literal token.
2165
2166 if (type == MVT::f64 && hasFPModifiers()) {
2167 // Cannot apply fp modifiers to int literals preserving the same semantics
2168 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2169 // disable these cases.
2170 return false;
2171 }
2172
2173 unsigned Size = type.getSizeInBits();
2174 if (Size == 64) {
2175 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2176 return true;
2177 Size = 32;
2178 }
2179
2180 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2181 // types.
2182 return isSafeTruncation(Imm.Val, Size);
2183 }
2184
2185 // We got fp literal token
2186 if (type == MVT::f64) { // Expected 64-bit fp operand
2187 // We would set low 64-bits of literal to zeroes but we accept this literals
2188 return true;
2189 }
2190
2191 if (type == MVT::i64) { // Expected 64-bit int operand
2192 // We don't allow fp literals in 64-bit integer instructions. It is
2193 // unclear how we should encode them.
2194 return false;
2195 }
2196
2197 // We allow fp literals with f16x2 operands assuming that the specified
2198 // literal goes into the lower half and the upper half is zero. We also
2199 // require that the literal may be losslessly converted to f16.
2200 //
2201 // For i16x2 operands, we assume that the specified literal is encoded as a
2202 // single-precision float. This is pretty odd, but it matches SP3 and what
2203 // happens in hardware.
2204 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2205 : (type == MVT::v2i16) ? MVT::f32
2206 : (type == MVT::v2f32) ? MVT::f32
2207 : type;
2208
2209 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2210 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2211}
2212
2213bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2214 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2215}
2216
2217bool AMDGPUOperand::isVRegWithInputMods() const {
2218 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2219 // GFX90A allows DPP on 64-bit operands.
2220 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2221 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2222}
2223
2224template <bool IsFake16>
2225bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2226 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2227 : AMDGPU::VGPR_16_Lo128RegClassID);
2228}
2229
2230template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2231 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2232 : AMDGPU::VGPR_16RegClassID);
2233}
2234
2235bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2236 if (AsmParser->isVI())
2237 return isVReg32();
2238 if (AsmParser->isGFX9Plus())
2239 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2240 return false;
2241}
2242
2243bool AMDGPUOperand::isSDWAFP16Operand() const {
2244 return isSDWAOperand(MVT::f16);
2245}
2246
2247bool AMDGPUOperand::isSDWAFP32Operand() const {
2248 return isSDWAOperand(MVT::f32);
2249}
2250
2251bool AMDGPUOperand::isSDWAInt16Operand() const {
2252 return isSDWAOperand(MVT::i16);
2253}
2254
2255bool AMDGPUOperand::isSDWAInt32Operand() const {
2256 return isSDWAOperand(MVT::i32);
2257}
2258
2259bool AMDGPUOperand::isBoolReg() const {
2260 auto FB = AsmParser->getFeatureBits();
2261 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2262 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2263}
2264
2265uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2266{
2267 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2268 assert(Size == 2 || Size == 4 || Size == 8);
2269
2270 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2271
2272 if (Imm.Mods.Abs) {
2273 Val &= ~FpSignMask;
2274 }
2275 if (Imm.Mods.Neg) {
2276 Val ^= FpSignMask;
2277 }
2278
2279 return Val;
2280}
2281
2282void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2283 MCOpIdx = Inst.getNumOperands();
2284
2285 if (isExpr()) {
2287 return;
2288 }
2289
2290 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2291 Inst.getNumOperands())) {
2292 addLiteralImmOperand(Inst, Imm.Val,
2293 ApplyModifiers &
2294 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2295 } else {
2296 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2298 }
2299}
2300
2301void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2302 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2303 auto OpNum = Inst.getNumOperands();
2304 // Check that this operand accepts literals
2305 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2306
2307 if (ApplyModifiers) {
2308 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2309 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2310 Val = applyInputFPModifiers(Val, Size);
2311 }
2312
2313 APInt Literal(64, Val);
2314 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2315
2316 if (Imm.IsFPImm) { // We got fp literal token
2317 switch (OpTy) {
2323 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2324 AsmParser->hasInv2PiInlineImm())) {
2325 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2326 return;
2327 }
2328
2329 // Non-inlineable
2330 if (AMDGPU::isSISrcFPOperand(InstDesc,
2331 OpNum)) { // Expected 64-bit fp operand
2332 bool HasMandatoryLiteral =
2333 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2334 // For fp operands we check if low 32 bits are zeros
2335 if (Literal.getLoBits(32) != 0 &&
2336 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2337 !HasMandatoryLiteral) {
2338 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2339 Inst.getLoc(),
2340 "Can't encode literal as exact 64-bit floating-point operand. "
2341 "Low 32-bits will be set to zero");
2342 Val &= 0xffffffff00000000u;
2343 }
2344
2346 return;
2347 }
2348
2349 // We don't allow fp literals in 64-bit integer instructions. It is
2350 // unclear how we should encode them. This case should be checked earlier
2351 // in predicate methods (isLiteralImm())
2352 llvm_unreachable("fp literal in 64-bit integer instruction.");
2353
2356 return;
2357
2362 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2363 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2364 // loss of precision. The constant represents ideomatic fp32 value of
2365 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2366 // bits. Prevent rounding below.
2367 Inst.addOperand(MCOperand::createImm(0x3e22));
2368 return;
2369 }
2370 [[fallthrough]];
2371
2392 bool lost;
2393 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2394 // Convert literal to single precision
2395 FPLiteral.convert(*getOpFltSemantics(OpTy),
2396 APFloat::rmNearestTiesToEven, &lost);
2397 // We allow precision lost but not overflow or underflow. This should be
2398 // checked earlier in isLiteralImm()
2399
2400 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2401 Inst.addOperand(MCOperand::createImm(ImmVal));
2402 return;
2403 }
2404 default:
2405 llvm_unreachable("invalid operand size");
2406 }
2407
2408 return;
2409 }
2410
2411 // We got int literal token.
2412 // Only sign extend inline immediates.
2413 switch (OpTy) {
2428 return;
2429
2432 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2434 return;
2435 }
2436
2437 // When the 32 MSBs are not zero (effectively means it can't be safely
2438 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2439 // the lit modifier is explicitly used, we need to truncate it to the 32
2440 // LSBs.
2441 if (!AsmParser->has64BitLiterals() ||
2442 getModifiers().Lit == LitModifier::Lit)
2443 Val = Lo_32(Val);
2444
2446 return;
2447
2451 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2453 return;
2454 }
2455
2456 // If the target doesn't support 64-bit literals, we need to use the
2457 // constant as the high 32 MSBs of a double-precision floating point value.
2458 if (!AsmParser->has64BitLiterals()) {
2459 Val = static_cast<uint64_t>(Val) << 32;
2460 } else {
2461 // Now the target does support 64-bit literals, there are two cases
2462 // where we still want to use src_literal encoding:
2463 // 1) explicitly forced by using lit modifier;
2464 // 2) the value is a valid 32-bit representation (signed or unsigned),
2465 // meanwhile not forced by lit64 modifier.
2466 if (getModifiers().Lit == LitModifier::Lit ||
2467 (getModifiers().Lit != LitModifier::Lit64 &&
2468 (isInt<32>(Val) || isUInt<32>(Val))))
2469 Val = static_cast<uint64_t>(Val) << 32;
2470 }
2471
2473 return;
2474
2487 return;
2488
2490 if ((isInt<32>(Val) || isUInt<32>(Val)) &&
2491 getModifiers().Lit != LitModifier::Lit64)
2492 Val <<= 32;
2493
2495 return;
2496
2497 default:
2498 llvm_unreachable("invalid operand type");
2499 }
2500}
2501
2502void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2503 MCOpIdx = Inst.getNumOperands();
2504 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2505}
2506
2507bool AMDGPUOperand::isInlineValue() const {
2508 return isRegKind() && ::isInlineValue(getReg());
2509}
2510
2511//===----------------------------------------------------------------------===//
2512// AsmParser
2513//===----------------------------------------------------------------------===//
2514
2515void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2516 // TODO: make those pre-defined variables read-only.
2517 // Currently there is none suitable machinery in the core llvm-mc for this.
2518 // MCSymbol::isRedefinable is intended for another purpose, and
2519 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2520 MCContext &Ctx = getContext();
2521 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2523}
2524
2525static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2526 if (Is == IS_VGPR) {
2527 switch (RegWidth) {
2528 default: return -1;
2529 case 32:
2530 return AMDGPU::VGPR_32RegClassID;
2531 case 64:
2532 return AMDGPU::VReg_64RegClassID;
2533 case 96:
2534 return AMDGPU::VReg_96RegClassID;
2535 case 128:
2536 return AMDGPU::VReg_128RegClassID;
2537 case 160:
2538 return AMDGPU::VReg_160RegClassID;
2539 case 192:
2540 return AMDGPU::VReg_192RegClassID;
2541 case 224:
2542 return AMDGPU::VReg_224RegClassID;
2543 case 256:
2544 return AMDGPU::VReg_256RegClassID;
2545 case 288:
2546 return AMDGPU::VReg_288RegClassID;
2547 case 320:
2548 return AMDGPU::VReg_320RegClassID;
2549 case 352:
2550 return AMDGPU::VReg_352RegClassID;
2551 case 384:
2552 return AMDGPU::VReg_384RegClassID;
2553 case 512:
2554 return AMDGPU::VReg_512RegClassID;
2555 case 1024:
2556 return AMDGPU::VReg_1024RegClassID;
2557 }
2558 } else if (Is == IS_TTMP) {
2559 switch (RegWidth) {
2560 default: return -1;
2561 case 32:
2562 return AMDGPU::TTMP_32RegClassID;
2563 case 64:
2564 return AMDGPU::TTMP_64RegClassID;
2565 case 128:
2566 return AMDGPU::TTMP_128RegClassID;
2567 case 256:
2568 return AMDGPU::TTMP_256RegClassID;
2569 case 512:
2570 return AMDGPU::TTMP_512RegClassID;
2571 }
2572 } else if (Is == IS_SGPR) {
2573 switch (RegWidth) {
2574 default: return -1;
2575 case 32:
2576 return AMDGPU::SGPR_32RegClassID;
2577 case 64:
2578 return AMDGPU::SGPR_64RegClassID;
2579 case 96:
2580 return AMDGPU::SGPR_96RegClassID;
2581 case 128:
2582 return AMDGPU::SGPR_128RegClassID;
2583 case 160:
2584 return AMDGPU::SGPR_160RegClassID;
2585 case 192:
2586 return AMDGPU::SGPR_192RegClassID;
2587 case 224:
2588 return AMDGPU::SGPR_224RegClassID;
2589 case 256:
2590 return AMDGPU::SGPR_256RegClassID;
2591 case 288:
2592 return AMDGPU::SGPR_288RegClassID;
2593 case 320:
2594 return AMDGPU::SGPR_320RegClassID;
2595 case 352:
2596 return AMDGPU::SGPR_352RegClassID;
2597 case 384:
2598 return AMDGPU::SGPR_384RegClassID;
2599 case 512:
2600 return AMDGPU::SGPR_512RegClassID;
2601 }
2602 } else if (Is == IS_AGPR) {
2603 switch (RegWidth) {
2604 default: return -1;
2605 case 32:
2606 return AMDGPU::AGPR_32RegClassID;
2607 case 64:
2608 return AMDGPU::AReg_64RegClassID;
2609 case 96:
2610 return AMDGPU::AReg_96RegClassID;
2611 case 128:
2612 return AMDGPU::AReg_128RegClassID;
2613 case 160:
2614 return AMDGPU::AReg_160RegClassID;
2615 case 192:
2616 return AMDGPU::AReg_192RegClassID;
2617 case 224:
2618 return AMDGPU::AReg_224RegClassID;
2619 case 256:
2620 return AMDGPU::AReg_256RegClassID;
2621 case 288:
2622 return AMDGPU::AReg_288RegClassID;
2623 case 320:
2624 return AMDGPU::AReg_320RegClassID;
2625 case 352:
2626 return AMDGPU::AReg_352RegClassID;
2627 case 384:
2628 return AMDGPU::AReg_384RegClassID;
2629 case 512:
2630 return AMDGPU::AReg_512RegClassID;
2631 case 1024:
2632 return AMDGPU::AReg_1024RegClassID;
2633 }
2634 }
2635 return -1;
2636}
2637
2640 .Case("exec", AMDGPU::EXEC)
2641 .Case("vcc", AMDGPU::VCC)
2642 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2643 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2644 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2645 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2646 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2647 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2648 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2649 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2650 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2651 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2652 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2653 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2654 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2655 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2656 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2657 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2658 .Case("m0", AMDGPU::M0)
2659 .Case("vccz", AMDGPU::SRC_VCCZ)
2660 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2661 .Case("execz", AMDGPU::SRC_EXECZ)
2662 .Case("src_execz", AMDGPU::SRC_EXECZ)
2663 .Case("scc", AMDGPU::SRC_SCC)
2664 .Case("src_scc", AMDGPU::SRC_SCC)
2665 .Case("tba", AMDGPU::TBA)
2666 .Case("tma", AMDGPU::TMA)
2667 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2668 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2669 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2670 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2671 .Case("vcc_lo", AMDGPU::VCC_LO)
2672 .Case("vcc_hi", AMDGPU::VCC_HI)
2673 .Case("exec_lo", AMDGPU::EXEC_LO)
2674 .Case("exec_hi", AMDGPU::EXEC_HI)
2675 .Case("tma_lo", AMDGPU::TMA_LO)
2676 .Case("tma_hi", AMDGPU::TMA_HI)
2677 .Case("tba_lo", AMDGPU::TBA_LO)
2678 .Case("tba_hi", AMDGPU::TBA_HI)
2679 .Case("pc", AMDGPU::PC_REG)
2680 .Case("null", AMDGPU::SGPR_NULL)
2681 .Default(AMDGPU::NoRegister);
2682}
2683
2684bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2685 SMLoc &EndLoc, bool RestoreOnFailure) {
2686 auto R = parseRegister();
2687 if (!R) return true;
2688 assert(R->isReg());
2689 RegNo = R->getReg();
2690 StartLoc = R->getStartLoc();
2691 EndLoc = R->getEndLoc();
2692 return false;
2693}
2694
2695bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2696 SMLoc &EndLoc) {
2697 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2698}
2699
2700ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2701 SMLoc &EndLoc) {
2702 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2703 bool PendingErrors = getParser().hasPendingError();
2704 getParser().clearPendingErrors();
2705 if (PendingErrors)
2706 return ParseStatus::Failure;
2707 if (Result)
2708 return ParseStatus::NoMatch;
2709 return ParseStatus::Success;
2710}
2711
2712bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2713 RegisterKind RegKind,
2714 MCRegister Reg1, SMLoc Loc) {
2715 switch (RegKind) {
2716 case IS_SPECIAL:
2717 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2718 Reg = AMDGPU::EXEC;
2719 RegWidth = 64;
2720 return true;
2721 }
2722 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2723 Reg = AMDGPU::FLAT_SCR;
2724 RegWidth = 64;
2725 return true;
2726 }
2727 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2728 Reg = AMDGPU::XNACK_MASK;
2729 RegWidth = 64;
2730 return true;
2731 }
2732 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2733 Reg = AMDGPU::VCC;
2734 RegWidth = 64;
2735 return true;
2736 }
2737 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2738 Reg = AMDGPU::TBA;
2739 RegWidth = 64;
2740 return true;
2741 }
2742 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2743 Reg = AMDGPU::TMA;
2744 RegWidth = 64;
2745 return true;
2746 }
2747 Error(Loc, "register does not fit in the list");
2748 return false;
2749 case IS_VGPR:
2750 case IS_SGPR:
2751 case IS_AGPR:
2752 case IS_TTMP:
2753 if (Reg1 != Reg + RegWidth / 32) {
2754 Error(Loc, "registers in a list must have consecutive indices");
2755 return false;
2756 }
2757 RegWidth += 32;
2758 return true;
2759 default:
2760 llvm_unreachable("unexpected register kind");
2761 }
2762}
2763
2764struct RegInfo {
2766 RegisterKind Kind;
2767};
2768
2769static constexpr RegInfo RegularRegisters[] = {
2770 {{"v"}, IS_VGPR},
2771 {{"s"}, IS_SGPR},
2772 {{"ttmp"}, IS_TTMP},
2773 {{"acc"}, IS_AGPR},
2774 {{"a"}, IS_AGPR},
2775};
2776
2777static bool isRegularReg(RegisterKind Kind) {
2778 return Kind == IS_VGPR ||
2779 Kind == IS_SGPR ||
2780 Kind == IS_TTMP ||
2781 Kind == IS_AGPR;
2782}
2783
2785 for (const RegInfo &Reg : RegularRegisters)
2786 if (Str.starts_with(Reg.Name))
2787 return &Reg;
2788 return nullptr;
2789}
2790
2791static bool getRegNum(StringRef Str, unsigned& Num) {
2792 return !Str.getAsInteger(10, Num);
2793}
2794
2795bool
2796AMDGPUAsmParser::isRegister(const AsmToken &Token,
2797 const AsmToken &NextToken) const {
2798
2799 // A list of consecutive registers: [s0,s1,s2,s3]
2800 if (Token.is(AsmToken::LBrac))
2801 return true;
2802
2803 if (!Token.is(AsmToken::Identifier))
2804 return false;
2805
2806 // A single register like s0 or a range of registers like s[0:1]
2807
2808 StringRef Str = Token.getString();
2809 const RegInfo *Reg = getRegularRegInfo(Str);
2810 if (Reg) {
2811 StringRef RegName = Reg->Name;
2812 StringRef RegSuffix = Str.substr(RegName.size());
2813 if (!RegSuffix.empty()) {
2814 RegSuffix.consume_back(".l");
2815 RegSuffix.consume_back(".h");
2816 unsigned Num;
2817 // A single register with an index: rXX
2818 if (getRegNum(RegSuffix, Num))
2819 return true;
2820 } else {
2821 // A range of registers: r[XX:YY].
2822 if (NextToken.is(AsmToken::LBrac))
2823 return true;
2824 }
2825 }
2826
2827 return getSpecialRegForName(Str).isValid();
2828}
2829
2830bool
2831AMDGPUAsmParser::isRegister()
2832{
2833 return isRegister(getToken(), peekToken());
2834}
2835
2836MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2837 unsigned SubReg, unsigned RegWidth,
2838 SMLoc Loc) {
2839 assert(isRegularReg(RegKind));
2840
2841 unsigned AlignSize = 1;
2842 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2843 // SGPR and TTMP registers must be aligned.
2844 // Max required alignment is 4 dwords.
2845 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2846 }
2847
2848 if (RegNum % AlignSize != 0) {
2849 Error(Loc, "invalid register alignment");
2850 return MCRegister();
2851 }
2852
2853 unsigned RegIdx = RegNum / AlignSize;
2854 int RCID = getRegClass(RegKind, RegWidth);
2855 if (RCID == -1) {
2856 Error(Loc, "invalid or unsupported register size");
2857 return MCRegister();
2858 }
2859
2860 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2861 const MCRegisterClass RC = TRI->getRegClass(RCID);
2862 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2863 Error(Loc, "register index is out of range");
2864 return AMDGPU::NoRegister;
2865 }
2866
2867 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2868 Error(Loc, "register index is out of range");
2869 return MCRegister();
2870 }
2871
2872 MCRegister Reg = RC.getRegister(RegIdx);
2873
2874 if (SubReg) {
2875 Reg = TRI->getSubReg(Reg, SubReg);
2876
2877 // Currently all regular registers have their .l and .h subregisters, so
2878 // we should never need to generate an error here.
2879 assert(Reg && "Invalid subregister!");
2880 }
2881
2882 return Reg;
2883}
2884
2885bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2886 unsigned &SubReg) {
2887 int64_t RegLo, RegHi;
2888 if (!skipToken(AsmToken::LBrac, "missing register index"))
2889 return false;
2890
2891 SMLoc FirstIdxLoc = getLoc();
2892 SMLoc SecondIdxLoc;
2893
2894 if (!parseExpr(RegLo))
2895 return false;
2896
2897 if (trySkipToken(AsmToken::Colon)) {
2898 SecondIdxLoc = getLoc();
2899 if (!parseExpr(RegHi))
2900 return false;
2901 } else {
2902 RegHi = RegLo;
2903 }
2904
2905 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2906 return false;
2907
2908 if (!isUInt<32>(RegLo)) {
2909 Error(FirstIdxLoc, "invalid register index");
2910 return false;
2911 }
2912
2913 if (!isUInt<32>(RegHi)) {
2914 Error(SecondIdxLoc, "invalid register index");
2915 return false;
2916 }
2917
2918 if (RegLo > RegHi) {
2919 Error(FirstIdxLoc, "first register index should not exceed second index");
2920 return false;
2921 }
2922
2923 if (RegHi == RegLo) {
2924 StringRef RegSuffix = getTokenStr();
2925 if (RegSuffix == ".l") {
2926 SubReg = AMDGPU::lo16;
2927 lex();
2928 } else if (RegSuffix == ".h") {
2929 SubReg = AMDGPU::hi16;
2930 lex();
2931 }
2932 }
2933
2934 Num = static_cast<unsigned>(RegLo);
2935 RegWidth = 32 * ((RegHi - RegLo) + 1);
2936
2937 return true;
2938}
2939
2940MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2941 unsigned &RegNum,
2942 unsigned &RegWidth,
2943 SmallVectorImpl<AsmToken> &Tokens) {
2944 assert(isToken(AsmToken::Identifier));
2945 MCRegister Reg = getSpecialRegForName(getTokenStr());
2946 if (Reg) {
2947 RegNum = 0;
2948 RegWidth = 32;
2949 RegKind = IS_SPECIAL;
2950 Tokens.push_back(getToken());
2951 lex(); // skip register name
2952 }
2953 return Reg;
2954}
2955
2956MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2957 unsigned &RegNum,
2958 unsigned &RegWidth,
2959 SmallVectorImpl<AsmToken> &Tokens) {
2960 assert(isToken(AsmToken::Identifier));
2961 StringRef RegName = getTokenStr();
2962 auto Loc = getLoc();
2963
2964 const RegInfo *RI = getRegularRegInfo(RegName);
2965 if (!RI) {
2966 Error(Loc, "invalid register name");
2967 return MCRegister();
2968 }
2969
2970 Tokens.push_back(getToken());
2971 lex(); // skip register name
2972
2973 RegKind = RI->Kind;
2974 StringRef RegSuffix = RegName.substr(RI->Name.size());
2975 unsigned SubReg = NoSubRegister;
2976 if (!RegSuffix.empty()) {
2977 if (RegSuffix.consume_back(".l"))
2978 SubReg = AMDGPU::lo16;
2979 else if (RegSuffix.consume_back(".h"))
2980 SubReg = AMDGPU::hi16;
2981
2982 // Single 32-bit register: vXX.
2983 if (!getRegNum(RegSuffix, RegNum)) {
2984 Error(Loc, "invalid register index");
2985 return MCRegister();
2986 }
2987 RegWidth = 32;
2988 } else {
2989 // Range of registers: v[XX:YY]. ":YY" is optional.
2990 if (!ParseRegRange(RegNum, RegWidth, SubReg))
2991 return MCRegister();
2992 }
2993
2994 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2995}
2996
2997MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2998 unsigned &RegNum, unsigned &RegWidth,
2999 SmallVectorImpl<AsmToken> &Tokens) {
3000 MCRegister Reg;
3001 auto ListLoc = getLoc();
3002
3003 if (!skipToken(AsmToken::LBrac,
3004 "expected a register or a list of registers")) {
3005 return MCRegister();
3006 }
3007
3008 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3009
3010 auto Loc = getLoc();
3011 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3012 return MCRegister();
3013 if (RegWidth != 32) {
3014 Error(Loc, "expected a single 32-bit register");
3015 return MCRegister();
3016 }
3017
3018 for (; trySkipToken(AsmToken::Comma); ) {
3019 RegisterKind NextRegKind;
3020 MCRegister NextReg;
3021 unsigned NextRegNum, NextRegWidth;
3022 Loc = getLoc();
3023
3024 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3025 NextRegNum, NextRegWidth,
3026 Tokens)) {
3027 return MCRegister();
3028 }
3029 if (NextRegWidth != 32) {
3030 Error(Loc, "expected a single 32-bit register");
3031 return MCRegister();
3032 }
3033 if (NextRegKind != RegKind) {
3034 Error(Loc, "registers in a list must be of the same kind");
3035 return MCRegister();
3036 }
3037 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3038 return MCRegister();
3039 }
3040
3041 if (!skipToken(AsmToken::RBrac,
3042 "expected a comma or a closing square bracket")) {
3043 return MCRegister();
3044 }
3045
3046 if (isRegularReg(RegKind))
3047 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3048
3049 return Reg;
3050}
3051
3052bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3053 MCRegister &Reg, unsigned &RegNum,
3054 unsigned &RegWidth,
3055 SmallVectorImpl<AsmToken> &Tokens) {
3056 auto Loc = getLoc();
3057 Reg = MCRegister();
3058
3059 if (isToken(AsmToken::Identifier)) {
3060 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3061 if (!Reg)
3062 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3063 } else {
3064 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3065 }
3066
3067 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3068 if (!Reg) {
3069 assert(Parser.hasPendingError());
3070 return false;
3071 }
3072
3073 if (!subtargetHasRegister(*TRI, Reg)) {
3074 if (Reg == AMDGPU::SGPR_NULL) {
3075 Error(Loc, "'null' operand is not supported on this GPU");
3076 } else {
3078 " register not available on this GPU");
3079 }
3080 return false;
3081 }
3082
3083 return true;
3084}
3085
3086bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3087 MCRegister &Reg, unsigned &RegNum,
3088 unsigned &RegWidth,
3089 bool RestoreOnFailure /*=false*/) {
3090 Reg = MCRegister();
3091
3093 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3094 if (RestoreOnFailure) {
3095 while (!Tokens.empty()) {
3096 getLexer().UnLex(Tokens.pop_back_val());
3097 }
3098 }
3099 return true;
3100 }
3101 return false;
3102}
3103
3104std::optional<StringRef>
3105AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3106 switch (RegKind) {
3107 case IS_VGPR:
3108 return StringRef(".amdgcn.next_free_vgpr");
3109 case IS_SGPR:
3110 return StringRef(".amdgcn.next_free_sgpr");
3111 default:
3112 return std::nullopt;
3113 }
3114}
3115
3116void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3117 auto SymbolName = getGprCountSymbolName(RegKind);
3118 assert(SymbolName && "initializing invalid register kind");
3119 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3121 Sym->setRedefinable(true);
3122}
3123
3124bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3125 unsigned DwordRegIndex,
3126 unsigned RegWidth) {
3127 // Symbols are only defined for GCN targets
3128 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3129 return true;
3130
3131 auto SymbolName = getGprCountSymbolName(RegKind);
3132 if (!SymbolName)
3133 return true;
3134 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3135
3136 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3137 int64_t OldCount;
3138
3139 if (!Sym->isVariable())
3140 return !Error(getLoc(),
3141 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3142 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3143 return !Error(
3144 getLoc(),
3145 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3146
3147 if (OldCount <= NewMax)
3149
3150 return true;
3151}
3152
3153std::unique_ptr<AMDGPUOperand>
3154AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3155 const auto &Tok = getToken();
3156 SMLoc StartLoc = Tok.getLoc();
3157 SMLoc EndLoc = Tok.getEndLoc();
3158 RegisterKind RegKind;
3159 MCRegister Reg;
3160 unsigned RegNum, RegWidth;
3161
3162 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3163 return nullptr;
3164 }
3165 if (isHsaAbi(getSTI())) {
3166 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3167 return nullptr;
3168 } else
3169 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3170 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3171}
3172
3173ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3174 bool HasSP3AbsModifier, LitModifier Lit) {
3175 // TODO: add syntactic sugar for 1/(2*PI)
3176
3177 if (isRegister() || isModifier())
3178 return ParseStatus::NoMatch;
3179
3180 if (Lit == LitModifier::None) {
3181 if (trySkipId("lit"))
3182 Lit = LitModifier::Lit;
3183 else if (trySkipId("lit64"))
3184 Lit = LitModifier::Lit64;
3185
3186 if (Lit != LitModifier::None) {
3187 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3188 return ParseStatus::Failure;
3189 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3190 if (S.isSuccess() &&
3191 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3192 return ParseStatus::Failure;
3193 return S;
3194 }
3195 }
3196
3197 const auto& Tok = getToken();
3198 const auto& NextTok = peekToken();
3199 bool IsReal = Tok.is(AsmToken::Real);
3200 SMLoc S = getLoc();
3201 bool Negate = false;
3202
3203 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3204 lex();
3205 IsReal = true;
3206 Negate = true;
3207 }
3208
3209 AMDGPUOperand::Modifiers Mods;
3210 Mods.Lit = Lit;
3211
3212 if (IsReal) {
3213 // Floating-point expressions are not supported.
3214 // Can only allow floating-point literals with an
3215 // optional sign.
3216
3217 StringRef Num = getTokenStr();
3218 lex();
3219
3220 APFloat RealVal(APFloat::IEEEdouble());
3221 auto roundMode = APFloat::rmNearestTiesToEven;
3222 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3223 return ParseStatus::Failure;
3224 if (Negate)
3225 RealVal.changeSign();
3226
3227 Operands.push_back(
3228 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3229 AMDGPUOperand::ImmTyNone, true));
3230 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3231 Op.setModifiers(Mods);
3232
3233 return ParseStatus::Success;
3234
3235 } else {
3236 int64_t IntVal;
3237 const MCExpr *Expr;
3238 SMLoc S = getLoc();
3239
3240 if (HasSP3AbsModifier) {
3241 // This is a workaround for handling expressions
3242 // as arguments of SP3 'abs' modifier, for example:
3243 // |1.0|
3244 // |-1|
3245 // |1+x|
3246 // This syntax is not compatible with syntax of standard
3247 // MC expressions (due to the trailing '|').
3248 SMLoc EndLoc;
3249 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3250 return ParseStatus::Failure;
3251 } else {
3252 if (Parser.parseExpression(Expr))
3253 return ParseStatus::Failure;
3254 }
3255
3256 if (Expr->evaluateAsAbsolute(IntVal)) {
3257 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3258 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3259 Op.setModifiers(Mods);
3260 } else {
3261 if (Lit != LitModifier::None)
3262 return ParseStatus::NoMatch;
3263 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3264 }
3265
3266 return ParseStatus::Success;
3267 }
3268
3269 return ParseStatus::NoMatch;
3270}
3271
3272ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3273 if (!isRegister())
3274 return ParseStatus::NoMatch;
3275
3276 if (auto R = parseRegister()) {
3277 assert(R->isReg());
3278 Operands.push_back(std::move(R));
3279 return ParseStatus::Success;
3280 }
3281 return ParseStatus::Failure;
3282}
3283
3284ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3285 bool HasSP3AbsMod, LitModifier Lit) {
3286 ParseStatus Res = parseReg(Operands);
3287 if (!Res.isNoMatch())
3288 return Res;
3289 if (isModifier())
3290 return ParseStatus::NoMatch;
3291 return parseImm(Operands, HasSP3AbsMod, Lit);
3292}
3293
3294bool
3295AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3296 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3297 const auto &str = Token.getString();
3298 return str == "abs" || str == "neg" || str == "sext";
3299 }
3300 return false;
3301}
3302
3303bool
3304AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3305 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3306}
3307
3308bool
3309AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3310 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3311}
3312
3313bool
3314AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3315 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3316}
3317
3318// Check if this is an operand modifier or an opcode modifier
3319// which may look like an expression but it is not. We should
3320// avoid parsing these modifiers as expressions. Currently
3321// recognized sequences are:
3322// |...|
3323// abs(...)
3324// neg(...)
3325// sext(...)
3326// -reg
3327// -|...|
3328// -abs(...)
3329// name:...
3330//
3331bool
3332AMDGPUAsmParser::isModifier() {
3333
3334 AsmToken Tok = getToken();
3335 AsmToken NextToken[2];
3336 peekTokens(NextToken);
3337
3338 return isOperandModifier(Tok, NextToken[0]) ||
3339 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3340 isOpcodeModifierWithVal(Tok, NextToken[0]);
3341}
3342
3343// Check if the current token is an SP3 'neg' modifier.
3344// Currently this modifier is allowed in the following context:
3345//
3346// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3347// 2. Before an 'abs' modifier: -abs(...)
3348// 3. Before an SP3 'abs' modifier: -|...|
3349//
3350// In all other cases "-" is handled as a part
3351// of an expression that follows the sign.
3352//
3353// Note: When "-" is followed by an integer literal,
3354// this is interpreted as integer negation rather
3355// than a floating-point NEG modifier applied to N.
3356// Beside being contr-intuitive, such use of floating-point
3357// NEG modifier would have resulted in different meaning
3358// of integer literals used with VOP1/2/C and VOP3,
3359// for example:
3360// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3361// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3362// Negative fp literals with preceding "-" are
3363// handled likewise for uniformity
3364//
3365bool
3366AMDGPUAsmParser::parseSP3NegModifier() {
3367
3368 AsmToken NextToken[2];
3369 peekTokens(NextToken);
3370
3371 if (isToken(AsmToken::Minus) &&
3372 (isRegister(NextToken[0], NextToken[1]) ||
3373 NextToken[0].is(AsmToken::Pipe) ||
3374 isId(NextToken[0], "abs"))) {
3375 lex();
3376 return true;
3377 }
3378
3379 return false;
3380}
3381
3382ParseStatus
3383AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3384 bool AllowImm) {
3385 bool Neg, SP3Neg;
3386 bool Abs, SP3Abs;
3387 SMLoc Loc;
3388
3389 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3390 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3391 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3392
3393 SP3Neg = parseSP3NegModifier();
3394
3395 Loc = getLoc();
3396 Neg = trySkipId("neg");
3397 if (Neg && SP3Neg)
3398 return Error(Loc, "expected register or immediate");
3399 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3400 return ParseStatus::Failure;
3401
3402 Abs = trySkipId("abs");
3403 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3404 return ParseStatus::Failure;
3405
3406 LitModifier Lit = LitModifier::None;
3407 if (trySkipId("lit")) {
3408 Lit = LitModifier::Lit;
3409 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3410 return ParseStatus::Failure;
3411 } else if (trySkipId("lit64")) {
3412 Lit = LitModifier::Lit64;
3413 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3414 return ParseStatus::Failure;
3415 if (!has64BitLiterals())
3416 return Error(Loc, "lit64 is not supported on this GPU");
3417 }
3418
3419 Loc = getLoc();
3420 SP3Abs = trySkipToken(AsmToken::Pipe);
3421 if (Abs && SP3Abs)
3422 return Error(Loc, "expected register or immediate");
3423
3424 ParseStatus Res;
3425 if (AllowImm) {
3426 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3427 } else {
3428 Res = parseReg(Operands);
3429 }
3430 if (!Res.isSuccess())
3431 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3433 : Res;
3434
3435 if (Lit != LitModifier::None && !Operands.back()->isImm())
3436 Error(Loc, "expected immediate with lit modifier");
3437
3438 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3439 return ParseStatus::Failure;
3440 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3441 return ParseStatus::Failure;
3442 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3443 return ParseStatus::Failure;
3444 if (Lit != LitModifier::None &&
3445 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3446 return ParseStatus::Failure;
3447
3448 AMDGPUOperand::Modifiers Mods;
3449 Mods.Abs = Abs || SP3Abs;
3450 Mods.Neg = Neg || SP3Neg;
3451 Mods.Lit = Lit;
3452
3453 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3454 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3455 if (Op.isExpr())
3456 return Error(Op.getStartLoc(), "expected an absolute expression");
3457 Op.setModifiers(Mods);
3458 }
3459 return ParseStatus::Success;
3460}
3461
3462ParseStatus
3463AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3464 bool AllowImm) {
3465 bool Sext = trySkipId("sext");
3466 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3467 return ParseStatus::Failure;
3468
3469 ParseStatus Res;
3470 if (AllowImm) {
3471 Res = parseRegOrImm(Operands);
3472 } else {
3473 Res = parseReg(Operands);
3474 }
3475 if (!Res.isSuccess())
3476 return Sext ? ParseStatus::Failure : Res;
3477
3478 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3479 return ParseStatus::Failure;
3480
3481 AMDGPUOperand::Modifiers Mods;
3482 Mods.Sext = Sext;
3483
3484 if (Mods.hasIntModifiers()) {
3485 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3486 if (Op.isExpr())
3487 return Error(Op.getStartLoc(), "expected an absolute expression");
3488 Op.setModifiers(Mods);
3489 }
3490
3491 return ParseStatus::Success;
3492}
3493
3494ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3495 return parseRegOrImmWithFPInputMods(Operands, false);
3496}
3497
3498ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3499 return parseRegOrImmWithIntInputMods(Operands, false);
3500}
3501
3502ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3503 auto Loc = getLoc();
3504 if (trySkipId("off")) {
3505 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3506 AMDGPUOperand::ImmTyOff, false));
3507 return ParseStatus::Success;
3508 }
3509
3510 if (!isRegister())
3511 return ParseStatus::NoMatch;
3512
3513 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3514 if (Reg) {
3515 Operands.push_back(std::move(Reg));
3516 return ParseStatus::Success;
3517 }
3518
3519 return ParseStatus::Failure;
3520}
3521
3522unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3523 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3524
3525 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3526 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3527 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3528 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3529 return Match_InvalidOperand;
3530
3531 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3532 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3533 // v_mac_f32/16 allow only dst_sel == DWORD;
3534 auto OpNum =
3535 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3536 const auto &Op = Inst.getOperand(OpNum);
3537 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3538 return Match_InvalidOperand;
3539 }
3540 }
3541
3542 // Asm can first try to match VOPD or VOPD3. By failing early here with
3543 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3544 // Checking later during validateInstruction does not give a chance to retry
3545 // parsing as a different encoding.
3546 if (tryAnotherVOPDEncoding(Inst))
3547 return Match_InvalidOperand;
3548
3549 return Match_Success;
3550}
3551
3561
3562// What asm variants we should check
3563ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3564 if (isForcedDPP() && isForcedVOP3()) {
3565 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3566 return ArrayRef(Variants);
3567 }
3568 if (getForcedEncodingSize() == 32) {
3569 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3570 return ArrayRef(Variants);
3571 }
3572
3573 if (isForcedVOP3()) {
3574 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3575 return ArrayRef(Variants);
3576 }
3577
3578 if (isForcedSDWA()) {
3579 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3581 return ArrayRef(Variants);
3582 }
3583
3584 if (isForcedDPP()) {
3585 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3586 return ArrayRef(Variants);
3587 }
3588
3589 return getAllVariants();
3590}
3591
3592StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3593 if (isForcedDPP() && isForcedVOP3())
3594 return "e64_dpp";
3595
3596 if (getForcedEncodingSize() == 32)
3597 return "e32";
3598
3599 if (isForcedVOP3())
3600 return "e64";
3601
3602 if (isForcedSDWA())
3603 return "sdwa";
3604
3605 if (isForcedDPP())
3606 return "dpp";
3607
3608 return "";
3609}
3610
3611unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3612 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3613 for (MCPhysReg Reg : Desc.implicit_uses()) {
3614 switch (Reg) {
3615 case AMDGPU::FLAT_SCR:
3616 case AMDGPU::VCC:
3617 case AMDGPU::VCC_LO:
3618 case AMDGPU::VCC_HI:
3619 case AMDGPU::M0:
3620 return Reg;
3621 default:
3622 break;
3623 }
3624 }
3625 return AMDGPU::NoRegister;
3626}
3627
3628// NB: This code is correct only when used to check constant
3629// bus limitations because GFX7 support no f16 inline constants.
3630// Note that there are no cases when a GFX7 opcode violates
3631// constant bus limitations due to the use of an f16 constant.
3632bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3633 unsigned OpIdx) const {
3634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3635
3638 return false;
3639 }
3640
3641 const MCOperand &MO = Inst.getOperand(OpIdx);
3642
3643 int64_t Val = MO.getImm();
3644 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3645
3646 switch (OpSize) { // expected operand size
3647 case 8:
3648 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3649 case 4:
3650 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3651 case 2: {
3652 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3655 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3656
3660
3664
3668
3671 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3672
3675 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3676
3678 return false;
3679
3680 llvm_unreachable("invalid operand type");
3681 }
3682 default:
3683 llvm_unreachable("invalid operand size");
3684 }
3685}
3686
3687unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3688 if (!isGFX10Plus())
3689 return 1;
3690
3691 switch (Opcode) {
3692 // 64-bit shift instructions can use only one scalar value input
3693 case AMDGPU::V_LSHLREV_B64_e64:
3694 case AMDGPU::V_LSHLREV_B64_gfx10:
3695 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3696 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3697 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3698 case AMDGPU::V_LSHRREV_B64_e64:
3699 case AMDGPU::V_LSHRREV_B64_gfx10:
3700 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3701 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3702 case AMDGPU::V_ASHRREV_I64_e64:
3703 case AMDGPU::V_ASHRREV_I64_gfx10:
3704 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3705 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3706 case AMDGPU::V_LSHL_B64_e64:
3707 case AMDGPU::V_LSHR_B64_e64:
3708 case AMDGPU::V_ASHR_I64_e64:
3709 return 1;
3710 default:
3711 return 2;
3712 }
3713}
3714
3715constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3717
3718// Get regular operand indices in the same order as specified
3719// in the instruction (but append mandatory literals to the end).
3721 bool AddMandatoryLiterals = false) {
3722
3723 int16_t ImmIdx =
3724 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3725
3726 if (isVOPD(Opcode)) {
3727 int16_t ImmXIdx =
3728 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3729
3730 return {getNamedOperandIdx(Opcode, OpName::src0X),
3731 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3732 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3733 getNamedOperandIdx(Opcode, OpName::src0Y),
3734 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3735 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3736 ImmXIdx,
3737 ImmIdx};
3738 }
3739
3740 return {getNamedOperandIdx(Opcode, OpName::src0),
3741 getNamedOperandIdx(Opcode, OpName::src1),
3742 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3743}
3744
3745bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3746 const MCOperand &MO = Inst.getOperand(OpIdx);
3747 if (MO.isImm())
3748 return !isInlineConstant(Inst, OpIdx);
3749 if (MO.isReg()) {
3750 auto Reg = MO.getReg();
3751 if (!Reg)
3752 return false;
3753 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3754 auto PReg = mc2PseudoReg(Reg);
3755 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3756 }
3757 return true;
3758}
3759
3760// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3761// Writelane is special in that it can use SGPR and M0 (which would normally
3762// count as using the constant bus twice - but in this case it is allowed since
3763// the lane selector doesn't count as a use of the constant bus). However, it is
3764// still required to abide by the 1 SGPR rule.
3765static bool checkWriteLane(const MCInst &Inst) {
3766 const unsigned Opcode = Inst.getOpcode();
3767 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3768 return false;
3769 const MCOperand &LaneSelOp = Inst.getOperand(2);
3770 if (!LaneSelOp.isReg())
3771 return false;
3772 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3773 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3774}
3775
3776bool AMDGPUAsmParser::validateConstantBusLimitations(
3777 const MCInst &Inst, const OperandVector &Operands) {
3778 const unsigned Opcode = Inst.getOpcode();
3779 const MCInstrDesc &Desc = MII.get(Opcode);
3780 MCRegister LastSGPR;
3781 unsigned ConstantBusUseCount = 0;
3782 unsigned NumLiterals = 0;
3783 unsigned LiteralSize;
3784
3785 if (!(Desc.TSFlags &
3788 !isVOPD(Opcode))
3789 return true;
3790
3791 if (checkWriteLane(Inst))
3792 return true;
3793
3794 // Check special imm operands (used by madmk, etc)
3795 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3796 ++NumLiterals;
3797 LiteralSize = 4;
3798 }
3799
3800 SmallDenseSet<unsigned> SGPRsUsed;
3801 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3802 if (SGPRUsed != AMDGPU::NoRegister) {
3803 SGPRsUsed.insert(SGPRUsed);
3804 ++ConstantBusUseCount;
3805 }
3806
3807 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3808
3809 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3810
3811 for (int OpIdx : OpIndices) {
3812 if (OpIdx == -1)
3813 continue;
3814
3815 const MCOperand &MO = Inst.getOperand(OpIdx);
3816 if (usesConstantBus(Inst, OpIdx)) {
3817 if (MO.isReg()) {
3818 LastSGPR = mc2PseudoReg(MO.getReg());
3819 // Pairs of registers with a partial intersections like these
3820 // s0, s[0:1]
3821 // flat_scratch_lo, flat_scratch
3822 // flat_scratch_lo, flat_scratch_hi
3823 // are theoretically valid but they are disabled anyway.
3824 // Note that this code mimics SIInstrInfo::verifyInstruction
3825 if (SGPRsUsed.insert(LastSGPR).second) {
3826 ++ConstantBusUseCount;
3827 }
3828 } else { // Expression or a literal
3829
3830 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3831 continue; // special operand like VINTERP attr_chan
3832
3833 // An instruction may use only one literal.
3834 // This has been validated on the previous step.
3835 // See validateVOPLiteral.
3836 // This literal may be used as more than one operand.
3837 // If all these operands are of the same size,
3838 // this literal counts as one scalar value.
3839 // Otherwise it counts as 2 scalar values.
3840 // See "GFX10 Shader Programming", section 3.6.2.3.
3841
3843 if (Size < 4)
3844 Size = 4;
3845
3846 if (NumLiterals == 0) {
3847 NumLiterals = 1;
3848 LiteralSize = Size;
3849 } else if (LiteralSize != Size) {
3850 NumLiterals = 2;
3851 }
3852 }
3853 }
3854
3855 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3856 Error(getOperandLoc(Operands, OpIdx),
3857 "invalid operand (violates constant bus restrictions)");
3858 return false;
3859 }
3860 }
3861 return true;
3862}
3863
3864std::optional<unsigned>
3865AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3866
3867 const unsigned Opcode = Inst.getOpcode();
3868 if (!isVOPD(Opcode))
3869 return {};
3870
3871 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3872
3873 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3874 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3875 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3876 ? Opr.getReg()
3877 : MCRegister();
3878 };
3879
3880 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3881 // source-cache.
3882 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3883 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3884 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3885 bool AllowSameVGPR = isGFX1250();
3886
3887 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3888 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3889 int I = getNamedOperandIdx(Opcode, OpName);
3890 const MCOperand &Op = Inst.getOperand(I);
3891 if (!Op.isImm())
3892 continue;
3893 int64_t Imm = Op.getImm();
3894 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3895 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3896 return (unsigned)I;
3897 }
3898
3899 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3900 OpName::vsrc2Y, OpName::imm}) {
3901 int I = getNamedOperandIdx(Opcode, OpName);
3902 if (I == -1)
3903 continue;
3904 const MCOperand &Op = Inst.getOperand(I);
3905 if (Op.isImm())
3906 return (unsigned)I;
3907 }
3908 }
3909
3910 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3911 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3912 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3913
3914 return InvalidCompOprIdx;
3915}
3916
3917bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3918 const OperandVector &Operands) {
3919
3920 unsigned Opcode = Inst.getOpcode();
3921 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3922
3923 if (AsVOPD3) {
3924 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3925 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3926 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3927 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3928 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3929 }
3930 }
3931
3932 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3933 if (!InvalidCompOprIdx.has_value())
3934 return true;
3935
3936 auto CompOprIdx = *InvalidCompOprIdx;
3937 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3938 auto ParsedIdx =
3939 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3940 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3941 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3942
3943 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3944 if (CompOprIdx == VOPD::Component::DST) {
3945 if (AsVOPD3)
3946 Error(Loc, "dst registers must be distinct");
3947 else
3948 Error(Loc, "one dst register must be even and the other odd");
3949 } else {
3950 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3951 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3952 " operands must use different VGPR banks");
3953 }
3954
3955 return false;
3956}
3957
3958// \returns true if \p Inst does not satisfy VOPD constraints, but can be
3959// potentially used as VOPD3 with the same operands.
3960bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
3961 // First check if it fits VOPD
3962 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
3963 if (!InvalidCompOprIdx.has_value())
3964 return false;
3965
3966 // Then if it fits VOPD3
3967 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
3968 if (InvalidCompOprIdx.has_value()) {
3969 // If failed operand is dst it is better to show error about VOPD3
3970 // instruction as it has more capabilities and error message will be
3971 // more informative. If the dst is not legal for VOPD3, then it is not
3972 // legal for VOPD either.
3973 if (*InvalidCompOprIdx == VOPD::Component::DST)
3974 return true;
3975
3976 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3977 // with a conflict in tied implicit src2 of fmac and no asm operand to
3978 // to point to.
3979 return false;
3980 }
3981 return true;
3982}
3983
3984// \returns true is a VOPD3 instruction can be also represented as a shorter
3985// VOPD encoding.
3986bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
3987 const unsigned Opcode = Inst.getOpcode();
3988 const auto &II = getVOPDInstInfo(Opcode, &MII);
3989 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
3990 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
3991 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
3992 return false;
3993
3994 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3995 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3996 // be parsed as VOPD which does not accept src2.
3997 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
3998 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
3999 return false;
4000
4001 // If any modifiers are set this cannot be VOPD.
4002 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4003 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4004 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4005 int I = getNamedOperandIdx(Opcode, OpName);
4006 if (I == -1)
4007 continue;
4008 if (Inst.getOperand(I).getImm())
4009 return false;
4010 }
4011
4012 return !tryVOPD3(Inst);
4013}
4014
4015// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4016// form but switch to VOPD3 otherwise.
4017bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4018 const unsigned Opcode = Inst.getOpcode();
4019 if (!isGFX1250() || !isVOPD(Opcode))
4020 return false;
4021
4022 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4023 return tryVOPD(Inst);
4024 return tryVOPD3(Inst);
4025}
4026
4027bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4028
4029 const unsigned Opc = Inst.getOpcode();
4030 const MCInstrDesc &Desc = MII.get(Opc);
4031
4032 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4033 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4034 assert(ClampIdx != -1);
4035 return Inst.getOperand(ClampIdx).getImm() == 0;
4036 }
4037
4038 return true;
4039}
4040
4043
4044bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4045 const SMLoc &IDLoc) {
4046
4047 const unsigned Opc = Inst.getOpcode();
4048 const MCInstrDesc &Desc = MII.get(Opc);
4049
4050 if ((Desc.TSFlags & MIMGFlags) == 0)
4051 return true;
4052
4053 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4054 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4055 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4056
4057 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4058 return true;
4059
4060 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4061 return true;
4062
4063 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4064 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4065 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4066 if (DMask == 0)
4067 DMask = 1;
4068
4069 bool IsPackedD16 = false;
4070 unsigned DataSize =
4071 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4072 if (hasPackedD16()) {
4073 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4074 IsPackedD16 = D16Idx >= 0;
4075 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4076 DataSize = (DataSize + 1) / 2;
4077 }
4078
4079 if ((VDataSize / 4) == DataSize + TFESize)
4080 return true;
4081
4082 StringRef Modifiers;
4083 if (isGFX90A())
4084 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4085 else
4086 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4087
4088 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4089 return false;
4090}
4091
4092bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4093 const SMLoc &IDLoc) {
4094 const unsigned Opc = Inst.getOpcode();
4095 const MCInstrDesc &Desc = MII.get(Opc);
4096
4097 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4098 return true;
4099
4100 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4101
4102 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4104 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4105 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4106 ? AMDGPU::OpName::srsrc
4107 : AMDGPU::OpName::rsrc;
4108 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4109 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4110 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4111
4112 assert(VAddr0Idx != -1);
4113 assert(SrsrcIdx != -1);
4114 assert(SrsrcIdx > VAddr0Idx);
4115
4116 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4117 if (BaseOpcode->BVH) {
4118 if (IsA16 == BaseOpcode->A16)
4119 return true;
4120 Error(IDLoc, "image address size does not match a16");
4121 return false;
4122 }
4123
4124 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4125 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4126 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4127 unsigned ActualAddrSize =
4128 IsNSA ? SrsrcIdx - VAddr0Idx
4129 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4130
4131 unsigned ExpectedAddrSize =
4132 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4133
4134 if (IsNSA) {
4135 if (hasPartialNSAEncoding() &&
4136 ExpectedAddrSize >
4138 int VAddrLastIdx = SrsrcIdx - 1;
4139 unsigned VAddrLastSize =
4140 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4141
4142 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4143 }
4144 } else {
4145 if (ExpectedAddrSize > 12)
4146 ExpectedAddrSize = 16;
4147
4148 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4149 // This provides backward compatibility for assembly created
4150 // before 160b/192b/224b types were directly supported.
4151 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4152 return true;
4153 }
4154
4155 if (ActualAddrSize == ExpectedAddrSize)
4156 return true;
4157
4158 Error(IDLoc, "image address size does not match dim and a16");
4159 return false;
4160}
4161
4162bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4163
4164 const unsigned Opc = Inst.getOpcode();
4165 const MCInstrDesc &Desc = MII.get(Opc);
4166
4167 if ((Desc.TSFlags & MIMGFlags) == 0)
4168 return true;
4169 if (!Desc.mayLoad() || !Desc.mayStore())
4170 return true; // Not atomic
4171
4172 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4173 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4174
4175 // This is an incomplete check because image_atomic_cmpswap
4176 // may only use 0x3 and 0xf while other atomic operations
4177 // may use 0x1 and 0x3. However these limitations are
4178 // verified when we check that dmask matches dst size.
4179 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4180}
4181
4182bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4183
4184 const unsigned Opc = Inst.getOpcode();
4185 const MCInstrDesc &Desc = MII.get(Opc);
4186
4187 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4188 return true;
4189
4190 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4191 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4192
4193 // GATHER4 instructions use dmask in a different fashion compared to
4194 // other MIMG instructions. The only useful DMASK values are
4195 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4196 // (red,red,red,red) etc.) The ISA document doesn't mention
4197 // this.
4198 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4199}
4200
4201bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4202 const OperandVector &Operands) {
4203 if (!isGFX10Plus())
4204 return true;
4205
4206 const unsigned Opc = Inst.getOpcode();
4207 const MCInstrDesc &Desc = MII.get(Opc);
4208
4209 if ((Desc.TSFlags & MIMGFlags) == 0)
4210 return true;
4211
4212 // image_bvh_intersect_ray instructions do not have dim
4214 return true;
4215
4216 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4217 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4218 if (Op.isDim())
4219 return true;
4220 }
4221 return false;
4222}
4223
4224bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4225 const unsigned Opc = Inst.getOpcode();
4226 const MCInstrDesc &Desc = MII.get(Opc);
4227
4228 if ((Desc.TSFlags & MIMGFlags) == 0)
4229 return true;
4230
4231 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4232 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4234
4235 if (!BaseOpcode->MSAA)
4236 return true;
4237
4238 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4239 assert(DimIdx != -1);
4240
4241 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4242 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4243
4244 return DimInfo->MSAA;
4245}
4246
4247static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4248{
4249 switch (Opcode) {
4250 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4251 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4252 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4253 return true;
4254 default:
4255 return false;
4256 }
4257}
4258
4259// movrels* opcodes should only allow VGPRS as src0.
4260// This is specified in .td description for vop1/vop3,
4261// but sdwa is handled differently. See isSDWAOperand.
4262bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4263 const OperandVector &Operands) {
4264
4265 const unsigned Opc = Inst.getOpcode();
4266 const MCInstrDesc &Desc = MII.get(Opc);
4267
4268 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4269 return true;
4270
4271 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4272 assert(Src0Idx != -1);
4273
4274 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4275 if (Src0.isReg()) {
4276 auto Reg = mc2PseudoReg(Src0.getReg());
4277 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4278 if (!isSGPR(Reg, TRI))
4279 return true;
4280 }
4281
4282 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4283 return false;
4284}
4285
4286bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4287 const OperandVector &Operands) {
4288
4289 const unsigned Opc = Inst.getOpcode();
4290
4291 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4292 return true;
4293
4294 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4295 assert(Src0Idx != -1);
4296
4297 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4298 if (!Src0.isReg())
4299 return true;
4300
4301 auto Reg = mc2PseudoReg(Src0.getReg());
4302 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4303 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4304 Error(getOperandLoc(Operands, Src0Idx),
4305 "source operand must be either a VGPR or an inline constant");
4306 return false;
4307 }
4308
4309 return true;
4310}
4311
4312bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4313 const OperandVector &Operands) {
4314 unsigned Opcode = Inst.getOpcode();
4315 const MCInstrDesc &Desc = MII.get(Opcode);
4316
4317 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4318 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4319 return true;
4320
4321 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4322 if (Src2Idx == -1)
4323 return true;
4324
4325 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4326 Error(getOperandLoc(Operands, Src2Idx),
4327 "inline constants are not allowed for this operand");
4328 return false;
4329 }
4330
4331 return true;
4332}
4333
4334bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4335 const OperandVector &Operands) {
4336 const unsigned Opc = Inst.getOpcode();
4337 const MCInstrDesc &Desc = MII.get(Opc);
4338
4339 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4340 return true;
4341
4342 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4343 if (BlgpIdx != -1) {
4344 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4345 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4346
4347 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4348 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4349
4350 // Validate the correct register size was used for the floating point
4351 // format operands
4352
4353 bool Success = true;
4354 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4355 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4356 Error(getOperandLoc(Operands, Src0Idx),
4357 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4358 Success = false;
4359 }
4360
4361 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4362 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4363 Error(getOperandLoc(Operands, Src1Idx),
4364 "wrong register tuple size for blgp value " + Twine(BLGP));
4365 Success = false;
4366 }
4367
4368 return Success;
4369 }
4370 }
4371
4372 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4373 if (Src2Idx == -1)
4374 return true;
4375
4376 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4377 if (!Src2.isReg())
4378 return true;
4379
4380 MCRegister Src2Reg = Src2.getReg();
4381 MCRegister DstReg = Inst.getOperand(0).getReg();
4382 if (Src2Reg == DstReg)
4383 return true;
4384
4385 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4386 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4387 return true;
4388
4389 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4390 Error(getOperandLoc(Operands, Src2Idx),
4391 "source 2 operand must not partially overlap with dst");
4392 return false;
4393 }
4394
4395 return true;
4396}
4397
4398bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4399 switch (Inst.getOpcode()) {
4400 default:
4401 return true;
4402 case V_DIV_SCALE_F32_gfx6_gfx7:
4403 case V_DIV_SCALE_F32_vi:
4404 case V_DIV_SCALE_F32_gfx10:
4405 case V_DIV_SCALE_F64_gfx6_gfx7:
4406 case V_DIV_SCALE_F64_vi:
4407 case V_DIV_SCALE_F64_gfx10:
4408 break;
4409 }
4410
4411 // TODO: Check that src0 = src1 or src2.
4412
4413 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4414 AMDGPU::OpName::src2_modifiers,
4415 AMDGPU::OpName::src2_modifiers}) {
4416 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4417 .getImm() &
4419 return false;
4420 }
4421 }
4422
4423 return true;
4424}
4425
4426bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4427
4428 const unsigned Opc = Inst.getOpcode();
4429 const MCInstrDesc &Desc = MII.get(Opc);
4430
4431 if ((Desc.TSFlags & MIMGFlags) == 0)
4432 return true;
4433
4434 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4435 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4436 if (isCI() || isSI())
4437 return false;
4438 }
4439
4440 return true;
4441}
4442
4443bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4444 const unsigned Opc = Inst.getOpcode();
4445 const MCInstrDesc &Desc = MII.get(Opc);
4446
4447 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4448 return true;
4449
4450 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4451
4452 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4453}
4454
4455static bool IsRevOpcode(const unsigned Opcode)
4456{
4457 switch (Opcode) {
4458 case AMDGPU::V_SUBREV_F32_e32:
4459 case AMDGPU::V_SUBREV_F32_e64:
4460 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4461 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4462 case AMDGPU::V_SUBREV_F32_e32_vi:
4463 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4464 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4465 case AMDGPU::V_SUBREV_F32_e64_vi:
4466
4467 case AMDGPU::V_SUBREV_CO_U32_e32:
4468 case AMDGPU::V_SUBREV_CO_U32_e64:
4469 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4470 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4471
4472 case AMDGPU::V_SUBBREV_U32_e32:
4473 case AMDGPU::V_SUBBREV_U32_e64:
4474 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4475 case AMDGPU::V_SUBBREV_U32_e32_vi:
4476 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4477 case AMDGPU::V_SUBBREV_U32_e64_vi:
4478
4479 case AMDGPU::V_SUBREV_U32_e32:
4480 case AMDGPU::V_SUBREV_U32_e64:
4481 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4482 case AMDGPU::V_SUBREV_U32_e32_vi:
4483 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4484 case AMDGPU::V_SUBREV_U32_e64_vi:
4485
4486 case AMDGPU::V_SUBREV_F16_e32:
4487 case AMDGPU::V_SUBREV_F16_e64:
4488 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4489 case AMDGPU::V_SUBREV_F16_e32_vi:
4490 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4491 case AMDGPU::V_SUBREV_F16_e64_vi:
4492
4493 case AMDGPU::V_SUBREV_U16_e32:
4494 case AMDGPU::V_SUBREV_U16_e64:
4495 case AMDGPU::V_SUBREV_U16_e32_vi:
4496 case AMDGPU::V_SUBREV_U16_e64_vi:
4497
4498 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4499 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4500 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4501
4502 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4503 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4504
4505 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4506 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4507
4508 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4509 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4510
4511 case AMDGPU::V_LSHRREV_B32_e32:
4512 case AMDGPU::V_LSHRREV_B32_e64:
4513 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4514 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4515 case AMDGPU::V_LSHRREV_B32_e32_vi:
4516 case AMDGPU::V_LSHRREV_B32_e64_vi:
4517 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4518 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4519
4520 case AMDGPU::V_ASHRREV_I32_e32:
4521 case AMDGPU::V_ASHRREV_I32_e64:
4522 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4523 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4524 case AMDGPU::V_ASHRREV_I32_e32_vi:
4525 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4526 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4527 case AMDGPU::V_ASHRREV_I32_e64_vi:
4528
4529 case AMDGPU::V_LSHLREV_B32_e32:
4530 case AMDGPU::V_LSHLREV_B32_e64:
4531 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4532 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4533 case AMDGPU::V_LSHLREV_B32_e32_vi:
4534 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4535 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4536 case AMDGPU::V_LSHLREV_B32_e64_vi:
4537
4538 case AMDGPU::V_LSHLREV_B16_e32:
4539 case AMDGPU::V_LSHLREV_B16_e64:
4540 case AMDGPU::V_LSHLREV_B16_e32_vi:
4541 case AMDGPU::V_LSHLREV_B16_e64_vi:
4542 case AMDGPU::V_LSHLREV_B16_gfx10:
4543
4544 case AMDGPU::V_LSHRREV_B16_e32:
4545 case AMDGPU::V_LSHRREV_B16_e64:
4546 case AMDGPU::V_LSHRREV_B16_e32_vi:
4547 case AMDGPU::V_LSHRREV_B16_e64_vi:
4548 case AMDGPU::V_LSHRREV_B16_gfx10:
4549
4550 case AMDGPU::V_ASHRREV_I16_e32:
4551 case AMDGPU::V_ASHRREV_I16_e64:
4552 case AMDGPU::V_ASHRREV_I16_e32_vi:
4553 case AMDGPU::V_ASHRREV_I16_e64_vi:
4554 case AMDGPU::V_ASHRREV_I16_gfx10:
4555
4556 case AMDGPU::V_LSHLREV_B64_e64:
4557 case AMDGPU::V_LSHLREV_B64_gfx10:
4558 case AMDGPU::V_LSHLREV_B64_vi:
4559
4560 case AMDGPU::V_LSHRREV_B64_e64:
4561 case AMDGPU::V_LSHRREV_B64_gfx10:
4562 case AMDGPU::V_LSHRREV_B64_vi:
4563
4564 case AMDGPU::V_ASHRREV_I64_e64:
4565 case AMDGPU::V_ASHRREV_I64_gfx10:
4566 case AMDGPU::V_ASHRREV_I64_vi:
4567
4568 case AMDGPU::V_PK_LSHLREV_B16:
4569 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4570 case AMDGPU::V_PK_LSHLREV_B16_vi:
4571
4572 case AMDGPU::V_PK_LSHRREV_B16:
4573 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4574 case AMDGPU::V_PK_LSHRREV_B16_vi:
4575 case AMDGPU::V_PK_ASHRREV_I16:
4576 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4577 case AMDGPU::V_PK_ASHRREV_I16_vi:
4578 return true;
4579 default:
4580 return false;
4581 }
4582}
4583
4584bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4585 const OperandVector &Operands) {
4586 using namespace SIInstrFlags;
4587 const unsigned Opcode = Inst.getOpcode();
4588 const MCInstrDesc &Desc = MII.get(Opcode);
4589
4590 // lds_direct register is defined so that it can be used
4591 // with 9-bit operands only. Ignore encodings which do not accept these.
4592 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4593 if ((Desc.TSFlags & Enc) == 0)
4594 return true;
4595
4596 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4597 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4598 if (SrcIdx == -1)
4599 break;
4600 const auto &Src = Inst.getOperand(SrcIdx);
4601 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4602
4603 if (isGFX90A() || isGFX11Plus()) {
4604 Error(getOperandLoc(Operands, SrcIdx),
4605 "lds_direct is not supported on this GPU");
4606 return false;
4607 }
4608
4609 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4610 Error(getOperandLoc(Operands, SrcIdx),
4611 "lds_direct cannot be used with this instruction");
4612 return false;
4613 }
4614
4615 if (SrcName != OpName::src0) {
4616 Error(getOperandLoc(Operands, SrcIdx),
4617 "lds_direct may be used as src0 only");
4618 return false;
4619 }
4620 }
4621 }
4622
4623 return true;
4624}
4625
4626SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4627 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4628 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4629 if (Op.isFlatOffset())
4630 return Op.getStartLoc();
4631 }
4632 return getLoc();
4633}
4634
4635bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4636 const OperandVector &Operands) {
4637 auto Opcode = Inst.getOpcode();
4638 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4639 if (OpNum == -1)
4640 return true;
4641
4642 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4643 if ((TSFlags & SIInstrFlags::FLAT))
4644 return validateFlatOffset(Inst, Operands);
4645
4646 if ((TSFlags & SIInstrFlags::SMRD))
4647 return validateSMEMOffset(Inst, Operands);
4648
4649 const auto &Op = Inst.getOperand(OpNum);
4650 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4651 if (isGFX12Plus() &&
4652 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4653 const unsigned OffsetSize = 24;
4654 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4655 Error(getFlatOffsetLoc(Operands),
4656 Twine("expected a ") + Twine(OffsetSize - 1) +
4657 "-bit unsigned offset for buffer ops");
4658 return false;
4659 }
4660 } else {
4661 const unsigned OffsetSize = 16;
4662 if (!isUIntN(OffsetSize, Op.getImm())) {
4663 Error(getFlatOffsetLoc(Operands),
4664 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4665 return false;
4666 }
4667 }
4668 return true;
4669}
4670
4671bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4672 const OperandVector &Operands) {
4673 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4674 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4675 return true;
4676
4677 auto Opcode = Inst.getOpcode();
4678 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4679 assert(OpNum != -1);
4680
4681 const auto &Op = Inst.getOperand(OpNum);
4682 if (!hasFlatOffsets() && Op.getImm() != 0) {
4683 Error(getFlatOffsetLoc(Operands),
4684 "flat offset modifier is not supported on this GPU");
4685 return false;
4686 }
4687
4688 // For pre-GFX12 FLAT instructions the offset must be positive;
4689 // MSB is ignored and forced to zero.
4690 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4691 bool AllowNegative =
4693 isGFX12Plus();
4694 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4695 Error(getFlatOffsetLoc(Operands),
4696 Twine("expected a ") +
4697 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4698 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4699 return false;
4700 }
4701
4702 return true;
4703}
4704
4705SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4706 // Start with second operand because SMEM Offset cannot be dst or src0.
4707 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4708 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4709 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4710 return Op.getStartLoc();
4711 }
4712 return getLoc();
4713}
4714
4715bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4716 const OperandVector &Operands) {
4717 if (isCI() || isSI())
4718 return true;
4719
4720 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4721 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4722 return true;
4723
4724 auto Opcode = Inst.getOpcode();
4725 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4726 if (OpNum == -1)
4727 return true;
4728
4729 const auto &Op = Inst.getOperand(OpNum);
4730 if (!Op.isImm())
4731 return true;
4732
4733 uint64_t Offset = Op.getImm();
4734 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4737 return true;
4738
4739 Error(getSMEMOffsetLoc(Operands),
4740 isGFX12Plus() && IsBuffer
4741 ? "expected a 23-bit unsigned offset for buffer ops"
4742 : isGFX12Plus() ? "expected a 24-bit signed offset"
4743 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4744 : "expected a 21-bit signed offset");
4745
4746 return false;
4747}
4748
4749bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4750 const OperandVector &Operands) {
4751 unsigned Opcode = Inst.getOpcode();
4752 const MCInstrDesc &Desc = MII.get(Opcode);
4753 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4754 return true;
4755
4756 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4757 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4758
4759 const int OpIndices[] = { Src0Idx, Src1Idx };
4760
4761 unsigned NumExprs = 0;
4762 unsigned NumLiterals = 0;
4763 int64_t LiteralValue;
4764
4765 for (int OpIdx : OpIndices) {
4766 if (OpIdx == -1) break;
4767
4768 const MCOperand &MO = Inst.getOperand(OpIdx);
4769 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4771 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4772 auto OpType = static_cast<AMDGPU::OperandType>(
4773 Desc.operands()[OpIdx].OperandType);
4774 int64_t Value = encode32BitLiteral(MO.getImm(), OpType);
4775 if (NumLiterals == 0 || LiteralValue != Value) {
4777 ++NumLiterals;
4778 }
4779 } else if (MO.isExpr()) {
4780 ++NumExprs;
4781 }
4782 }
4783 }
4784
4785 if (NumLiterals + NumExprs <= 1)
4786 return true;
4787
4788 Error(getOperandLoc(Operands, Src1Idx),
4789 "only one unique literal operand is allowed");
4790 return false;
4791}
4792
4793bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4794 const unsigned Opc = Inst.getOpcode();
4795 if (isPermlane16(Opc)) {
4796 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4797 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4798
4799 if (OpSel & ~3)
4800 return false;
4801 }
4802
4803 uint64_t TSFlags = MII.get(Opc).TSFlags;
4804
4805 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4806 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4807 if (OpSelIdx != -1) {
4808 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4809 return false;
4810 }
4811 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4812 if (OpSelHiIdx != -1) {
4813 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4814 return false;
4815 }
4816 }
4817
4818 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4819 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4820 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4821 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4822 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4823 if (OpSel & 3)
4824 return false;
4825 }
4826
4827 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4828 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4829 // the first SGPR and use it for both the low and high operations.
4830 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4831 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4832 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4833 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4834 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4835
4836 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4837 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4838 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4839 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4840
4841 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4842
4843 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4844 unsigned Mask = 1U << Index;
4845 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4846 };
4847
4848 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4849 !VerifyOneSGPR(/*Index=*/0))
4850 return false;
4851 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4852 !VerifyOneSGPR(/*Index=*/1))
4853 return false;
4854
4855 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4856 if (Src2Idx != -1) {
4857 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4858 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4859 !VerifyOneSGPR(/*Index=*/2))
4860 return false;
4861 }
4862 }
4863
4864 return true;
4865}
4866
4867bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4868 if (!hasTrue16Insts())
4869 return true;
4870 const MCRegisterInfo *MRI = getMRI();
4871 const unsigned Opc = Inst.getOpcode();
4872 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4873 if (OpSelIdx == -1)
4874 return true;
4875 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4876 // If the value is 0 we could have a default OpSel Operand, so conservatively
4877 // allow it.
4878 if (OpSelOpValue == 0)
4879 return true;
4880 unsigned OpCount = 0;
4881 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4882 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4883 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4884 if (OpIdx == -1)
4885 continue;
4886 const MCOperand &Op = Inst.getOperand(OpIdx);
4887 if (Op.isReg() &&
4888 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4889 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4890 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4891 if (OpSelOpIsHi != VGPRSuffixIsHi)
4892 return false;
4893 }
4894 ++OpCount;
4895 }
4896
4897 return true;
4898}
4899
4900bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4901 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4902
4903 const unsigned Opc = Inst.getOpcode();
4904 uint64_t TSFlags = MII.get(Opc).TSFlags;
4905
4906 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4907 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4908 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4909 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4910 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4911 !(TSFlags & SIInstrFlags::IsSWMMAC))
4912 return true;
4913
4914 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4915 if (NegIdx == -1)
4916 return true;
4917
4918 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4919
4920 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4921 // on some src operands but not allowed on other.
4922 // It is convenient that such instructions don't have src_modifiers operand
4923 // for src operands that don't allow neg because they also don't allow opsel.
4924
4925 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4926 AMDGPU::OpName::src1_modifiers,
4927 AMDGPU::OpName::src2_modifiers};
4928
4929 for (unsigned i = 0; i < 3; ++i) {
4930 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4931 if (Neg & (1 << i))
4932 return false;
4933 }
4934 }
4935
4936 return true;
4937}
4938
4939bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4940 const OperandVector &Operands) {
4941 const unsigned Opc = Inst.getOpcode();
4942 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4943 if (DppCtrlIdx >= 0) {
4944 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4945
4946 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
4947 AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
4948 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
4949 // only on GFX12.
4950 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4951 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
4952 : "DP ALU dpp only supports row_newbcast");
4953 return false;
4954 }
4955 }
4956
4957 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4958 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4959
4960 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4961 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4962 if (Src1Idx >= 0) {
4963 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4964 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4965 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4966 Error(getOperandLoc(Operands, Src1Idx),
4967 "invalid operand for instruction");
4968 return false;
4969 }
4970 if (Src1.isImm()) {
4971 Error(getInstLoc(Operands),
4972 "src1 immediate operand invalid for instruction");
4973 return false;
4974 }
4975 }
4976 }
4977
4978 return true;
4979}
4980
4981// Check if VCC register matches wavefront size
4982bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4983 auto FB = getFeatureBits();
4984 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4985 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4986}
4987
4988// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4989bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4990 const OperandVector &Operands) {
4991 unsigned Opcode = Inst.getOpcode();
4992 const MCInstrDesc &Desc = MII.get(Opcode);
4993 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4994 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4995 !HasMandatoryLiteral && !isVOPD(Opcode))
4996 return true;
4997
4998 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4999
5000 std::optional<unsigned> LiteralOpIdx;
5001 std::optional<uint64_t> LiteralValue;
5002
5003 for (int OpIdx : OpIndices) {
5004 if (OpIdx == -1)
5005 continue;
5006
5007 const MCOperand &MO = Inst.getOperand(OpIdx);
5008 if (!MO.isImm() && !MO.isExpr())
5009 continue;
5010 if (!isSISrcOperand(Desc, OpIdx))
5011 continue;
5012
5013 bool IsAnotherLiteral = false;
5014 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5015 uint64_t Value = static_cast<uint64_t>(MO.getImm());
5016 bool IsForcedFP64 =
5017 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5019 HasMandatoryLiteral);
5020 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5021 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5022 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5023
5024 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5025 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5026 Error(getOperandLoc(Operands, OpIdx),
5027 "invalid operand for instruction");
5028 return false;
5029 }
5030
5031 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5032 Value = Hi_32(Value);
5033
5034 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5036 } else if (MO.isExpr()) {
5037 // Literal value not known, so we conservately assume it's different.
5038 IsAnotherLiteral = true;
5039 }
5040
5041 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5042 !getFeatureBits()[FeatureVOP3Literal]) {
5043 Error(getOperandLoc(Operands, OpIdx),
5044 "literal operands are not supported");
5045 return false;
5046 }
5047
5048 if (LiteralOpIdx && IsAnotherLiteral) {
5049 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5050 getOperandLoc(Operands, *LiteralOpIdx)),
5051 "only one unique literal operand is allowed");
5052 return false;
5053 }
5054
5055 if (IsAnotherLiteral)
5056 LiteralOpIdx = OpIdx;
5057 }
5058
5059 return true;
5060}
5061
5062// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5063static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5064 const MCRegisterInfo *MRI) {
5065 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5066 if (OpIdx < 0)
5067 return -1;
5068
5069 const MCOperand &Op = Inst.getOperand(OpIdx);
5070 if (!Op.isReg())
5071 return -1;
5072
5073 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5074 auto Reg = Sub ? Sub : Op.getReg();
5075 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5076 return AGPR32.contains(Reg) ? 1 : 0;
5077}
5078
5079bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5080 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5081 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5083 SIInstrFlags::DS)) == 0)
5084 return true;
5085
5086 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5087 ? AMDGPU::OpName::data0
5088 : AMDGPU::OpName::vdata;
5089
5090 const MCRegisterInfo *MRI = getMRI();
5091 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5092 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5093
5094 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5095 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5096 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5097 return false;
5098 }
5099
5100 auto FB = getFeatureBits();
5101 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5102 if (DataAreg < 0 || DstAreg < 0)
5103 return true;
5104 return DstAreg == DataAreg;
5105 }
5106
5107 return DstAreg < 1 && DataAreg < 1;
5108}
5109
5110bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5111 auto FB = getFeatureBits();
5112 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5113 return true;
5114
5115 unsigned Opc = Inst.getOpcode();
5116 const MCRegisterInfo *MRI = getMRI();
5117 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5118 // unaligned VGPR. All others only allow even aligned VGPRs.
5119 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5120 return true;
5121
5122 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5123 switch (Opc) {
5124 default:
5125 break;
5126 case AMDGPU::DS_LOAD_TR6_B96:
5127 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5128 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5129 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5130 return true;
5131 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5132 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5133 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5134 // allows unaligned VGPR for vdst, but other operands still only allow
5135 // even aligned VGPRs.
5136 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5137 if (VAddrIdx != -1) {
5138 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5139 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5140 if ((Sub - AMDGPU::VGPR0) & 1)
5141 return false;
5142 }
5143 return true;
5144 }
5145 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5146 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5147 return true;
5148 }
5149 }
5150
5151 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5152 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5153 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5154 const MCOperand &Op = Inst.getOperand(I);
5155 if (!Op.isReg())
5156 continue;
5157
5158 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5159 if (!Sub)
5160 continue;
5161
5162 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5163 return false;
5164 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5165 return false;
5166 }
5167
5168 return true;
5169}
5170
5171SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5172 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5173 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5174 if (Op.isBLGP())
5175 return Op.getStartLoc();
5176 }
5177 return SMLoc();
5178}
5179
5180bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5181 const OperandVector &Operands) {
5182 unsigned Opc = Inst.getOpcode();
5183 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5184 if (BlgpIdx == -1)
5185 return true;
5186 SMLoc BLGPLoc = getBLGPLoc(Operands);
5187 if (!BLGPLoc.isValid())
5188 return true;
5189 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5190 auto FB = getFeatureBits();
5191 bool UsesNeg = false;
5192 if (FB[AMDGPU::FeatureGFX940Insts]) {
5193 switch (Opc) {
5194 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5195 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5196 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5197 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5198 UsesNeg = true;
5199 }
5200 }
5201
5202 if (IsNeg == UsesNeg)
5203 return true;
5204
5205 Error(BLGPLoc,
5206 UsesNeg ? "invalid modifier: blgp is not supported"
5207 : "invalid modifier: neg is not supported");
5208
5209 return false;
5210}
5211
5212bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5213 const OperandVector &Operands) {
5214 if (!isGFX11Plus())
5215 return true;
5216
5217 unsigned Opc = Inst.getOpcode();
5218 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5219 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5220 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5221 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5222 return true;
5223
5224 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5225 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5226 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5227 if (Reg == AMDGPU::SGPR_NULL)
5228 return true;
5229
5230 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5231 return false;
5232}
5233
5234bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5235 const OperandVector &Operands) {
5236 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5237 if ((TSFlags & SIInstrFlags::DS) == 0)
5238 return true;
5239 if (TSFlags & SIInstrFlags::GWS)
5240 return validateGWS(Inst, Operands);
5241 // Only validate GDS for non-GWS instructions.
5242 if (hasGDS())
5243 return true;
5244 int GDSIdx =
5245 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5246 if (GDSIdx < 0)
5247 return true;
5248 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5249 if (GDS) {
5250 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5251 Error(S, "gds modifier is not supported on this GPU");
5252 return false;
5253 }
5254 return true;
5255}
5256
5257// gfx90a has an undocumented limitation:
5258// DS_GWS opcodes must use even aligned registers.
5259bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5260 const OperandVector &Operands) {
5261 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5262 return true;
5263
5264 int Opc = Inst.getOpcode();
5265 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5266 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5267 return true;
5268
5269 const MCRegisterInfo *MRI = getMRI();
5270 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5271 int Data0Pos =
5272 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5273 assert(Data0Pos != -1);
5274 auto Reg = Inst.getOperand(Data0Pos).getReg();
5275 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5276 if (RegIdx & 1) {
5277 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5278 return false;
5279 }
5280
5281 return true;
5282}
5283
5284bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5285 const OperandVector &Operands,
5286 const SMLoc &IDLoc) {
5287 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5288 AMDGPU::OpName::cpol);
5289 if (CPolPos == -1)
5290 return true;
5291
5292 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5293
5294 if (!isGFX1250()) {
5295 if (CPol & CPol::SCAL) {
5296 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5297 StringRef CStr(S.getPointer());
5298 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5299 Error(S, "scale_offset is not supported on this GPU");
5300 }
5301 if (CPol & CPol::NV) {
5302 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5303 StringRef CStr(S.getPointer());
5304 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5305 Error(S, "nv is not supported on this GPU");
5306 }
5307 }
5308
5309 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5310 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5311 StringRef CStr(S.getPointer());
5312 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5313 Error(S, "scale_offset is not supported for this instruction");
5314 }
5315
5316 if (isGFX12Plus())
5317 return validateTHAndScopeBits(Inst, Operands, CPol);
5318
5319 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5320 if (TSFlags & SIInstrFlags::SMRD) {
5321 if (CPol && (isSI() || isCI())) {
5322 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5323 Error(S, "cache policy is not supported for SMRD instructions");
5324 return false;
5325 }
5326 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5327 Error(IDLoc, "invalid cache policy for SMEM instruction");
5328 return false;
5329 }
5330 }
5331
5332 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5333 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5336 if (!(TSFlags & AllowSCCModifier)) {
5337 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5338 StringRef CStr(S.getPointer());
5339 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5340 Error(S,
5341 "scc modifier is not supported for this instruction on this GPU");
5342 return false;
5343 }
5344 }
5345
5347 return true;
5348
5349 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5350 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5351 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5352 : "instruction must use glc");
5353 return false;
5354 }
5355 } else {
5356 if (CPol & CPol::GLC) {
5357 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5358 StringRef CStr(S.getPointer());
5360 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5361 Error(S, isGFX940() ? "instruction must not use sc0"
5362 : "instruction must not use glc");
5363 return false;
5364 }
5365 }
5366
5367 return true;
5368}
5369
5370bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5371 const OperandVector &Operands,
5372 const unsigned CPol) {
5373 const unsigned TH = CPol & AMDGPU::CPol::TH;
5374 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5375
5376 const unsigned Opcode = Inst.getOpcode();
5377 const MCInstrDesc &TID = MII.get(Opcode);
5378
5379 auto PrintError = [&](StringRef Msg) {
5380 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5381 Error(S, Msg);
5382 return false;
5383 };
5384
5385 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5388 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5389
5390 if (TH == 0)
5391 return true;
5392
5393 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5394 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5395 (TH == AMDGPU::CPol::TH_NT_HT)))
5396 return PrintError("invalid th value for SMEM instruction");
5397
5398 if (TH == AMDGPU::CPol::TH_BYPASS) {
5399 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5401 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5403 return PrintError("scope and th combination is not valid");
5404 }
5405
5406 unsigned THType = AMDGPU::getTemporalHintType(TID);
5407 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5408 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5409 return PrintError("invalid th value for atomic instructions");
5410 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5411 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5412 return PrintError("invalid th value for store instructions");
5413 } else {
5414 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5415 return PrintError("invalid th value for load instructions");
5416 }
5417
5418 return true;
5419}
5420
5421bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5422 const OperandVector &Operands) {
5423 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5424 if (Desc.mayStore() &&
5426 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5427 if (Loc != getInstLoc(Operands)) {
5428 Error(Loc, "TFE modifier has no meaning for store instructions");
5429 return false;
5430 }
5431 }
5432
5433 return true;
5434}
5435
5436bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5437 const OperandVector &Operands) {
5438 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5439 return true;
5440
5441 int Simm16Pos =
5442 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5443 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5444 SMLoc Loc = Operands[1]->getStartLoc();
5445 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5446 return false;
5447 }
5448
5449 return true;
5450}
5451
5452bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5453 const OperandVector &Operands) {
5454 unsigned Opc = Inst.getOpcode();
5455 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5456 const MCInstrDesc &Desc = MII.get(Opc);
5457
5458 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5459 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5460 if (FmtIdx == -1)
5461 return true;
5462 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5463 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5464 unsigned RegSize =
5465 TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5466
5468 return true;
5469
5470 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5471 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5472 "MATRIX_FMT_FP4"};
5473
5474 Error(getOperandLoc(Operands, SrcIdx),
5475 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5476 return false;
5477 };
5478
5479 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5480 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5481}
5482
5483bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5484 const SMLoc &IDLoc,
5485 const OperandVector &Operands) {
5486 if (!validateLdsDirect(Inst, Operands))
5487 return false;
5488 if (!validateTrue16OpSel(Inst)) {
5489 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5490 "op_sel operand conflicts with 16-bit operand suffix");
5491 return false;
5492 }
5493 if (!validateSOPLiteral(Inst, Operands))
5494 return false;
5495 if (!validateVOPLiteral(Inst, Operands)) {
5496 return false;
5497 }
5498 if (!validateConstantBusLimitations(Inst, Operands)) {
5499 return false;
5500 }
5501 if (!validateVOPD(Inst, Operands)) {
5502 return false;
5503 }
5504 if (!validateIntClampSupported(Inst)) {
5505 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5506 "integer clamping is not supported on this GPU");
5507 return false;
5508 }
5509 if (!validateOpSel(Inst)) {
5510 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5511 "invalid op_sel operand");
5512 return false;
5513 }
5514 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5515 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5516 "invalid neg_lo operand");
5517 return false;
5518 }
5519 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5520 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5521 "invalid neg_hi operand");
5522 return false;
5523 }
5524 if (!validateDPP(Inst, Operands)) {
5525 return false;
5526 }
5527 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5528 if (!validateMIMGD16(Inst)) {
5529 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5530 "d16 modifier is not supported on this GPU");
5531 return false;
5532 }
5533 if (!validateMIMGDim(Inst, Operands)) {
5534 Error(IDLoc, "missing dim operand");
5535 return false;
5536 }
5537 if (!validateTensorR128(Inst)) {
5538 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5539 "instruction must set modifier r128=0");
5540 return false;
5541 }
5542 if (!validateMIMGMSAA(Inst)) {
5543 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5544 "invalid dim; must be MSAA type");
5545 return false;
5546 }
5547 if (!validateMIMGDataSize(Inst, IDLoc)) {
5548 return false;
5549 }
5550 if (!validateMIMGAddrSize(Inst, IDLoc))
5551 return false;
5552 if (!validateMIMGAtomicDMask(Inst)) {
5553 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5554 "invalid atomic image dmask");
5555 return false;
5556 }
5557 if (!validateMIMGGatherDMask(Inst)) {
5558 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5559 "invalid image_gather dmask: only one bit must be set");
5560 return false;
5561 }
5562 if (!validateMovrels(Inst, Operands)) {
5563 return false;
5564 }
5565 if (!validateOffset(Inst, Operands)) {
5566 return false;
5567 }
5568 if (!validateMAIAccWrite(Inst, Operands)) {
5569 return false;
5570 }
5571 if (!validateMAISrc2(Inst, Operands)) {
5572 return false;
5573 }
5574 if (!validateMFMA(Inst, Operands)) {
5575 return false;
5576 }
5577 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5578 return false;
5579 }
5580
5581 if (!validateAGPRLdSt(Inst)) {
5582 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5583 ? "invalid register class: data and dst should be all VGPR or AGPR"
5584 : "invalid register class: agpr loads and stores not supported on this GPU"
5585 );
5586 return false;
5587 }
5588 if (!validateVGPRAlign(Inst)) {
5589 Error(IDLoc,
5590 "invalid register class: vgpr tuples must be 64 bit aligned");
5591 return false;
5592 }
5593 if (!validateDS(Inst, Operands)) {
5594 return false;
5595 }
5596
5597 if (!validateBLGP(Inst, Operands)) {
5598 return false;
5599 }
5600
5601 if (!validateDivScale(Inst)) {
5602 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5603 return false;
5604 }
5605 if (!validateWaitCnt(Inst, Operands)) {
5606 return false;
5607 }
5608 if (!validateTFE(Inst, Operands)) {
5609 return false;
5610 }
5611 if (!validateSetVgprMSB(Inst, Operands)) {
5612 return false;
5613 }
5614 if (!validateWMMA(Inst, Operands)) {
5615 return false;
5616 }
5617
5618 return true;
5619}
5620
5622 const FeatureBitset &FBS,
5623 unsigned VariantID = 0);
5624
5625static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5626 const FeatureBitset &AvailableFeatures,
5627 unsigned VariantID);
5628
5629bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5630 const FeatureBitset &FBS) {
5631 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5632}
5633
5634bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5635 const FeatureBitset &FBS,
5636 ArrayRef<unsigned> Variants) {
5637 for (auto Variant : Variants) {
5638 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5639 return true;
5640 }
5641
5642 return false;
5643}
5644
5645bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5646 const SMLoc &IDLoc) {
5647 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5648
5649 // Check if requested instruction variant is supported.
5650 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5651 return false;
5652
5653 // This instruction is not supported.
5654 // Clear any other pending errors because they are no longer relevant.
5655 getParser().clearPendingErrors();
5656
5657 // Requested instruction variant is not supported.
5658 // Check if any other variants are supported.
5659 StringRef VariantName = getMatchedVariantName();
5660 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5661 return Error(IDLoc,
5662 Twine(VariantName,
5663 " variant of this instruction is not supported"));
5664 }
5665
5666 // Check if this instruction may be used with a different wavesize.
5667 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5668 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5669
5670 FeatureBitset FeaturesWS32 = getFeatureBits();
5671 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5672 .flip(AMDGPU::FeatureWavefrontSize32);
5673 FeatureBitset AvailableFeaturesWS32 =
5674 ComputeAvailableFeatures(FeaturesWS32);
5675
5676 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5677 return Error(IDLoc, "instruction requires wavesize=32");
5678 }
5679
5680 // Finally check if this instruction is supported on any other GPU.
5681 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5682 return Error(IDLoc, "instruction not supported on this GPU");
5683 }
5684
5685 // Instruction not supported on any GPU. Probably a typo.
5686 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5687 return Error(IDLoc, "invalid instruction" + Suggestion);
5688}
5689
5691 uint64_t InvalidOprIdx) {
5692 assert(InvalidOprIdx < Operands.size());
5693 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5694 if (Op.isToken() && InvalidOprIdx > 1) {
5695 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5696 return PrevOp.isToken() && PrevOp.getToken() == "::";
5697 }
5698 return false;
5699}
5700
5701bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5703 MCStreamer &Out,
5704 uint64_t &ErrorInfo,
5705 bool MatchingInlineAsm) {
5706 MCInst Inst;
5707 Inst.setLoc(IDLoc);
5708 unsigned Result = Match_Success;
5709 for (auto Variant : getMatchedVariants()) {
5710 uint64_t EI;
5711 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5712 Variant);
5713 // We order match statuses from least to most specific. We use most specific
5714 // status as resulting
5715 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5716 if (R == Match_Success || R == Match_MissingFeature ||
5717 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5718 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5719 Result != Match_MissingFeature)) {
5720 Result = R;
5721 ErrorInfo = EI;
5722 }
5723 if (R == Match_Success)
5724 break;
5725 }
5726
5727 if (Result == Match_Success) {
5728 if (!validateInstruction(Inst, IDLoc, Operands)) {
5729 return true;
5730 }
5731 Out.emitInstruction(Inst, getSTI());
5732 return false;
5733 }
5734
5735 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5736 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5737 return true;
5738 }
5739
5740 switch (Result) {
5741 default: break;
5742 case Match_MissingFeature:
5743 // It has been verified that the specified instruction
5744 // mnemonic is valid. A match was found but it requires
5745 // features which are not supported on this GPU.
5746 return Error(IDLoc, "operands are not valid for this GPU or mode");
5747
5748 case Match_InvalidOperand: {
5749 SMLoc ErrorLoc = IDLoc;
5750 if (ErrorInfo != ~0ULL) {
5751 if (ErrorInfo >= Operands.size()) {
5752 return Error(IDLoc, "too few operands for instruction");
5753 }
5754 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5755 if (ErrorLoc == SMLoc())
5756 ErrorLoc = IDLoc;
5757
5758 if (isInvalidVOPDY(Operands, ErrorInfo))
5759 return Error(ErrorLoc, "invalid VOPDY instruction");
5760 }
5761 return Error(ErrorLoc, "invalid operand for instruction");
5762 }
5763
5764 case Match_MnemonicFail:
5765 llvm_unreachable("Invalid instructions should have been handled already");
5766 }
5767 llvm_unreachable("Implement any new match types added!");
5768}
5769
5770bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5771 int64_t Tmp = -1;
5772 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5773 return true;
5774 }
5775 if (getParser().parseAbsoluteExpression(Tmp)) {
5776 return true;
5777 }
5778 Ret = static_cast<uint32_t>(Tmp);
5779 return false;
5780}
5781
5782bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5783 if (!getSTI().getTargetTriple().isAMDGCN())
5784 return TokError("directive only supported for amdgcn architecture");
5785
5786 std::string TargetIDDirective;
5787 SMLoc TargetStart = getTok().getLoc();
5788 if (getParser().parseEscapedString(TargetIDDirective))
5789 return true;
5790
5791 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5792 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5793 return getParser().Error(TargetRange.Start,
5794 (Twine(".amdgcn_target directive's target id ") +
5795 Twine(TargetIDDirective) +
5796 Twine(" does not match the specified target id ") +
5797 Twine(getTargetStreamer().getTargetID()->toString())).str());
5798
5799 return false;
5800}
5801
5802bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5803 return Error(Range.Start, "value out of range", Range);
5804}
5805
5806bool AMDGPUAsmParser::calculateGPRBlocks(
5807 const FeatureBitset &Features, const MCExpr *VCCUsed,
5808 const MCExpr *FlatScrUsed, bool XNACKUsed,
5809 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5810 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5811 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5812 // TODO(scott.linder): These calculations are duplicated from
5813 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5814 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5815 MCContext &Ctx = getContext();
5816
5817 const MCExpr *NumSGPRs = NextFreeSGPR;
5818 int64_t EvaluatedSGPRs;
5819
5820 if (Version.Major >= 10)
5822 else {
5823 unsigned MaxAddressableNumSGPRs =
5825
5826 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5827 !Features.test(FeatureSGPRInitBug) &&
5828 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5829 return OutOfRangeError(SGPRRange);
5830
5831 const MCExpr *ExtraSGPRs =
5832 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5833 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5834
5835 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5836 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5837 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5838 return OutOfRangeError(SGPRRange);
5839
5840 if (Features.test(FeatureSGPRInitBug))
5841 NumSGPRs =
5843 }
5844
5845 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5846 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5847 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5848 unsigned Granule) -> const MCExpr * {
5849 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5850 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5851 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5852 const MCExpr *AlignToGPR =
5853 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5854 const MCExpr *DivGPR =
5855 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5856 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5857 return SubGPR;
5858 };
5859
5860 VGPRBlocks = GetNumGPRBlocks(
5861 NextFreeVGPR,
5862 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5863 SGPRBlocks =
5864 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5865
5866 return false;
5867}
5868
5869bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5870 if (!getSTI().getTargetTriple().isAMDGCN())
5871 return TokError("directive only supported for amdgcn architecture");
5872
5873 if (!isHsaAbi(getSTI()))
5874 return TokError("directive only supported for amdhsa OS");
5875
5876 StringRef KernelName;
5877 if (getParser().parseIdentifier(KernelName))
5878 return true;
5879
5880 AMDGPU::MCKernelDescriptor KD =
5882 &getSTI(), getContext());
5883
5884 StringSet<> Seen;
5885
5886 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5887
5888 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5889 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5890
5891 SMRange VGPRRange;
5892 const MCExpr *NextFreeVGPR = ZeroExpr;
5893 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5894 const MCExpr *NamedBarCnt = ZeroExpr;
5895 uint64_t SharedVGPRCount = 0;
5896 uint64_t PreloadLength = 0;
5897 uint64_t PreloadOffset = 0;
5898 SMRange SGPRRange;
5899 const MCExpr *NextFreeSGPR = ZeroExpr;
5900
5901 // Count the number of user SGPRs implied from the enabled feature bits.
5902 unsigned ImpliedUserSGPRCount = 0;
5903
5904 // Track if the asm explicitly contains the directive for the user SGPR
5905 // count.
5906 std::optional<unsigned> ExplicitUserSGPRCount;
5907 const MCExpr *ReserveVCC = OneExpr;
5908 const MCExpr *ReserveFlatScr = OneExpr;
5909 std::optional<bool> EnableWavefrontSize32;
5910
5911 while (true) {
5912 while (trySkipToken(AsmToken::EndOfStatement));
5913
5914 StringRef ID;
5915 SMRange IDRange = getTok().getLocRange();
5916 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5917 return true;
5918
5919 if (ID == ".end_amdhsa_kernel")
5920 break;
5921
5922 if (!Seen.insert(ID).second)
5923 return TokError(".amdhsa_ directives cannot be repeated");
5924
5925 SMLoc ValStart = getLoc();
5926 const MCExpr *ExprVal;
5927 if (getParser().parseExpression(ExprVal))
5928 return true;
5929 SMLoc ValEnd = getLoc();
5930 SMRange ValRange = SMRange(ValStart, ValEnd);
5931
5932 int64_t IVal = 0;
5933 uint64_t Val = IVal;
5934 bool EvaluatableExpr;
5935 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5936 if (IVal < 0)
5937 return OutOfRangeError(ValRange);
5938 Val = IVal;
5939 }
5940
5941#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5942 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5943 return OutOfRangeError(RANGE); \
5944 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5945 getContext());
5946
5947// Some fields use the parsed value immediately which requires the expression to
5948// be solvable.
5949#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5950 if (!(RESOLVED)) \
5951 return Error(IDRange.Start, "directive should have resolvable expression", \
5952 IDRange);
5953
5954 if (ID == ".amdhsa_group_segment_fixed_size") {
5956 CHAR_BIT>(Val))
5957 return OutOfRangeError(ValRange);
5958 KD.group_segment_fixed_size = ExprVal;
5959 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5961 CHAR_BIT>(Val))
5962 return OutOfRangeError(ValRange);
5963 KD.private_segment_fixed_size = ExprVal;
5964 } else if (ID == ".amdhsa_kernarg_size") {
5965 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5966 return OutOfRangeError(ValRange);
5967 KD.kernarg_size = ExprVal;
5968 } else if (ID == ".amdhsa_user_sgpr_count") {
5969 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5970 ExplicitUserSGPRCount = Val;
5971 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5972 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5974 return Error(IDRange.Start,
5975 "directive is not supported with architected flat scratch",
5976 IDRange);
5978 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5979 ExprVal, ValRange);
5980 if (Val)
5981 ImpliedUserSGPRCount += 4;
5982 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5983 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5984 if (!hasKernargPreload())
5985 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5986
5987 if (Val > getMaxNumUserSGPRs())
5988 return OutOfRangeError(ValRange);
5989 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5990 ValRange);
5991 if (Val) {
5992 ImpliedUserSGPRCount += Val;
5993 PreloadLength = Val;
5994 }
5995 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5996 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5997 if (!hasKernargPreload())
5998 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5999
6000 if (Val >= 1024)
6001 return OutOfRangeError(ValRange);
6002 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6003 ValRange);
6004 if (Val)
6005 PreloadOffset = Val;
6006 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6007 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6009 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6010 ValRange);
6011 if (Val)
6012 ImpliedUserSGPRCount += 2;
6013 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6014 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6016 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6017 ValRange);
6018 if (Val)
6019 ImpliedUserSGPRCount += 2;
6020 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6021 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6023 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6024 ExprVal, ValRange);
6025 if (Val)
6026 ImpliedUserSGPRCount += 2;
6027 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6028 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6030 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6031 ValRange);
6032 if (Val)
6033 ImpliedUserSGPRCount += 2;
6034 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6036 return Error(IDRange.Start,
6037 "directive is not supported with architected flat scratch",
6038 IDRange);
6039 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6041 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6042 ExprVal, ValRange);
6043 if (Val)
6044 ImpliedUserSGPRCount += 2;
6045 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6046 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6048 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6049 ExprVal, ValRange);
6050 if (Val)
6051 ImpliedUserSGPRCount += 1;
6052 } else if (ID == ".amdhsa_wavefront_size32") {
6053 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6054 if (IVersion.Major < 10)
6055 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6056 EnableWavefrontSize32 = Val;
6058 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6059 ValRange);
6060 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6062 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6063 ValRange);
6064 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6066 return Error(IDRange.Start,
6067 "directive is not supported with architected flat scratch",
6068 IDRange);
6070 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6071 ValRange);
6072 } else if (ID == ".amdhsa_enable_private_segment") {
6074 return Error(
6075 IDRange.Start,
6076 "directive is not supported without architected flat scratch",
6077 IDRange);
6079 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6080 ValRange);
6081 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6083 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6084 ValRange);
6085 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6087 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6088 ValRange);
6089 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6091 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6092 ValRange);
6093 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6095 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6096 ValRange);
6097 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6099 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6100 ValRange);
6101 } else if (ID == ".amdhsa_next_free_vgpr") {
6102 VGPRRange = ValRange;
6103 NextFreeVGPR = ExprVal;
6104 } else if (ID == ".amdhsa_next_free_sgpr") {
6105 SGPRRange = ValRange;
6106 NextFreeSGPR = ExprVal;
6107 } else if (ID == ".amdhsa_accum_offset") {
6108 if (!isGFX90A())
6109 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6110 AccumOffset = ExprVal;
6111 } else if (ID == ".amdhsa_named_barrier_count") {
6112 if (!isGFX1250())
6113 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6114 NamedBarCnt = ExprVal;
6115 } else if (ID == ".amdhsa_reserve_vcc") {
6116 if (EvaluatableExpr && !isUInt<1>(Val))
6117 return OutOfRangeError(ValRange);
6118 ReserveVCC = ExprVal;
6119 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6120 if (IVersion.Major < 7)
6121 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6123 return Error(IDRange.Start,
6124 "directive is not supported with architected flat scratch",
6125 IDRange);
6126 if (EvaluatableExpr && !isUInt<1>(Val))
6127 return OutOfRangeError(ValRange);
6128 ReserveFlatScr = ExprVal;
6129 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6130 if (IVersion.Major < 8)
6131 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6132 if (!isUInt<1>(Val))
6133 return OutOfRangeError(ValRange);
6134 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6135 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6136 IDRange);
6137 } else if (ID == ".amdhsa_float_round_mode_32") {
6139 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6140 ValRange);
6141 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6143 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6144 ValRange);
6145 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6147 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6148 ValRange);
6149 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6151 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6152 ValRange);
6153 } else if (ID == ".amdhsa_dx10_clamp") {
6154 if (IVersion.Major >= 12)
6155 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6157 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6158 ValRange);
6159 } else if (ID == ".amdhsa_ieee_mode") {
6160 if (IVersion.Major >= 12)
6161 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6163 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6164 ValRange);
6165 } else if (ID == ".amdhsa_fp16_overflow") {
6166 if (IVersion.Major < 9)
6167 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6169 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6170 ValRange);
6171 } else if (ID == ".amdhsa_tg_split") {
6172 if (!isGFX90A())
6173 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6175 ExprVal, ValRange);
6176 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6177 if (!supportsWGP(getSTI()))
6178 return Error(IDRange.Start,
6179 "directive unsupported on " + getSTI().getCPU(), IDRange);
6181 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6182 ValRange);
6183 } else if (ID == ".amdhsa_memory_ordered") {
6184 if (IVersion.Major < 10)
6185 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6187 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6188 ValRange);
6189 } else if (ID == ".amdhsa_forward_progress") {
6190 if (IVersion.Major < 10)
6191 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6193 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6194 ValRange);
6195 } else if (ID == ".amdhsa_shared_vgpr_count") {
6196 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6197 if (IVersion.Major < 10 || IVersion.Major >= 12)
6198 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6199 IDRange);
6200 SharedVGPRCount = Val;
6202 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_inst_pref_size") {
6205 if (IVersion.Major < 11)
6206 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6207 if (IVersion.Major == 11) {
6209 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6210 ValRange);
6211 } else {
6213 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6214 ValRange);
6215 }
6216 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6219 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6220 ExprVal, ValRange);
6221 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6223 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6224 ExprVal, ValRange);
6225 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6228 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6229 ExprVal, ValRange);
6230 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6233 ExprVal, ValRange);
6234 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6237 ExprVal, ValRange);
6238 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6240 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6241 ExprVal, ValRange);
6242 } else if (ID == ".amdhsa_exception_int_div_zero") {
6244 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6245 ExprVal, ValRange);
6246 } else if (ID == ".amdhsa_round_robin_scheduling") {
6247 if (IVersion.Major < 12)
6248 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6250 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6251 ValRange);
6252 } else {
6253 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6254 }
6255
6256#undef PARSE_BITS_ENTRY
6257 }
6258
6259 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6260 return TokError(".amdhsa_next_free_vgpr directive is required");
6261
6262 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6263 return TokError(".amdhsa_next_free_sgpr directive is required");
6264
6265 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6266
6267 // Consider the case where the total number of UserSGPRs with trailing
6268 // allocated preload SGPRs, is greater than the number of explicitly
6269 // referenced SGPRs.
6270 if (PreloadLength) {
6271 MCContext &Ctx = getContext();
6272 NextFreeSGPR = AMDGPUMCExpr::createMax(
6273 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6274 }
6275
6276 const MCExpr *VGPRBlocks;
6277 const MCExpr *SGPRBlocks;
6278 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6279 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6280 EnableWavefrontSize32, NextFreeVGPR,
6281 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6282 SGPRBlocks))
6283 return true;
6284
6285 int64_t EvaluatedVGPRBlocks;
6286 bool VGPRBlocksEvaluatable =
6287 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6288 if (VGPRBlocksEvaluatable &&
6290 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6291 return OutOfRangeError(VGPRRange);
6292 }
6294 KD.compute_pgm_rsrc1, VGPRBlocks,
6295 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6296 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6297
6298 int64_t EvaluatedSGPRBlocks;
6299 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6301 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6302 return OutOfRangeError(SGPRRange);
6304 KD.compute_pgm_rsrc1, SGPRBlocks,
6305 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6306 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6307
6308 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6309 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6310 "enabled user SGPRs");
6311
6312 if (isGFX1250()) {
6314 return TokError("too many user SGPRs enabled");
6317 MCConstantExpr::create(UserSGPRCount, getContext()),
6318 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6319 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6320 } else {
6322 UserSGPRCount))
6323 return TokError("too many user SGPRs enabled");
6326 MCConstantExpr::create(UserSGPRCount, getContext()),
6327 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6328 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6329 }
6330
6331 int64_t IVal = 0;
6332 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6333 return TokError("Kernarg size should be resolvable");
6334 uint64_t kernarg_size = IVal;
6335 if (PreloadLength && kernarg_size &&
6336 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6337 return TokError("Kernarg preload length + offset is larger than the "
6338 "kernarg segment size");
6339
6340 if (isGFX90A()) {
6341 if (!Seen.contains(".amdhsa_accum_offset"))
6342 return TokError(".amdhsa_accum_offset directive is required");
6343 int64_t EvaluatedAccum;
6344 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6345 uint64_t UEvaluatedAccum = EvaluatedAccum;
6346 if (AccumEvaluatable &&
6347 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6348 return TokError("accum_offset should be in range [4..256] in "
6349 "increments of 4");
6350
6351 int64_t EvaluatedNumVGPR;
6352 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6353 AccumEvaluatable &&
6354 UEvaluatedAccum >
6355 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6356 return TokError("accum_offset exceeds total VGPR allocation");
6357 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6359 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6362 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6363 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6364 getContext());
6365 }
6366
6367 if (isGFX1250())
6369 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6370 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6371 getContext());
6372
6373 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6374 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6375 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6376 return TokError("shared_vgpr_count directive not valid on "
6377 "wavefront size 32");
6378 }
6379
6380 if (VGPRBlocksEvaluatable &&
6381 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6382 63)) {
6383 return TokError("shared_vgpr_count*2 + "
6384 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6385 "exceed 63\n");
6386 }
6387 }
6388
6389 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6390 NextFreeVGPR, NextFreeSGPR,
6391 ReserveVCC, ReserveFlatScr);
6392 return false;
6393}
6394
6395bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6396 uint32_t Version;
6397 if (ParseAsAbsoluteExpression(Version))
6398 return true;
6399
6400 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6401 return false;
6402}
6403
6404bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6405 AMDGPUMCKernelCodeT &C) {
6406 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6407 // assembly for backwards compatibility.
6408 if (ID == "max_scratch_backing_memory_byte_size") {
6409 Parser.eatToEndOfStatement();
6410 return false;
6411 }
6412
6413 SmallString<40> ErrStr;
6414 raw_svector_ostream Err(ErrStr);
6415 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6416 return TokError(Err.str());
6417 }
6418 Lex();
6419
6420 if (ID == "enable_wavefront_size32") {
6421 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6422 if (!isGFX10Plus())
6423 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6424 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6425 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6426 } else {
6427 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6428 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6429 }
6430 }
6431
6432 if (ID == "wavefront_size") {
6433 if (C.wavefront_size == 5) {
6434 if (!isGFX10Plus())
6435 return TokError("wavefront_size=5 is only allowed on GFX10+");
6436 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6437 return TokError("wavefront_size=5 requires +WavefrontSize32");
6438 } else if (C.wavefront_size == 6) {
6439 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6440 return TokError("wavefront_size=6 requires +WavefrontSize64");
6441 }
6442 }
6443
6444 return false;
6445}
6446
6447bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6448 AMDGPUMCKernelCodeT KernelCode;
6449 KernelCode.initDefault(&getSTI(), getContext());
6450
6451 while (true) {
6452 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6453 // will set the current token to EndOfStatement.
6454 while(trySkipToken(AsmToken::EndOfStatement));
6455
6456 StringRef ID;
6457 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6458 return true;
6459
6460 if (ID == ".end_amd_kernel_code_t")
6461 break;
6462
6463 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6464 return true;
6465 }
6466
6467 KernelCode.validate(&getSTI(), getContext());
6468 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6469
6470 return false;
6471}
6472
6473bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6474 StringRef KernelName;
6475 if (!parseId(KernelName, "expected symbol name"))
6476 return true;
6477
6478 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6480
6481 KernelScope.initialize(getContext());
6482 return false;
6483}
6484
6485bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6486 if (!getSTI().getTargetTriple().isAMDGCN()) {
6487 return Error(getLoc(),
6488 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6489 "architectures");
6490 }
6491
6492 auto TargetIDDirective = getLexer().getTok().getStringContents();
6493 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6494 return Error(getParser().getTok().getLoc(), "target id must match options");
6495
6496 getTargetStreamer().EmitISAVersion();
6497 Lex();
6498
6499 return false;
6500}
6501
6502bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6503 assert(isHsaAbi(getSTI()));
6504
6505 std::string HSAMetadataString;
6506 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6507 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6508 return true;
6509
6510 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6511 return Error(getLoc(), "invalid HSA metadata");
6512
6513 return false;
6514}
6515
6516/// Common code to parse out a block of text (typically YAML) between start and
6517/// end directives.
6518bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6519 const char *AssemblerDirectiveEnd,
6520 std::string &CollectString) {
6521
6522 raw_string_ostream CollectStream(CollectString);
6523
6524 getLexer().setSkipSpace(false);
6525
6526 bool FoundEnd = false;
6527 while (!isToken(AsmToken::Eof)) {
6528 while (isToken(AsmToken::Space)) {
6529 CollectStream << getTokenStr();
6530 Lex();
6531 }
6532
6533 if (trySkipId(AssemblerDirectiveEnd)) {
6534 FoundEnd = true;
6535 break;
6536 }
6537
6538 CollectStream << Parser.parseStringToEndOfStatement()
6539 << getContext().getAsmInfo()->getSeparatorString();
6540
6541 Parser.eatToEndOfStatement();
6542 }
6543
6544 getLexer().setSkipSpace(true);
6545
6546 if (isToken(AsmToken::Eof) && !FoundEnd) {
6547 return TokError(Twine("expected directive ") +
6548 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6549 }
6550
6551 return false;
6552}
6553
6554/// Parse the assembler directive for new MsgPack-format PAL metadata.
6555bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6556 std::string String;
6557 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6559 return true;
6560
6561 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6562 if (!PALMetadata->setFromString(String))
6563 return Error(getLoc(), "invalid PAL metadata");
6564 return false;
6565}
6566
6567/// Parse the assembler directive for old linear-format PAL metadata.
6568bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6569 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6570 return Error(getLoc(),
6571 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6572 "not available on non-amdpal OSes")).str());
6573 }
6574
6575 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6576 PALMetadata->setLegacy();
6577 for (;;) {
6578 uint32_t Key, Value;
6579 if (ParseAsAbsoluteExpression(Key)) {
6580 return TokError(Twine("invalid value in ") +
6582 }
6583 if (!trySkipToken(AsmToken::Comma)) {
6584 return TokError(Twine("expected an even number of values in ") +
6586 }
6587 if (ParseAsAbsoluteExpression(Value)) {
6588 return TokError(Twine("invalid value in ") +
6590 }
6591 PALMetadata->setRegister(Key, Value);
6592 if (!trySkipToken(AsmToken::Comma))
6593 break;
6594 }
6595 return false;
6596}
6597
6598/// ParseDirectiveAMDGPULDS
6599/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6600bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6601 if (getParser().checkForValidSection())
6602 return true;
6603
6604 StringRef Name;
6605 SMLoc NameLoc = getLoc();
6606 if (getParser().parseIdentifier(Name))
6607 return TokError("expected identifier in directive");
6608
6609 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6610 if (getParser().parseComma())
6611 return true;
6612
6613 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6614
6615 int64_t Size;
6616 SMLoc SizeLoc = getLoc();
6617 if (getParser().parseAbsoluteExpression(Size))
6618 return true;
6619 if (Size < 0)
6620 return Error(SizeLoc, "size must be non-negative");
6621 if (Size > LocalMemorySize)
6622 return Error(SizeLoc, "size is too large");
6623
6624 int64_t Alignment = 4;
6625 if (trySkipToken(AsmToken::Comma)) {
6626 SMLoc AlignLoc = getLoc();
6627 if (getParser().parseAbsoluteExpression(Alignment))
6628 return true;
6629 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6630 return Error(AlignLoc, "alignment must be a power of two");
6631
6632 // Alignment larger than the size of LDS is possible in theory, as long
6633 // as the linker manages to place to symbol at address 0, but we do want
6634 // to make sure the alignment fits nicely into a 32-bit integer.
6635 if (Alignment >= 1u << 31)
6636 return Error(AlignLoc, "alignment is too large");
6637 }
6638
6639 if (parseEOL())
6640 return true;
6641
6642 Symbol->redefineIfPossible();
6643 if (!Symbol->isUndefined())
6644 return Error(NameLoc, "invalid symbol redefinition");
6645
6646 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6647 return false;
6648}
6649
6650bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6651 StringRef IDVal = DirectiveID.getString();
6652
6653 if (isHsaAbi(getSTI())) {
6654 if (IDVal == ".amdhsa_kernel")
6655 return ParseDirectiveAMDHSAKernel();
6656
6657 if (IDVal == ".amdhsa_code_object_version")
6658 return ParseDirectiveAMDHSACodeObjectVersion();
6659
6660 // TODO: Restructure/combine with PAL metadata directive.
6662 return ParseDirectiveHSAMetadata();
6663 } else {
6664 if (IDVal == ".amd_kernel_code_t")
6665 return ParseDirectiveAMDKernelCodeT();
6666
6667 if (IDVal == ".amdgpu_hsa_kernel")
6668 return ParseDirectiveAMDGPUHsaKernel();
6669
6670 if (IDVal == ".amd_amdgpu_isa")
6671 return ParseDirectiveISAVersion();
6672
6674 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6675 Twine(" directive is "
6676 "not available on non-amdhsa OSes"))
6677 .str());
6678 }
6679 }
6680
6681 if (IDVal == ".amdgcn_target")
6682 return ParseDirectiveAMDGCNTarget();
6683
6684 if (IDVal == ".amdgpu_lds")
6685 return ParseDirectiveAMDGPULDS();
6686
6687 if (IDVal == PALMD::AssemblerDirectiveBegin)
6688 return ParseDirectivePALMetadataBegin();
6689
6690 if (IDVal == PALMD::AssemblerDirective)
6691 return ParseDirectivePALMetadata();
6692
6693 return true;
6694}
6695
6696bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6697 MCRegister Reg) {
6698 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6699 return isGFX9Plus();
6700
6701 // GFX10+ has 2 more SGPRs 104 and 105.
6702 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6703 return hasSGPR104_SGPR105();
6704
6705 switch (Reg.id()) {
6706 case SRC_SHARED_BASE_LO:
6707 case SRC_SHARED_BASE:
6708 case SRC_SHARED_LIMIT_LO:
6709 case SRC_SHARED_LIMIT:
6710 case SRC_PRIVATE_BASE_LO:
6711 case SRC_PRIVATE_BASE:
6712 case SRC_PRIVATE_LIMIT_LO:
6713 case SRC_PRIVATE_LIMIT:
6714 return isGFX9Plus();
6715 case SRC_FLAT_SCRATCH_BASE_LO:
6716 case SRC_FLAT_SCRATCH_BASE_HI:
6717 return hasGloballyAddressableScratch();
6718 case SRC_POPS_EXITING_WAVE_ID:
6719 return isGFX9Plus() && !isGFX11Plus();
6720 case TBA:
6721 case TBA_LO:
6722 case TBA_HI:
6723 case TMA:
6724 case TMA_LO:
6725 case TMA_HI:
6726 return !isGFX9Plus();
6727 case XNACK_MASK:
6728 case XNACK_MASK_LO:
6729 case XNACK_MASK_HI:
6730 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6731 case SGPR_NULL:
6732 return isGFX10Plus();
6733 case SRC_EXECZ:
6734 case SRC_VCCZ:
6735 return !isGFX11Plus();
6736 default:
6737 break;
6738 }
6739
6740 if (isCI())
6741 return true;
6742
6743 if (isSI() || isGFX10Plus()) {
6744 // No flat_scr on SI.
6745 // On GFX10Plus flat scratch is not a valid register operand and can only be
6746 // accessed with s_setreg/s_getreg.
6747 switch (Reg.id()) {
6748 case FLAT_SCR:
6749 case FLAT_SCR_LO:
6750 case FLAT_SCR_HI:
6751 return false;
6752 default:
6753 return true;
6754 }
6755 }
6756
6757 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6758 // SI/CI have.
6759 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6760 return hasSGPR102_SGPR103();
6761
6762 return true;
6763}
6764
6765ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6766 StringRef Mnemonic,
6767 OperandMode Mode) {
6768 ParseStatus Res = parseVOPD(Operands);
6769 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6770 return Res;
6771
6772 // Try to parse with a custom parser
6773 Res = MatchOperandParserImpl(Operands, Mnemonic);
6774
6775 // If we successfully parsed the operand or if there as an error parsing,
6776 // we are done.
6777 //
6778 // If we are parsing after we reach EndOfStatement then this means we
6779 // are appending default values to the Operands list. This is only done
6780 // by custom parser, so we shouldn't continue on to the generic parsing.
6781 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6782 return Res;
6783
6784 SMLoc RBraceLoc;
6785 SMLoc LBraceLoc = getLoc();
6786 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6787 unsigned Prefix = Operands.size();
6788
6789 for (;;) {
6790 auto Loc = getLoc();
6791 Res = parseReg(Operands);
6792 if (Res.isNoMatch())
6793 Error(Loc, "expected a register");
6794 if (!Res.isSuccess())
6795 return ParseStatus::Failure;
6796
6797 RBraceLoc = getLoc();
6798 if (trySkipToken(AsmToken::RBrac))
6799 break;
6800
6801 if (!skipToken(AsmToken::Comma,
6802 "expected a comma or a closing square bracket"))
6803 return ParseStatus::Failure;
6804 }
6805
6806 if (Operands.size() - Prefix > 1) {
6807 Operands.insert(Operands.begin() + Prefix,
6808 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6809 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6810 }
6811
6812 return ParseStatus::Success;
6813 }
6814
6815 return parseRegOrImm(Operands);
6816}
6817
6818StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6819 // Clear any forced encodings from the previous instruction.
6820 setForcedEncodingSize(0);
6821 setForcedDPP(false);
6822 setForcedSDWA(false);
6823
6824 if (Name.consume_back("_e64_dpp")) {
6825 setForcedDPP(true);
6826 setForcedEncodingSize(64);
6827 return Name;
6828 }
6829 if (Name.consume_back("_e64")) {
6830 setForcedEncodingSize(64);
6831 return Name;
6832 }
6833 if (Name.consume_back("_e32")) {
6834 setForcedEncodingSize(32);
6835 return Name;
6836 }
6837 if (Name.consume_back("_dpp")) {
6838 setForcedDPP(true);
6839 return Name;
6840 }
6841 if (Name.consume_back("_sdwa")) {
6842 setForcedSDWA(true);
6843 return Name;
6844 }
6845 return Name;
6846}
6847
6848static void applyMnemonicAliases(StringRef &Mnemonic,
6849 const FeatureBitset &Features,
6850 unsigned VariantID);
6851
6852bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6853 StringRef Name, SMLoc NameLoc,
6855 // Add the instruction mnemonic
6856 Name = parseMnemonicSuffix(Name);
6857
6858 // If the target architecture uses MnemonicAlias, call it here to parse
6859 // operands correctly.
6860 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6861
6862 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6863
6864 bool IsMIMG = Name.starts_with("image_");
6865
6866 while (!trySkipToken(AsmToken::EndOfStatement)) {
6867 OperandMode Mode = OperandMode_Default;
6868 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6869 Mode = OperandMode_NSA;
6870 ParseStatus Res = parseOperand(Operands, Name, Mode);
6871
6872 if (!Res.isSuccess()) {
6873 checkUnsupportedInstruction(Name, NameLoc);
6874 if (!Parser.hasPendingError()) {
6875 // FIXME: use real operand location rather than the current location.
6876 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6877 : "not a valid operand.";
6878 Error(getLoc(), Msg);
6879 }
6880 while (!trySkipToken(AsmToken::EndOfStatement)) {
6881 lex();
6882 }
6883 return true;
6884 }
6885
6886 // Eat the comma or space if there is one.
6887 trySkipToken(AsmToken::Comma);
6888 }
6889
6890 return false;
6891}
6892
6893//===----------------------------------------------------------------------===//
6894// Utility functions
6895//===----------------------------------------------------------------------===//
6896
6897ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6899 SMLoc S = getLoc();
6900 if (!trySkipId(Name))
6901 return ParseStatus::NoMatch;
6902
6903 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6904 return ParseStatus::Success;
6905}
6906
6907ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6908 int64_t &IntVal) {
6909
6910 if (!trySkipId(Prefix, AsmToken::Colon))
6911 return ParseStatus::NoMatch;
6912
6914}
6915
6916ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6917 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6918 std::function<bool(int64_t &)> ConvertResult) {
6919 SMLoc S = getLoc();
6920 int64_t Value = 0;
6921
6922 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6923 if (!Res.isSuccess())
6924 return Res;
6925
6926 if (ConvertResult && !ConvertResult(Value)) {
6927 Error(S, "invalid " + StringRef(Prefix) + " value.");
6928 }
6929
6930 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6931 return ParseStatus::Success;
6932}
6933
6934ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6935 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6936 bool (*ConvertResult)(int64_t &)) {
6937 SMLoc S = getLoc();
6938 if (!trySkipId(Prefix, AsmToken::Colon))
6939 return ParseStatus::NoMatch;
6940
6941 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6942 return ParseStatus::Failure;
6943
6944 unsigned Val = 0;
6945 const unsigned MaxSize = 4;
6946
6947 // FIXME: How to verify the number of elements matches the number of src
6948 // operands?
6949 for (int I = 0; ; ++I) {
6950 int64_t Op;
6951 SMLoc Loc = getLoc();
6952 if (!parseExpr(Op))
6953 return ParseStatus::Failure;
6954
6955 if (Op != 0 && Op != 1)
6956 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6957
6958 Val |= (Op << I);
6959
6960 if (trySkipToken(AsmToken::RBrac))
6961 break;
6962
6963 if (I + 1 == MaxSize)
6964 return Error(getLoc(), "expected a closing square bracket");
6965
6966 if (!skipToken(AsmToken::Comma, "expected a comma"))
6967 return ParseStatus::Failure;
6968 }
6969
6970 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6971 return ParseStatus::Success;
6972}
6973
6974ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6976 AMDGPUOperand::ImmTy ImmTy) {
6977 int64_t Bit;
6978 SMLoc S = getLoc();
6979
6980 if (trySkipId(Name)) {
6981 Bit = 1;
6982 } else if (trySkipId("no", Name)) {
6983 Bit = 0;
6984 } else {
6985 return ParseStatus::NoMatch;
6986 }
6987
6988 if (Name == "r128" && !hasMIMG_R128())
6989 return Error(S, "r128 modifier is not supported on this GPU");
6990 if (Name == "a16" && !hasA16())
6991 return Error(S, "a16 modifier is not supported on this GPU");
6992
6993 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6994 ImmTy = AMDGPUOperand::ImmTyR128A16;
6995
6996 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6997 return ParseStatus::Success;
6998}
6999
7000unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7001 bool &Disabling) const {
7002 Disabling = Id.consume_front("no");
7003
7004 if (isGFX940() && !Mnemo.starts_with("s_")) {
7005 return StringSwitch<unsigned>(Id)
7006 .Case("nt", AMDGPU::CPol::NT)
7007 .Case("sc0", AMDGPU::CPol::SC0)
7008 .Case("sc1", AMDGPU::CPol::SC1)
7009 .Default(0);
7010 }
7011
7012 return StringSwitch<unsigned>(Id)
7013 .Case("dlc", AMDGPU::CPol::DLC)
7014 .Case("glc", AMDGPU::CPol::GLC)
7015 .Case("scc", AMDGPU::CPol::SCC)
7016 .Case("slc", AMDGPU::CPol::SLC)
7017 .Default(0);
7018}
7019
7020ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7021 if (isGFX12Plus()) {
7022 SMLoc StringLoc = getLoc();
7023
7024 int64_t CPolVal = 0;
7025 ParseStatus ResTH = ParseStatus::NoMatch;
7026 ParseStatus ResScope = ParseStatus::NoMatch;
7027 ParseStatus ResNV = ParseStatus::NoMatch;
7028 ParseStatus ResScal = ParseStatus::NoMatch;
7029
7030 for (;;) {
7031 if (ResTH.isNoMatch()) {
7032 int64_t TH;
7033 ResTH = parseTH(Operands, TH);
7034 if (ResTH.isFailure())
7035 return ResTH;
7036 if (ResTH.isSuccess()) {
7037 CPolVal |= TH;
7038 continue;
7039 }
7040 }
7041
7042 if (ResScope.isNoMatch()) {
7043 int64_t Scope;
7044 ResScope = parseScope(Operands, Scope);
7045 if (ResScope.isFailure())
7046 return ResScope;
7047 if (ResScope.isSuccess()) {
7048 CPolVal |= Scope;
7049 continue;
7050 }
7051 }
7052
7053 // NV bit exists on GFX12+, but does something starting from GFX1250.
7054 // Allow parsing on all GFX12 and fail on validation for better
7055 // diagnostics.
7056 if (ResNV.isNoMatch()) {
7057 if (trySkipId("nv")) {
7058 ResNV = ParseStatus::Success;
7059 CPolVal |= CPol::NV;
7060 continue;
7061 } else if (trySkipId("no", "nv")) {
7062 ResNV = ParseStatus::Success;
7063 continue;
7064 }
7065 }
7066
7067 if (ResScal.isNoMatch()) {
7068 if (trySkipId("scale_offset")) {
7069 ResScal = ParseStatus::Success;
7070 CPolVal |= CPol::SCAL;
7071 continue;
7072 } else if (trySkipId("no", "scale_offset")) {
7073 ResScal = ParseStatus::Success;
7074 continue;
7075 }
7076 }
7077
7078 break;
7079 }
7080
7081 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7082 ResScal.isNoMatch())
7083 return ParseStatus::NoMatch;
7084
7085 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7086 AMDGPUOperand::ImmTyCPol));
7087 return ParseStatus::Success;
7088 }
7089
7090 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7091 SMLoc OpLoc = getLoc();
7092 unsigned Enabled = 0, Seen = 0;
7093 for (;;) {
7094 SMLoc S = getLoc();
7095 bool Disabling;
7096 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7097 if (!CPol)
7098 break;
7099
7100 lex();
7101
7102 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7103 return Error(S, "dlc modifier is not supported on this GPU");
7104
7105 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7106 return Error(S, "scc modifier is not supported on this GPU");
7107
7108 if (Seen & CPol)
7109 return Error(S, "duplicate cache policy modifier");
7110
7111 if (!Disabling)
7112 Enabled |= CPol;
7113
7114 Seen |= CPol;
7115 }
7116
7117 if (!Seen)
7118 return ParseStatus::NoMatch;
7119
7120 Operands.push_back(
7121 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7122 return ParseStatus::Success;
7123}
7124
7125ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7126 int64_t &Scope) {
7127 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7129
7130 ParseStatus Res = parseStringOrIntWithPrefix(
7131 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7132 Scope);
7133
7134 if (Res.isSuccess())
7135 Scope = Scopes[Scope];
7136
7137 return Res;
7138}
7139
7140ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7141 TH = AMDGPU::CPol::TH_RT; // default
7142
7143 StringRef Value;
7144 SMLoc StringLoc;
7145 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7146 if (!Res.isSuccess())
7147 return Res;
7148
7149 if (Value == "TH_DEFAULT")
7151 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7152 Value == "TH_LOAD_NT_WB") {
7153 return Error(StringLoc, "invalid th value");
7154 } else if (Value.consume_front("TH_ATOMIC_")) {
7156 } else if (Value.consume_front("TH_LOAD_")) {
7158 } else if (Value.consume_front("TH_STORE_")) {
7160 } else {
7161 return Error(StringLoc, "invalid th value");
7162 }
7163
7164 if (Value == "BYPASS")
7166
7167 if (TH != 0) {
7169 TH |= StringSwitch<int64_t>(Value)
7170 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7171 .Case("RT", AMDGPU::CPol::TH_RT)
7172 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7173 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7174 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7176 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7177 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7179 .Default(0xffffffff);
7180 else
7181 TH |= StringSwitch<int64_t>(Value)
7182 .Case("RT", AMDGPU::CPol::TH_RT)
7183 .Case("NT", AMDGPU::CPol::TH_NT)
7184 .Case("HT", AMDGPU::CPol::TH_HT)
7185 .Case("LU", AMDGPU::CPol::TH_LU)
7186 .Case("WB", AMDGPU::CPol::TH_WB)
7187 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7188 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7189 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7190 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7191 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7192 .Default(0xffffffff);
7193 }
7194
7195 if (TH == 0xffffffff)
7196 return Error(StringLoc, "invalid th value");
7197
7198 return ParseStatus::Success;
7199}
7200
7201static void
7203 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7204 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7205 std::optional<unsigned> InsertAt = std::nullopt) {
7206 auto i = OptionalIdx.find(ImmT);
7207 if (i != OptionalIdx.end()) {
7208 unsigned Idx = i->second;
7209 const AMDGPUOperand &Op =
7210 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7211 if (InsertAt)
7212 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7213 else
7214 Op.addImmOperands(Inst, 1);
7215 } else {
7216 if (InsertAt.has_value())
7217 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7218 else
7220 }
7221}
7222
7223ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7224 StringRef &Value,
7225 SMLoc &StringLoc) {
7226 if (!trySkipId(Prefix, AsmToken::Colon))
7227 return ParseStatus::NoMatch;
7228
7229 StringLoc = getLoc();
7230 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7232}
7233
7234ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7235 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7236 int64_t &IntVal) {
7237 if (!trySkipId(Name, AsmToken::Colon))
7238 return ParseStatus::NoMatch;
7239
7240 SMLoc StringLoc = getLoc();
7241
7242 StringRef Value;
7243 if (isToken(AsmToken::Identifier)) {
7244 Value = getTokenStr();
7245 lex();
7246
7247 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7248 if (Value == Ids[IntVal])
7249 break;
7250 } else if (!parseExpr(IntVal))
7251 return ParseStatus::Failure;
7252
7253 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7254 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7255
7256 return ParseStatus::Success;
7257}
7258
7259ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7260 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7261 AMDGPUOperand::ImmTy Type) {
7262 SMLoc S = getLoc();
7263 int64_t IntVal;
7264
7265 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7266 if (Res.isSuccess())
7267 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7268
7269 return Res;
7270}
7271
7272//===----------------------------------------------------------------------===//
7273// MTBUF format
7274//===----------------------------------------------------------------------===//
7275
7276bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7277 int64_t MaxVal,
7278 int64_t &Fmt) {
7279 int64_t Val;
7280 SMLoc Loc = getLoc();
7281
7282 auto Res = parseIntWithPrefix(Pref, Val);
7283 if (Res.isFailure())
7284 return false;
7285 if (Res.isNoMatch())
7286 return true;
7287
7288 if (Val < 0 || Val > MaxVal) {
7289 Error(Loc, Twine("out of range ", StringRef(Pref)));
7290 return false;
7291 }
7292
7293 Fmt = Val;
7294 return true;
7295}
7296
7297ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7298 AMDGPUOperand::ImmTy ImmTy) {
7299 const char *Pref = "index_key";
7300 int64_t ImmVal = 0;
7301 SMLoc Loc = getLoc();
7302 auto Res = parseIntWithPrefix(Pref, ImmVal);
7303 if (!Res.isSuccess())
7304 return Res;
7305
7306 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7307 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7308 (ImmVal < 0 || ImmVal > 1))
7309 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7310
7311 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7312 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7313
7314 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7315 return ParseStatus::Success;
7316}
7317
7318ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7319 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7320}
7321
7322ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7323 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7324}
7325
7326ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7327 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7328}
7329
7330ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7331 StringRef Name,
7332 AMDGPUOperand::ImmTy Type) {
7333 return parseStringOrIntWithPrefix(Operands, Name,
7334 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7335 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7336 "MATRIX_FMT_FP4"},
7337 Type);
7338}
7339
7340ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7341 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7342 AMDGPUOperand::ImmTyMatrixAFMT);
7343}
7344
7345ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7346 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7347 AMDGPUOperand::ImmTyMatrixBFMT);
7348}
7349
7350ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7351 StringRef Name,
7352 AMDGPUOperand::ImmTy Type) {
7353 return parseStringOrIntWithPrefix(
7354 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7355}
7356
7357ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7358 return tryParseMatrixScale(Operands, "matrix_a_scale",
7359 AMDGPUOperand::ImmTyMatrixAScale);
7360}
7361
7362ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7363 return tryParseMatrixScale(Operands, "matrix_b_scale",
7364 AMDGPUOperand::ImmTyMatrixBScale);
7365}
7366
7367ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7368 StringRef Name,
7369 AMDGPUOperand::ImmTy Type) {
7370 return parseStringOrIntWithPrefix(
7371 Operands, Name,
7372 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7373 Type);
7374}
7375
7376ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7377 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7378 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7379}
7380
7381ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7382 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7383 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7384}
7385
7386// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7387// values to live in a joint format operand in the MCInst encoding.
7388ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7389 using namespace llvm::AMDGPU::MTBUFFormat;
7390
7391 int64_t Dfmt = DFMT_UNDEF;
7392 int64_t Nfmt = NFMT_UNDEF;
7393
7394 // dfmt and nfmt can appear in either order, and each is optional.
7395 for (int I = 0; I < 2; ++I) {
7396 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7397 return ParseStatus::Failure;
7398
7399 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7400 return ParseStatus::Failure;
7401
7402 // Skip optional comma between dfmt/nfmt
7403 // but guard against 2 commas following each other.
7404 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7405 !peekToken().is(AsmToken::Comma)) {
7406 trySkipToken(AsmToken::Comma);
7407 }
7408 }
7409
7410 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7411 return ParseStatus::NoMatch;
7412
7413 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7414 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7415
7416 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7417 return ParseStatus::Success;
7418}
7419
7420ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7421 using namespace llvm::AMDGPU::MTBUFFormat;
7422
7423 int64_t Fmt = UFMT_UNDEF;
7424
7425 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7426 return ParseStatus::Failure;
7427
7428 if (Fmt == UFMT_UNDEF)
7429 return ParseStatus::NoMatch;
7430
7431 Format = Fmt;
7432 return ParseStatus::Success;
7433}
7434
7435bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7436 int64_t &Nfmt,
7437 StringRef FormatStr,
7438 SMLoc Loc) {
7439 using namespace llvm::AMDGPU::MTBUFFormat;
7440 int64_t Format;
7441
7442 Format = getDfmt(FormatStr);
7443 if (Format != DFMT_UNDEF) {
7444 Dfmt = Format;
7445 return true;
7446 }
7447
7448 Format = getNfmt(FormatStr, getSTI());
7449 if (Format != NFMT_UNDEF) {
7450 Nfmt = Format;
7451 return true;
7452 }
7453
7454 Error(Loc, "unsupported format");
7455 return false;
7456}
7457
7458ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7459 SMLoc FormatLoc,
7460 int64_t &Format) {
7461 using namespace llvm::AMDGPU::MTBUFFormat;
7462
7463 int64_t Dfmt = DFMT_UNDEF;
7464 int64_t Nfmt = NFMT_UNDEF;
7465 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7466 return ParseStatus::Failure;
7467
7468 if (trySkipToken(AsmToken::Comma)) {
7469 StringRef Str;
7470 SMLoc Loc = getLoc();
7471 if (!parseId(Str, "expected a format string") ||
7472 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7473 return ParseStatus::Failure;
7474 if (Dfmt == DFMT_UNDEF)
7475 return Error(Loc, "duplicate numeric format");
7476 if (Nfmt == NFMT_UNDEF)
7477 return Error(Loc, "duplicate data format");
7478 }
7479
7480 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7481 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7482
7483 if (isGFX10Plus()) {
7484 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7485 if (Ufmt == UFMT_UNDEF)
7486 return Error(FormatLoc, "unsupported format");
7487 Format = Ufmt;
7488 } else {
7489 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7490 }
7491
7492 return ParseStatus::Success;
7493}
7494
7495ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7496 SMLoc Loc,
7497 int64_t &Format) {
7498 using namespace llvm::AMDGPU::MTBUFFormat;
7499
7500 auto Id = getUnifiedFormat(FormatStr, getSTI());
7501 if (Id == UFMT_UNDEF)
7502 return ParseStatus::NoMatch;
7503
7504 if (!isGFX10Plus())
7505 return Error(Loc, "unified format is not supported on this GPU");
7506
7507 Format = Id;
7508 return ParseStatus::Success;
7509}
7510
7511ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7512 using namespace llvm::AMDGPU::MTBUFFormat;
7513 SMLoc Loc = getLoc();
7514
7515 if (!parseExpr(Format))
7516 return ParseStatus::Failure;
7517 if (!isValidFormatEncoding(Format, getSTI()))
7518 return Error(Loc, "out of range format");
7519
7520 return ParseStatus::Success;
7521}
7522
7523ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7524 using namespace llvm::AMDGPU::MTBUFFormat;
7525
7526 if (!trySkipId("format", AsmToken::Colon))
7527 return ParseStatus::NoMatch;
7528
7529 if (trySkipToken(AsmToken::LBrac)) {
7530 StringRef FormatStr;
7531 SMLoc Loc = getLoc();
7532 if (!parseId(FormatStr, "expected a format string"))
7533 return ParseStatus::Failure;
7534
7535 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7536 if (Res.isNoMatch())
7537 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7538 if (!Res.isSuccess())
7539 return Res;
7540
7541 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7542 return ParseStatus::Failure;
7543
7544 return ParseStatus::Success;
7545 }
7546
7547 return parseNumericFormat(Format);
7548}
7549
7550ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7551 using namespace llvm::AMDGPU::MTBUFFormat;
7552
7553 int64_t Format = getDefaultFormatEncoding(getSTI());
7554 ParseStatus Res;
7555 SMLoc Loc = getLoc();
7556
7557 // Parse legacy format syntax.
7558 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7559 if (Res.isFailure())
7560 return Res;
7561
7562 bool FormatFound = Res.isSuccess();
7563
7564 Operands.push_back(
7565 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7566
7567 if (FormatFound)
7568 trySkipToken(AsmToken::Comma);
7569
7570 if (isToken(AsmToken::EndOfStatement)) {
7571 // We are expecting an soffset operand,
7572 // but let matcher handle the error.
7573 return ParseStatus::Success;
7574 }
7575
7576 // Parse soffset.
7577 Res = parseRegOrImm(Operands);
7578 if (!Res.isSuccess())
7579 return Res;
7580
7581 trySkipToken(AsmToken::Comma);
7582
7583 if (!FormatFound) {
7584 Res = parseSymbolicOrNumericFormat(Format);
7585 if (Res.isFailure())
7586 return Res;
7587 if (Res.isSuccess()) {
7588 auto Size = Operands.size();
7589 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7590 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7591 Op.setImm(Format);
7592 }
7593 return ParseStatus::Success;
7594 }
7595
7596 if (isId("format") && peekToken().is(AsmToken::Colon))
7597 return Error(getLoc(), "duplicate format");
7598 return ParseStatus::Success;
7599}
7600
7601ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7602 ParseStatus Res =
7603 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7604 if (Res.isNoMatch()) {
7605 Res = parseIntWithPrefix("inst_offset", Operands,
7606 AMDGPUOperand::ImmTyInstOffset);
7607 }
7608 return Res;
7609}
7610
7611ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7612 ParseStatus Res =
7613 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7614 if (Res.isNoMatch())
7615 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7616 return Res;
7617}
7618
7619ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7620 ParseStatus Res =
7621 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7622 if (Res.isNoMatch()) {
7623 Res =
7624 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7625 }
7626 return Res;
7627}
7628
7629//===----------------------------------------------------------------------===//
7630// Exp
7631//===----------------------------------------------------------------------===//
7632
7633void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7634 OptionalImmIndexMap OptionalIdx;
7635
7636 unsigned OperandIdx[4];
7637 unsigned EnMask = 0;
7638 int SrcIdx = 0;
7639
7640 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7641 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7642
7643 // Add the register arguments
7644 if (Op.isReg()) {
7645 assert(SrcIdx < 4);
7646 OperandIdx[SrcIdx] = Inst.size();
7647 Op.addRegOperands(Inst, 1);
7648 ++SrcIdx;
7649 continue;
7650 }
7651
7652 if (Op.isOff()) {
7653 assert(SrcIdx < 4);
7654 OperandIdx[SrcIdx] = Inst.size();
7655 Inst.addOperand(MCOperand::createReg(MCRegister()));
7656 ++SrcIdx;
7657 continue;
7658 }
7659
7660 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7661 Op.addImmOperands(Inst, 1);
7662 continue;
7663 }
7664
7665 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7666 continue;
7667
7668 // Handle optional arguments
7669 OptionalIdx[Op.getImmTy()] = i;
7670 }
7671
7672 assert(SrcIdx == 4);
7673
7674 bool Compr = false;
7675 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7676 Compr = true;
7677 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7678 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7679 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7680 }
7681
7682 for (auto i = 0; i < SrcIdx; ++i) {
7683 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7684 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7685 }
7686 }
7687
7688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7690
7691 Inst.addOperand(MCOperand::createImm(EnMask));
7692}
7693
7694//===----------------------------------------------------------------------===//
7695// s_waitcnt
7696//===----------------------------------------------------------------------===//
7697
7698static bool
7700 const AMDGPU::IsaVersion ISA,
7701 int64_t &IntVal,
7702 int64_t CntVal,
7703 bool Saturate,
7704 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7705 unsigned (*decode)(const IsaVersion &Version, unsigned))
7706{
7707 bool Failed = false;
7708
7709 IntVal = encode(ISA, IntVal, CntVal);
7710 if (CntVal != decode(ISA, IntVal)) {
7711 if (Saturate) {
7712 IntVal = encode(ISA, IntVal, -1);
7713 } else {
7714 Failed = true;
7715 }
7716 }
7717 return Failed;
7718}
7719
7720bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7721
7722 SMLoc CntLoc = getLoc();
7723 StringRef CntName = getTokenStr();
7724
7725 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7726 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7727 return false;
7728
7729 int64_t CntVal;
7730 SMLoc ValLoc = getLoc();
7731 if (!parseExpr(CntVal))
7732 return false;
7733
7734 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7735
7736 bool Failed = true;
7737 bool Sat = CntName.ends_with("_sat");
7738
7739 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7740 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7741 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7742 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7743 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7744 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7745 } else {
7746 Error(CntLoc, "invalid counter name " + CntName);
7747 return false;
7748 }
7749
7750 if (Failed) {
7751 Error(ValLoc, "too large value for " + CntName);
7752 return false;
7753 }
7754
7755 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7756 return false;
7757
7758 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7759 if (isToken(AsmToken::EndOfStatement)) {
7760 Error(getLoc(), "expected a counter name");
7761 return false;
7762 }
7763 }
7764
7765 return true;
7766}
7767
7768ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7769 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7770 int64_t Waitcnt = getWaitcntBitMask(ISA);
7771 SMLoc S = getLoc();
7772
7773 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7774 while (!isToken(AsmToken::EndOfStatement)) {
7775 if (!parseCnt(Waitcnt))
7776 return ParseStatus::Failure;
7777 }
7778 } else {
7779 if (!parseExpr(Waitcnt))
7780 return ParseStatus::Failure;
7781 }
7782
7783 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7784 return ParseStatus::Success;
7785}
7786
7787bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7788 SMLoc FieldLoc = getLoc();
7789 StringRef FieldName = getTokenStr();
7790 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7791 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7792 return false;
7793
7794 SMLoc ValueLoc = getLoc();
7795 StringRef ValueName = getTokenStr();
7796 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7797 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7798 return false;
7799
7800 unsigned Shift;
7801 if (FieldName == "instid0") {
7802 Shift = 0;
7803 } else if (FieldName == "instskip") {
7804 Shift = 4;
7805 } else if (FieldName == "instid1") {
7806 Shift = 7;
7807 } else {
7808 Error(FieldLoc, "invalid field name " + FieldName);
7809 return false;
7810 }
7811
7812 int Value;
7813 if (Shift == 4) {
7814 // Parse values for instskip.
7815 Value = StringSwitch<int>(ValueName)
7816 .Case("SAME", 0)
7817 .Case("NEXT", 1)
7818 .Case("SKIP_1", 2)
7819 .Case("SKIP_2", 3)
7820 .Case("SKIP_3", 4)
7821 .Case("SKIP_4", 5)
7822 .Default(-1);
7823 } else {
7824 // Parse values for instid0 and instid1.
7825 Value = StringSwitch<int>(ValueName)
7826 .Case("NO_DEP", 0)
7827 .Case("VALU_DEP_1", 1)
7828 .Case("VALU_DEP_2", 2)
7829 .Case("VALU_DEP_3", 3)
7830 .Case("VALU_DEP_4", 4)
7831 .Case("TRANS32_DEP_1", 5)
7832 .Case("TRANS32_DEP_2", 6)
7833 .Case("TRANS32_DEP_3", 7)
7834 .Case("FMA_ACCUM_CYCLE_1", 8)
7835 .Case("SALU_CYCLE_1", 9)
7836 .Case("SALU_CYCLE_2", 10)
7837 .Case("SALU_CYCLE_3", 11)
7838 .Default(-1);
7839 }
7840 if (Value < 0) {
7841 Error(ValueLoc, "invalid value name " + ValueName);
7842 return false;
7843 }
7844
7845 Delay |= Value << Shift;
7846 return true;
7847}
7848
7849ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7850 int64_t Delay = 0;
7851 SMLoc S = getLoc();
7852
7853 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7854 do {
7855 if (!parseDelay(Delay))
7856 return ParseStatus::Failure;
7857 } while (trySkipToken(AsmToken::Pipe));
7858 } else {
7859 if (!parseExpr(Delay))
7860 return ParseStatus::Failure;
7861 }
7862
7863 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7864 return ParseStatus::Success;
7865}
7866
7867bool
7868AMDGPUOperand::isSWaitCnt() const {
7869 return isImm();
7870}
7871
7872bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7873
7874//===----------------------------------------------------------------------===//
7875// DepCtr
7876//===----------------------------------------------------------------------===//
7877
7878void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7879 StringRef DepCtrName) {
7880 switch (ErrorId) {
7881 case OPR_ID_UNKNOWN:
7882 Error(Loc, Twine("invalid counter name ", DepCtrName));
7883 return;
7884 case OPR_ID_UNSUPPORTED:
7885 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7886 return;
7887 case OPR_ID_DUPLICATE:
7888 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7889 return;
7890 case OPR_VAL_INVALID:
7891 Error(Loc, Twine("invalid value for ", DepCtrName));
7892 return;
7893 default:
7894 assert(false);
7895 }
7896}
7897
7898bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7899
7900 using namespace llvm::AMDGPU::DepCtr;
7901
7902 SMLoc DepCtrLoc = getLoc();
7903 StringRef DepCtrName = getTokenStr();
7904
7905 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7906 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7907 return false;
7908
7909 int64_t ExprVal;
7910 if (!parseExpr(ExprVal))
7911 return false;
7912
7913 unsigned PrevOprMask = UsedOprMask;
7914 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7915
7916 if (CntVal < 0) {
7917 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7918 return false;
7919 }
7920
7921 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7922 return false;
7923
7924 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7925 if (isToken(AsmToken::EndOfStatement)) {
7926 Error(getLoc(), "expected a counter name");
7927 return false;
7928 }
7929 }
7930
7931 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7932 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7933 return true;
7934}
7935
7936ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7937 using namespace llvm::AMDGPU::DepCtr;
7938
7939 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7940 SMLoc Loc = getLoc();
7941
7942 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7943 unsigned UsedOprMask = 0;
7944 while (!isToken(AsmToken::EndOfStatement)) {
7945 if (!parseDepCtr(DepCtr, UsedOprMask))
7946 return ParseStatus::Failure;
7947 }
7948 } else {
7949 if (!parseExpr(DepCtr))
7950 return ParseStatus::Failure;
7951 }
7952
7953 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7954 return ParseStatus::Success;
7955}
7956
7957bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7958
7959//===----------------------------------------------------------------------===//
7960// hwreg
7961//===----------------------------------------------------------------------===//
7962
7963ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7964 OperandInfoTy &Offset,
7965 OperandInfoTy &Width) {
7966 using namespace llvm::AMDGPU::Hwreg;
7967
7968 if (!trySkipId("hwreg", AsmToken::LParen))
7969 return ParseStatus::NoMatch;
7970
7971 // The register may be specified by name or using a numeric code
7972 HwReg.Loc = getLoc();
7973 if (isToken(AsmToken::Identifier) &&
7974 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7975 HwReg.IsSymbolic = true;
7976 lex(); // skip register name
7977 } else if (!parseExpr(HwReg.Val, "a register name")) {
7978 return ParseStatus::Failure;
7979 }
7980
7981 if (trySkipToken(AsmToken::RParen))
7982 return ParseStatus::Success;
7983
7984 // parse optional params
7985 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7986 return ParseStatus::Failure;
7987
7988 Offset.Loc = getLoc();
7989 if (!parseExpr(Offset.Val))
7990 return ParseStatus::Failure;
7991
7992 if (!skipToken(AsmToken::Comma, "expected a comma"))
7993 return ParseStatus::Failure;
7994
7995 Width.Loc = getLoc();
7996 if (!parseExpr(Width.Val) ||
7997 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7998 return ParseStatus::Failure;
7999
8000 return ParseStatus::Success;
8001}
8002
8003ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8004 using namespace llvm::AMDGPU::Hwreg;
8005
8006 int64_t ImmVal = 0;
8007 SMLoc Loc = getLoc();
8008
8009 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8010 HwregId::Default);
8011 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8012 HwregOffset::Default);
8013 struct : StructuredOpField {
8014 using StructuredOpField::StructuredOpField;
8015 bool validate(AMDGPUAsmParser &Parser) const override {
8016 if (!isUIntN(Width, Val - 1))
8017 return Error(Parser, "only values from 1 to 32 are legal");
8018 return true;
8019 }
8020 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8021 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8022
8023 if (Res.isNoMatch())
8024 Res = parseHwregFunc(HwReg, Offset, Width);
8025
8026 if (Res.isSuccess()) {
8027 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8028 return ParseStatus::Failure;
8029 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8030 }
8031
8032 if (Res.isNoMatch() &&
8033 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8035
8036 if (!Res.isSuccess())
8037 return ParseStatus::Failure;
8038
8039 if (!isUInt<16>(ImmVal))
8040 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8041 Operands.push_back(
8042 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8043 return ParseStatus::Success;
8044}
8045
8046bool AMDGPUOperand::isHwreg() const {
8047 return isImmTy(ImmTyHwreg);
8048}
8049
8050//===----------------------------------------------------------------------===//
8051// sendmsg
8052//===----------------------------------------------------------------------===//
8053
8054bool
8055AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8056 OperandInfoTy &Op,
8057 OperandInfoTy &Stream) {
8058 using namespace llvm::AMDGPU::SendMsg;
8059
8060 Msg.Loc = getLoc();
8061 if (isToken(AsmToken::Identifier) &&
8062 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8063 Msg.IsSymbolic = true;
8064 lex(); // skip message name
8065 } else if (!parseExpr(Msg.Val, "a message name")) {
8066 return false;
8067 }
8068
8069 if (trySkipToken(AsmToken::Comma)) {
8070 Op.IsDefined = true;
8071 Op.Loc = getLoc();
8072 if (isToken(AsmToken::Identifier) &&
8073 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8075 lex(); // skip operation name
8076 } else if (!parseExpr(Op.Val, "an operation name")) {
8077 return false;
8078 }
8079
8080 if (trySkipToken(AsmToken::Comma)) {
8081 Stream.IsDefined = true;
8082 Stream.Loc = getLoc();
8083 if (!parseExpr(Stream.Val))
8084 return false;
8085 }
8086 }
8087
8088 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8089}
8090
8091bool
8092AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8093 const OperandInfoTy &Op,
8094 const OperandInfoTy &Stream) {
8095 using namespace llvm::AMDGPU::SendMsg;
8096
8097 // Validation strictness depends on whether message is specified
8098 // in a symbolic or in a numeric form. In the latter case
8099 // only encoding possibility is checked.
8100 bool Strict = Msg.IsSymbolic;
8101
8102 if (Strict) {
8103 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8104 Error(Msg.Loc, "specified message id is not supported on this GPU");
8105 return false;
8106 }
8107 } else {
8108 if (!isValidMsgId(Msg.Val, getSTI())) {
8109 Error(Msg.Loc, "invalid message id");
8110 return false;
8111 }
8112 }
8113 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8114 if (Op.IsDefined) {
8115 Error(Op.Loc, "message does not support operations");
8116 } else {
8117 Error(Msg.Loc, "missing message operation");
8118 }
8119 return false;
8120 }
8121 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8122 if (Op.Val == OPR_ID_UNSUPPORTED)
8123 Error(Op.Loc, "specified operation id is not supported on this GPU");
8124 else
8125 Error(Op.Loc, "invalid operation id");
8126 return false;
8127 }
8128 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8129 Stream.IsDefined) {
8130 Error(Stream.Loc, "message operation does not support streams");
8131 return false;
8132 }
8133 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8134 Error(Stream.Loc, "invalid message stream id");
8135 return false;
8136 }
8137 return true;
8138}
8139
8140ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8141 using namespace llvm::AMDGPU::SendMsg;
8142
8143 int64_t ImmVal = 0;
8144 SMLoc Loc = getLoc();
8145
8146 if (trySkipId("sendmsg", AsmToken::LParen)) {
8147 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8148 OperandInfoTy Op(OP_NONE_);
8149 OperandInfoTy Stream(STREAM_ID_NONE_);
8150 if (parseSendMsgBody(Msg, Op, Stream) &&
8151 validateSendMsg(Msg, Op, Stream)) {
8152 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8153 } else {
8154 return ParseStatus::Failure;
8155 }
8156 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8157 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8158 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8159 } else {
8160 return ParseStatus::Failure;
8161 }
8162
8163 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8164 return ParseStatus::Success;
8165}
8166
8167bool AMDGPUOperand::isSendMsg() const {
8168 return isImmTy(ImmTySendMsg);
8169}
8170
8171//===----------------------------------------------------------------------===//
8172// v_interp
8173//===----------------------------------------------------------------------===//
8174
8175ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8176 StringRef Str;
8177 SMLoc S = getLoc();
8178
8179 if (!parseId(Str))
8180 return ParseStatus::NoMatch;
8181
8182 int Slot = StringSwitch<int>(Str)
8183 .Case("p10", 0)
8184 .Case("p20", 1)
8185 .Case("p0", 2)
8186 .Default(-1);
8187
8188 if (Slot == -1)
8189 return Error(S, "invalid interpolation slot");
8190
8191 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8192 AMDGPUOperand::ImmTyInterpSlot));
8193 return ParseStatus::Success;
8194}
8195
8196ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8197 StringRef Str;
8198 SMLoc S = getLoc();
8199
8200 if (!parseId(Str))
8201 return ParseStatus::NoMatch;
8202
8203 if (!Str.starts_with("attr"))
8204 return Error(S, "invalid interpolation attribute");
8205
8206 StringRef Chan = Str.take_back(2);
8207 int AttrChan = StringSwitch<int>(Chan)
8208 .Case(".x", 0)
8209 .Case(".y", 1)
8210 .Case(".z", 2)
8211 .Case(".w", 3)
8212 .Default(-1);
8213 if (AttrChan == -1)
8214 return Error(S, "invalid or missing interpolation attribute channel");
8215
8216 Str = Str.drop_back(2).drop_front(4);
8217
8218 uint8_t Attr;
8219 if (Str.getAsInteger(10, Attr))
8220 return Error(S, "invalid or missing interpolation attribute number");
8221
8222 if (Attr > 32)
8223 return Error(S, "out of bounds interpolation attribute number");
8224
8225 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8226
8227 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8228 AMDGPUOperand::ImmTyInterpAttr));
8229 Operands.push_back(AMDGPUOperand::CreateImm(
8230 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8231 return ParseStatus::Success;
8232}
8233
8234//===----------------------------------------------------------------------===//
8235// exp
8236//===----------------------------------------------------------------------===//
8237
8238ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8239 using namespace llvm::AMDGPU::Exp;
8240
8241 StringRef Str;
8242 SMLoc S = getLoc();
8243
8244 if (!parseId(Str))
8245 return ParseStatus::NoMatch;
8246
8247 unsigned Id = getTgtId(Str);
8248 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8249 return Error(S, (Id == ET_INVALID)
8250 ? "invalid exp target"
8251 : "exp target is not supported on this GPU");
8252
8253 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8254 AMDGPUOperand::ImmTyExpTgt));
8255 return ParseStatus::Success;
8256}
8257
8258//===----------------------------------------------------------------------===//
8259// parser helpers
8260//===----------------------------------------------------------------------===//
8261
8262bool
8263AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8264 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8265}
8266
8267bool
8268AMDGPUAsmParser::isId(const StringRef Id) const {
8269 return isId(getToken(), Id);
8270}
8271
8272bool
8273AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8274 return getTokenKind() == Kind;
8275}
8276
8277StringRef AMDGPUAsmParser::getId() const {
8278 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8279}
8280
8281bool
8282AMDGPUAsmParser::trySkipId(const StringRef Id) {
8283 if (isId(Id)) {
8284 lex();
8285 return true;
8286 }
8287 return false;
8288}
8289
8290bool
8291AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8292 if (isToken(AsmToken::Identifier)) {
8293 StringRef Tok = getTokenStr();
8294 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8295 lex();
8296 return true;
8297 }
8298 }
8299 return false;
8300}
8301
8302bool
8303AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8304 if (isId(Id) && peekToken().is(Kind)) {
8305 lex();
8306 lex();
8307 return true;
8308 }
8309 return false;
8310}
8311
8312bool
8313AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8314 if (isToken(Kind)) {
8315 lex();
8316 return true;
8317 }
8318 return false;
8319}
8320
8321bool
8322AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8323 const StringRef ErrMsg) {
8324 if (!trySkipToken(Kind)) {
8325 Error(getLoc(), ErrMsg);
8326 return false;
8327 }
8328 return true;
8329}
8330
8331bool
8332AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8333 SMLoc S = getLoc();
8334
8335 const MCExpr *Expr;
8336 if (Parser.parseExpression(Expr))
8337 return false;
8338
8339 if (Expr->evaluateAsAbsolute(Imm))
8340 return true;
8341
8342 if (Expected.empty()) {
8343 Error(S, "expected absolute expression");
8344 } else {
8345 Error(S, Twine("expected ", Expected) +
8346 Twine(" or an absolute expression"));
8347 }
8348 return false;
8349}
8350
8351bool
8352AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8353 SMLoc S = getLoc();
8354
8355 const MCExpr *Expr;
8356 if (Parser.parseExpression(Expr))
8357 return false;
8358
8359 int64_t IntVal;
8360 if (Expr->evaluateAsAbsolute(IntVal)) {
8361 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8362 } else {
8363 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8364 }
8365 return true;
8366}
8367
8368bool
8369AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8370 if (isToken(AsmToken::String)) {
8371 Val = getToken().getStringContents();
8372 lex();
8373 return true;
8374 }
8375 Error(getLoc(), ErrMsg);
8376 return false;
8377}
8378
8379bool
8380AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8381 if (isToken(AsmToken::Identifier)) {
8382 Val = getTokenStr();
8383 lex();
8384 return true;
8385 }
8386 if (!ErrMsg.empty())
8387 Error(getLoc(), ErrMsg);
8388 return false;
8389}
8390
8391AsmToken
8392AMDGPUAsmParser::getToken() const {
8393 return Parser.getTok();
8394}
8395
8396AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8397 return isToken(AsmToken::EndOfStatement)
8398 ? getToken()
8399 : getLexer().peekTok(ShouldSkipSpace);
8400}
8401
8402void
8403AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8404 auto TokCount = getLexer().peekTokens(Tokens);
8405
8406 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8407 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8408}
8409
8411AMDGPUAsmParser::getTokenKind() const {
8412 return getLexer().getKind();
8413}
8414
8415SMLoc
8416AMDGPUAsmParser::getLoc() const {
8417 return getToken().getLoc();
8418}
8419
8420StringRef
8421AMDGPUAsmParser::getTokenStr() const {
8422 return getToken().getString();
8423}
8424
8425void
8426AMDGPUAsmParser::lex() {
8427 Parser.Lex();
8428}
8429
8430SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8431 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8432}
8433
8434// Returns one of the given locations that comes later in the source.
8435SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8436 return a.getPointer() < b.getPointer() ? b : a;
8437}
8438
8439SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8440 int MCOpIdx) const {
8441 for (const auto &Op : Operands) {
8442 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8443 if (TargetOp.getMCOpIdx() == MCOpIdx)
8444 return TargetOp.getStartLoc();
8445 }
8446 llvm_unreachable("No such MC operand!");
8447}
8448
8449SMLoc
8450AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8451 const OperandVector &Operands) const {
8452 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8453 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8454 if (Test(Op))
8455 return Op.getStartLoc();
8456 }
8457 return getInstLoc(Operands);
8458}
8459
8460SMLoc
8461AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8462 const OperandVector &Operands) const {
8463 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8464 return getOperandLoc(Test, Operands);
8465}
8466
8467ParseStatus
8468AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8469 if (!trySkipToken(AsmToken::LCurly))
8470 return ParseStatus::NoMatch;
8471
8472 bool First = true;
8473 while (!trySkipToken(AsmToken::RCurly)) {
8474 if (!First &&
8475 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8476 return ParseStatus::Failure;
8477
8478 StringRef Id = getTokenStr();
8479 SMLoc IdLoc = getLoc();
8480 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8481 !skipToken(AsmToken::Colon, "colon expected"))
8482 return ParseStatus::Failure;
8483
8484 const auto *I =
8485 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8486 if (I == Fields.end())
8487 return Error(IdLoc, "unknown field");
8488 if ((*I)->IsDefined)
8489 return Error(IdLoc, "duplicate field");
8490
8491 // TODO: Support symbolic values.
8492 (*I)->Loc = getLoc();
8493 if (!parseExpr((*I)->Val))
8494 return ParseStatus::Failure;
8495 (*I)->IsDefined = true;
8496
8497 First = false;
8498 }
8499 return ParseStatus::Success;
8500}
8501
8502bool AMDGPUAsmParser::validateStructuredOpFields(
8504 return all_of(Fields, [this](const StructuredOpField *F) {
8505 return F->validate(*this);
8506 });
8507}
8508
8509//===----------------------------------------------------------------------===//
8510// swizzle
8511//===----------------------------------------------------------------------===//
8512
8514static unsigned
8515encodeBitmaskPerm(const unsigned AndMask,
8516 const unsigned OrMask,
8517 const unsigned XorMask) {
8518 using namespace llvm::AMDGPU::Swizzle;
8519
8520 return BITMASK_PERM_ENC |
8521 (AndMask << BITMASK_AND_SHIFT) |
8522 (OrMask << BITMASK_OR_SHIFT) |
8523 (XorMask << BITMASK_XOR_SHIFT);
8524}
8525
8526bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8527 const unsigned MaxVal,
8528 const Twine &ErrMsg, SMLoc &Loc) {
8529 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8530 return false;
8531 }
8532 Loc = getLoc();
8533 if (!parseExpr(Op)) {
8534 return false;
8535 }
8536 if (Op < MinVal || Op > MaxVal) {
8537 Error(Loc, ErrMsg);
8538 return false;
8539 }
8540
8541 return true;
8542}
8543
8544bool
8545AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8546 const unsigned MinVal,
8547 const unsigned MaxVal,
8548 const StringRef ErrMsg) {
8549 SMLoc Loc;
8550 for (unsigned i = 0; i < OpNum; ++i) {
8551 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8552 return false;
8553 }
8554
8555 return true;
8556}
8557
8558bool
8559AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8560 using namespace llvm::AMDGPU::Swizzle;
8561
8562 int64_t Lane[LANE_NUM];
8563 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8564 "expected a 2-bit lane id")) {
8566 for (unsigned I = 0; I < LANE_NUM; ++I) {
8567 Imm |= Lane[I] << (LANE_SHIFT * I);
8568 }
8569 return true;
8570 }
8571 return false;
8572}
8573
8574bool
8575AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8576 using namespace llvm::AMDGPU::Swizzle;
8577
8578 SMLoc Loc;
8579 int64_t GroupSize;
8580 int64_t LaneIdx;
8581
8582 if (!parseSwizzleOperand(GroupSize,
8583 2, 32,
8584 "group size must be in the interval [2,32]",
8585 Loc)) {
8586 return false;
8587 }
8588 if (!isPowerOf2_64(GroupSize)) {
8589 Error(Loc, "group size must be a power of two");
8590 return false;
8591 }
8592 if (parseSwizzleOperand(LaneIdx,
8593 0, GroupSize - 1,
8594 "lane id must be in the interval [0,group size - 1]",
8595 Loc)) {
8596 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8597 return true;
8598 }
8599 return false;
8600}
8601
8602bool
8603AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8604 using namespace llvm::AMDGPU::Swizzle;
8605
8606 SMLoc Loc;
8607 int64_t GroupSize;
8608
8609 if (!parseSwizzleOperand(GroupSize,
8610 2, 32,
8611 "group size must be in the interval [2,32]",
8612 Loc)) {
8613 return false;
8614 }
8615 if (!isPowerOf2_64(GroupSize)) {
8616 Error(Loc, "group size must be a power of two");
8617 return false;
8618 }
8619
8620 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8621 return true;
8622}
8623
8624bool
8625AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8626 using namespace llvm::AMDGPU::Swizzle;
8627
8628 SMLoc Loc;
8629 int64_t GroupSize;
8630
8631 if (!parseSwizzleOperand(GroupSize,
8632 1, 16,
8633 "group size must be in the interval [1,16]",
8634 Loc)) {
8635 return false;
8636 }
8637 if (!isPowerOf2_64(GroupSize)) {
8638 Error(Loc, "group size must be a power of two");
8639 return false;
8640 }
8641
8642 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8643 return true;
8644}
8645
8646bool
8647AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8648 using namespace llvm::AMDGPU::Swizzle;
8649
8650 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8651 return false;
8652 }
8653
8654 StringRef Ctl;
8655 SMLoc StrLoc = getLoc();
8656 if (!parseString(Ctl)) {
8657 return false;
8658 }
8659 if (Ctl.size() != BITMASK_WIDTH) {
8660 Error(StrLoc, "expected a 5-character mask");
8661 return false;
8662 }
8663
8664 unsigned AndMask = 0;
8665 unsigned OrMask = 0;
8666 unsigned XorMask = 0;
8667
8668 for (size_t i = 0; i < Ctl.size(); ++i) {
8669 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8670 switch(Ctl[i]) {
8671 default:
8672 Error(StrLoc, "invalid mask");
8673 return false;
8674 case '0':
8675 break;
8676 case '1':
8677 OrMask |= Mask;
8678 break;
8679 case 'p':
8680 AndMask |= Mask;
8681 break;
8682 case 'i':
8683 AndMask |= Mask;
8684 XorMask |= Mask;
8685 break;
8686 }
8687 }
8688
8689 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8690 return true;
8691}
8692
8693bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8694 using namespace llvm::AMDGPU::Swizzle;
8695
8696 if (!AMDGPU::isGFX9Plus(getSTI())) {
8697 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8698 return false;
8699 }
8700
8701 int64_t Swizzle;
8702 SMLoc Loc;
8703 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8704 "FFT swizzle must be in the interval [0," +
8705 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8706 Loc))
8707 return false;
8708
8709 Imm = FFT_MODE_ENC | Swizzle;
8710 return true;
8711}
8712
8713bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8714 using namespace llvm::AMDGPU::Swizzle;
8715
8716 if (!AMDGPU::isGFX9Plus(getSTI())) {
8717 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8718 return false;
8719 }
8720
8721 SMLoc Loc;
8722 int64_t Direction;
8723
8724 if (!parseSwizzleOperand(Direction, 0, 1,
8725 "direction must be 0 (left) or 1 (right)", Loc))
8726 return false;
8727
8728 int64_t RotateSize;
8729 if (!parseSwizzleOperand(
8730 RotateSize, 0, ROTATE_MAX_SIZE,
8731 "number of threads to rotate must be in the interval [0," +
8732 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8733 Loc))
8734 return false;
8735
8737 (RotateSize << ROTATE_SIZE_SHIFT);
8738 return true;
8739}
8740
8741bool
8742AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8743
8744 SMLoc OffsetLoc = getLoc();
8745
8746 if (!parseExpr(Imm, "a swizzle macro")) {
8747 return false;
8748 }
8749 if (!isUInt<16>(Imm)) {
8750 Error(OffsetLoc, "expected a 16-bit offset");
8751 return false;
8752 }
8753 return true;
8754}
8755
8756bool
8757AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8758 using namespace llvm::AMDGPU::Swizzle;
8759
8760 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8761
8762 SMLoc ModeLoc = getLoc();
8763 bool Ok = false;
8764
8765 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8766 Ok = parseSwizzleQuadPerm(Imm);
8767 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8768 Ok = parseSwizzleBitmaskPerm(Imm);
8769 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8770 Ok = parseSwizzleBroadcast(Imm);
8771 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8772 Ok = parseSwizzleSwap(Imm);
8773 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8774 Ok = parseSwizzleReverse(Imm);
8775 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8776 Ok = parseSwizzleFFT(Imm);
8777 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8778 Ok = parseSwizzleRotate(Imm);
8779 } else {
8780 Error(ModeLoc, "expected a swizzle mode");
8781 }
8782
8783 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8784 }
8785
8786 return false;
8787}
8788
8789ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8790 SMLoc S = getLoc();
8791 int64_t Imm = 0;
8792
8793 if (trySkipId("offset")) {
8794
8795 bool Ok = false;
8796 if (skipToken(AsmToken::Colon, "expected a colon")) {
8797 if (trySkipId("swizzle")) {
8798 Ok = parseSwizzleMacro(Imm);
8799 } else {
8800 Ok = parseSwizzleOffset(Imm);
8801 }
8802 }
8803
8804 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8805
8807 }
8808 return ParseStatus::NoMatch;
8809}
8810
8811bool
8812AMDGPUOperand::isSwizzle() const {
8813 return isImmTy(ImmTySwizzle);
8814}
8815
8816//===----------------------------------------------------------------------===//
8817// VGPR Index Mode
8818//===----------------------------------------------------------------------===//
8819
8820int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8821
8822 using namespace llvm::AMDGPU::VGPRIndexMode;
8823
8824 if (trySkipToken(AsmToken::RParen)) {
8825 return OFF;
8826 }
8827
8828 int64_t Imm = 0;
8829
8830 while (true) {
8831 unsigned Mode = 0;
8832 SMLoc S = getLoc();
8833
8834 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8835 if (trySkipId(IdSymbolic[ModeId])) {
8836 Mode = 1 << ModeId;
8837 break;
8838 }
8839 }
8840
8841 if (Mode == 0) {
8842 Error(S, (Imm == 0)?
8843 "expected a VGPR index mode or a closing parenthesis" :
8844 "expected a VGPR index mode");
8845 return UNDEF;
8846 }
8847
8848 if (Imm & Mode) {
8849 Error(S, "duplicate VGPR index mode");
8850 return UNDEF;
8851 }
8852 Imm |= Mode;
8853
8854 if (trySkipToken(AsmToken::RParen))
8855 break;
8856 if (!skipToken(AsmToken::Comma,
8857 "expected a comma or a closing parenthesis"))
8858 return UNDEF;
8859 }
8860
8861 return Imm;
8862}
8863
8864ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8865
8866 using namespace llvm::AMDGPU::VGPRIndexMode;
8867
8868 int64_t Imm = 0;
8869 SMLoc S = getLoc();
8870
8871 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8872 Imm = parseGPRIdxMacro();
8873 if (Imm == UNDEF)
8874 return ParseStatus::Failure;
8875 } else {
8876 if (getParser().parseAbsoluteExpression(Imm))
8877 return ParseStatus::Failure;
8878 if (Imm < 0 || !isUInt<4>(Imm))
8879 return Error(S, "invalid immediate: only 4-bit values are legal");
8880 }
8881
8882 Operands.push_back(
8883 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8884 return ParseStatus::Success;
8885}
8886
8887bool AMDGPUOperand::isGPRIdxMode() const {
8888 return isImmTy(ImmTyGprIdxMode);
8889}
8890
8891//===----------------------------------------------------------------------===//
8892// sopp branch targets
8893//===----------------------------------------------------------------------===//
8894
8895ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8896
8897 // Make sure we are not parsing something
8898 // that looks like a label or an expression but is not.
8899 // This will improve error messages.
8900 if (isRegister() || isModifier())
8901 return ParseStatus::NoMatch;
8902
8903 if (!parseExpr(Operands))
8904 return ParseStatus::Failure;
8905
8906 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8907 assert(Opr.isImm() || Opr.isExpr());
8908 SMLoc Loc = Opr.getStartLoc();
8909
8910 // Currently we do not support arbitrary expressions as branch targets.
8911 // Only labels and absolute expressions are accepted.
8912 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8913 Error(Loc, "expected an absolute expression or a label");
8914 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8915 Error(Loc, "expected a 16-bit signed jump offset");
8916 }
8917
8918 return ParseStatus::Success;
8919}
8920
8921//===----------------------------------------------------------------------===//
8922// Boolean holding registers
8923//===----------------------------------------------------------------------===//
8924
8925ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8926 return parseReg(Operands);
8927}
8928
8929//===----------------------------------------------------------------------===//
8930// mubuf
8931//===----------------------------------------------------------------------===//
8932
8933void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8934 const OperandVector &Operands,
8935 bool IsAtomic) {
8936 OptionalImmIndexMap OptionalIdx;
8937 unsigned FirstOperandIdx = 1;
8938 bool IsAtomicReturn = false;
8939
8940 if (IsAtomic) {
8941 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8943 }
8944
8945 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8946 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8947
8948 // Add the register arguments
8949 if (Op.isReg()) {
8950 Op.addRegOperands(Inst, 1);
8951 // Insert a tied src for atomic return dst.
8952 // This cannot be postponed as subsequent calls to
8953 // addImmOperands rely on correct number of MC operands.
8954 if (IsAtomicReturn && i == FirstOperandIdx)
8955 Op.addRegOperands(Inst, 1);
8956 continue;
8957 }
8958
8959 // Handle the case where soffset is an immediate
8960 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8961 Op.addImmOperands(Inst, 1);
8962 continue;
8963 }
8964
8965 // Handle tokens like 'offen' which are sometimes hard-coded into the
8966 // asm string. There are no MCInst operands for these.
8967 if (Op.isToken()) {
8968 continue;
8969 }
8970 assert(Op.isImm());
8971
8972 // Handle optional arguments
8973 OptionalIdx[Op.getImmTy()] = i;
8974 }
8975
8976 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8977 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8978}
8979
8980//===----------------------------------------------------------------------===//
8981// smrd
8982//===----------------------------------------------------------------------===//
8983
8984bool AMDGPUOperand::isSMRDOffset8() const {
8985 return isImmLiteral() && isUInt<8>(getImm());
8986}
8987
8988bool AMDGPUOperand::isSMEMOffset() const {
8989 // Offset range is checked later by validator.
8990 return isImmLiteral();
8991}
8992
8993bool AMDGPUOperand::isSMRDLiteralOffset() const {
8994 // 32-bit literals are only supported on CI and we only want to use them
8995 // when the offset is > 8-bits.
8996 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8997}
8998
8999//===----------------------------------------------------------------------===//
9000// vop3
9001//===----------------------------------------------------------------------===//
9002
9003static bool ConvertOmodMul(int64_t &Mul) {
9004 if (Mul != 1 && Mul != 2 && Mul != 4)
9005 return false;
9006
9007 Mul >>= 1;
9008 return true;
9009}
9010
9011static bool ConvertOmodDiv(int64_t &Div) {
9012 if (Div == 1) {
9013 Div = 0;
9014 return true;
9015 }
9016
9017 if (Div == 2) {
9018 Div = 3;
9019 return true;
9020 }
9021
9022 return false;
9023}
9024
9025// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9026// This is intentional and ensures compatibility with sp3.
9027// See bug 35397 for details.
9028bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9029 if (BoundCtrl == 0 || BoundCtrl == 1) {
9030 if (!isGFX11Plus())
9031 BoundCtrl = 1;
9032 return true;
9033 }
9034 return false;
9035}
9036
9037void AMDGPUAsmParser::onBeginOfFile() {
9038 if (!getParser().getStreamer().getTargetStreamer() ||
9039 getSTI().getTargetTriple().getArch() == Triple::r600)
9040 return;
9041
9042 if (!getTargetStreamer().getTargetID())
9043 getTargetStreamer().initializeTargetID(getSTI(),
9044 getSTI().getFeatureString());
9045
9046 if (isHsaAbi(getSTI()))
9047 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9048}
9049
9050/// Parse AMDGPU specific expressions.
9051///
9052/// expr ::= or(expr, ...) |
9053/// max(expr, ...)
9054///
9055bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9056 using AGVK = AMDGPUMCExpr::VariantKind;
9057
9058 if (isToken(AsmToken::Identifier)) {
9059 StringRef TokenId = getTokenStr();
9060 AGVK VK = StringSwitch<AGVK>(TokenId)
9061 .Case("max", AGVK::AGVK_Max)
9062 .Case("or", AGVK::AGVK_Or)
9063 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9064 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9065 .Case("alignto", AGVK::AGVK_AlignTo)
9066 .Case("occupancy", AGVK::AGVK_Occupancy)
9067 .Default(AGVK::AGVK_None);
9068
9069 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9071 uint64_t CommaCount = 0;
9072 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9073 lex(); // Eat '('
9074 while (true) {
9075 if (trySkipToken(AsmToken::RParen)) {
9076 if (Exprs.empty()) {
9077 Error(getToken().getLoc(),
9078 "empty " + Twine(TokenId) + " expression");
9079 return true;
9080 }
9081 if (CommaCount + 1 != Exprs.size()) {
9082 Error(getToken().getLoc(),
9083 "mismatch of commas in " + Twine(TokenId) + " expression");
9084 return true;
9085 }
9086 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9087 return false;
9088 }
9089 const MCExpr *Expr;
9090 if (getParser().parseExpression(Expr, EndLoc))
9091 return true;
9092 Exprs.push_back(Expr);
9093 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9094 if (LastTokenWasComma)
9095 CommaCount++;
9096 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9097 Error(getToken().getLoc(),
9098 "unexpected token in " + Twine(TokenId) + " expression");
9099 return true;
9100 }
9101 }
9102 }
9103 }
9104 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9105}
9106
9107ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9108 StringRef Name = getTokenStr();
9109 if (Name == "mul") {
9110 return parseIntWithPrefix("mul", Operands,
9111 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9112 }
9113
9114 if (Name == "div") {
9115 return parseIntWithPrefix("div", Operands,
9116 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9117 }
9118
9119 return ParseStatus::NoMatch;
9120}
9121
9122// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9123// the number of src operands present, then copies that bit into src0_modifiers.
9124static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9125 int Opc = Inst.getOpcode();
9126 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9127 if (OpSelIdx == -1)
9128 return;
9129
9130 int SrcNum;
9131 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9132 AMDGPU::OpName::src2};
9133 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9134 ++SrcNum)
9135 ;
9136 assert(SrcNum > 0);
9137
9138 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9139
9140 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9141 if (DstIdx == -1)
9142 return;
9143
9144 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9145 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9146 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9147 if (DstOp.isReg() &&
9148 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9150 ModVal |= SISrcMods::DST_OP_SEL;
9151 } else {
9152 if ((OpSel & (1 << SrcNum)) != 0)
9153 ModVal |= SISrcMods::DST_OP_SEL;
9154 }
9155 Inst.getOperand(ModIdx).setImm(ModVal);
9156}
9157
9158void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9159 const OperandVector &Operands) {
9160 cvtVOP3P(Inst, Operands);
9161 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9162}
9163
9164void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9165 OptionalImmIndexMap &OptionalIdx) {
9166 cvtVOP3P(Inst, Operands, OptionalIdx);
9167 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9168}
9169
9170static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9171 return
9172 // 1. This operand is input modifiers
9173 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9174 // 2. This is not last operand
9175 && Desc.NumOperands > (OpNum + 1)
9176 // 3. Next operand is register class
9177 && Desc.operands()[OpNum + 1].RegClass != -1
9178 // 4. Next register is not tied to any other operand
9179 && Desc.getOperandConstraint(OpNum + 1,
9181}
9182
9183void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9184{
9185 OptionalImmIndexMap OptionalIdx;
9186 unsigned Opc = Inst.getOpcode();
9187
9188 unsigned I = 1;
9189 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9190 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9191 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9192 }
9193
9194 for (unsigned E = Operands.size(); I != E; ++I) {
9195 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9197 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9198 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9199 Op.isInterpAttrChan()) {
9200 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9201 } else if (Op.isImmModifier()) {
9202 OptionalIdx[Op.getImmTy()] = I;
9203 } else {
9204 llvm_unreachable("unhandled operand type");
9205 }
9206 }
9207
9208 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9209 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9210 AMDGPUOperand::ImmTyHigh);
9211
9212 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9213 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9214 AMDGPUOperand::ImmTyClamp);
9215
9216 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9217 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9218 AMDGPUOperand::ImmTyOModSI);
9219}
9220
9221void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9222{
9223 OptionalImmIndexMap OptionalIdx;
9224 unsigned Opc = Inst.getOpcode();
9225
9226 unsigned I = 1;
9227 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9228 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9229 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9230 }
9231
9232 for (unsigned E = Operands.size(); I != E; ++I) {
9233 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9235 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9236 } else if (Op.isImmModifier()) {
9237 OptionalIdx[Op.getImmTy()] = I;
9238 } else {
9239 llvm_unreachable("unhandled operand type");
9240 }
9241 }
9242
9243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9244
9245 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9246 if (OpSelIdx != -1)
9247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9248
9249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9250
9251 if (OpSelIdx == -1)
9252 return;
9253
9254 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9255 AMDGPU::OpName::src2};
9256 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9257 AMDGPU::OpName::src1_modifiers,
9258 AMDGPU::OpName::src2_modifiers};
9259
9260 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9261
9262 for (int J = 0; J < 3; ++J) {
9263 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9264 if (OpIdx == -1)
9265 break;
9266
9267 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9268 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9269
9270 if ((OpSel & (1 << J)) != 0)
9271 ModVal |= SISrcMods::OP_SEL_0;
9272 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9273 (OpSel & (1 << 3)) != 0)
9274 ModVal |= SISrcMods::DST_OP_SEL;
9275
9276 Inst.getOperand(ModIdx).setImm(ModVal);
9277 }
9278}
9279void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9280 const OperandVector &Operands) {
9281 OptionalImmIndexMap OptionalIdx;
9282 unsigned Opc = Inst.getOpcode();
9283 unsigned I = 1;
9284 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9285
9286 const MCInstrDesc &Desc = MII.get(Opc);
9287
9288 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9289 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9290
9291 for (unsigned E = Operands.size(); I != E; ++I) {
9292 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9293 int NumOperands = Inst.getNumOperands();
9294 // The order of operands in MCInst and parsed operands are different.
9295 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9296 // indices for parsing scale values correctly.
9297 if (NumOperands == CbszOpIdx) {
9300 }
9301 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9302 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9303 } else if (Op.isImmModifier()) {
9304 OptionalIdx[Op.getImmTy()] = I;
9305 } else {
9306 Op.addRegOrImmOperands(Inst, 1);
9307 }
9308 }
9309
9310 // Insert CBSZ and BLGP operands for F8F6F4 variants
9311 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9312 if (CbszIdx != OptionalIdx.end()) {
9313 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9314 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9315 }
9316
9317 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9318 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9319 if (BlgpIdx != OptionalIdx.end()) {
9320 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9321 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9322 }
9323
9324 // Add dummy src_modifiers
9327
9328 // Handle op_sel fields
9329
9330 unsigned OpSel = 0;
9331 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9332 if (OpselIdx != OptionalIdx.end()) {
9333 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9334 .getImm();
9335 }
9336
9337 unsigned OpSelHi = 0;
9338 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9339 if (OpselHiIdx != OptionalIdx.end()) {
9340 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9341 .getImm();
9342 }
9343 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9344 AMDGPU::OpName::src1_modifiers};
9345
9346 for (unsigned J = 0; J < 2; ++J) {
9347 unsigned ModVal = 0;
9348 if (OpSel & (1 << J))
9349 ModVal |= SISrcMods::OP_SEL_0;
9350 if (OpSelHi & (1 << J))
9351 ModVal |= SISrcMods::OP_SEL_1;
9352
9353 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9354 Inst.getOperand(ModIdx).setImm(ModVal);
9355 }
9356}
9357
9358void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9359 OptionalImmIndexMap &OptionalIdx) {
9360 unsigned Opc = Inst.getOpcode();
9361
9362 unsigned I = 1;
9363 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9364 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9365 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9366 }
9367
9368 for (unsigned E = Operands.size(); I != E; ++I) {
9369 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9371 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9372 } else if (Op.isImmModifier()) {
9373 OptionalIdx[Op.getImmTy()] = I;
9374 } else {
9375 Op.addRegOrImmOperands(Inst, 1);
9376 }
9377 }
9378
9379 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9380 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9381 AMDGPUOperand::ImmTyScaleSel);
9382
9383 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9384 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9385 AMDGPUOperand::ImmTyClamp);
9386
9387 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9388 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9389 Inst.addOperand(Inst.getOperand(0));
9390 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9391 AMDGPUOperand::ImmTyByteSel);
9392 }
9393
9394 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9395 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9396 AMDGPUOperand::ImmTyOModSI);
9397
9398 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9399 // it has src2 register operand that is tied to dst operand
9400 // we don't allow modifiers for this operand in assembler so src2_modifiers
9401 // should be 0.
9402 if (isMAC(Opc)) {
9403 auto *it = Inst.begin();
9404 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9405 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9406 ++it;
9407 // Copy the operand to ensure it's not invalidated when Inst grows.
9408 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9409 }
9410}
9411
9412void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9413 OptionalImmIndexMap OptionalIdx;
9414 cvtVOP3(Inst, Operands, OptionalIdx);
9415}
9416
9417void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9418 OptionalImmIndexMap &OptIdx) {
9419 const int Opc = Inst.getOpcode();
9420 const MCInstrDesc &Desc = MII.get(Opc);
9421
9422 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9423
9424 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9425 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9426 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9427 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9428 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9429 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9430 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9431 Inst.addOperand(Inst.getOperand(0));
9432 }
9433
9434 // Adding vdst_in operand is already covered for these DPP instructions in
9435 // cvtVOP3DPP.
9436 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9437 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9438 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9439 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9440 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9441 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9442 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9443 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9444 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9445 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9446 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9447 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9448 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9449 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9450 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9451 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9452 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9453 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9454 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9455 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9456 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9457 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9458 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9459 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9460 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9461 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9462 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9463 Inst.addOperand(Inst.getOperand(0));
9464 }
9465
9466 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9467 if (BitOp3Idx != -1) {
9468 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9469 }
9470
9471 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9472 // instruction, and then figure out where to actually put the modifiers
9473
9474 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9475 if (OpSelIdx != -1) {
9476 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9477 }
9478
9479 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9480 if (OpSelHiIdx != -1) {
9481 int DefaultVal = IsPacked ? -1 : 0;
9482 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9483 DefaultVal);
9484 }
9485
9486 int MatrixAFMTIdx =
9487 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9488 if (MatrixAFMTIdx != -1) {
9489 addOptionalImmOperand(Inst, Operands, OptIdx,
9490 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9491 }
9492
9493 int MatrixBFMTIdx =
9494 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9495 if (MatrixBFMTIdx != -1) {
9496 addOptionalImmOperand(Inst, Operands, OptIdx,
9497 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9498 }
9499
9500 int MatrixAScaleIdx =
9501 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9502 if (MatrixAScaleIdx != -1) {
9503 addOptionalImmOperand(Inst, Operands, OptIdx,
9504 AMDGPUOperand::ImmTyMatrixAScale, 0);
9505 }
9506
9507 int MatrixBScaleIdx =
9508 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9509 if (MatrixBScaleIdx != -1) {
9510 addOptionalImmOperand(Inst, Operands, OptIdx,
9511 AMDGPUOperand::ImmTyMatrixBScale, 0);
9512 }
9513
9514 int MatrixAScaleFmtIdx =
9515 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9516 if (MatrixAScaleFmtIdx != -1) {
9517 addOptionalImmOperand(Inst, Operands, OptIdx,
9518 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9519 }
9520
9521 int MatrixBScaleFmtIdx =
9522 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9523 if (MatrixBScaleFmtIdx != -1) {
9524 addOptionalImmOperand(Inst, Operands, OptIdx,
9525 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9526 }
9527
9528 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9529 addOptionalImmOperand(Inst, Operands, OptIdx,
9530 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9531
9532 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9533 addOptionalImmOperand(Inst, Operands, OptIdx,
9534 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9535
9536 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9537 if (NegLoIdx != -1)
9538 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9539
9540 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9541 if (NegHiIdx != -1)
9542 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9543
9544 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9545 AMDGPU::OpName::src2};
9546 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9547 AMDGPU::OpName::src1_modifiers,
9548 AMDGPU::OpName::src2_modifiers};
9549
9550 unsigned OpSel = 0;
9551 unsigned OpSelHi = 0;
9552 unsigned NegLo = 0;
9553 unsigned NegHi = 0;
9554
9555 if (OpSelIdx != -1)
9556 OpSel = Inst.getOperand(OpSelIdx).getImm();
9557
9558 if (OpSelHiIdx != -1)
9559 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9560
9561 if (NegLoIdx != -1)
9562 NegLo = Inst.getOperand(NegLoIdx).getImm();
9563
9564 if (NegHiIdx != -1)
9565 NegHi = Inst.getOperand(NegHiIdx).getImm();
9566
9567 for (int J = 0; J < 3; ++J) {
9568 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9569 if (OpIdx == -1)
9570 break;
9571
9572 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9573
9574 if (ModIdx == -1)
9575 continue;
9576
9577 uint32_t ModVal = 0;
9578
9579 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9580 if (SrcOp.isReg() && getMRI()
9581 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9582 .contains(SrcOp.getReg())) {
9583 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9584 if (VGPRSuffixIsHi)
9585 ModVal |= SISrcMods::OP_SEL_0;
9586 } else {
9587 if ((OpSel & (1 << J)) != 0)
9588 ModVal |= SISrcMods::OP_SEL_0;
9589 }
9590
9591 if ((OpSelHi & (1 << J)) != 0)
9592 ModVal |= SISrcMods::OP_SEL_1;
9593
9594 if ((NegLo & (1 << J)) != 0)
9595 ModVal |= SISrcMods::NEG;
9596
9597 if ((NegHi & (1 << J)) != 0)
9598 ModVal |= SISrcMods::NEG_HI;
9599
9600 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9601 }
9602}
9603
9604void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9605 OptionalImmIndexMap OptIdx;
9606 cvtVOP3(Inst, Operands, OptIdx);
9607 cvtVOP3P(Inst, Operands, OptIdx);
9608}
9609
9611 unsigned i, unsigned Opc,
9612 AMDGPU::OpName OpName) {
9613 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9614 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9615 else
9616 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9617}
9618
9619void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9620 unsigned Opc = Inst.getOpcode();
9621
9622 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9623 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9624 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9625 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9626 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9627
9628 OptionalImmIndexMap OptIdx;
9629 for (unsigned i = 5; i < Operands.size(); ++i) {
9630 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9631 OptIdx[Op.getImmTy()] = i;
9632 }
9633
9634 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9635 addOptionalImmOperand(Inst, Operands, OptIdx,
9636 AMDGPUOperand::ImmTyIndexKey8bit);
9637
9638 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9639 addOptionalImmOperand(Inst, Operands, OptIdx,
9640 AMDGPUOperand::ImmTyIndexKey16bit);
9641
9642 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9643 addOptionalImmOperand(Inst, Operands, OptIdx,
9644 AMDGPUOperand::ImmTyIndexKey32bit);
9645
9646 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9647 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9648
9649 cvtVOP3P(Inst, Operands, OptIdx);
9650}
9651
9652//===----------------------------------------------------------------------===//
9653// VOPD
9654//===----------------------------------------------------------------------===//
9655
9656ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9657 if (!hasVOPD(getSTI()))
9658 return ParseStatus::NoMatch;
9659
9660 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9661 SMLoc S = getLoc();
9662 lex();
9663 lex();
9664 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9665 SMLoc OpYLoc = getLoc();
9666 StringRef OpYName;
9667 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9668 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9669 return ParseStatus::Success;
9670 }
9671 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9672 }
9673 return ParseStatus::NoMatch;
9674}
9675
9676// Create VOPD MCInst operands using parsed assembler operands.
9677void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9678 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9679
9680 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9681 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9683 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9684 return;
9685 }
9686 if (Op.isReg()) {
9687 Op.addRegOperands(Inst, 1);
9688 return;
9689 }
9690 if (Op.isImm()) {
9691 Op.addImmOperands(Inst, 1);
9692 return;
9693 }
9694 llvm_unreachable("Unhandled operand type in cvtVOPD");
9695 };
9696
9697 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9698
9699 // MCInst operands are ordered as follows:
9700 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9701
9702 for (auto CompIdx : VOPD::COMPONENTS) {
9703 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9704 }
9705
9706 for (auto CompIdx : VOPD::COMPONENTS) {
9707 const auto &CInfo = InstInfo[CompIdx];
9708 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9709 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9710 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9711 if (CInfo.hasSrc2Acc())
9712 addOp(CInfo.getIndexOfDstInParsedOperands());
9713 }
9714
9715 int BitOp3Idx =
9716 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9717 if (BitOp3Idx != -1) {
9718 OptionalImmIndexMap OptIdx;
9719 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9720 if (Op.isImm())
9721 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9722
9723 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9724 }
9725}
9726
9727//===----------------------------------------------------------------------===//
9728// dpp
9729//===----------------------------------------------------------------------===//
9730
9731bool AMDGPUOperand::isDPP8() const {
9732 return isImmTy(ImmTyDPP8);
9733}
9734
9735bool AMDGPUOperand::isDPPCtrl() const {
9736 using namespace AMDGPU::DPP;
9737
9738 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9739 if (result) {
9740 int64_t Imm = getImm();
9741 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9742 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9743 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9744 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9745 (Imm == DppCtrl::WAVE_SHL1) ||
9746 (Imm == DppCtrl::WAVE_ROL1) ||
9747 (Imm == DppCtrl::WAVE_SHR1) ||
9748 (Imm == DppCtrl::WAVE_ROR1) ||
9749 (Imm == DppCtrl::ROW_MIRROR) ||
9750 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9751 (Imm == DppCtrl::BCAST15) ||
9752 (Imm == DppCtrl::BCAST31) ||
9753 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9754 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9755 }
9756 return false;
9757}
9758
9759//===----------------------------------------------------------------------===//
9760// mAI
9761//===----------------------------------------------------------------------===//
9762
9763bool AMDGPUOperand::isBLGP() const {
9764 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9765}
9766
9767bool AMDGPUOperand::isS16Imm() const {
9768 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9769}
9770
9771bool AMDGPUOperand::isU16Imm() const {
9772 return isImmLiteral() && isUInt<16>(getImm());
9773}
9774
9775//===----------------------------------------------------------------------===//
9776// dim
9777//===----------------------------------------------------------------------===//
9778
9779bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9780 // We want to allow "dim:1D" etc.,
9781 // but the initial 1 is tokenized as an integer.
9782 std::string Token;
9783 if (isToken(AsmToken::Integer)) {
9784 SMLoc Loc = getToken().getEndLoc();
9785 Token = std::string(getTokenStr());
9786 lex();
9787 if (getLoc() != Loc)
9788 return false;
9789 }
9790
9791 StringRef Suffix;
9792 if (!parseId(Suffix))
9793 return false;
9794 Token += Suffix;
9795
9796 StringRef DimId = Token;
9797 DimId.consume_front("SQ_RSRC_IMG_");
9798
9799 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9800 if (!DimInfo)
9801 return false;
9802
9803 Encoding = DimInfo->Encoding;
9804 return true;
9805}
9806
9807ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9808 if (!isGFX10Plus())
9809 return ParseStatus::NoMatch;
9810
9811 SMLoc S = getLoc();
9812
9813 if (!trySkipId("dim", AsmToken::Colon))
9814 return ParseStatus::NoMatch;
9815
9816 unsigned Encoding;
9817 SMLoc Loc = getLoc();
9818 if (!parseDimId(Encoding))
9819 return Error(Loc, "invalid dim value");
9820
9821 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9822 AMDGPUOperand::ImmTyDim));
9823 return ParseStatus::Success;
9824}
9825
9826//===----------------------------------------------------------------------===//
9827// dpp
9828//===----------------------------------------------------------------------===//
9829
9830ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9831 SMLoc S = getLoc();
9832
9833 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9834 return ParseStatus::NoMatch;
9835
9836 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9837
9838 int64_t Sels[8];
9839
9840 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9841 return ParseStatus::Failure;
9842
9843 for (size_t i = 0; i < 8; ++i) {
9844 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9845 return ParseStatus::Failure;
9846
9847 SMLoc Loc = getLoc();
9848 if (getParser().parseAbsoluteExpression(Sels[i]))
9849 return ParseStatus::Failure;
9850 if (0 > Sels[i] || 7 < Sels[i])
9851 return Error(Loc, "expected a 3-bit value");
9852 }
9853
9854 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9855 return ParseStatus::Failure;
9856
9857 unsigned DPP8 = 0;
9858 for (size_t i = 0; i < 8; ++i)
9859 DPP8 |= (Sels[i] << (i * 3));
9860
9861 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9862 return ParseStatus::Success;
9863}
9864
9865bool
9866AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9867 const OperandVector &Operands) {
9868 if (Ctrl == "row_newbcast")
9869 return isGFX90A();
9870
9871 if (Ctrl == "row_share" ||
9872 Ctrl == "row_xmask")
9873 return isGFX10Plus();
9874
9875 if (Ctrl == "wave_shl" ||
9876 Ctrl == "wave_shr" ||
9877 Ctrl == "wave_rol" ||
9878 Ctrl == "wave_ror" ||
9879 Ctrl == "row_bcast")
9880 return isVI() || isGFX9();
9881
9882 return Ctrl == "row_mirror" ||
9883 Ctrl == "row_half_mirror" ||
9884 Ctrl == "quad_perm" ||
9885 Ctrl == "row_shl" ||
9886 Ctrl == "row_shr" ||
9887 Ctrl == "row_ror";
9888}
9889
9890int64_t
9891AMDGPUAsmParser::parseDPPCtrlPerm() {
9892 // quad_perm:[%d,%d,%d,%d]
9893
9894 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9895 return -1;
9896
9897 int64_t Val = 0;
9898 for (int i = 0; i < 4; ++i) {
9899 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9900 return -1;
9901
9902 int64_t Temp;
9903 SMLoc Loc = getLoc();
9904 if (getParser().parseAbsoluteExpression(Temp))
9905 return -1;
9906 if (Temp < 0 || Temp > 3) {
9907 Error(Loc, "expected a 2-bit value");
9908 return -1;
9909 }
9910
9911 Val += (Temp << i * 2);
9912 }
9913
9914 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9915 return -1;
9916
9917 return Val;
9918}
9919
9920int64_t
9921AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9922 using namespace AMDGPU::DPP;
9923
9924 // sel:%d
9925
9926 int64_t Val;
9927 SMLoc Loc = getLoc();
9928
9929 if (getParser().parseAbsoluteExpression(Val))
9930 return -1;
9931
9932 struct DppCtrlCheck {
9933 int64_t Ctrl;
9934 int Lo;
9935 int Hi;
9936 };
9937
9938 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9939 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9940 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9941 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9942 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9943 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9944 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9945 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9946 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9947 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9948 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9949 .Default({-1, 0, 0});
9950
9951 bool Valid;
9952 if (Check.Ctrl == -1) {
9953 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9954 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9955 } else {
9956 Valid = Check.Lo <= Val && Val <= Check.Hi;
9957 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9958 }
9959
9960 if (!Valid) {
9961 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9962 return -1;
9963 }
9964
9965 return Val;
9966}
9967
9968ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9969 using namespace AMDGPU::DPP;
9970
9971 if (!isToken(AsmToken::Identifier) ||
9972 !isSupportedDPPCtrl(getTokenStr(), Operands))
9973 return ParseStatus::NoMatch;
9974
9975 SMLoc S = getLoc();
9976 int64_t Val = -1;
9977 StringRef Ctrl;
9978
9979 parseId(Ctrl);
9980
9981 if (Ctrl == "row_mirror") {
9982 Val = DppCtrl::ROW_MIRROR;
9983 } else if (Ctrl == "row_half_mirror") {
9984 Val = DppCtrl::ROW_HALF_MIRROR;
9985 } else {
9986 if (skipToken(AsmToken::Colon, "expected a colon")) {
9987 if (Ctrl == "quad_perm") {
9988 Val = parseDPPCtrlPerm();
9989 } else {
9990 Val = parseDPPCtrlSel(Ctrl);
9991 }
9992 }
9993 }
9994
9995 if (Val == -1)
9996 return ParseStatus::Failure;
9997
9998 Operands.push_back(
9999 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10000 return ParseStatus::Success;
10001}
10002
10003void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10004 bool IsDPP8) {
10005 OptionalImmIndexMap OptionalIdx;
10006 unsigned Opc = Inst.getOpcode();
10007 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10008
10009 // MAC instructions are special because they have 'old'
10010 // operand which is not tied to dst (but assumed to be).
10011 // They also have dummy unused src2_modifiers.
10012 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10013 int Src2ModIdx =
10014 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10015 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10016 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10017
10018 unsigned I = 1;
10019 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10020 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10021 }
10022
10023 int Fi = 0;
10024 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10025 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10026 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10027 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10028 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10029
10030 for (unsigned E = Operands.size(); I != E; ++I) {
10031
10032 if (IsMAC) {
10033 int NumOperands = Inst.getNumOperands();
10034 if (OldIdx == NumOperands) {
10035 // Handle old operand
10036 constexpr int DST_IDX = 0;
10037 Inst.addOperand(Inst.getOperand(DST_IDX));
10038 } else if (Src2ModIdx == NumOperands) {
10039 // Add unused dummy src2_modifiers
10041 }
10042 }
10043
10044 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10045 Inst.addOperand(Inst.getOperand(0));
10046 }
10047
10048 if (IsVOP3CvtSrDpp) {
10049 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10051 Inst.addOperand(MCOperand::createReg(MCRegister()));
10052 }
10053 }
10054
10055 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10057 if (TiedTo != -1) {
10058 assert((unsigned)TiedTo < Inst.getNumOperands());
10059 // handle tied old or src2 for MAC instructions
10060 Inst.addOperand(Inst.getOperand(TiedTo));
10061 }
10062 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10063 // Add the register arguments
10064 if (IsDPP8 && Op.isDppFI()) {
10065 Fi = Op.getImm();
10066 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10067 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10068 } else if (Op.isReg()) {
10069 Op.addRegOperands(Inst, 1);
10070 } else if (Op.isImm() &&
10071 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10072 Op.addImmOperands(Inst, 1);
10073 } else if (Op.isImm()) {
10074 OptionalIdx[Op.getImmTy()] = I;
10075 } else {
10076 llvm_unreachable("unhandled operand type");
10077 }
10078 }
10079
10080 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10081 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10082 AMDGPUOperand::ImmTyClamp);
10083
10084 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10085 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10086 Inst.addOperand(Inst.getOperand(0));
10087 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10088 AMDGPUOperand::ImmTyByteSel);
10089 }
10090
10091 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10093
10094 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10095 cvtVOP3P(Inst, Operands, OptionalIdx);
10096 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10097 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10098 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10100 }
10101
10102 if (IsDPP8) {
10103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10104 using namespace llvm::AMDGPU::DPP;
10105 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10106 } else {
10107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10111
10112 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10113 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10114 AMDGPUOperand::ImmTyDppFI);
10115 }
10116}
10117
10118void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10119 OptionalImmIndexMap OptionalIdx;
10120
10121 unsigned I = 1;
10122 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10123 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10124 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10125 }
10126
10127 int Fi = 0;
10128 for (unsigned E = Operands.size(); I != E; ++I) {
10129 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10131 if (TiedTo != -1) {
10132 assert((unsigned)TiedTo < Inst.getNumOperands());
10133 // handle tied old or src2 for MAC instructions
10134 Inst.addOperand(Inst.getOperand(TiedTo));
10135 }
10136 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10137 // Add the register arguments
10138 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10139 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10140 // Skip it.
10141 continue;
10142 }
10143
10144 if (IsDPP8) {
10145 if (Op.isDPP8()) {
10146 Op.addImmOperands(Inst, 1);
10147 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10148 Op.addRegWithFPInputModsOperands(Inst, 2);
10149 } else if (Op.isDppFI()) {
10150 Fi = Op.getImm();
10151 } else if (Op.isReg()) {
10152 Op.addRegOperands(Inst, 1);
10153 } else {
10154 llvm_unreachable("Invalid operand type");
10155 }
10156 } else {
10158 Op.addRegWithFPInputModsOperands(Inst, 2);
10159 } else if (Op.isReg()) {
10160 Op.addRegOperands(Inst, 1);
10161 } else if (Op.isDPPCtrl()) {
10162 Op.addImmOperands(Inst, 1);
10163 } else if (Op.isImm()) {
10164 // Handle optional arguments
10165 OptionalIdx[Op.getImmTy()] = I;
10166 } else {
10167 llvm_unreachable("Invalid operand type");
10168 }
10169 }
10170 }
10171
10172 if (IsDPP8) {
10173 using namespace llvm::AMDGPU::DPP;
10174 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10175 } else {
10176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10177 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10179 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10180 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10181 AMDGPUOperand::ImmTyDppFI);
10182 }
10183 }
10184}
10185
10186//===----------------------------------------------------------------------===//
10187// sdwa
10188//===----------------------------------------------------------------------===//
10189
10190ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10191 StringRef Prefix,
10192 AMDGPUOperand::ImmTy Type) {
10193 return parseStringOrIntWithPrefix(
10194 Operands, Prefix,
10195 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10196 Type);
10197}
10198
10199ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10200 return parseStringOrIntWithPrefix(
10201 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10202 AMDGPUOperand::ImmTySDWADstUnused);
10203}
10204
10205void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10206 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10207}
10208
10209void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10210 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10211}
10212
10213void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10214 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10215}
10216
10217void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10218 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10219}
10220
10221void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10222 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10223}
10224
10225void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10226 uint64_t BasicInstType,
10227 bool SkipDstVcc,
10228 bool SkipSrcVcc) {
10229 using namespace llvm::AMDGPU::SDWA;
10230
10231 OptionalImmIndexMap OptionalIdx;
10232 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10233 bool SkippedVcc = false;
10234
10235 unsigned I = 1;
10236 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10237 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10238 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10239 }
10240
10241 for (unsigned E = Operands.size(); I != E; ++I) {
10242 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10243 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10244 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10245 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10246 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10247 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10248 // Skip VCC only if we didn't skip it on previous iteration.
10249 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10250 if (BasicInstType == SIInstrFlags::VOP2 &&
10251 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10252 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10253 SkippedVcc = true;
10254 continue;
10255 }
10256 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10257 SkippedVcc = true;
10258 continue;
10259 }
10260 }
10262 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10263 } else if (Op.isImm()) {
10264 // Handle optional arguments
10265 OptionalIdx[Op.getImmTy()] = I;
10266 } else {
10267 llvm_unreachable("Invalid operand type");
10268 }
10269 SkippedVcc = false;
10270 }
10271
10272 const unsigned Opc = Inst.getOpcode();
10273 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10274 Opc != AMDGPU::V_NOP_sdwa_vi) {
10275 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10276 switch (BasicInstType) {
10277 case SIInstrFlags::VOP1:
10278 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10279 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10280 AMDGPUOperand::ImmTyClamp, 0);
10281
10282 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10283 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10284 AMDGPUOperand::ImmTyOModSI, 0);
10285
10286 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10287 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10288 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10289
10290 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10291 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10292 AMDGPUOperand::ImmTySDWADstUnused,
10293 DstUnused::UNUSED_PRESERVE);
10294
10295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10296 break;
10297
10298 case SIInstrFlags::VOP2:
10299 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10300 AMDGPUOperand::ImmTyClamp, 0);
10301
10302 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10304
10305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10309 break;
10310
10311 case SIInstrFlags::VOPC:
10312 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10313 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10314 AMDGPUOperand::ImmTyClamp, 0);
10315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10317 break;
10318
10319 default:
10320 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10321 }
10322 }
10323
10324 // special case v_mac_{f16, f32}:
10325 // it has src2 register operand that is tied to dst operand
10326 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10327 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10328 auto *it = Inst.begin();
10329 std::advance(
10330 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10331 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10332 }
10333}
10334
10335/// Force static initialization.
10336extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10341
10342#define GET_REGISTER_MATCHER
10343#define GET_MATCHER_IMPLEMENTATION
10344#define GET_MNEMONIC_SPELL_CHECKER
10345#define GET_MNEMONIC_CHECKER
10346#include "AMDGPUGenAsmMatcher.inc"
10347
10348ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10349 unsigned MCK) {
10350 switch (MCK) {
10351 case MCK_addr64:
10352 return parseTokenOp("addr64", Operands);
10353 case MCK_done:
10354 return parseTokenOp("done", Operands);
10355 case MCK_idxen:
10356 return parseTokenOp("idxen", Operands);
10357 case MCK_lds:
10358 return parseTokenOp("lds", Operands);
10359 case MCK_offen:
10360 return parseTokenOp("offen", Operands);
10361 case MCK_off:
10362 return parseTokenOp("off", Operands);
10363 case MCK_row_95_en:
10364 return parseTokenOp("row_en", Operands);
10365 case MCK_gds:
10366 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10367 case MCK_tfe:
10368 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10369 }
10370 return tryCustomParseOperand(Operands, MCK);
10371}
10372
10373// This function should be defined after auto-generated include so that we have
10374// MatchClassKind enum defined
10375unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10376 unsigned Kind) {
10377 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10378 // But MatchInstructionImpl() expects to meet token and fails to validate
10379 // operand. This method checks if we are given immediate operand but expect to
10380 // get corresponding token.
10381 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10382 switch (Kind) {
10383 case MCK_addr64:
10384 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10385 case MCK_gds:
10386 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10387 case MCK_lds:
10388 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10389 case MCK_idxen:
10390 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10391 case MCK_offen:
10392 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10393 case MCK_tfe:
10394 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10395 case MCK_SSrc_b32:
10396 // When operands have expression values, they will return true for isToken,
10397 // because it is not possible to distinguish between a token and an
10398 // expression at parse time. MatchInstructionImpl() will always try to
10399 // match an operand as a token, when isToken returns true, and when the
10400 // name of the expression is not a valid token, the match will fail,
10401 // so we need to handle it here.
10402 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10403 case MCK_SSrc_f32:
10404 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10405 case MCK_SOPPBrTarget:
10406 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10407 case MCK_VReg32OrOff:
10408 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10409 case MCK_InterpSlot:
10410 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10411 case MCK_InterpAttr:
10412 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10413 case MCK_InterpAttrChan:
10414 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10415 case MCK_SReg_64:
10416 case MCK_SReg_64_XEXEC:
10417 // Null is defined as a 32-bit register but
10418 // it should also be enabled with 64-bit operands or larger.
10419 // The following code enables it for SReg_64 and larger operands
10420 // used as source and destination. Remaining source
10421 // operands are handled in isInlinableImm.
10422 case MCK_SReg_96:
10423 case MCK_SReg_128:
10424 case MCK_SReg_256:
10425 case MCK_SReg_512:
10426 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10427 default:
10428 return Match_InvalidOperand;
10429 }
10430}
10431
10432//===----------------------------------------------------------------------===//
10433// endpgm
10434//===----------------------------------------------------------------------===//
10435
10436ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10437 SMLoc S = getLoc();
10438 int64_t Imm = 0;
10439
10440 if (!parseExpr(Imm)) {
10441 // The operand is optional, if not present default to 0
10442 Imm = 0;
10443 }
10444
10445 if (!isUInt<16>(Imm))
10446 return Error(S, "expected a 16-bit value");
10447
10448 Operands.push_back(
10449 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10450 return ParseStatus::Success;
10451}
10452
10453bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10454
10455//===----------------------------------------------------------------------===//
10456// Split Barrier
10457//===----------------------------------------------------------------------===//
10458
10459bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
Represents a range in source code.
Definition SMLoc.h:48
SMLoc Start
Definition SMLoc.h:50
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:217
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...