56enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
74 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
85 bool hasFPModifiers()
const {
return Abs || Neg; }
86 bool hasIntModifiers()
const {
return Sext; }
87 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 int64_t getFPModifiersOperand()
const {
96 int64_t getIntModifiersOperand()
const {
102 int64_t getModifiersOperand()
const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 &&
"fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
112 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
216 mutable int MCOpIdx = -1;
219 bool isToken()
const override {
return Kind == Token; }
221 bool isSymbolRefExpr()
const {
225 bool isImm()
const override {
226 return Kind == Immediate;
229 bool isInlinableImm(MVT type)
const;
230 bool isLiteralImm(MVT type)
const;
232 bool isRegKind()
const {
233 return Kind == Register;
236 bool isReg()
const override {
237 return isRegKind() && !hasModifiers();
240 bool isRegOrInline(
unsigned RCID, MVT type)
const {
241 return isRegClass(RCID) || isInlinableImm(type);
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
248 bool isRegOrImmWithInt16InputMods()
const {
252 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
257 bool isRegOrImmWithInt32InputMods()
const {
261 bool isRegOrInlineImmWithInt16InputMods()
const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
265 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
270 bool isRegOrInlineImmWithInt32InputMods()
const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
274 bool isRegOrImmWithInt64InputMods()
const {
278 bool isRegOrImmWithFP16InputMods()
const {
282 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
287 bool isRegOrImmWithFP32InputMods()
const {
291 bool isRegOrImmWithFP64InputMods()
const {
295 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
300 bool isRegOrInlineImmWithFP32InputMods()
const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
304 bool isRegOrInlineImmWithFP64InputMods()
const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
308 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
310 bool isVRegWithFP32InputMods()
const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
314 bool isVRegWithFP64InputMods()
const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
318 bool isPackedFP16InputMods()
const {
322 bool isPackedVGPRFP32InputMods()
const {
326 bool isVReg()
const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
338 bool isVReg32()
const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
342 bool isVReg32OrOff()
const {
343 return isOff() || isVReg32();
347 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
350 bool isVRegWithInputMods()
const;
351 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
352 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
354 bool isSDWAOperand(MVT type)
const;
355 bool isSDWAFP16Operand()
const;
356 bool isSDWAFP32Operand()
const;
357 bool isSDWAInt16Operand()
const;
358 bool isSDWAInt32Operand()
const;
360 bool isImmTy(ImmTy ImmT)
const {
361 return isImm() &&
Imm.Type == ImmT;
364 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
366 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
368 bool isImmModifier()
const {
369 return isImm() &&
Imm.Type != ImmTyNone;
372 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
373 bool isDim()
const {
return isImmTy(ImmTyDim); }
374 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
375 bool isOff()
const {
return isImmTy(ImmTyOff); }
376 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
377 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
378 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
379 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
383 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
384 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
397 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
398 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
409 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
410 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
412 bool isRegOrImm()
const {
413 return isReg() || isImm();
416 bool isRegClass(
unsigned RCID)
const;
420 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
424 bool isSCSrcB16()
const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 bool isSCSrcV2B16()
const {
432 bool isSCSrc_b32()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 bool isSCSrc_b64()
const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 bool isBoolReg()
const;
442 bool isSCSrcF16()
const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 bool isSCSrcV2F16()
const {
450 bool isSCSrcF32()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 bool isSCSrcF64()
const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 bool isSSrc_b32()
const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
464 bool isSSrcV2B16()
const {
469 bool isSSrc_b64()
const {
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((
const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
478 bool isSSrc_f32()
const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
482 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
484 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
486 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
488 bool isSSrcV2F16()
const {
493 bool isSSrcV2FP32()
const {
498 bool isSCSrcV2FP32()
const {
503 bool isSSrcV2INT32()
const {
508 bool isSCSrcV2INT32()
const {
510 return isSCSrc_b32();
513 bool isSSrcOrLds_b32()
const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
518 bool isVCSrc_b32()
const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
522 bool isVCSrc_b32_Lo256()
const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
526 bool isVCSrc_b64_Lo256()
const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
530 bool isVCSrc_b64()
const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
534 bool isVCSrcT_b16()
const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
538 bool isVCSrcTB16_Lo128()
const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
542 bool isVCSrcFake16B16_Lo128()
const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
546 bool isVCSrc_b16()
const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
550 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
552 bool isVCSrc_f32()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
556 bool isVCSrc_f64()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
560 bool isVCSrcTBF16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
564 bool isVCSrcT_f16()
const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
568 bool isVCSrcT_bf16()
const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
572 bool isVCSrcTBF16_Lo128()
const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
576 bool isVCSrcTF16_Lo128()
const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
580 bool isVCSrcFake16BF16_Lo128()
const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
584 bool isVCSrcFake16F16_Lo128()
const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
588 bool isVCSrc_bf16()
const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
592 bool isVCSrc_f16()
const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
596 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
598 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
600 bool isVSrc_b32()
const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
604 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
606 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
608 bool isVSrcT_b16_Lo128()
const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
612 bool isVSrcFake16_b16_Lo128()
const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
616 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
618 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
620 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
622 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
624 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
626 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
628 bool isVSrc_f32()
const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
632 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
634 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
636 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
638 bool isVSrcT_bf16_Lo128()
const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
642 bool isVSrcT_f16_Lo128()
const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
646 bool isVSrcFake16_bf16_Lo128()
const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
650 bool isVSrcFake16_f16_Lo128()
const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
654 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
656 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
658 bool isVSrc_v2bf16()
const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
662 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
664 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
666 bool isVISrcB32()
const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
670 bool isVISrcB16()
const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
674 bool isVISrcV2B16()
const {
678 bool isVISrcF32()
const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
682 bool isVISrcF16()
const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
686 bool isVISrcV2F16()
const {
687 return isVISrcF16() || isVISrcB32();
690 bool isVISrc_64_bf16()
const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
694 bool isVISrc_64_f16()
const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
698 bool isVISrc_64_b32()
const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
702 bool isVISrc_64B64()
const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
706 bool isVISrc_64_f64()
const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
710 bool isVISrc_64V2FP32()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
714 bool isVISrc_64V2INT32()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 bool isVISrc_256_b32()
const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
722 bool isVISrc_256_f32()
const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
726 bool isVISrc_256B64()
const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
730 bool isVISrc_256_f64()
const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
734 bool isVISrc_512_f64()
const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
738 bool isVISrc_128B16()
const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
742 bool isVISrc_128V2B16()
const {
743 return isVISrc_128B16();
746 bool isVISrc_128_b32()
const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
750 bool isVISrc_128_f32()
const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
754 bool isVISrc_256V2FP32()
const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
758 bool isVISrc_256V2INT32()
const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
762 bool isVISrc_512_b32()
const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
766 bool isVISrc_512B16()
const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
770 bool isVISrc_512V2B16()
const {
771 return isVISrc_512B16();
774 bool isVISrc_512_f32()
const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
778 bool isVISrc_512F16()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
782 bool isVISrc_512V2F16()
const {
783 return isVISrc_512F16() || isVISrc_512_b32();
786 bool isVISrc_1024_b32()
const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
790 bool isVISrc_1024B16()
const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
794 bool isVISrc_1024V2B16()
const {
795 return isVISrc_1024B16();
798 bool isVISrc_1024_f32()
const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
802 bool isVISrc_1024F16()
const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
806 bool isVISrc_1024V2F16()
const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
810 bool isAISrcB32()
const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
814 bool isAISrcB16()
const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
818 bool isAISrcV2B16()
const {
822 bool isAISrcF32()
const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
826 bool isAISrcF16()
const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
830 bool isAISrcV2F16()
const {
831 return isAISrcF16() || isAISrcB32();
834 bool isAISrc_64B64()
const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
838 bool isAISrc_64_f64()
const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
842 bool isAISrc_128_b32()
const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
846 bool isAISrc_128B16()
const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
850 bool isAISrc_128V2B16()
const {
851 return isAISrc_128B16();
854 bool isAISrc_128_f32()
const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
858 bool isAISrc_128F16()
const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
862 bool isAISrc_128V2F16()
const {
863 return isAISrc_128F16() || isAISrc_128_b32();
866 bool isVISrc_128_bf16()
const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
870 bool isVISrc_128_f16()
const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
874 bool isVISrc_128V2F16()
const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
878 bool isAISrc_256B64()
const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
882 bool isAISrc_256_f64()
const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
886 bool isAISrc_512_b32()
const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
890 bool isAISrc_512B16()
const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
894 bool isAISrc_512V2B16()
const {
895 return isAISrc_512B16();
898 bool isAISrc_512_f32()
const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
902 bool isAISrc_512F16()
const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
906 bool isAISrc_512V2F16()
const {
907 return isAISrc_512F16() || isAISrc_512_b32();
910 bool isAISrc_1024_b32()
const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
914 bool isAISrc_1024B16()
const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
918 bool isAISrc_1024V2B16()
const {
919 return isAISrc_1024B16();
922 bool isAISrc_1024_f32()
const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
926 bool isAISrc_1024F16()
const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
930 bool isAISrc_1024V2F16()
const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
934 bool isKImmFP32()
const {
935 return isLiteralImm(MVT::f32);
938 bool isKImmFP16()
const {
939 return isLiteralImm(MVT::f16);
942 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
944 bool isMem()
const override {
948 bool isExpr()
const {
949 return Kind == Expression;
952 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
954 bool isSWaitCnt()
const;
955 bool isDepCtr()
const;
956 bool isSDelayALU()
const;
957 bool isHwreg()
const;
958 bool isSendMsg()
const;
959 bool isSplitBarrier()
const;
960 bool isSwizzle()
const;
961 bool isSMRDOffset8()
const;
962 bool isSMEMOffset()
const;
963 bool isSMRDLiteralOffset()
const;
965 bool isDPPCtrl()
const;
967 bool isGPRIdxMode()
const;
968 bool isS16Imm()
const;
969 bool isU16Imm()
const;
970 bool isEndpgm()
const;
972 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
973 return [
this,
P]() {
return P(*
this); };
978 return StringRef(Tok.Data, Tok.Length);
986 void setImm(int64_t Val) {
991 ImmTy getImmTy()
const {
996 MCRegister
getReg()
const override {
1001 SMLoc getStartLoc()
const override {
1005 SMLoc getEndLoc()
const override {
1009 SMRange getLocRange()
const {
1010 return SMRange(StartLoc, EndLoc);
1013 int getMCOpIdx()
const {
return MCOpIdx; }
1015 Modifiers getModifiers()
const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1028 bool hasModifiers()
const {
1029 return getModifiers().hasModifiers();
1032 bool hasFPModifiers()
const {
1033 return getModifiers().hasFPModifiers();
1036 bool hasIntModifiers()
const {
1037 return getModifiers().hasIntModifiers();
1040 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1042 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1046 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1048 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1050 addRegOperands(Inst,
N);
1052 addImmOperands(Inst,
N);
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1056 Modifiers Mods = getModifiers();
1059 addRegOperands(Inst,
N);
1061 addImmOperands(Inst,
N,
false);
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst,
N);
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst,
N);
1075 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1076 Modifiers Mods = getModifiers();
1079 addRegOperands(Inst,
N);
1082 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst,
N);
1087 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst,
N);
1092 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1095 case ImmTyNone: OS <<
"None";
break;
1096 case ImmTyGDS: OS <<
"GDS";
break;
1097 case ImmTyLDS: OS <<
"LDS";
break;
1098 case ImmTyOffen: OS <<
"Offen";
break;
1099 case ImmTyIdxen: OS <<
"Idxen";
break;
1100 case ImmTyAddr64: OS <<
"Addr64";
break;
1101 case ImmTyOffset: OS <<
"Offset";
break;
1102 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1103 case ImmTyOffset0: OS <<
"Offset0";
break;
1104 case ImmTyOffset1: OS <<
"Offset1";
break;
1105 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1106 case ImmTyCPol: OS <<
"CPol";
break;
1107 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1108 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1109 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1110 case ImmTyTFE: OS <<
"TFE";
break;
1111 case ImmTyD16: OS <<
"D16";
break;
1112 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1113 case ImmTyClamp: OS <<
"Clamp";
break;
1114 case ImmTyOModSI: OS <<
"OModSI";
break;
1115 case ImmTyDPP8: OS <<
"DPP8";
break;
1116 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1117 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1118 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1119 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1120 case ImmTyDppFI: OS <<
"DppFI";
break;
1121 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1122 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1123 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1124 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1125 case ImmTyDMask: OS <<
"DMask";
break;
1126 case ImmTyDim: OS <<
"Dim";
break;
1127 case ImmTyUNorm: OS <<
"UNorm";
break;
1128 case ImmTyDA: OS <<
"DA";
break;
1129 case ImmTyR128A16: OS <<
"R128A16";
break;
1130 case ImmTyA16: OS <<
"A16";
break;
1131 case ImmTyLWE: OS <<
"LWE";
break;
1132 case ImmTyOff: OS <<
"Off";
break;
1133 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1134 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1135 case ImmTyExpVM: OS <<
"ExpVM";
break;
1136 case ImmTyHwreg: OS <<
"Hwreg";
break;
1137 case ImmTySendMsg: OS <<
"SendMsg";
break;
1138 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1139 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1140 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1141 case ImmTyOpSel: OS <<
"OpSel";
break;
1142 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1143 case ImmTyNegLo: OS <<
"NegLo";
break;
1144 case ImmTyNegHi: OS <<
"NegHi";
break;
1145 case ImmTySwizzle: OS <<
"Swizzle";
break;
1146 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1147 case ImmTyHigh: OS <<
"High";
break;
1148 case ImmTyBLGP: OS <<
"BLGP";
break;
1149 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1150 case ImmTyABID: OS <<
"ABID";
break;
1151 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1152 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1153 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1154 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1155 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1156 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1157 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1158 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1159 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1160 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1161 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1162 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1163 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1164 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1165 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1166 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1171 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1175 <<
" mods: " <<
Reg.Mods <<
'>';
1179 if (getImmTy() != ImmTyNone) {
1180 OS <<
" type: "; printImmTy(OS, getImmTy());
1182 OS <<
" mods: " <<
Imm.Mods <<
'>';
1195 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy
Type = ImmTyNone,
1198 bool IsFPImm =
false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1201 Op->Imm.IsFPImm = IsFPImm;
1203 Op->Imm.Mods = Modifiers();
1209 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize =
true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1220 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1221 MCRegister
Reg, SMLoc S, SMLoc
E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo =
Reg;
1224 Op->Reg.Mods = Modifiers();
1230 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1241 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1265 void usesSgprAt(
int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1270 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1276 void usesVgprAt(
int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1281 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1283 VgprIndexUnusedMin);
1289 void usesAgprAt(
int i) {
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1298 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1303 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1305 VgprIndexUnusedMin);
1312 KernelScopeInfo() =
default;
1316 MSTI = Ctx->getSubtargetInfo();
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1325 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1329 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1332 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1335 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1344 MCAsmParser &Parser;
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP =
false;
1348 bool ForcedSDWA =
false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1361 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1363 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange
Range);
1387 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1399 bool subtargetHasRegister(
const MCRegisterInfo &
MRI, MCRegister
Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1410 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1414 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1417 unsigned &RegNum,
unsigned &RegWidth,
1418 bool RestoreOnFailure =
false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1420 unsigned &RegNum,
unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &
SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1433 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1436 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1446 OperandMode_Default,
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1452 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII,
const MCTargetOptions &
Options)
1454 : MCTargetAsmParser(
Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1462 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1463 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1464 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1466 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1467 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1468 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1477 createConstantSymbol(Symbol, Code);
1479 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1553 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1555 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1557 bool hasInv2PiInlineImm()
const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1561 bool has64BitLiterals()
const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1565 bool hasFlatOffsets()
const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1569 bool hasTrue16Insts()
const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1577 bool hasSGPR102_SGPR103()
const {
1581 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1583 bool hasIntClamp()
const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1587 bool hasPartialNSAEncoding()
const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1591 bool hasGloballyAddressableScratch()
const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &
>(TS);
1613 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1616 const MCRegisterInfo *getMRI()
const {
1620 const MCInstrInfo *getMII()
const {
1626 const FeatureBitset &getFeatureBits()
const {
1627 return getSTI().getFeatureBits();
1630 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1631 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1634 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1635 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1636 bool isForcedDPP()
const {
return ForcedDPP; }
1637 bool isForcedSDWA()
const {
return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants()
const;
1639 StringRef getMatchedVariantName()
const;
1641 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1645 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc)
override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1649 unsigned Kind)
override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm)
override;
1654 bool ParseDirective(AsmToken DirectiveID)
override;
1656 OperandMode
Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &
Info, StringRef Name,
1664 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1671 ParseStatus parseOperandArrayWithPrefix(
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) =
nullptr);
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1687 ArrayRef<const char *> Ids,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy
Type);
1695 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1696 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1697 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1698 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1699 bool parseSP3NegModifier();
1706 bool AllowImm =
true);
1708 bool AllowImm =
true);
1713 AMDGPUOperand::ImmTy ImmTy);
1718 AMDGPUOperand::ImmTy
Type);
1722 AMDGPUOperand::ImmTy
Type);
1726 AMDGPUOperand::ImmTy
Type);
1730 ParseStatus parseDfmtNfmt(int64_t &
Format);
1731 ParseStatus parseUfmt(int64_t &
Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1738 ParseStatus parseNumericFormat(int64_t &
Format);
1742 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1747 bool parseCnt(int64_t &IntVal);
1750 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1751 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1754 bool parseDelay(int64_t &Delay);
1760 struct OperandInfoTy {
1763 bool IsSymbolic =
false;
1764 bool IsDefined =
false;
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1769 struct StructuredOpField : OperandInfoTy {
1773 bool IsDefined =
false;
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc,
unsigned Width,
1777 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() =
default;
1780 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1781 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1785 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1787 return Error(Parser,
"not supported on this GPU");
1789 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(
const OperandInfoTy &Msg,
1799 const OperandInfoTy &
Op,
1800 const OperandInfoTy &Stream);
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1803 OperandInfoTy &Width);
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1812 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1818 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1825 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1828 bool tryVOPD(
const MCInst &Inst);
1829 bool tryVOPD3(
const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1832 bool validateIntClampSupported(
const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1834 bool validateMIMGGatherDMask(
const MCInst &Inst);
1836 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(
const MCInst &Inst);
1840 bool validateTensorR128(
const MCInst &Inst);
1841 bool validateMIMGMSAA(
const MCInst &Inst);
1842 bool validateOpSel(
const MCInst &Inst);
1843 bool validateTrue16OpSel(
const MCInst &Inst);
1844 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1846 bool validateVccOperand(MCRegister
Reg)
const;
1851 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1852 bool validateVGPRAlign(
const MCInst &Inst)
const;
1856 bool validateDivScale(
const MCInst &Inst);
1861 const unsigned CPol);
1866 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1867 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1868 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1869 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1871 bool isSupportedMnemo(StringRef Mnemo,
1872 const FeatureBitset &FBS);
1873 bool isSupportedMnemo(StringRef Mnemo,
1874 const FeatureBitset &FBS,
1875 ArrayRef<unsigned> Variants);
1876 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1878 bool isId(
const StringRef Id)
const;
1879 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1881 StringRef getId()
const;
1882 bool trySkipId(
const StringRef Id);
1883 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1887 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1888 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1894 StringRef getTokenStr()
const;
1895 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1897 SMLoc getLoc()
const;
1901 void onBeginOfFile()
override;
1902 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1913 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1914 const unsigned MaxVal,
const Twine &ErrMsg,
1916 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1917 const unsigned MinVal,
1918 const unsigned MaxVal,
1919 const StringRef ErrMsg);
1921 bool parseSwizzleOffset(int64_t &
Imm);
1922 bool parseSwizzleMacro(int64_t &
Imm);
1923 bool parseSwizzleQuadPerm(int64_t &
Imm);
1924 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1925 bool parseSwizzleBroadcast(int64_t &
Imm);
1926 bool parseSwizzleSwap(int64_t &
Imm);
1927 bool parseSwizzleReverse(int64_t &
Imm);
1928 bool parseSwizzleFFT(int64_t &
Imm);
1929 bool parseSwizzleRotate(int64_t &
Imm);
1932 int64_t parseGPRIdxMacro();
1940 OptionalImmIndexMap &OptionalIdx);
1949 OptionalImmIndexMap &OptionalIdx);
1951 OptionalImmIndexMap &OptionalIdx);
1955 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
1957 bool parseDimId(
unsigned &Encoding);
1959 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1963 int64_t parseDPPCtrlSel(StringRef Ctrl);
1964 int64_t parseDPPCtrlPerm();
1970 bool IsDPP8 =
false);
1976 AMDGPUOperand::ImmTy
Type);
1984 uint64_t BasicInstType,
1985 bool SkipDstVcc =
false,
1986 bool SkipSrcVcc =
false);
2094bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2104 if (!isImmTy(ImmTyNone)) {
2109 if (getModifiers().
Lit != LitModifier::None)
2119 if (type == MVT::f64 || type == MVT::i64) {
2121 AsmParser->hasInv2PiInlineImm());
2124 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2143 APFloat::rmNearestTiesToEven, &Lost);
2150 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2152 AsmParser->hasInv2PiInlineImm());
2157 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2158 AsmParser->hasInv2PiInlineImm());
2162 if (type == MVT::f64 || type == MVT::i64) {
2164 AsmParser->hasInv2PiInlineImm());
2173 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2174 type, AsmParser->hasInv2PiInlineImm());
2178 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2179 AsmParser->hasInv2PiInlineImm());
2182bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2184 if (!isImmTy(ImmTyNone)) {
2189 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2194 if (type == MVT::f64 && hasFPModifiers()) {
2214 if (type == MVT::f64) {
2219 if (type == MVT::i64) {
2232 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2233 : (type == MVT::v2i16) ? MVT::f32
2234 : (type == MVT::v2f32) ? MVT::f32
2237 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2241bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2242 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2245bool AMDGPUOperand::isVRegWithInputMods()
const {
2246 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2248 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2249 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2252template <
bool IsFake16>
2253bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2254 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2255 : AMDGPU::VGPR_16_Lo128RegClassID);
2258template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2259 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2260 : AMDGPU::VGPR_16RegClassID);
2263bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2264 if (AsmParser->isVI())
2266 if (AsmParser->isGFX9Plus())
2267 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2271bool AMDGPUOperand::isSDWAFP16Operand()
const {
2272 return isSDWAOperand(MVT::f16);
2275bool AMDGPUOperand::isSDWAFP32Operand()
const {
2276 return isSDWAOperand(MVT::f32);
2279bool AMDGPUOperand::isSDWAInt16Operand()
const {
2280 return isSDWAOperand(MVT::i16);
2283bool AMDGPUOperand::isSDWAInt32Operand()
const {
2284 return isSDWAOperand(MVT::i32);
2287bool AMDGPUOperand::isBoolReg()
const {
2288 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2289 (AsmParser->isWave32() && isSCSrc_b32()));
2292uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2294 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2297 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2309void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2319 addLiteralImmOperand(Inst,
Imm.Val,
2321 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2323 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2328void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2329 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2334 if (ApplyModifiers) {
2337 Val = applyInputFPModifiers(Val,
Size);
2341 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2343 bool CanUse64BitLiterals =
2344 AsmParser->has64BitLiterals() &&
2347 MCContext &Ctx = AsmParser->getContext();
2356 if (
Lit == LitModifier::None &&
2358 AsmParser->hasInv2PiInlineImm())) {
2366 bool HasMandatoryLiteral =
2369 if (
Literal.getLoBits(32) != 0 &&
2370 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2371 !HasMandatoryLiteral) {
2372 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2374 "Can't encode literal as exact 64-bit floating-point operand. "
2375 "Low 32-bits will be set to zero");
2376 Val &= 0xffffffff00000000u;
2382 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2388 Lit = LitModifier::Lit64;
2389 }
else if (
Lit == LitModifier::Lit) {
2403 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2405 Lit = LitModifier::Lit64;
2412 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2413 Literal == 0x3fc45f306725feed) {
2447 APFloat::rmNearestTiesToEven, &lost);
2451 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2458 if (
Lit != LitModifier::None) {
2487 if (
Lit == LitModifier::None &&
2497 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2504 if (
Lit == LitModifier::None &&
2512 if (!AsmParser->has64BitLiterals()) {
2513 Val =
static_cast<uint64_t
>(Val) << 32;
2520 if (
Lit == LitModifier::Lit ||
2522 Val =
static_cast<uint64_t
>(Val) << 32;
2526 if (
Lit == LitModifier::Lit)
2552 if (
Lit != LitModifier::None) {
2560void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2565bool AMDGPUOperand::isInlineValue()
const {
2573void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2584 if (Is == IS_VGPR) {
2588 return AMDGPU::VGPR_32RegClassID;
2590 return AMDGPU::VReg_64RegClassID;
2592 return AMDGPU::VReg_96RegClassID;
2594 return AMDGPU::VReg_128RegClassID;
2596 return AMDGPU::VReg_160RegClassID;
2598 return AMDGPU::VReg_192RegClassID;
2600 return AMDGPU::VReg_224RegClassID;
2602 return AMDGPU::VReg_256RegClassID;
2604 return AMDGPU::VReg_288RegClassID;
2606 return AMDGPU::VReg_320RegClassID;
2608 return AMDGPU::VReg_352RegClassID;
2610 return AMDGPU::VReg_384RegClassID;
2612 return AMDGPU::VReg_512RegClassID;
2614 return AMDGPU::VReg_1024RegClassID;
2616 }
else if (Is == IS_TTMP) {
2620 return AMDGPU::TTMP_32RegClassID;
2622 return AMDGPU::TTMP_64RegClassID;
2624 return AMDGPU::TTMP_128RegClassID;
2626 return AMDGPU::TTMP_256RegClassID;
2628 return AMDGPU::TTMP_512RegClassID;
2630 }
else if (Is == IS_SGPR) {
2634 return AMDGPU::SGPR_32RegClassID;
2636 return AMDGPU::SGPR_64RegClassID;
2638 return AMDGPU::SGPR_96RegClassID;
2640 return AMDGPU::SGPR_128RegClassID;
2642 return AMDGPU::SGPR_160RegClassID;
2644 return AMDGPU::SGPR_192RegClassID;
2646 return AMDGPU::SGPR_224RegClassID;
2648 return AMDGPU::SGPR_256RegClassID;
2650 return AMDGPU::SGPR_288RegClassID;
2652 return AMDGPU::SGPR_320RegClassID;
2654 return AMDGPU::SGPR_352RegClassID;
2656 return AMDGPU::SGPR_384RegClassID;
2658 return AMDGPU::SGPR_512RegClassID;
2660 }
else if (Is == IS_AGPR) {
2664 return AMDGPU::AGPR_32RegClassID;
2666 return AMDGPU::AReg_64RegClassID;
2668 return AMDGPU::AReg_96RegClassID;
2670 return AMDGPU::AReg_128RegClassID;
2672 return AMDGPU::AReg_160RegClassID;
2674 return AMDGPU::AReg_192RegClassID;
2676 return AMDGPU::AReg_224RegClassID;
2678 return AMDGPU::AReg_256RegClassID;
2680 return AMDGPU::AReg_288RegClassID;
2682 return AMDGPU::AReg_320RegClassID;
2684 return AMDGPU::AReg_352RegClassID;
2686 return AMDGPU::AReg_384RegClassID;
2688 return AMDGPU::AReg_512RegClassID;
2690 return AMDGPU::AReg_1024RegClassID;
2698 .
Case(
"exec", AMDGPU::EXEC)
2699 .
Case(
"vcc", AMDGPU::VCC)
2700 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2701 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2702 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2703 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2704 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2705 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2706 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2707 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2708 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2709 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2710 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2711 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2712 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2713 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2714 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2715 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2716 .
Case(
"m0", AMDGPU::M0)
2717 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2718 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2719 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2720 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2721 .
Case(
"scc", AMDGPU::SRC_SCC)
2722 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2723 .
Case(
"tba", AMDGPU::TBA)
2724 .
Case(
"tma", AMDGPU::TMA)
2725 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2726 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2727 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2728 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2729 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2730 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2731 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2732 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2733 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2734 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2735 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2736 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2737 .
Case(
"pc", AMDGPU::PC_REG)
2738 .
Case(
"null", AMDGPU::SGPR_NULL)
2742bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2743 SMLoc &EndLoc,
bool RestoreOnFailure) {
2744 auto R = parseRegister();
2745 if (!R)
return true;
2747 RegNo =
R->getReg();
2748 StartLoc =
R->getStartLoc();
2749 EndLoc =
R->getEndLoc();
2753bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2755 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2758ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2760 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2761 bool PendingErrors = getParser().hasPendingError();
2762 getParser().clearPendingErrors();
2770bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2771 RegisterKind RegKind,
2772 MCRegister Reg1, SMLoc Loc) {
2775 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2780 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2781 Reg = AMDGPU::FLAT_SCR;
2785 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2786 Reg = AMDGPU::XNACK_MASK;
2790 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2795 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2800 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2805 Error(Loc,
"register does not fit in the list");
2811 if (Reg1 !=
Reg + RegWidth / 32) {
2812 Error(Loc,
"registers in a list must have consecutive indices");
2830 {{
"ttmp"}, IS_TTMP},
2836 return Kind == IS_VGPR ||
2844 if (Str.starts_with(
Reg.Name))
2850 return !Str.getAsInteger(10, Num);
2854AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2855 const AsmToken &NextToken)
const {
2870 StringRef RegSuffix = Str.substr(
RegName.size());
2871 if (!RegSuffix.
empty()) {
2889AMDGPUAsmParser::isRegister()
2891 return isRegister(
getToken(), peekToken());
2894MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2895 unsigned SubReg,
unsigned RegWidth,
2899 unsigned AlignSize = 1;
2900 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2906 if (RegNum % AlignSize != 0) {
2907 Error(Loc,
"invalid register alignment");
2908 return MCRegister();
2911 unsigned RegIdx = RegNum / AlignSize;
2914 Error(Loc,
"invalid or unsupported register size");
2915 return MCRegister();
2919 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2920 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2921 Error(Loc,
"register index is out of range");
2922 return AMDGPU::NoRegister;
2925 if (RegKind == IS_VGPR && !
isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2926 Error(Loc,
"register index is out of range");
2927 return MCRegister();
2943bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
2945 int64_t RegLo, RegHi;
2949 SMLoc FirstIdxLoc = getLoc();
2956 SecondIdxLoc = getLoc();
2967 Error(FirstIdxLoc,
"invalid register index");
2972 Error(SecondIdxLoc,
"invalid register index");
2976 if (RegLo > RegHi) {
2977 Error(FirstIdxLoc,
"first register index should not exceed second index");
2981 if (RegHi == RegLo) {
2982 StringRef RegSuffix = getTokenStr();
2983 if (RegSuffix ==
".l") {
2986 }
else if (RegSuffix ==
".h") {
2992 Num =
static_cast<unsigned>(RegLo);
2993 RegWidth = 32 * ((RegHi - RegLo) + 1);
2998MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3001 SmallVectorImpl<AsmToken> &Tokens) {
3007 RegKind = IS_SPECIAL;
3014MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3017 SmallVectorImpl<AsmToken> &Tokens) {
3019 StringRef
RegName = getTokenStr();
3020 auto Loc = getLoc();
3024 Error(Loc,
"invalid register name");
3025 return MCRegister();
3033 unsigned SubReg = NoSubRegister;
3034 if (!RegSuffix.
empty()) {
3042 Error(Loc,
"invalid register index");
3043 return MCRegister();
3048 if (!ParseRegRange(RegNum, RegWidth,
SubReg))
3049 return MCRegister();
3052 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
3055MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3056 unsigned &RegNum,
unsigned &RegWidth,
3057 SmallVectorImpl<AsmToken> &Tokens) {
3059 auto ListLoc = getLoc();
3062 "expected a register or a list of registers")) {
3063 return MCRegister();
3068 auto Loc = getLoc();
3069 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3070 return MCRegister();
3071 if (RegWidth != 32) {
3072 Error(Loc,
"expected a single 32-bit register");
3073 return MCRegister();
3077 RegisterKind NextRegKind;
3079 unsigned NextRegNum, NextRegWidth;
3082 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3083 NextRegNum, NextRegWidth,
3085 return MCRegister();
3087 if (NextRegWidth != 32) {
3088 Error(Loc,
"expected a single 32-bit register");
3089 return MCRegister();
3091 if (NextRegKind != RegKind) {
3092 Error(Loc,
"registers in a list must be of the same kind");
3093 return MCRegister();
3095 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
3096 return MCRegister();
3100 "expected a comma or a closing square bracket")) {
3101 return MCRegister();
3105 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3110bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3111 MCRegister &
Reg,
unsigned &RegNum,
3113 SmallVectorImpl<AsmToken> &Tokens) {
3114 auto Loc = getLoc();
3118 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3120 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3122 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3127 assert(Parser.hasPendingError());
3131 if (!subtargetHasRegister(*
TRI,
Reg)) {
3132 if (
Reg == AMDGPU::SGPR_NULL) {
3133 Error(Loc,
"'null' operand is not supported on this GPU");
3136 " register not available on this GPU");
3144bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3145 MCRegister &
Reg,
unsigned &RegNum,
3147 bool RestoreOnFailure ) {
3151 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3152 if (RestoreOnFailure) {
3153 while (!Tokens.
empty()) {
3162std::optional<StringRef>
3163AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3166 return StringRef(
".amdgcn.next_free_vgpr");
3168 return StringRef(
".amdgcn.next_free_sgpr");
3170 return std::nullopt;
3174void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3175 auto SymbolName = getGprCountSymbolName(RegKind);
3176 assert(SymbolName &&
"initializing invalid register kind");
3182bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3183 unsigned DwordRegIndex,
3184 unsigned RegWidth) {
3189 auto SymbolName = getGprCountSymbolName(RegKind);
3194 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3198 return !
Error(getLoc(),
3199 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3203 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3205 if (OldCount <= NewMax)
3211std::unique_ptr<AMDGPUOperand>
3212AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3214 SMLoc StartLoc = Tok.getLoc();
3215 SMLoc EndLoc = Tok.getEndLoc();
3216 RegisterKind RegKind;
3218 unsigned RegNum, RegWidth;
3220 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3224 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3227 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3228 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3235 if (isRegister() || isModifier())
3238 if (
Lit == LitModifier::None) {
3239 if (trySkipId(
"lit"))
3240 Lit = LitModifier::Lit;
3241 else if (trySkipId(
"lit64"))
3242 Lit = LitModifier::Lit64;
3244 if (
Lit != LitModifier::None) {
3247 ParseStatus S = parseImm(
Operands, HasSP3AbsModifier,
Lit);
3256 const auto& NextTok = peekToken();
3259 bool Negate =
false;
3267 AMDGPUOperand::Modifiers Mods;
3275 StringRef Num = getTokenStr();
3278 APFloat RealVal(APFloat::IEEEdouble());
3279 auto roundMode = APFloat::rmNearestTiesToEven;
3280 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3283 RealVal.changeSign();
3286 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3287 AMDGPUOperand::ImmTyNone,
true));
3288 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3289 Op.setModifiers(Mods);
3298 if (HasSP3AbsModifier) {
3307 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3310 if (Parser.parseExpression(Expr))
3314 if (Expr->evaluateAsAbsolute(IntVal)) {
3315 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3316 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3317 Op.setModifiers(Mods);
3319 if (
Lit != LitModifier::None)
3321 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3334 if (
auto R = parseRegister()) {
3344 ParseStatus Res = parseReg(
Operands);
3353AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3356 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3362AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3367AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3368 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3372AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3373 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3390AMDGPUAsmParser::isModifier() {
3393 AsmToken NextToken[2];
3394 peekTokens(NextToken);
3396 return isOperandModifier(Tok, NextToken[0]) ||
3397 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3398 isOpcodeModifierWithVal(Tok, NextToken[0]);
3424AMDGPUAsmParser::parseSP3NegModifier() {
3426 AsmToken NextToken[2];
3427 peekTokens(NextToken);
3430 (isRegister(NextToken[0], NextToken[1]) ||
3432 isId(NextToken[0],
"abs"))) {
3449 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3451 SP3Neg = parseSP3NegModifier();
3454 Neg = trySkipId(
"neg");
3456 return Error(Loc,
"expected register or immediate");
3460 Abs = trySkipId(
"abs");
3465 if (trySkipId(
"lit")) {
3466 Lit = LitModifier::Lit;
3469 }
else if (trySkipId(
"lit64")) {
3470 Lit = LitModifier::Lit64;
3473 if (!has64BitLiterals())
3474 return Error(Loc,
"lit64 is not supported on this GPU");
3480 return Error(Loc,
"expected register or immediate");
3489 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3493 if (
Lit != LitModifier::None && !
Operands.back()->isImm())
3494 Error(Loc,
"expected immediate with lit modifier");
3496 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3502 if (
Lit != LitModifier::None &&
3506 AMDGPUOperand::Modifiers Mods;
3507 Mods.Abs = Abs || SP3Abs;
3508 Mods.Neg = Neg || SP3Neg;
3511 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3512 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3514 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3515 Op.setModifiers(Mods);
3523 bool Sext = trySkipId(
"sext");
3524 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3539 AMDGPUOperand::Modifiers Mods;
3542 if (Mods.hasIntModifiers()) {
3543 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3545 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3546 Op.setModifiers(Mods);
3553 return parseRegOrImmWithFPInputMods(
Operands,
false);
3557 return parseRegOrImmWithIntInputMods(
Operands,
false);
3561 auto Loc = getLoc();
3562 if (trySkipId(
"off")) {
3563 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3564 AMDGPUOperand::ImmTyOff,
false));
3571 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3580unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3587 return Match_InvalidOperand;
3589 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3590 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3593 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3595 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3596 return Match_InvalidOperand;
3604 if (tryAnotherVOPDEncoding(Inst))
3605 return Match_InvalidOperand;
3607 return Match_Success;
3611 static const unsigned Variants[] = {
3621ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3622 if (isForcedDPP() && isForcedVOP3()) {
3626 if (getForcedEncodingSize() == 32) {
3631 if (isForcedVOP3()) {
3636 if (isForcedSDWA()) {
3642 if (isForcedDPP()) {
3650StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3651 if (isForcedDPP() && isForcedVOP3())
3654 if (getForcedEncodingSize() == 32)
3669unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3673 case AMDGPU::FLAT_SCR:
3675 case AMDGPU::VCC_LO:
3676 case AMDGPU::VCC_HI:
3683 return AMDGPU::NoRegister;
3690bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3691 unsigned OpIdx)
const {
3745unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3751 case AMDGPU::V_LSHLREV_B64_e64:
3752 case AMDGPU::V_LSHLREV_B64_gfx10:
3753 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3754 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3755 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3756 case AMDGPU::V_LSHRREV_B64_e64:
3757 case AMDGPU::V_LSHRREV_B64_gfx10:
3758 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3759 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3760 case AMDGPU::V_ASHRREV_I64_e64:
3761 case AMDGPU::V_ASHRREV_I64_gfx10:
3762 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3763 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3764 case AMDGPU::V_LSHL_B64_e64:
3765 case AMDGPU::V_LSHR_B64_e64:
3766 case AMDGPU::V_ASHR_I64_e64:
3779 bool AddMandatoryLiterals =
false) {
3782 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3786 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3788 return {getNamedOperandIdx(Opcode, OpName::src0X),
3789 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3790 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3791 getNamedOperandIdx(Opcode, OpName::src0Y),
3792 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3793 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3798 return {getNamedOperandIdx(Opcode, OpName::src0),
3799 getNamedOperandIdx(Opcode, OpName::src1),
3800 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3803bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3806 return !isInlineConstant(Inst,
OpIdx);
3813 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3824 const unsigned Opcode = Inst.
getOpcode();
3825 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3828 if (!LaneSelOp.
isReg())
3831 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3834bool AMDGPUAsmParser::validateConstantBusLimitations(
3836 const unsigned Opcode = Inst.
getOpcode();
3837 const MCInstrDesc &
Desc = MII.
get(Opcode);
3838 MCRegister LastSGPR;
3839 unsigned ConstantBusUseCount = 0;
3840 unsigned NumLiterals = 0;
3841 unsigned LiteralSize;
3843 if (!(
Desc.TSFlags &
3858 SmallDenseSet<unsigned> SGPRsUsed;
3859 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3860 if (SGPRUsed != AMDGPU::NoRegister) {
3861 SGPRsUsed.
insert(SGPRUsed);
3862 ++ConstantBusUseCount;
3867 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3869 for (
int OpIdx : OpIndices) {
3874 if (usesConstantBus(Inst,
OpIdx)) {
3883 if (SGPRsUsed.
insert(LastSGPR).second) {
3884 ++ConstantBusUseCount;
3904 if (NumLiterals == 0) {
3907 }
else if (LiteralSize !=
Size) {
3913 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3915 "invalid operand (violates constant bus restrictions)");
3922std::optional<unsigned>
3923AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
3925 const unsigned Opcode = Inst.
getOpcode();
3931 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
3932 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
3940 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3941 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3942 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3946 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
3947 int I = getNamedOperandIdx(Opcode, OpName);
3951 int64_t
Imm =
Op.getImm();
3957 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3958 OpName::vsrc2Y, OpName::imm}) {
3959 int I = getNamedOperandIdx(Opcode, OpName);
3969 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3970 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3972 return InvalidCompOprIdx;
3975bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
3982 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand :
Operands) {
3983 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
3984 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3986 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
3990 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3991 if (!InvalidCompOprIdx.has_value())
3994 auto CompOprIdx = *InvalidCompOprIdx;
3997 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3998 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4001 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
4002 if (CompOprIdx == VOPD::Component::DST) {
4004 Error(Loc,
"dst registers must be distinct");
4006 Error(Loc,
"one dst register must be even and the other odd");
4008 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4009 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4010 " operands must use different VGPR banks");
4018bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4020 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4021 if (!InvalidCompOprIdx.has_value())
4025 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4026 if (InvalidCompOprIdx.has_value()) {
4031 if (*InvalidCompOprIdx == VOPD::Component::DST)
4044bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4045 const unsigned Opcode = Inst.
getOpcode();
4060 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4061 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4062 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4063 int I = getNamedOperandIdx(Opcode, OpName);
4070 return !tryVOPD3(Inst);
4075bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4076 const unsigned Opcode = Inst.
getOpcode();
4081 return tryVOPD(Inst);
4082 return tryVOPD3(Inst);
4085bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4091 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4102bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4110 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4111 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4112 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4120 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4121 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4126 bool IsPackedD16 =
false;
4130 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4131 IsPackedD16 = D16Idx >= 0;
4136 if ((VDataSize / 4) ==
DataSize + TFESize)
4141 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4143 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4145 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4149bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4158 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4160 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4162 ? AMDGPU::OpName::srsrc
4163 : AMDGPU::OpName::rsrc;
4164 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4165 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4166 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4170 assert(SrsrcIdx > VAddr0Idx);
4173 if (BaseOpcode->
BVH) {
4174 if (IsA16 == BaseOpcode->
A16)
4176 Error(IDLoc,
"image address size does not match a16");
4182 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4183 unsigned ActualAddrSize =
4184 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4186 unsigned ExpectedAddrSize =
4190 if (hasPartialNSAEncoding() &&
4193 int VAddrLastIdx = SrsrcIdx - 1;
4194 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4196 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4199 if (ExpectedAddrSize > 12)
4200 ExpectedAddrSize = 16;
4205 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4209 if (ActualAddrSize == ExpectedAddrSize)
4212 Error(IDLoc,
"image address size does not match dim and a16");
4216bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4223 if (!
Desc.mayLoad() || !
Desc.mayStore())
4226 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4233 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4236bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4244 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4252 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4255bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4270 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4271 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4278bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4286 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4289 if (!BaseOpcode->
MSAA)
4292 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4298 return DimInfo->
MSAA;
4304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4316bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4325 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4328 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4336 Error(getOperandLoc(
Operands, Src0Idx),
"source operand must be a VGPR");
4340bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4345 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4348 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4351 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4359 "source operand must be either a VGPR or an inline constant");
4366bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4369 const MCInstrDesc &
Desc = MII.
get(Opcode);
4372 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4375 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4379 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4381 "inline constants are not allowed for this operand");
4388bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4396 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4397 if (BlgpIdx != -1) {
4398 if (
const MFMA_F8F6F4_Info *
Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4399 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4409 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4411 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4416 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4418 "wrong register tuple size for blgp value " + Twine(BLGP));
4426 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4430 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4434 MCRegister Src2Reg = Src2.
getReg();
4436 if (Src2Reg == DstReg)
4441 .getSizeInBits() <= 128)
4444 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4446 "source 2 operand must not partially overlap with dst");
4453bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4457 case V_DIV_SCALE_F32_gfx6_gfx7:
4458 case V_DIV_SCALE_F32_vi:
4459 case V_DIV_SCALE_F32_gfx10:
4460 case V_DIV_SCALE_F64_gfx6_gfx7:
4461 case V_DIV_SCALE_F64_vi:
4462 case V_DIV_SCALE_F64_gfx10:
4468 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4469 AMDGPU::OpName::src2_modifiers,
4470 AMDGPU::OpName::src2_modifiers}) {
4481bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4489 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4498bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4505 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4513 case AMDGPU::V_SUBREV_F32_e32:
4514 case AMDGPU::V_SUBREV_F32_e64:
4515 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4516 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4517 case AMDGPU::V_SUBREV_F32_e32_vi:
4518 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4519 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4520 case AMDGPU::V_SUBREV_F32_e64_vi:
4522 case AMDGPU::V_SUBREV_CO_U32_e32:
4523 case AMDGPU::V_SUBREV_CO_U32_e64:
4524 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4525 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4527 case AMDGPU::V_SUBBREV_U32_e32:
4528 case AMDGPU::V_SUBBREV_U32_e64:
4529 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4530 case AMDGPU::V_SUBBREV_U32_e32_vi:
4531 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4532 case AMDGPU::V_SUBBREV_U32_e64_vi:
4534 case AMDGPU::V_SUBREV_U32_e32:
4535 case AMDGPU::V_SUBREV_U32_e64:
4536 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4537 case AMDGPU::V_SUBREV_U32_e32_vi:
4538 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4539 case AMDGPU::V_SUBREV_U32_e64_vi:
4541 case AMDGPU::V_SUBREV_F16_e32:
4542 case AMDGPU::V_SUBREV_F16_e64:
4543 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4544 case AMDGPU::V_SUBREV_F16_e32_vi:
4545 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4546 case AMDGPU::V_SUBREV_F16_e64_vi:
4548 case AMDGPU::V_SUBREV_U16_e32:
4549 case AMDGPU::V_SUBREV_U16_e64:
4550 case AMDGPU::V_SUBREV_U16_e32_vi:
4551 case AMDGPU::V_SUBREV_U16_e64_vi:
4553 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4554 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4555 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4557 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4558 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4560 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4561 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4563 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4564 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4566 case AMDGPU::V_LSHRREV_B32_e32:
4567 case AMDGPU::V_LSHRREV_B32_e64:
4568 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4569 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4570 case AMDGPU::V_LSHRREV_B32_e32_vi:
4571 case AMDGPU::V_LSHRREV_B32_e64_vi:
4572 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4573 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4575 case AMDGPU::V_ASHRREV_I32_e32:
4576 case AMDGPU::V_ASHRREV_I32_e64:
4577 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4578 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4579 case AMDGPU::V_ASHRREV_I32_e32_vi:
4580 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4581 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4582 case AMDGPU::V_ASHRREV_I32_e64_vi:
4584 case AMDGPU::V_LSHLREV_B32_e32:
4585 case AMDGPU::V_LSHLREV_B32_e64:
4586 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4587 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4588 case AMDGPU::V_LSHLREV_B32_e32_vi:
4589 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4590 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4591 case AMDGPU::V_LSHLREV_B32_e64_vi:
4593 case AMDGPU::V_LSHLREV_B16_e32:
4594 case AMDGPU::V_LSHLREV_B16_e64:
4595 case AMDGPU::V_LSHLREV_B16_e32_vi:
4596 case AMDGPU::V_LSHLREV_B16_e64_vi:
4597 case AMDGPU::V_LSHLREV_B16_gfx10:
4599 case AMDGPU::V_LSHRREV_B16_e32:
4600 case AMDGPU::V_LSHRREV_B16_e64:
4601 case AMDGPU::V_LSHRREV_B16_e32_vi:
4602 case AMDGPU::V_LSHRREV_B16_e64_vi:
4603 case AMDGPU::V_LSHRREV_B16_gfx10:
4605 case AMDGPU::V_ASHRREV_I16_e32:
4606 case AMDGPU::V_ASHRREV_I16_e64:
4607 case AMDGPU::V_ASHRREV_I16_e32_vi:
4608 case AMDGPU::V_ASHRREV_I16_e64_vi:
4609 case AMDGPU::V_ASHRREV_I16_gfx10:
4611 case AMDGPU::V_LSHLREV_B64_e64:
4612 case AMDGPU::V_LSHLREV_B64_gfx10:
4613 case AMDGPU::V_LSHLREV_B64_vi:
4615 case AMDGPU::V_LSHRREV_B64_e64:
4616 case AMDGPU::V_LSHRREV_B64_gfx10:
4617 case AMDGPU::V_LSHRREV_B64_vi:
4619 case AMDGPU::V_ASHRREV_I64_e64:
4620 case AMDGPU::V_ASHRREV_I64_gfx10:
4621 case AMDGPU::V_ASHRREV_I64_vi:
4623 case AMDGPU::V_PK_LSHLREV_B16:
4624 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4625 case AMDGPU::V_PK_LSHLREV_B16_vi:
4627 case AMDGPU::V_PK_LSHRREV_B16:
4628 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4629 case AMDGPU::V_PK_LSHRREV_B16_vi:
4630 case AMDGPU::V_PK_ASHRREV_I16:
4631 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4632 case AMDGPU::V_PK_ASHRREV_I16_vi:
4639bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4641 using namespace SIInstrFlags;
4642 const unsigned Opcode = Inst.
getOpcode();
4643 const MCInstrDesc &
Desc = MII.
get(Opcode);
4648 if ((
Desc.TSFlags & Enc) == 0)
4651 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4652 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4656 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4660 "lds_direct is not supported on this GPU");
4666 "lds_direct cannot be used with this instruction");
4670 if (SrcName != OpName::src0) {
4672 "lds_direct may be used as src0 only");
4682 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4683 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4684 if (
Op.isFlatOffset())
4685 return Op.getStartLoc();
4690bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4693 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4699 return validateFlatOffset(Inst,
Operands);
4702 return validateSMEMOffset(Inst,
Operands);
4708 const unsigned OffsetSize = 24;
4709 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4711 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4712 "-bit unsigned offset for buffer ops");
4716 const unsigned OffsetSize = 16;
4717 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4719 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4726bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4733 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4737 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4739 "flat offset modifier is not supported on this GPU");
4746 bool AllowNegative =
4749 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4751 Twine(
"expected a ") +
4752 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4753 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4762 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4763 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4764 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4765 return Op.getStartLoc();
4770bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4780 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4796 ?
"expected a 23-bit unsigned offset for buffer ops"
4797 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4798 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4799 :
"expected a 21-bit signed offset");
4804bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4807 const MCInstrDesc &
Desc = MII.
get(Opcode);
4811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4812 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4814 const int OpIndices[] = { Src0Idx, Src1Idx };
4816 unsigned NumExprs = 0;
4817 unsigned NumLiterals = 0;
4820 for (
int OpIdx : OpIndices) {
4821 if (
OpIdx == -1)
break;
4827 std::optional<int64_t>
Imm;
4830 }
else if (MO.
isExpr()) {
4839 if (!
Imm.has_value()) {
4841 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4845 if (NumLiterals == 0 || LiteralValue !=
Value) {
4853 if (NumLiterals + NumExprs <= 1)
4857 "only one unique literal operand is allowed");
4861bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4864 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4874 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4875 if (OpSelIdx != -1) {
4879 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4880 if (OpSelHiIdx != -1) {
4889 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4899 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4900 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4901 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4902 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4904 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4905 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4911 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4913 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
4923 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4924 if (Src2Idx != -1) {
4925 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4935bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4936 if (!hasTrue16Insts())
4938 const MCRegisterInfo *
MRI = getMRI();
4940 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4946 if (OpSelOpValue == 0)
4948 unsigned OpCount = 0;
4949 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4950 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4951 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
4956 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(
Op.getReg())) {
4958 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4959 if (OpSelOpIsHi != VGPRSuffixIsHi)
4968bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
4969 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4982 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
4993 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4994 AMDGPU::OpName::src1_modifiers,
4995 AMDGPU::OpName::src2_modifiers};
4997 for (
unsigned i = 0; i < 3; ++i) {
5007bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5010 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5011 if (DppCtrlIdx >= 0) {
5018 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl,
Operands);
5019 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5020 :
"DP ALU dpp only supports row_newbcast");
5025 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5026 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5029 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5031 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5035 "invalid operand for instruction");
5040 "src1 immediate operand invalid for instruction");
5050bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5051 return (
Reg == AMDGPU::VCC && isWave64()) ||
5052 (
Reg == AMDGPU::VCC_LO && isWave32());
5056bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5059 const MCInstrDesc &
Desc = MII.
get(Opcode);
5060 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5062 !HasMandatoryLiteral && !
isVOPD(Opcode))
5067 std::optional<unsigned> LiteralOpIdx;
5070 for (
int OpIdx : OpIndices) {
5080 std::optional<int64_t>
Imm;
5086 bool IsAnotherLiteral =
false;
5087 if (!
Imm.has_value()) {
5089 IsAnotherLiteral =
true;
5090 }
else if (!isInlineConstant(Inst,
OpIdx)) {
5095 HasMandatoryLiteral);
5101 !IsForcedFP64 && (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5103 "invalid operand for instruction");
5107 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5114 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5115 !getFeatureBits()[FeatureVOP3Literal]) {
5117 "literal operands are not supported");
5121 if (LiteralOpIdx && IsAnotherLiteral) {
5123 getOperandLoc(
Operands, *LiteralOpIdx)),
5124 "only one unique literal operand is allowed");
5128 if (IsAnotherLiteral)
5129 LiteralOpIdx =
OpIdx;
5152bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5160 ? AMDGPU::OpName::data0
5161 : AMDGPU::OpName::vdata;
5163 const MCRegisterInfo *
MRI = getMRI();
5169 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5173 auto FB = getFeatureBits();
5174 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5175 if (DataAreg < 0 || DstAreg < 0)
5177 return DstAreg == DataAreg;
5180 return DstAreg < 1 && DataAreg < 1;
5183bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5184 auto FB = getFeatureBits();
5185 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5189 const MCRegisterInfo *
MRI = getMRI();
5192 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5195 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5199 case AMDGPU::DS_LOAD_TR6_B96:
5200 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5204 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5205 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5209 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5210 if (VAddrIdx != -1) {
5212 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5213 if ((
Sub - AMDGPU::VGPR0) & 1)
5218 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5219 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5224 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5225 const MCRegisterClass &AGPR32 =
MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5231 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5245 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
5246 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
5248 return Op.getStartLoc();
5253bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5256 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5259 SMLoc BLGPLoc = getBLGPLoc(
Operands);
5262 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5263 auto FB = getFeatureBits();
5264 bool UsesNeg =
false;
5265 if (FB[AMDGPU::FeatureGFX940Insts]) {
5267 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5268 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5269 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5270 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5275 if (IsNeg == UsesNeg)
5279 UsesNeg ?
"invalid modifier: blgp is not supported"
5280 :
"invalid modifier: neg is not supported");
5285bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5291 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5292 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5293 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5294 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5297 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5300 if (
Reg == AMDGPU::SGPR_NULL)
5303 Error(getOperandLoc(
Operands, Src0Idx),
"src0 must be null");
5307bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5313 return validateGWS(Inst,
Operands);
5318 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5323 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS,
Operands);
5324 Error(S,
"gds modifier is not supported on this GPU");
5332bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5334 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5338 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5339 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5342 const MCRegisterInfo *
MRI = getMRI();
5343 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5345 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5348 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5350 Error(getOperandLoc(
Operands, Data0Pos),
"vgpr must be even aligned");
5357bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5360 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5361 AMDGPU::OpName::cpol);
5369 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5372 Error(S,
"scale_offset is not supported on this GPU");
5375 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5378 Error(S,
"nv is not supported on this GPU");
5383 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5386 Error(S,
"scale_offset is not supported for this instruction");
5390 return validateTHAndScopeBits(Inst,
Operands, CPol);
5395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5396 Error(S,
"cache policy is not supported for SMRD instructions");
5400 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5409 if (!(TSFlags & AllowSCCModifier)) {
5410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5414 "scc modifier is not supported for this instruction on this GPU");
5425 :
"instruction must use glc");
5430 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5433 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5435 :
"instruction must not use glc");
5443bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5445 const unsigned CPol) {
5449 const unsigned Opcode = Inst.
getOpcode();
5450 const MCInstrDesc &TID = MII.
get(Opcode);
5453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5461 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5469 return PrintError(
"invalid th value for SMEM instruction");
5476 return PrintError(
"scope and th combination is not valid");
5482 return PrintError(
"invalid th value for atomic instructions");
5485 return PrintError(
"invalid th value for store instructions");
5488 return PrintError(
"invalid th value for load instructions");
5494bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5497 if (
Desc.mayStore() &&
5499 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE,
Operands);
5501 Error(Loc,
"TFE modifier has no meaning for store instructions");
5509bool AMDGPUAsmParser::validateSetVgprMSB(
const MCInst &Inst,
5511 if (Inst.
getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5515 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::simm16);
5517 SMLoc Loc =
Operands[1]->getStartLoc();
5518 Error(Loc,
"s_set_vgpr_msb accepts values in range [0..255]");
5525bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5531 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5532 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5536 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5544 static const char *FmtNames[] = {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
5545 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
5549 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5553 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5554 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5557bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5559 if (!validateLdsDirect(Inst,
Operands))
5561 if (!validateTrue16OpSel(Inst)) {
5563 "op_sel operand conflicts with 16-bit operand suffix");
5566 if (!validateSOPLiteral(Inst,
Operands))
5568 if (!validateVOPLiteral(Inst,
Operands)) {
5571 if (!validateConstantBusLimitations(Inst,
Operands)) {
5574 if (!validateVOPD(Inst,
Operands)) {
5577 if (!validateIntClampSupported(Inst)) {
5579 "integer clamping is not supported on this GPU");
5582 if (!validateOpSel(Inst)) {
5584 "invalid op_sel operand");
5587 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5589 "invalid neg_lo operand");
5592 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5594 "invalid neg_hi operand");
5597 if (!validateDPP(Inst,
Operands)) {
5601 if (!validateMIMGD16(Inst)) {
5603 "d16 modifier is not supported on this GPU");
5606 if (!validateMIMGDim(Inst,
Operands)) {
5607 Error(IDLoc,
"missing dim operand");
5610 if (!validateTensorR128(Inst)) {
5612 "instruction must set modifier r128=0");
5615 if (!validateMIMGMSAA(Inst)) {
5617 "invalid dim; must be MSAA type");
5620 if (!validateMIMGDataSize(Inst, IDLoc)) {
5623 if (!validateMIMGAddrSize(Inst, IDLoc))
5625 if (!validateMIMGAtomicDMask(Inst)) {
5627 "invalid atomic image dmask");
5630 if (!validateMIMGGatherDMask(Inst)) {
5632 "invalid image_gather dmask: only one bit must be set");
5635 if (!validateMovrels(Inst,
Operands)) {
5638 if (!validateOffset(Inst,
Operands)) {
5641 if (!validateMAIAccWrite(Inst,
Operands)) {
5644 if (!validateMAISrc2(Inst,
Operands)) {
5647 if (!validateMFMA(Inst,
Operands)) {
5650 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5654 if (!validateAGPRLdSt(Inst)) {
5655 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5656 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5657 :
"invalid register class: agpr loads and stores not supported on this GPU"
5661 if (!validateVGPRAlign(Inst)) {
5663 "invalid register class: vgpr tuples must be 64 bit aligned");
5670 if (!validateBLGP(Inst,
Operands)) {
5674 if (!validateDivScale(Inst)) {
5675 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5678 if (!validateWaitCnt(Inst,
Operands)) {
5681 if (!validateTFE(Inst,
Operands)) {
5684 if (!validateSetVgprMSB(Inst,
Operands)) {
5687 if (!validateWMMA(Inst,
Operands)) {
5696 unsigned VariantID = 0);
5700 unsigned VariantID);
5702bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5707bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5708 const FeatureBitset &FBS,
5709 ArrayRef<unsigned> Variants) {
5710 for (
auto Variant : Variants) {
5718bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5720 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5723 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5728 getParser().clearPendingErrors();
5732 StringRef VariantName = getMatchedVariantName();
5733 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5736 " variant of this instruction is not supported"));
5740 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5741 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5743 FeatureBitset FeaturesWS32 = getFeatureBits();
5744 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5745 .
flip(AMDGPU::FeatureWavefrontSize32);
5746 FeatureBitset AvailableFeaturesWS32 =
5747 ComputeAvailableFeatures(FeaturesWS32);
5749 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5750 return Error(IDLoc,
"instruction requires wavesize=32");
5754 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5755 return Error(IDLoc,
"instruction not supported on this GPU");
5760 return Error(IDLoc,
"invalid instruction" + Suggestion);
5766 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5767 if (
Op.isToken() && InvalidOprIdx > 1) {
5768 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5769 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5774bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5777 uint64_t &ErrorInfo,
5778 bool MatchingInlineAsm) {
5781 unsigned Result = Match_Success;
5782 for (
auto Variant : getMatchedVariants()) {
5784 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5789 if (R == Match_Success || R == Match_MissingFeature ||
5790 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5791 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5792 Result != Match_MissingFeature)) {
5796 if (R == Match_Success)
5800 if (Result == Match_Success) {
5801 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5809 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5815 case Match_MissingFeature:
5819 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5821 case Match_InvalidOperand: {
5822 SMLoc ErrorLoc = IDLoc;
5823 if (ErrorInfo != ~0ULL) {
5824 if (ErrorInfo >=
Operands.size()) {
5825 return Error(IDLoc,
"too few operands for instruction");
5827 ErrorLoc = ((AMDGPUOperand &)*
Operands[ErrorInfo]).getStartLoc();
5828 if (ErrorLoc == SMLoc())
5832 return Error(ErrorLoc,
"invalid VOPDY instruction");
5834 return Error(ErrorLoc,
"invalid operand for instruction");
5837 case Match_MnemonicFail:
5843bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5848 if (getParser().parseAbsoluteExpression(Tmp)) {
5851 Ret =
static_cast<uint32_t
>(Tmp);
5855bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5856 if (!getSTI().getTargetTriple().isAMDGCN())
5857 return TokError(
"directive only supported for amdgcn architecture");
5859 std::string TargetIDDirective;
5860 SMLoc TargetStart = getTok().getLoc();
5861 if (getParser().parseEscapedString(TargetIDDirective))
5864 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5865 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5866 return getParser().Error(TargetRange.
Start,
5867 (Twine(
".amdgcn_target directive's target id ") +
5868 Twine(TargetIDDirective) +
5869 Twine(
" does not match the specified target id ") +
5870 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5875bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5879bool AMDGPUAsmParser::calculateGPRBlocks(
5880 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5881 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5882 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5883 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5884 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5890 const MCExpr *
NumSGPRs = NextFreeSGPR;
5891 int64_t EvaluatedSGPRs;
5896 unsigned MaxAddressableNumSGPRs =
5899 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5900 !Features.
test(FeatureSGPRInitBug) &&
5901 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5902 return OutOfRangeError(SGPRRange);
5904 const MCExpr *ExtraSGPRs =
5908 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5909 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5910 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5911 return OutOfRangeError(SGPRRange);
5913 if (Features.
test(FeatureSGPRInitBug))
5920 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5921 unsigned Granule) ->
const MCExpr * {
5925 const MCExpr *AlignToGPR =
5927 const MCExpr *DivGPR =
5933 VGPRBlocks = GetNumGPRBlocks(
5942bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5943 if (!getSTI().getTargetTriple().isAMDGCN())
5944 return TokError(
"directive only supported for amdgcn architecture");
5947 return TokError(
"directive only supported for amdhsa OS");
5949 StringRef KernelName;
5950 if (getParser().parseIdentifier(KernelName))
5953 AMDGPU::MCKernelDescriptor KD =
5965 const MCExpr *NextFreeVGPR = ZeroExpr;
5967 const MCExpr *NamedBarCnt = ZeroExpr;
5968 uint64_t SharedVGPRCount = 0;
5969 uint64_t PreloadLength = 0;
5970 uint64_t PreloadOffset = 0;
5972 const MCExpr *NextFreeSGPR = ZeroExpr;
5975 unsigned ImpliedUserSGPRCount = 0;
5979 std::optional<unsigned> ExplicitUserSGPRCount;
5980 const MCExpr *ReserveVCC = OneExpr;
5981 const MCExpr *ReserveFlatScr = OneExpr;
5982 std::optional<bool> EnableWavefrontSize32;
5988 SMRange IDRange = getTok().getLocRange();
5989 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5992 if (
ID ==
".end_amdhsa_kernel")
5996 return TokError(
".amdhsa_ directives cannot be repeated");
5998 SMLoc ValStart = getLoc();
5999 const MCExpr *ExprVal;
6000 if (getParser().parseExpression(ExprVal))
6002 SMLoc ValEnd = getLoc();
6003 SMRange ValRange = SMRange(ValStart, ValEnd);
6006 uint64_t Val = IVal;
6007 bool EvaluatableExpr;
6008 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6010 return OutOfRangeError(ValRange);
6014#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6015 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6016 return OutOfRangeError(RANGE); \
6017 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6022#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6024 return Error(IDRange.Start, "directive should have resolvable expression", \
6027 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6030 return OutOfRangeError(ValRange);
6032 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6035 return OutOfRangeError(ValRange);
6037 }
else if (
ID ==
".amdhsa_kernarg_size") {
6039 return OutOfRangeError(ValRange);
6041 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6043 ExplicitUserSGPRCount = Val;
6044 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6048 "directive is not supported with architected flat scratch",
6051 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6054 ImpliedUserSGPRCount += 4;
6055 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6058 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6061 return OutOfRangeError(ValRange);
6065 ImpliedUserSGPRCount += Val;
6066 PreloadLength = Val;
6068 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6071 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6074 return OutOfRangeError(ValRange);
6078 PreloadOffset = Val;
6079 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6085 ImpliedUserSGPRCount += 2;
6086 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6089 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6092 ImpliedUserSGPRCount += 2;
6093 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6096 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6099 ImpliedUserSGPRCount += 2;
6100 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6103 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6106 ImpliedUserSGPRCount += 2;
6107 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6110 "directive is not supported with architected flat scratch",
6114 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6117 ImpliedUserSGPRCount += 2;
6118 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6121 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6124 ImpliedUserSGPRCount += 1;
6125 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6127 if (IVersion.
Major < 10)
6128 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6129 EnableWavefrontSize32 = Val;
6131 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6133 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6135 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6137 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6140 "directive is not supported with architected flat scratch",
6143 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6145 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6149 "directive is not supported without architected flat scratch",
6152 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6154 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6156 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6158 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6160 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6162 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6164 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6166 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6168 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6170 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6172 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6174 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6175 VGPRRange = ValRange;
6176 NextFreeVGPR = ExprVal;
6177 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6178 SGPRRange = ValRange;
6179 NextFreeSGPR = ExprVal;
6180 }
else if (
ID ==
".amdhsa_accum_offset") {
6182 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6183 AccumOffset = ExprVal;
6184 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6186 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6187 NamedBarCnt = ExprVal;
6188 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6190 return OutOfRangeError(ValRange);
6191 ReserveVCC = ExprVal;
6192 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6193 if (IVersion.
Major < 7)
6194 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6197 "directive is not supported with architected flat scratch",
6200 return OutOfRangeError(ValRange);
6201 ReserveFlatScr = ExprVal;
6202 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6203 if (IVersion.
Major < 8)
6204 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6206 return OutOfRangeError(ValRange);
6207 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6208 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6210 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6212 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6214 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6216 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6218 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6220 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6222 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6224 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6226 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6227 if (IVersion.
Major >= 12)
6228 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6230 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6232 }
else if (
ID ==
".amdhsa_ieee_mode") {
6233 if (IVersion.
Major >= 12)
6234 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6236 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6238 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6239 if (IVersion.
Major < 9)
6240 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6242 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6244 }
else if (
ID ==
".amdhsa_tg_split") {
6246 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6249 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6252 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6254 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6256 }
else if (
ID ==
".amdhsa_memory_ordered") {
6257 if (IVersion.
Major < 10)
6258 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6260 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6262 }
else if (
ID ==
".amdhsa_forward_progress") {
6263 if (IVersion.
Major < 10)
6264 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6266 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6268 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6270 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6271 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6273 SharedVGPRCount = Val;
6275 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6277 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6278 if (IVersion.
Major < 11)
6279 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6280 if (IVersion.
Major == 11) {
6282 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6286 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6289 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6292 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6294 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6296 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6298 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6301 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6303 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6305 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6307 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6309 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6311 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6313 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6315 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6317 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6319 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6320 if (IVersion.
Major < 12)
6321 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6323 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6326 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6329#undef PARSE_BITS_ENTRY
6332 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6333 return TokError(
".amdhsa_next_free_vgpr directive is required");
6335 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6336 return TokError(
".amdhsa_next_free_sgpr directive is required");
6338 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6343 if (PreloadLength) {
6349 const MCExpr *VGPRBlocks;
6350 const MCExpr *SGPRBlocks;
6351 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6352 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6353 EnableWavefrontSize32, NextFreeVGPR,
6354 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6358 int64_t EvaluatedVGPRBlocks;
6359 bool VGPRBlocksEvaluatable =
6360 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6361 if (VGPRBlocksEvaluatable &&
6363 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6364 return OutOfRangeError(VGPRRange);
6368 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6369 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6371 int64_t EvaluatedSGPRBlocks;
6372 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6374 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6375 return OutOfRangeError(SGPRRange);
6378 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6379 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6381 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6382 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
6383 "enabled user SGPRs");
6387 return TokError(
"too many user SGPRs enabled");
6391 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6392 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6396 return TokError(
"too many user SGPRs enabled");
6400 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6401 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6406 return TokError(
"Kernarg size should be resolvable");
6407 uint64_t kernarg_size = IVal;
6408 if (PreloadLength && kernarg_size &&
6409 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6410 return TokError(
"Kernarg preload length + offset is larger than the "
6411 "kernarg segment size");
6414 if (!Seen.
contains(
".amdhsa_accum_offset"))
6415 return TokError(
".amdhsa_accum_offset directive is required");
6416 int64_t EvaluatedAccum;
6417 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6418 uint64_t UEvaluatedAccum = EvaluatedAccum;
6419 if (AccumEvaluatable &&
6420 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6421 return TokError(
"accum_offset should be in range [4..256] in "
6424 int64_t EvaluatedNumVGPR;
6425 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6428 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6429 return TokError(
"accum_offset exceeds total VGPR allocation");
6435 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6436 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6442 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6443 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6446 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6448 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6449 return TokError(
"shared_vgpr_count directive not valid on "
6450 "wavefront size 32");
6453 if (VGPRBlocksEvaluatable &&
6454 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6456 return TokError(
"shared_vgpr_count*2 + "
6457 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6462 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6463 NextFreeVGPR, NextFreeSGPR,
6464 ReserveVCC, ReserveFlatScr);
6468bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6470 if (ParseAsAbsoluteExpression(
Version))
6473 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6477bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6478 AMDGPUMCKernelCodeT &
C) {
6481 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6482 Parser.eatToEndOfStatement();
6486 SmallString<40> ErrStr;
6487 raw_svector_ostream Err(ErrStr);
6488 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6489 return TokError(Err.
str());
6493 if (
ID ==
"enable_wavefront_size32") {
6496 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6498 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6501 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6505 if (
ID ==
"wavefront_size") {
6506 if (
C.wavefront_size == 5) {
6508 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6510 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6511 }
else if (
C.wavefront_size == 6) {
6513 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6520bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6521 AMDGPUMCKernelCodeT KernelCode;
6530 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6533 if (
ID ==
".end_amd_kernel_code_t")
6536 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6541 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6546bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6547 StringRef KernelName;
6548 if (!parseId(KernelName,
"expected symbol name"))
6551 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6558bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6559 if (!getSTI().getTargetTriple().isAMDGCN()) {
6560 return Error(getLoc(),
6561 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6565 auto TargetIDDirective = getLexer().getTok().getStringContents();
6566 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6567 return Error(getParser().getTok().getLoc(),
"target id must match options");
6569 getTargetStreamer().EmitISAVersion();
6575bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6578 std::string HSAMetadataString;
6583 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6584 return Error(getLoc(),
"invalid HSA metadata");
6591bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6592 const char *AssemblerDirectiveEnd,
6593 std::string &CollectString) {
6595 raw_string_ostream CollectStream(CollectString);
6597 getLexer().setSkipSpace(
false);
6599 bool FoundEnd =
false;
6602 CollectStream << getTokenStr();
6606 if (trySkipId(AssemblerDirectiveEnd)) {
6611 CollectStream << Parser.parseStringToEndOfStatement()
6612 <<
getContext().getAsmInfo()->getSeparatorString();
6614 Parser.eatToEndOfStatement();
6617 getLexer().setSkipSpace(
true);
6620 return TokError(Twine(
"expected directive ") +
6621 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6628bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6634 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6635 if (!PALMetadata->setFromString(
String))
6636 return Error(getLoc(),
"invalid PAL metadata");
6641bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6643 return Error(getLoc(),
6645 "not available on non-amdpal OSes")).str());
6648 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6649 PALMetadata->setLegacy();
6652 if (ParseAsAbsoluteExpression(
Key)) {
6653 return TokError(Twine(
"invalid value in ") +
6657 return TokError(Twine(
"expected an even number of values in ") +
6660 if (ParseAsAbsoluteExpression(
Value)) {
6661 return TokError(Twine(
"invalid value in ") +
6664 PALMetadata->setRegister(
Key,
Value);
6673bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6674 if (getParser().checkForValidSection())
6678 SMLoc NameLoc = getLoc();
6679 if (getParser().parseIdentifier(Name))
6680 return TokError(
"expected identifier in directive");
6683 if (getParser().parseComma())
6689 SMLoc SizeLoc = getLoc();
6690 if (getParser().parseAbsoluteExpression(
Size))
6693 return Error(SizeLoc,
"size must be non-negative");
6694 if (
Size > LocalMemorySize)
6695 return Error(SizeLoc,
"size is too large");
6697 int64_t Alignment = 4;
6699 SMLoc AlignLoc = getLoc();
6700 if (getParser().parseAbsoluteExpression(Alignment))
6703 return Error(AlignLoc,
"alignment must be a power of two");
6708 if (Alignment >= 1u << 31)
6709 return Error(AlignLoc,
"alignment is too large");
6715 Symbol->redefineIfPossible();
6716 if (!
Symbol->isUndefined())
6717 return Error(NameLoc,
"invalid symbol redefinition");
6719 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6723bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6724 StringRef IDVal = DirectiveID.
getString();
6727 if (IDVal ==
".amdhsa_kernel")
6728 return ParseDirectiveAMDHSAKernel();
6730 if (IDVal ==
".amdhsa_code_object_version")
6731 return ParseDirectiveAMDHSACodeObjectVersion();
6735 return ParseDirectiveHSAMetadata();
6737 if (IDVal ==
".amd_kernel_code_t")
6738 return ParseDirectiveAMDKernelCodeT();
6740 if (IDVal ==
".amdgpu_hsa_kernel")
6741 return ParseDirectiveAMDGPUHsaKernel();
6743 if (IDVal ==
".amd_amdgpu_isa")
6744 return ParseDirectiveISAVersion();
6748 Twine(
" directive is "
6749 "not available on non-amdhsa OSes"))
6754 if (IDVal ==
".amdgcn_target")
6755 return ParseDirectiveAMDGCNTarget();
6757 if (IDVal ==
".amdgpu_lds")
6758 return ParseDirectiveAMDGPULDS();
6761 return ParseDirectivePALMetadataBegin();
6764 return ParseDirectivePALMetadata();
6769bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &
MRI,
6771 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15,
Reg))
6775 if (
MRI.regsOverlap(SGPR104_SGPR105,
Reg))
6776 return hasSGPR104_SGPR105();
6779 case SRC_SHARED_BASE_LO:
6780 case SRC_SHARED_BASE:
6781 case SRC_SHARED_LIMIT_LO:
6782 case SRC_SHARED_LIMIT:
6783 case SRC_PRIVATE_BASE_LO:
6784 case SRC_PRIVATE_BASE:
6785 case SRC_PRIVATE_LIMIT_LO:
6786 case SRC_PRIVATE_LIMIT:
6788 case SRC_FLAT_SCRATCH_BASE_LO:
6789 case SRC_FLAT_SCRATCH_BASE_HI:
6790 return hasGloballyAddressableScratch();
6791 case SRC_POPS_EXITING_WAVE_ID:
6803 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6832 if (
MRI.regsOverlap(SGPR102_SGPR103,
Reg))
6833 return hasSGPR102_SGPR103();
6841 ParseStatus Res = parseVOPD(
Operands);
6846 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6858 SMLoc LBraceLoc = getLoc();
6863 auto Loc = getLoc();
6866 Error(Loc,
"expected a register");
6870 RBraceLoc = getLoc();
6875 "expected a comma or a closing square bracket"))
6879 if (
Operands.size() - Prefix > 1) {
6881 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6882 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6891StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6893 setForcedEncodingSize(0);
6894 setForcedDPP(
false);
6895 setForcedSDWA(
false);
6897 if (
Name.consume_back(
"_e64_dpp")) {
6899 setForcedEncodingSize(64);
6902 if (
Name.consume_back(
"_e64")) {
6903 setForcedEncodingSize(64);
6906 if (
Name.consume_back(
"_e32")) {
6907 setForcedEncodingSize(32);
6910 if (
Name.consume_back(
"_dpp")) {
6914 if (
Name.consume_back(
"_sdwa")) {
6915 setForcedSDWA(
true);
6923 unsigned VariantID);
6929 Name = parseMnemonicSuffix(Name);
6935 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
6937 bool IsMIMG = Name.starts_with(
"image_");
6940 OperandMode
Mode = OperandMode_Default;
6942 Mode = OperandMode_NSA;
6946 checkUnsupportedInstruction(Name, NameLoc);
6947 if (!Parser.hasPendingError()) {
6950 :
"not a valid operand.";
6951 Error(getLoc(), Msg);
6970ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6973 if (!trySkipId(Name))
6976 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, S));
6980ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6989ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6991 std::function<
bool(int64_t &)> ConvertResult) {
6995 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
6999 if (ConvertResult && !ConvertResult(
Value)) {
7000 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7003 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7007ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7009 bool (*ConvertResult)(int64_t &)) {
7018 const unsigned MaxSize = 4;
7022 for (
int I = 0; ; ++
I) {
7024 SMLoc Loc = getLoc();
7028 if (
Op != 0 &&
Op != 1)
7029 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7036 if (
I + 1 == MaxSize)
7037 return Error(getLoc(),
"expected a closing square bracket");
7043 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7047ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7049 AMDGPUOperand::ImmTy ImmTy) {
7053 if (trySkipId(Name)) {
7055 }
else if (trySkipId(
"no", Name)) {
7062 return Error(S,
"r128 modifier is not supported on this GPU");
7063 if (Name ==
"a16" && !
hasA16())
7064 return Error(S,
"a16 modifier is not supported on this GPU");
7066 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7067 ImmTy = AMDGPUOperand::ImmTyR128A16;
7069 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7073unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7074 bool &Disabling)
const {
7075 Disabling =
Id.consume_front(
"no");
7078 return StringSwitch<unsigned>(Id)
7085 return StringSwitch<unsigned>(Id)
7095 SMLoc StringLoc = getLoc();
7097 int64_t CPolVal = 0;
7117 ResScope = parseScope(
Operands, Scope);
7130 if (trySkipId(
"nv")) {
7134 }
else if (trySkipId(
"no",
"nv")) {
7141 if (trySkipId(
"scale_offset")) {
7145 }
else if (trySkipId(
"no",
"scale_offset")) {
7158 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7159 AMDGPUOperand::ImmTyCPol));
7164 SMLoc OpLoc = getLoc();
7165 unsigned Enabled = 0, Seen = 0;
7169 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7176 return Error(S,
"dlc modifier is not supported on this GPU");
7179 return Error(S,
"scc modifier is not supported on this GPU");
7182 return Error(S,
"duplicate cache policy modifier");
7194 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7203 ParseStatus Res = parseStringOrIntWithPrefix(
7204 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7218 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7222 if (
Value ==
"TH_DEFAULT")
7224 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7225 Value ==
"TH_LOAD_NT_WB") {
7226 return Error(StringLoc,
"invalid th value");
7227 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7229 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7231 }
else if (
Value.consume_front(
"TH_STORE_")) {
7234 return Error(StringLoc,
"invalid th value");
7237 if (
Value ==
"BYPASS")
7242 TH |= StringSwitch<int64_t>(
Value)
7252 .Default(0xffffffff);
7254 TH |= StringSwitch<int64_t>(
Value)
7265 .Default(0xffffffff);
7268 if (TH == 0xffffffff)
7269 return Error(StringLoc,
"invalid th value");
7276 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7277 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7278 std::optional<unsigned> InsertAt = std::nullopt) {
7279 auto i = OptionalIdx.find(ImmT);
7280 if (i != OptionalIdx.end()) {
7281 unsigned Idx = i->second;
7282 const AMDGPUOperand &
Op =
7283 static_cast<const AMDGPUOperand &
>(*
Operands[Idx]);
7287 Op.addImmOperands(Inst, 1);
7289 if (InsertAt.has_value())
7296ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7302 StringLoc = getLoc();
7307ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7313 SMLoc StringLoc = getLoc();
7317 Value = getTokenStr();
7321 if (
Value == Ids[IntVal])
7326 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7327 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7332ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7334 AMDGPUOperand::ImmTy
Type) {
7338 ParseStatus Res = parseStringOrIntWithPrefix(
Operands, Name, Ids, IntVal);
7340 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7349bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7353 SMLoc Loc = getLoc();
7355 auto Res = parseIntWithPrefix(Pref, Val);
7361 if (Val < 0 || Val > MaxVal) {
7362 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7371 AMDGPUOperand::ImmTy ImmTy) {
7372 const char *Pref =
"index_key";
7374 SMLoc Loc = getLoc();
7375 auto Res = parseIntWithPrefix(Pref, ImmVal);
7379 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7380 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7381 (ImmVal < 0 || ImmVal > 1))
7382 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7384 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7385 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7387 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7392 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7396 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7400 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7405 AMDGPUOperand::ImmTy
Type) {
7406 return parseStringOrIntWithPrefix(
Operands, Name,
7407 {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
7408 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
7414 return tryParseMatrixFMT(
Operands,
"matrix_a_fmt",
7415 AMDGPUOperand::ImmTyMatrixAFMT);
7419 return tryParseMatrixFMT(
Operands,
"matrix_b_fmt",
7420 AMDGPUOperand::ImmTyMatrixBFMT);
7425 AMDGPUOperand::ImmTy
Type) {
7426 return parseStringOrIntWithPrefix(
7427 Operands, Name, {
"MATRIX_SCALE_ROW0",
"MATRIX_SCALE_ROW1"},
Type);
7431 return tryParseMatrixScale(
Operands,
"matrix_a_scale",
7432 AMDGPUOperand::ImmTyMatrixAScale);
7436 return tryParseMatrixScale(
Operands,
"matrix_b_scale",
7437 AMDGPUOperand::ImmTyMatrixBScale);
7442 AMDGPUOperand::ImmTy
Type) {
7443 return parseStringOrIntWithPrefix(
7445 {
"MATRIX_SCALE_FMT_E8",
"MATRIX_SCALE_FMT_E5M3",
"MATRIX_SCALE_FMT_E4M3"},
7450 return tryParseMatrixScaleFmt(
Operands,
"matrix_a_scale_fmt",
7451 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7455 return tryParseMatrixScaleFmt(
Operands,
"matrix_b_scale_fmt",
7456 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7461ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7462 using namespace llvm::AMDGPU::MTBUFFormat;
7468 for (
int I = 0;
I < 2; ++
I) {
7469 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7472 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7477 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7483 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7486 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7487 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7493ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7494 using namespace llvm::AMDGPU::MTBUFFormat;
7498 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7501 if (Fmt == UFMT_UNDEF)
7508bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7510 StringRef FormatStr,
7512 using namespace llvm::AMDGPU::MTBUFFormat;
7516 if (
Format != DFMT_UNDEF) {
7522 if (
Format != NFMT_UNDEF) {
7527 Error(Loc,
"unsupported format");
7531ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7534 using namespace llvm::AMDGPU::MTBUFFormat;
7538 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7543 SMLoc Loc = getLoc();
7544 if (!parseId(Str,
"expected a format string") ||
7545 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7547 if (Dfmt == DFMT_UNDEF)
7548 return Error(Loc,
"duplicate numeric format");
7549 if (Nfmt == NFMT_UNDEF)
7550 return Error(Loc,
"duplicate data format");
7553 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7554 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7558 if (Ufmt == UFMT_UNDEF)
7559 return Error(FormatLoc,
"unsupported format");
7568ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7574 if (Id == UFMT_UNDEF)
7578 return Error(Loc,
"unified format is not supported on this GPU");
7584ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7585 using namespace llvm::AMDGPU::MTBUFFormat;
7586 SMLoc Loc = getLoc();
7591 return Error(Loc,
"out of range format");
7596ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7597 using namespace llvm::AMDGPU::MTBUFFormat;
7603 StringRef FormatStr;
7604 SMLoc Loc = getLoc();
7605 if (!parseId(FormatStr,
"expected a format string"))
7608 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7610 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7620 return parseNumericFormat(
Format);
7624 using namespace llvm::AMDGPU::MTBUFFormat;
7628 SMLoc Loc = getLoc();
7638 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7657 Res = parseSymbolicOrNumericFormat(
Format);
7662 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7663 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7670 return Error(getLoc(),
"duplicate format");
7676 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7678 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7679 AMDGPUOperand::ImmTyInstOffset);
7686 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7688 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7694 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7697 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7707 OptionalImmIndexMap OptionalIdx;
7709 unsigned OperandIdx[4];
7710 unsigned EnMask = 0;
7713 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7714 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7719 OperandIdx[SrcIdx] = Inst.
size();
7720 Op.addRegOperands(Inst, 1);
7727 OperandIdx[SrcIdx] = Inst.
size();
7733 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7734 Op.addImmOperands(Inst, 1);
7738 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7742 OptionalIdx[
Op.getImmTy()] = i;
7748 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7755 for (
auto i = 0; i < SrcIdx; ++i) {
7757 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7782 IntVal =
encode(ISA, IntVal, CntVal);
7783 if (CntVal !=
decode(ISA, IntVal)) {
7785 IntVal =
encode(ISA, IntVal, -1);
7793bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7795 SMLoc CntLoc = getLoc();
7796 StringRef CntName = getTokenStr();
7803 SMLoc ValLoc = getLoc();
7812 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7814 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7816 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7819 Error(CntLoc,
"invalid counter name " + CntName);
7824 Error(ValLoc,
"too large value for " + CntName);
7833 Error(getLoc(),
"expected a counter name");
7848 if (!parseCnt(Waitcnt))
7856 Operands.push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
7860bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7861 SMLoc FieldLoc = getLoc();
7862 StringRef FieldName = getTokenStr();
7867 SMLoc ValueLoc = getLoc();
7874 if (FieldName ==
"instid0") {
7876 }
else if (FieldName ==
"instskip") {
7878 }
else if (FieldName ==
"instid1") {
7881 Error(FieldLoc,
"invalid field name " + FieldName);
7900 .Case(
"VALU_DEP_1", 1)
7901 .Case(
"VALU_DEP_2", 2)
7902 .Case(
"VALU_DEP_3", 3)
7903 .Case(
"VALU_DEP_4", 4)
7904 .Case(
"TRANS32_DEP_1", 5)
7905 .Case(
"TRANS32_DEP_2", 6)
7906 .Case(
"TRANS32_DEP_3", 7)
7907 .Case(
"FMA_ACCUM_CYCLE_1", 8)
7908 .Case(
"SALU_CYCLE_1", 9)
7909 .Case(
"SALU_CYCLE_2", 10)
7910 .Case(
"SALU_CYCLE_3", 11)
7918 Delay |=
Value << Shift;
7928 if (!parseDelay(Delay))
7936 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7941AMDGPUOperand::isSWaitCnt()
const {
7945bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7951void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
7952 StringRef DepCtrName) {
7955 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
7958 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
7961 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
7964 Error(Loc, Twine(
"invalid value for ", DepCtrName));
7971bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7973 using namespace llvm::AMDGPU::DepCtr;
7975 SMLoc DepCtrLoc = getLoc();
7976 StringRef DepCtrName = getTokenStr();
7986 unsigned PrevOprMask = UsedOprMask;
7987 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7990 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7999 Error(getLoc(),
"expected a counter name");
8004 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8005 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8010 using namespace llvm::AMDGPU::DepCtr;
8013 SMLoc Loc = getLoc();
8016 unsigned UsedOprMask = 0;
8018 if (!parseDepCtr(DepCtr, UsedOprMask))
8026 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8030bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8036ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8038 OperandInfoTy &Width) {
8039 using namespace llvm::AMDGPU::Hwreg;
8045 HwReg.Loc = getLoc();
8048 HwReg.IsSymbolic =
true;
8050 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8058 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8068 Width.Loc = getLoc();
8077 using namespace llvm::AMDGPU::Hwreg;
8080 SMLoc Loc = getLoc();
8082 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8084 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8085 HwregOffset::Default);
8086 struct : StructuredOpField {
8087 using StructuredOpField::StructuredOpField;
8088 bool validate(AMDGPUAsmParser &Parser)
const override {
8090 return Error(Parser,
"only values from 1 to 32 are legal");
8093 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8094 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8097 Res = parseHwregFunc(HwReg,
Offset, Width);
8100 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8102 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8106 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8113 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8115 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8119bool AMDGPUOperand::isHwreg()
const {
8120 return isImmTy(ImmTyHwreg);
8128AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8130 OperandInfoTy &Stream) {
8131 using namespace llvm::AMDGPU::SendMsg;
8136 Msg.IsSymbolic =
true;
8138 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8143 Op.IsDefined =
true;
8146 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8149 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8154 Stream.IsDefined =
true;
8155 Stream.Loc = getLoc();
8165AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8166 const OperandInfoTy &
Op,
8167 const OperandInfoTy &Stream) {
8168 using namespace llvm::AMDGPU::SendMsg;
8173 bool Strict = Msg.IsSymbolic;
8177 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8182 Error(Msg.Loc,
"invalid message id");
8188 Error(
Op.Loc,
"message does not support operations");
8190 Error(Msg.Loc,
"missing message operation");
8196 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8198 Error(
Op.Loc,
"invalid operation id");
8203 Error(Stream.Loc,
"message operation does not support streams");
8207 Error(Stream.Loc,
"invalid message stream id");
8214 using namespace llvm::AMDGPU::SendMsg;
8217 SMLoc Loc = getLoc();
8221 OperandInfoTy
Op(OP_NONE_);
8222 OperandInfoTy Stream(STREAM_ID_NONE_);
8223 if (parseSendMsgBody(Msg,
Op, Stream) &&
8224 validateSendMsg(Msg,
Op, Stream)) {
8229 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8231 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8236 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8240bool AMDGPUOperand::isSendMsg()
const {
8241 return isImmTy(ImmTySendMsg);
8255 int Slot = StringSwitch<int>(Str)
8262 return Error(S,
"invalid interpolation slot");
8264 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8265 AMDGPUOperand::ImmTyInterpSlot));
8276 if (!Str.starts_with(
"attr"))
8277 return Error(S,
"invalid interpolation attribute");
8279 StringRef Chan = Str.take_back(2);
8280 int AttrChan = StringSwitch<int>(Chan)
8287 return Error(S,
"invalid or missing interpolation attribute channel");
8289 Str = Str.drop_back(2).drop_front(4);
8292 if (Str.getAsInteger(10, Attr))
8293 return Error(S,
"invalid or missing interpolation attribute number");
8296 return Error(S,
"out of bounds interpolation attribute number");
8300 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8301 AMDGPUOperand::ImmTyInterpAttr));
8302 Operands.push_back(AMDGPUOperand::CreateImm(
8303 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8312 using namespace llvm::AMDGPU::Exp;
8322 return Error(S, (Id == ET_INVALID)
8323 ?
"invalid exp target"
8324 :
"exp target is not supported on this GPU");
8326 Operands.push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8327 AMDGPUOperand::ImmTyExpTgt));
8336AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8341AMDGPUAsmParser::isId(
const StringRef Id)
const {
8347 return getTokenKind() ==
Kind;
8350StringRef AMDGPUAsmParser::getId()
const {
8355AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8364AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8366 StringRef Tok = getTokenStr();
8377 if (isId(Id) && peekToken().is(Kind)) {
8387 if (isToken(Kind)) {
8396 const StringRef ErrMsg) {
8397 if (!trySkipToken(Kind)) {
8398 Error(getLoc(), ErrMsg);
8405AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8409 if (Parser.parseExpression(Expr))
8412 if (Expr->evaluateAsAbsolute(
Imm))
8415 if (Expected.empty()) {
8416 Error(S,
"expected absolute expression");
8418 Error(S, Twine(
"expected ", Expected) +
8419 Twine(
" or an absolute expression"));
8429 if (Parser.parseExpression(Expr))
8433 if (Expr->evaluateAsAbsolute(IntVal)) {
8434 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8436 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8442AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8444 Val =
getToken().getStringContents();
8448 Error(getLoc(), ErrMsg);
8453AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8455 Val = getTokenStr();
8459 if (!ErrMsg.
empty())
8460 Error(getLoc(), ErrMsg);
8465AMDGPUAsmParser::getToken()
const {
8466 return Parser.getTok();
8469AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8472 : getLexer().peekTok(ShouldSkipSpace);
8477 auto TokCount = getLexer().peekTokens(Tokens);
8479 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8484AMDGPUAsmParser::getTokenKind()
const {
8485 return getLexer().getKind();
8489AMDGPUAsmParser::getLoc()
const {
8494AMDGPUAsmParser::getTokenStr()
const {
8499AMDGPUAsmParser::lex() {
8504 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
8508SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8513 int MCOpIdx)
const {
8515 const auto TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8516 if (TargetOp.getMCOpIdx() == MCOpIdx)
8517 return TargetOp.getStartLoc();
8523AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8525 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
8526 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8528 return Op.getStartLoc();
8534AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8536 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8551 StringRef
Id = getTokenStr();
8552 SMLoc IdLoc = getLoc();
8558 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8559 if (
I == Fields.
end())
8560 return Error(IdLoc,
"unknown field");
8561 if ((*I)->IsDefined)
8562 return Error(IdLoc,
"duplicate field");
8565 (*I)->Loc = getLoc();
8568 (*I)->IsDefined =
true;
8575bool AMDGPUAsmParser::validateStructuredOpFields(
8577 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8578 return F->validate(*
this);
8589 const unsigned OrMask,
8590 const unsigned XorMask) {
8599bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8600 const unsigned MaxVal,
8601 const Twine &ErrMsg, SMLoc &Loc) {
8618AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8619 const unsigned MinVal,
8620 const unsigned MaxVal,
8621 const StringRef ErrMsg) {
8623 for (
unsigned i = 0; i < OpNum; ++i) {
8624 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8632AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8633 using namespace llvm::AMDGPU::Swizzle;
8636 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8637 "expected a 2-bit lane id")) {
8648AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8649 using namespace llvm::AMDGPU::Swizzle;
8655 if (!parseSwizzleOperand(GroupSize,
8657 "group size must be in the interval [2,32]",
8662 Error(Loc,
"group size must be a power of two");
8665 if (parseSwizzleOperand(LaneIdx,
8667 "lane id must be in the interval [0,group size - 1]",
8676AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8677 using namespace llvm::AMDGPU::Swizzle;
8682 if (!parseSwizzleOperand(GroupSize,
8684 "group size must be in the interval [2,32]",
8689 Error(Loc,
"group size must be a power of two");
8698AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8699 using namespace llvm::AMDGPU::Swizzle;
8704 if (!parseSwizzleOperand(GroupSize,
8706 "group size must be in the interval [1,16]",
8711 Error(Loc,
"group size must be a power of two");
8720AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8721 using namespace llvm::AMDGPU::Swizzle;
8728 SMLoc StrLoc = getLoc();
8729 if (!parseString(Ctl)) {
8732 if (Ctl.
size() != BITMASK_WIDTH) {
8733 Error(StrLoc,
"expected a 5-character mask");
8737 unsigned AndMask = 0;
8738 unsigned OrMask = 0;
8739 unsigned XorMask = 0;
8741 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8745 Error(StrLoc,
"invalid mask");
8766bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8767 using namespace llvm::AMDGPU::Swizzle;
8770 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8776 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8777 "FFT swizzle must be in the interval [0," +
8778 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
8786bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
8787 using namespace llvm::AMDGPU::Swizzle;
8790 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8797 if (!parseSwizzleOperand(
Direction, 0, 1,
8798 "direction must be 0 (left) or 1 (right)", Loc))
8802 if (!parseSwizzleOperand(
8803 RotateSize, 0, ROTATE_MAX_SIZE,
8804 "number of threads to rotate must be in the interval [0," +
8805 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
8810 (RotateSize << ROTATE_SIZE_SHIFT);
8815AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
8817 SMLoc OffsetLoc = getLoc();
8823 Error(OffsetLoc,
"expected a 16-bit offset");
8830AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
8831 using namespace llvm::AMDGPU::Swizzle;
8835 SMLoc ModeLoc = getLoc();
8838 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8839 Ok = parseSwizzleQuadPerm(
Imm);
8840 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8841 Ok = parseSwizzleBitmaskPerm(
Imm);
8842 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8843 Ok = parseSwizzleBroadcast(
Imm);
8844 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8845 Ok = parseSwizzleSwap(
Imm);
8846 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8847 Ok = parseSwizzleReverse(
Imm);
8848 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
8849 Ok = parseSwizzleFFT(
Imm);
8850 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8851 Ok = parseSwizzleRotate(
Imm);
8853 Error(ModeLoc,
"expected a swizzle mode");
8856 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8866 if (trySkipId(
"offset")) {
8870 if (trySkipId(
"swizzle")) {
8871 Ok = parseSwizzleMacro(
Imm);
8873 Ok = parseSwizzleOffset(
Imm);
8877 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
8885AMDGPUOperand::isSwizzle()
const {
8886 return isImmTy(ImmTySwizzle);
8893int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8895 using namespace llvm::AMDGPU::VGPRIndexMode;
8907 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8908 if (trySkipId(IdSymbolic[ModeId])) {
8916 "expected a VGPR index mode or a closing parenthesis" :
8917 "expected a VGPR index mode");
8922 Error(S,
"duplicate VGPR index mode");
8930 "expected a comma or a closing parenthesis"))
8939 using namespace llvm::AMDGPU::VGPRIndexMode;
8945 Imm = parseGPRIdxMacro();
8949 if (getParser().parseAbsoluteExpression(
Imm))
8952 return Error(S,
"invalid immediate: only 4-bit values are legal");
8956 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8960bool AMDGPUOperand::isGPRIdxMode()
const {
8961 return isImmTy(ImmTyGprIdxMode);
8973 if (isRegister() || isModifier())
8980 assert(Opr.isImm() || Opr.isExpr());
8981 SMLoc Loc = Opr.getStartLoc();
8985 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8986 Error(Loc,
"expected an absolute expression or a label");
8987 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8988 Error(Loc,
"expected a 16-bit signed jump offset");
9006void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9009 OptionalImmIndexMap OptionalIdx;
9010 unsigned FirstOperandIdx = 1;
9011 bool IsAtomicReturn =
false;
9018 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
9019 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9023 Op.addRegOperands(Inst, 1);
9027 if (IsAtomicReturn && i == FirstOperandIdx)
9028 Op.addRegOperands(Inst, 1);
9033 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9034 Op.addImmOperands(Inst, 1);
9046 OptionalIdx[
Op.getImmTy()] = i;
9057bool AMDGPUOperand::isSMRDOffset8()
const {
9061bool AMDGPUOperand::isSMEMOffset()
const {
9063 return isImmLiteral();
9066bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9101bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9102 if (BoundCtrl == 0 || BoundCtrl == 1) {
9110void AMDGPUAsmParser::onBeginOfFile() {
9111 if (!getParser().getStreamer().getTargetStreamer() ||
9115 if (!getTargetStreamer().getTargetID())
9116 getTargetStreamer().initializeTargetID(getSTI(),
9117 getSTI().getFeatureString());
9120 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9128bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9132 StringRef TokenId = getTokenStr();
9133 AGVK VK = StringSwitch<AGVK>(TokenId)
9134 .Case(
"max", AGVK::AGVK_Max)
9135 .Case(
"or", AGVK::AGVK_Or)
9136 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9137 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9138 .Case(
"alignto", AGVK::AGVK_AlignTo)
9139 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9140 .Default(AGVK::AGVK_None);
9144 uint64_t CommaCount = 0;
9149 if (Exprs.
empty()) {
9151 "empty " + Twine(TokenId) +
" expression");
9154 if (CommaCount + 1 != Exprs.
size()) {
9156 "mismatch of commas in " + Twine(TokenId) +
" expression");
9163 if (getParser().parseExpression(Expr, EndLoc))
9167 if (LastTokenWasComma)
9171 "unexpected token in " + Twine(TokenId) +
" expression");
9177 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9181 StringRef
Name = getTokenStr();
9182 if (Name ==
"mul") {
9183 return parseIntWithPrefix(
"mul",
Operands,
9187 if (Name ==
"div") {
9188 return parseIntWithPrefix(
"div",
Operands,
9199 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9204 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9205 AMDGPU::OpName::src2};
9213 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9218 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9220 if (
DstOp.isReg() &&
9221 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
9225 if ((OpSel & (1 << SrcNum)) != 0)
9231void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9238 OptionalImmIndexMap &OptionalIdx) {
9239 cvtVOP3P(Inst,
Operands, OptionalIdx);
9248 &&
Desc.NumOperands > (OpNum + 1)
9250 &&
Desc.operands()[OpNum + 1].RegClass != -1
9252 &&
Desc.getOperandConstraint(OpNum + 1,
9256void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9258 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9259 AMDGPU::OpName::src2};
9260 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9261 AMDGPU::OpName::src1_modifiers,
9262 AMDGPU::OpName::src2_modifiers};
9263 for (
int J = 0; J < 3; ++J) {
9264 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9270 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9273 if ((OpSel & (1 << J)) != 0)
9276 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9285 OptionalImmIndexMap OptionalIdx;
9290 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9291 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9295 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9297 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9298 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9299 Op.isInterpAttrChan()) {
9301 }
else if (
Op.isImmModifier()) {
9302 OptionalIdx[
Op.getImmTy()] =
I;
9310 AMDGPUOperand::ImmTyHigh);
9314 AMDGPUOperand::ImmTyClamp);
9318 AMDGPUOperand::ImmTyOModSI);
9323 AMDGPUOperand::ImmTyOpSel);
9324 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9327 cvtOpSelHelper(Inst, OpSel);
9333 OptionalImmIndexMap OptionalIdx;
9338 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9339 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9343 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9345 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9346 }
else if (
Op.isImmModifier()) {
9347 OptionalIdx[
Op.getImmTy()] =
I;
9355 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9365 cvtOpSelHelper(Inst, OpSel);
9368void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9370 OptionalImmIndexMap OptionalIdx;
9373 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9377 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9378 static_cast<AMDGPUOperand &
>(*
Operands[
I++]).addRegOperands(Inst, 1);
9381 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
I]);
9386 if (NumOperands == CbszOpIdx) {
9391 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9392 }
else if (
Op.isImmModifier()) {
9393 OptionalIdx[
Op.getImmTy()] =
I;
9395 Op.addRegOrImmOperands(Inst, 1);
9400 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9401 if (CbszIdx != OptionalIdx.end()) {
9402 int CbszVal = ((AMDGPUOperand &)*
Operands[CbszIdx->second]).
getImm();
9406 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9407 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9408 if (BlgpIdx != OptionalIdx.end()) {
9409 int BlgpVal = ((AMDGPUOperand &)*
Operands[BlgpIdx->second]).
getImm();
9420 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9421 if (OpselIdx != OptionalIdx.end()) {
9422 OpSel =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselIdx->second])
9426 unsigned OpSelHi = 0;
9427 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9428 if (OpselHiIdx != OptionalIdx.end()) {
9429 OpSelHi =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselHiIdx->second])
9432 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9433 AMDGPU::OpName::src1_modifiers};
9435 for (
unsigned J = 0; J < 2; ++J) {
9436 unsigned ModVal = 0;
9437 if (OpSel & (1 << J))
9439 if (OpSelHi & (1 << J))
9442 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9448 OptionalImmIndexMap &OptionalIdx) {
9453 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9454 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9458 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9460 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9461 }
else if (
Op.isImmModifier()) {
9462 OptionalIdx[
Op.getImmTy()] =
I;
9464 Op.addRegOrImmOperands(Inst, 1);
9470 AMDGPUOperand::ImmTyScaleSel);
9474 AMDGPUOperand::ImmTyClamp);
9480 AMDGPUOperand::ImmTyByteSel);
9485 AMDGPUOperand::ImmTyOModSI);
9492 auto *it = Inst.
begin();
9493 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9502 OptionalImmIndexMap OptionalIdx;
9503 cvtVOP3(Inst,
Operands, OptionalIdx);
9507 OptionalImmIndexMap &OptIdx) {
9513 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9514 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9515 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9516 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9517 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9518 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9526 !(
Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9527 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9528 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9529 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9530 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9531 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9532 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9533 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9534 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9536 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9539 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9542 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9543 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9547 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9548 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9549 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9550 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9551 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9555 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9556 if (BitOp3Idx != -1) {
9563 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9564 if (OpSelIdx != -1) {
9568 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9569 if (OpSelHiIdx != -1) {
9576 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9577 if (MatrixAFMTIdx != -1) {
9579 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9583 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9584 if (MatrixBFMTIdx != -1) {
9586 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9589 int MatrixAScaleIdx =
9590 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9591 if (MatrixAScaleIdx != -1) {
9593 AMDGPUOperand::ImmTyMatrixAScale, 0);
9596 int MatrixBScaleIdx =
9597 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9598 if (MatrixBScaleIdx != -1) {
9600 AMDGPUOperand::ImmTyMatrixBScale, 0);
9603 int MatrixAScaleFmtIdx =
9604 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9605 if (MatrixAScaleFmtIdx != -1) {
9607 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9610 int MatrixBScaleFmtIdx =
9611 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9612 if (MatrixBScaleFmtIdx != -1) {
9614 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9619 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9623 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9625 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9629 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9633 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9634 AMDGPU::OpName::src2};
9635 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9636 AMDGPU::OpName::src1_modifiers,
9637 AMDGPU::OpName::src2_modifiers};
9640 unsigned OpSelHi = 0;
9647 if (OpSelHiIdx != -1)
9656 for (
int J = 0; J < 3; ++J) {
9657 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9661 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9666 uint32_t ModVal = 0;
9669 if (SrcOp.
isReg() && getMRI()
9676 if ((OpSel & (1 << J)) != 0)
9680 if ((OpSelHi & (1 << J)) != 0)
9683 if ((NegLo & (1 << J)) != 0)
9686 if ((NegHi & (1 << J)) != 0)
9694 OptionalImmIndexMap OptIdx;
9700 unsigned i,
unsigned Opc,
9702 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9703 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9705 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9711 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9714 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9715 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
9717 OptionalImmIndexMap OptIdx;
9718 for (
unsigned i = 5; i <
Operands.size(); ++i) {
9719 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9720 OptIdx[
Op.getImmTy()] = i;
9725 AMDGPUOperand::ImmTyIndexKey8bit);
9729 AMDGPUOperand::ImmTyIndexKey16bit);
9733 AMDGPUOperand::ImmTyIndexKey32bit);
9753 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9754 SMLoc OpYLoc = getLoc();
9757 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9760 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9769 auto addOp = [&](uint16_t ParsedOprIdx) {
9770 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9772 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9776 Op.addRegOperands(Inst, 1);
9780 Op.addImmOperands(Inst, 1);
9792 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9796 const auto &CInfo = InstInfo[CompIdx];
9797 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9798 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9799 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9800 if (CInfo.hasSrc2Acc())
9801 addOp(CInfo.getIndexOfDstInParsedOperands());
9805 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
9806 if (BitOp3Idx != -1) {
9807 OptionalImmIndexMap OptIdx;
9808 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands.back());
9820bool AMDGPUOperand::isDPP8()
const {
9821 return isImmTy(ImmTyDPP8);
9824bool AMDGPUOperand::isDPPCtrl()
const {
9825 using namespace AMDGPU::DPP;
9827 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
9830 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
9831 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9832 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
9833 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9834 (
Imm == DppCtrl::WAVE_SHL1) ||
9835 (
Imm == DppCtrl::WAVE_ROL1) ||
9836 (
Imm == DppCtrl::WAVE_SHR1) ||
9837 (
Imm == DppCtrl::WAVE_ROR1) ||
9838 (
Imm == DppCtrl::ROW_MIRROR) ||
9839 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9840 (
Imm == DppCtrl::BCAST15) ||
9841 (
Imm == DppCtrl::BCAST31) ||
9842 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
9843 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9852bool AMDGPUOperand::isBLGP()
const {
9856bool AMDGPUOperand::isS16Imm()
const {
9860bool AMDGPUOperand::isU16Imm()
const {
9868bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9873 SMLoc Loc =
getToken().getEndLoc();
9874 Token = std::string(getTokenStr());
9876 if (getLoc() != Loc)
9881 if (!parseId(Suffix))
9885 StringRef DimId = Token;
9906 SMLoc Loc = getLoc();
9907 if (!parseDimId(Encoding))
9908 return Error(Loc,
"invalid dim value");
9910 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9911 AMDGPUOperand::ImmTyDim));
9929 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9932 for (
size_t i = 0; i < 8; ++i) {
9936 SMLoc Loc = getLoc();
9937 if (getParser().parseAbsoluteExpression(Sels[i]))
9939 if (0 > Sels[i] || 7 < Sels[i])
9940 return Error(Loc,
"expected a 3-bit value");
9947 for (
size_t i = 0; i < 8; ++i)
9948 DPP8 |= (Sels[i] << (i * 3));
9950 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9955AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9957 if (Ctrl ==
"row_newbcast")
9960 if (Ctrl ==
"row_share" ||
9961 Ctrl ==
"row_xmask")
9964 if (Ctrl ==
"wave_shl" ||
9965 Ctrl ==
"wave_shr" ||
9966 Ctrl ==
"wave_rol" ||
9967 Ctrl ==
"wave_ror" ||
9968 Ctrl ==
"row_bcast")
9971 return Ctrl ==
"row_mirror" ||
9972 Ctrl ==
"row_half_mirror" ||
9973 Ctrl ==
"quad_perm" ||
9974 Ctrl ==
"row_shl" ||
9975 Ctrl ==
"row_shr" ||
9980AMDGPUAsmParser::parseDPPCtrlPerm() {
9983 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9987 for (
int i = 0; i < 4; ++i) {
9992 SMLoc Loc = getLoc();
9993 if (getParser().parseAbsoluteExpression(Temp))
9995 if (Temp < 0 || Temp > 3) {
9996 Error(Loc,
"expected a 2-bit value");
10000 Val += (Temp << i * 2);
10003 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10010AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10011 using namespace AMDGPU::DPP;
10016 SMLoc Loc = getLoc();
10018 if (getParser().parseAbsoluteExpression(Val))
10021 struct DppCtrlCheck {
10027 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10028 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10029 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10030 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10031 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10032 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10033 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10034 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10035 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10036 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10037 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10041 if (
Check.Ctrl == -1) {
10042 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10050 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10058 using namespace AMDGPU::DPP;
10061 !isSupportedDPPCtrl(getTokenStr(),
Operands))
10064 SMLoc S = getLoc();
10070 if (Ctrl ==
"row_mirror") {
10071 Val = DppCtrl::ROW_MIRROR;
10072 }
else if (Ctrl ==
"row_half_mirror") {
10073 Val = DppCtrl::ROW_HALF_MIRROR;
10076 if (Ctrl ==
"quad_perm") {
10077 Val = parseDPPCtrlPerm();
10079 Val = parseDPPCtrlSel(Ctrl);
10088 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10094 OptionalImmIndexMap OptionalIdx;
10101 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10103 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10104 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10108 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10109 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10113 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10114 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10115 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10116 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10117 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10123 if (OldIdx == NumOperands) {
10125 constexpr int DST_IDX = 0;
10127 }
else if (Src2ModIdx == NumOperands) {
10137 if (IsVOP3CvtSrDpp) {
10146 if (TiedTo != -1) {
10151 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10153 if (IsDPP8 &&
Op.isDppFI()) {
10156 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10157 }
else if (
Op.isReg()) {
10158 Op.addRegOperands(Inst, 1);
10159 }
else if (
Op.isImm() &&
10161 Op.addImmOperands(Inst, 1);
10162 }
else if (
Op.isImm()) {
10163 OptionalIdx[
Op.getImmTy()] =
I;
10171 AMDGPUOperand::ImmTyClamp);
10177 AMDGPUOperand::ImmTyByteSel);
10184 cvtVOP3P(Inst,
Operands, OptionalIdx);
10186 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
10193 using namespace llvm::AMDGPU::DPP;
10203 AMDGPUOperand::ImmTyDppFI);
10208 OptionalImmIndexMap OptionalIdx;
10212 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10213 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10220 if (TiedTo != -1) {
10225 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10227 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10235 Op.addImmOperands(Inst, 1);
10237 Op.addRegWithFPInputModsOperands(Inst, 2);
10238 }
else if (
Op.isDppFI()) {
10240 }
else if (
Op.isReg()) {
10241 Op.addRegOperands(Inst, 1);
10247 Op.addRegWithFPInputModsOperands(Inst, 2);
10248 }
else if (
Op.isReg()) {
10249 Op.addRegOperands(Inst, 1);
10250 }
else if (
Op.isDPPCtrl()) {
10251 Op.addImmOperands(Inst, 1);
10252 }
else if (
Op.isImm()) {
10254 OptionalIdx[
Op.getImmTy()] =
I;
10262 using namespace llvm::AMDGPU::DPP;
10270 AMDGPUOperand::ImmTyDppFI);
10281 AMDGPUOperand::ImmTy
Type) {
10282 return parseStringOrIntWithPrefix(
10284 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10289 return parseStringOrIntWithPrefix(
10290 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10291 AMDGPUOperand::ImmTySDWADstUnused);
10315 uint64_t BasicInstType,
10318 using namespace llvm::AMDGPU::SDWA;
10320 OptionalImmIndexMap OptionalIdx;
10321 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10322 bool SkippedVcc =
false;
10326 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10327 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10331 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10332 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10333 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10351 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10352 }
else if (
Op.isImm()) {
10354 OptionalIdx[
Op.getImmTy()] =
I;
10358 SkippedVcc =
false;
10362 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10363 Opc != AMDGPU::V_NOP_sdwa_vi) {
10365 switch (BasicInstType) {
10369 AMDGPUOperand::ImmTyClamp, 0);
10373 AMDGPUOperand::ImmTyOModSI, 0);
10377 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10381 AMDGPUOperand::ImmTySDWADstUnused,
10382 DstUnused::UNUSED_PRESERVE);
10389 AMDGPUOperand::ImmTyClamp, 0);
10403 AMDGPUOperand::ImmTyClamp, 0);
10409 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10415 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10416 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10417 auto *it = Inst.
begin();
10419 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10431#define GET_MATCHER_IMPLEMENTATION
10432#define GET_MNEMONIC_SPELL_CHECKER
10433#define GET_MNEMONIC_CHECKER
10434#include "AMDGPUGenAsmMatcher.inc"
10440 return parseTokenOp(
"addr64",
Operands);
10442 return parseTokenOp(
"done",
Operands);
10444 return parseTokenOp(
"idxen",
Operands);
10446 return parseTokenOp(
"lds",
Operands);
10448 return parseTokenOp(
"offen",
Operands);
10450 return parseTokenOp(
"off",
Operands);
10451 case MCK_row_95_en:
10452 return parseTokenOp(
"row_en",
Operands);
10454 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10456 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10458 return tryCustomParseOperand(
Operands, MCK);
10463unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10469 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10472 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10474 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10476 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10478 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10480 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10482 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10490 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10492 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10493 case MCK_SOPPBrTarget:
10494 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10495 case MCK_VReg32OrOff:
10496 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10497 case MCK_InterpSlot:
10498 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10499 case MCK_InterpAttr:
10500 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10501 case MCK_InterpAttrChan:
10502 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10504 case MCK_SReg_64_XEXEC:
10514 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10516 return Match_InvalidOperand;
10525 SMLoc S = getLoc();
10534 return Error(S,
"expected a 16-bit value");
10537 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10541bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10547bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Default
The result values are uniform if and only if all operands are uniform.
int popcount(T Value) noexcept
Count the number of set bits in a value.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size