Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
65 unsigned Gen = INVALID;
67 int LDSBankCount = 0;
69
70 // Possibly statically set by tablegen, but may want to be overridden.
71 bool FastDenormalF32 = false;
72 bool HalfRate64Ops = false;
73 bool FullRate64Ops = false;
74
75 // Dynamically set bits that enable features.
76 bool FlatForGlobal = false;
78 bool BackOffBarrier = false;
80 bool UnalignedAccessMode = false;
82 bool HasApertureRegs = false;
83 bool SupportsXNACK = false;
84 bool KernargPreload = false;
85
86 // This should not be used directly. 'TargetID' tracks the dynamic settings
87 // for XNACK.
88 bool EnableXNACK = false;
89
90 bool EnableTgSplit = false;
91 bool EnableCuMode = false;
92 bool TrapHandler = false;
93 bool EnablePreciseMemory = false;
94
95 // Used as options.
96 bool EnableLoadStoreOpt = false;
98 bool EnableSIScheduler = false;
99 bool EnableDS128 = false;
101 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasFP8E5M3Insts = false;
170 bool HasCvtFP8Vop1Bug = false;
171 bool HasPkFmacF16Inst = false;
192 bool HasXF32Insts = false;
193 /// The maximum number of instructions that may be placed within an S_CLAUSE,
194 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
195 /// indicates a lack of S_CLAUSE support.
197 bool SupportsSRAMECC = false;
198 bool DynamicVGPR = false;
200 bool HasVMemToLDSLoad = false;
201 bool RequiresAlignVGPR = false;
202
203 // This should not be used directly. 'TargetID' tracks the dynamic settings
204 // for SRAMECC.
205 bool EnableSRAMECC = false;
206
207 bool HasNoSdstCMPX = false;
208 bool HasVscnt = false;
209 bool HasWaitXcnt = false;
210 bool HasGetWaveIdInst = false;
211 bool HasSMemTimeInst = false;
214 bool HasVOP3Literal = false;
215 bool HasNoDataDepHazard = false;
216 bool FlatAddressSpace = false;
217 bool FlatInstOffsets = false;
218 bool FlatGlobalInsts = false;
219 bool FlatScratchInsts = false;
220 bool FlatGVSMode = false;
223 bool EnableFlatScratch = false;
225 bool HasGDS = false;
226 bool HasGWS = false;
227 bool AddNoCarryInsts = false;
228 bool HasUnpackedD16VMem = false;
229 bool LDSMisalignedBug = false;
232 bool UnalignedDSAccess = false;
233 bool HasPackedTID = false;
234 bool ScalarizeGlobal = false;
235 bool HasSALUFloatInsts = false;
238 bool Has64BitLiterals = false;
240 bool HasBitOp3Insts = false;
241 bool HasTanhInsts = false;
244 bool HasPrngInst = false;
246 bool HasPermlane16Swap = false;
247 bool HasPermlane32Swap = false;
252 bool HasVmemPrefInsts = false;
254 bool HasSafeCUPrefetch = false;
257 bool HasNSAtoVMEMBug = false;
258 bool HasNSAClauseBug = false;
259 bool HasOffset3fBug = false;
265 bool Has1_5xVGPRs = false;
266 bool HasMADIntraFwdBug = false;
267 bool HasVOPDInsts = false;
271 bool HasAshrPkInsts = false;
275 bool HasMin3Max3PKF16 = false;
277 bool HasLshlAddU64Inst = false;
278 bool HasAddSubU64Insts = false;
279 bool HasMadU32Inst = false;
283
284 bool RequiresCOV6 = false;
287
289
290 // Dummy feature to use for assembler in tablegen.
291 bool FeatureDisable = false;
292
293private:
294 SIInstrInfo InstrInfo;
295 SITargetLowering TLInfo;
296 SIFrameLowering FrameLowering;
297
298public:
299 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
300 const GCNTargetMachine &TM);
301 ~GCNSubtarget() override;
302
304 StringRef GPU, StringRef FS);
305
306 /// Diagnose inconsistent subtarget features before attempting to codegen
307 /// function \p F.
308 void checkSubtargetFeatures(const Function &F) const;
309
310 const SIInstrInfo *getInstrInfo() const override {
311 return &InstrInfo;
312 }
313
314 const SIFrameLowering *getFrameLowering() const override {
315 return &FrameLowering;
316 }
317
318 const SITargetLowering *getTargetLowering() const override {
319 return &TLInfo;
320 }
321
322 const SIRegisterInfo *getRegisterInfo() const override {
323 return &InstrInfo.getRegisterInfo();
324 }
325
326 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
327
328 const CallLowering *getCallLowering() const override {
329 return CallLoweringInfo.get();
330 }
331
332 const InlineAsmLowering *getInlineAsmLowering() const override {
333 return InlineAsmLoweringInfo.get();
334 }
335
337 return InstSelector.get();
338 }
339
340 const LegalizerInfo *getLegalizerInfo() const override {
341 return Legalizer.get();
342 }
343
344 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
345 return RegBankInfo.get();
346 }
347
349 return TargetID;
350 }
351
353 return &InstrItins;
354 }
355
357
359 return (Generation)Gen;
360 }
361
362 unsigned getMaxWaveScratchSize() const {
363 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
364 if (getGeneration() >= GFX12) {
365 // 18-bit field in units of 64-dword.
366 return (64 * 4) * ((1 << 18) - 1);
367 }
368 if (getGeneration() == GFX11) {
369 // 15-bit field in units of 64-dword.
370 return (64 * 4) * ((1 << 15) - 1);
371 }
372 // 13-bit field in units of 256-dword.
373 return (256 * 4) * ((1 << 13) - 1);
374 }
375
376 /// Return the number of high bits known to be zero for a frame index.
380
381 int getLDSBankCount() const {
382 return LDSBankCount;
383 }
384
385 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
386 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
387 }
388
389 unsigned getConstantBusLimit(unsigned Opcode) const;
390
391 /// Returns if the result of this instruction with a 16-bit result returned in
392 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
393 /// the original value.
394 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
395
396 bool supportsWGP() const {
397 if (GFX1250Insts)
398 return false;
399 return getGeneration() >= GFX10;
400 }
401
402 bool hasIntClamp() const {
403 return HasIntClamp;
404 }
405
406 bool hasFP64() const {
407 return FP64;
408 }
409
410 bool hasMIMG_R128() const {
411 return MIMG_R128;
412 }
413
414 bool hasHWFP64() const {
415 return FP64;
416 }
417
418 bool hasHalfRate64Ops() const {
419 return HalfRate64Ops;
420 }
421
422 bool hasFullRate64Ops() const {
423 return FullRate64Ops;
424 }
425
426 bool hasAddr64() const {
428 }
429
430 bool hasFlat() const {
432 }
433
434 // Return true if the target only has the reverse operand versions of VALU
435 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
436 bool hasOnlyRevVALUShifts() const {
438 }
439
440 bool hasFractBug() const {
442 }
443
444 bool hasBFE() const {
445 return true;
446 }
447
448 bool hasBFI() const {
449 return true;
450 }
451
452 bool hasBFM() const {
453 return hasBFE();
454 }
455
456 bool hasBCNT(unsigned Size) const {
457 return true;
458 }
459
460 bool hasFFBL() const {
461 return true;
462 }
463
464 bool hasFFBH() const {
465 return true;
466 }
467
468 bool hasMed3_16() const {
470 }
471
472 bool hasMin3Max3_16() const {
474 }
475
476 bool hasFmaMixInsts() const {
477 return HasFmaMixInsts;
478 }
479
480 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
481
482 bool hasCARRY() const {
483 return true;
484 }
485
486 bool hasFMA() const {
487 return FMA;
488 }
489
490 bool hasSwap() const {
491 return GFX9Insts;
492 }
493
494 bool hasScalarPackInsts() const {
495 return GFX9Insts;
496 }
497
498 bool hasScalarMulHiInsts() const {
499 return GFX9Insts;
500 }
501
502 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
503
507
509 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
510 return getGeneration() >= GFX9;
511 }
512
513 /// True if the offset field of DS instructions works as expected. On SI, the
514 /// offset uses a 16-bit adder and does not always wrap properly.
515 bool hasUsableDSOffset() const {
516 return getGeneration() >= SEA_ISLANDS;
517 }
518
522
523 /// Condition output from div_scale is usable.
527
528 /// Extra wait hazard is needed in some cases before
529 /// s_cbranch_vccnz/s_cbranch_vccz.
530 bool hasReadVCCZBug() const {
531 return getGeneration() <= SEA_ISLANDS;
532 }
533
534 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
536 return getGeneration() >= GFX10;
537 }
538
539 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
540 /// was written by a VALU instruction.
543 }
544
545 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
546 /// SGPR was written by a VALU Instruction.
549 }
550
551 bool hasRFEHazards() const {
553 }
554
555 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
556 unsigned getSetRegWaitStates() const {
557 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
558 }
559
560 bool dumpCode() const {
561 return DumpCode;
562 }
563
564 /// Return the amount of LDS that can be used that will not restrict the
565 /// occupancy lower than WaveCount.
566 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
567 const Function &) const;
568
571 }
572
573 /// \returns If target supports S_DENORM_MODE.
574 bool hasDenormModeInst() const {
576 }
577
578 bool useFlatForGlobal() const {
579 return FlatForGlobal;
580 }
581
582 /// \returns If target supports ds_read/write_b128 and user enables generation
583 /// of ds_read/write_b128.
584 bool useDS128() const {
585 return CIInsts && EnableDS128;
586 }
587
588 /// \return If target supports ds_read/write_b96/128.
589 bool hasDS96AndDS128() const {
590 return CIInsts;
591 }
592
593 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
594 bool haveRoundOpsF64() const {
595 return CIInsts;
596 }
597
598 /// \returns If MUBUF instructions always perform range checking, even for
599 /// buffer resources used for private memory access.
603
604 /// \returns If target requires PRT Struct NULL support (zero result registers
605 /// for sparse texture support).
606 bool usePRTStrictNull() const {
607 return EnablePRTStrictNull;
608 }
609
613
614 /// \returns true if the target supports backing off of s_barrier instructions
615 /// when an exception is raised.
617 return BackOffBarrier;
618 }
619
622 }
623
627
628 bool hasUnalignedDSAccess() const {
629 return UnalignedDSAccess;
630 }
631
635
638 }
639
643
645 return UnalignedAccessMode;
646 }
647
649
650 bool hasApertureRegs() const {
651 return HasApertureRegs;
652 }
653
654 bool isTrapHandlerEnabled() const {
655 return TrapHandler;
656 }
657
658 bool isXNACKEnabled() const {
659 return TargetID.isXnackOnOrAny();
660 }
661
662 bool isTgSplitEnabled() const {
663 return EnableTgSplit;
664 }
665
666 bool isCuModeEnabled() const {
667 return EnableCuMode;
668 }
669
671
672 bool hasFlatAddressSpace() const {
673 return FlatAddressSpace;
674 }
675
676 bool hasFlatScrRegister() const {
677 return hasFlatAddressSpace();
678 }
679
680 bool hasFlatInstOffsets() const {
681 return FlatInstOffsets;
682 }
683
684 bool hasFlatGlobalInsts() const {
685 return FlatGlobalInsts;
686 }
687
688 bool hasFlatScratchInsts() const {
689 return FlatScratchInsts;
690 }
691
692 // Check if target supports ST addressing mode with FLAT scratch instructions.
693 // The ST addressing mode means no registers are used, either VGPR or SGPR,
694 // but only immediate offset is swizzled and added to the FLAT scratch base.
695 bool hasFlatScratchSTMode() const {
697 }
698
699 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
700
703 }
704
705 bool enableFlatScratch() const {
706 return flatScratchIsArchitected() ||
708 }
709
710 bool hasGlobalAddTidInsts() const {
711 return GFX10_BEncoding;
712 }
713
714 bool hasAtomicCSub() const {
715 return GFX10_BEncoding;
716 }
717
718 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
719
720 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
721
722 bool hasExportInsts() const {
723 return !hasGFX940Insts() && !hasGFX1250Insts();
724 }
725
726 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
727
728 // DS_ADD_F64/DS_ADD_RTN_F64
729 bool hasLdsAtomicAddF64() const {
730 return hasGFX90AInsts() || hasGFX1250Insts();
731 }
732
734 return getGeneration() >= GFX9;
735 }
736
739 }
740
742 return getGeneration() > GFX9;
743 }
744
745 bool hasD16LoadStore() const {
746 return getGeneration() >= GFX9;
747 }
748
750 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
751 }
752
753 bool hasD16Images() const {
755 }
756
757 /// Return if most LDS instructions have an m0 use that require m0 to be
758 /// initialized.
759 bool ldsRequiresM0Init() const {
760 return getGeneration() < GFX9;
761 }
762
763 // True if the hardware rewinds and replays GWS operations if a wave is
764 // preempted.
765 //
766 // If this is false, a GWS operation requires testing if a nack set the
767 // MEM_VIOL bit, and repeating if so.
768 bool hasGWSAutoReplay() const {
769 return getGeneration() >= GFX9;
770 }
771
772 /// \returns if target has ds_gws_sema_release_all instruction.
773 bool hasGWSSemaReleaseAll() const {
774 return CIInsts;
775 }
776
777 /// \returns true if the target has integer add/sub instructions that do not
778 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
779 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
780 /// for saturation.
781 bool hasAddNoCarry() const {
782 return AddNoCarryInsts;
783 }
784
785 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
786
787 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
788
789 bool hasUnpackedD16VMem() const {
790 return HasUnpackedD16VMem;
791 }
792
793 // Covers VS/PS/CS graphics shaders
794 bool isMesaGfxShader(const Function &F) const {
795 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
796 }
797
798 bool hasMad64_32() const {
799 return getGeneration() >= SEA_ISLANDS;
800 }
801
802 bool hasSDWAOmod() const {
803 return HasSDWAOmod;
804 }
805
806 bool hasSDWAScalar() const {
807 return HasSDWAScalar;
808 }
809
810 bool hasSDWASdst() const {
811 return HasSDWASdst;
812 }
813
814 bool hasSDWAMac() const {
815 return HasSDWAMac;
816 }
817
818 bool hasSDWAOutModsVOPC() const {
819 return HasSDWAOutModsVOPC;
820 }
821
822 bool hasDLInsts() const {
823 return HasDLInsts;
824 }
825
826 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
827
828 bool hasDot1Insts() const {
829 return HasDot1Insts;
830 }
831
832 bool hasDot2Insts() const {
833 return HasDot2Insts;
834 }
835
836 bool hasDot3Insts() const {
837 return HasDot3Insts;
838 }
839
840 bool hasDot4Insts() const {
841 return HasDot4Insts;
842 }
843
844 bool hasDot5Insts() const {
845 return HasDot5Insts;
846 }
847
848 bool hasDot6Insts() const {
849 return HasDot6Insts;
850 }
851
852 bool hasDot7Insts() const {
853 return HasDot7Insts;
854 }
855
856 bool hasDot8Insts() const {
857 return HasDot8Insts;
858 }
859
860 bool hasDot9Insts() const {
861 return HasDot9Insts;
862 }
863
864 bool hasDot10Insts() const {
865 return HasDot10Insts;
866 }
867
868 bool hasDot11Insts() const {
869 return HasDot11Insts;
870 }
871
872 bool hasDot12Insts() const {
873 return HasDot12Insts;
874 }
875
876 bool hasDot13Insts() const {
877 return HasDot13Insts;
878 }
879
880 bool hasMAIInsts() const {
881 return HasMAIInsts;
882 }
883
884 bool hasFP8Insts() const {
885 return HasFP8Insts;
886 }
887
889
890 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
891
892 bool hasPkFmacF16Inst() const {
893 return HasPkFmacF16Inst;
894 }
895
899
903
907
911
913
915
919
921
923
927
931
935
939
941
942 /// \return true if the target has flat, global, and buffer atomic fadd for
943 /// double.
947
948 /// \return true if the target's flat, global, and buffer atomic fadd for
949 /// float supports denormal handling.
953
954 /// \return true if atomic operations targeting fine-grained memory work
955 /// correctly at device scope, in allocations in host or peer PCIe device
956 /// memory.
960
961 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
962 /// via CAS loop.
966
968
972
973 bool hasNoSdstCMPX() const {
974 return HasNoSdstCMPX;
975 }
976
977 bool hasVscnt() const {
978 return HasVscnt;
979 }
980
981 bool hasGetWaveIdInst() const {
982 return HasGetWaveIdInst;
983 }
984
985 bool hasSMemTimeInst() const {
986 return HasSMemTimeInst;
987 }
988
991 }
992
996
997 bool hasVOP3Literal() const {
998 return HasVOP3Literal;
999 }
1000
1001 bool hasNoDataDepHazard() const {
1002 return HasNoDataDepHazard;
1003 }
1004
1006 return getGeneration() < SEA_ISLANDS;
1007 }
1008
1009 bool hasInstPrefetch() const {
1010 return getGeneration() == GFX10 || getGeneration() == GFX11;
1011 }
1012
1013 bool hasPrefetch() const { return GFX12Insts; }
1014
1015 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1016
1018
1019 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1020
1021 // Has s_cmpk_* instructions.
1022 bool hasSCmpK() const { return getGeneration() < GFX12; }
1023
1024 // Scratch is allocated in 256 dword per wave blocks for the entire
1025 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1026 // is 4-byte aligned.
1027 //
1028 // Only 4-byte alignment is really needed to access anything. Transformations
1029 // on the pointer value itself may rely on the alignment / known low bits of
1030 // the pointer. Set this to something above the minimum to avoid needing
1031 // dynamic realignment in common cases.
1032 Align getStackAlignment() const { return Align(16); }
1033
1034 bool enableMachineScheduler() const override {
1035 return true;
1036 }
1037
1038 bool useAA() const override;
1039
1040 bool enableSubRegLiveness() const override {
1041 return true;
1042 }
1043
1046
1047 // static wrappers
1048 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1049
1050 // XXX - Why is this here if it isn't in the default pass set?
1051 bool enableEarlyIfConversion() const override {
1052 return true;
1053 }
1054
1056 const SchedRegion &Region) const override;
1057
1059 const SchedRegion &Region) const override;
1060
1061 void mirFileLoaded(MachineFunction &MF) const override;
1062
1063 unsigned getMaxNumUserSGPRs() const {
1064 return AMDGPU::getMaxNumUserSGPRs(*this);
1065 }
1066
1067 bool hasSMemRealTime() const {
1068 return HasSMemRealTime;
1069 }
1070
1071 bool hasMovrel() const {
1072 return HasMovrel;
1073 }
1074
1075 bool hasVGPRIndexMode() const {
1076 return HasVGPRIndexMode;
1077 }
1078
1079 bool useVGPRIndexMode() const;
1080
1082 return getGeneration() >= VOLCANIC_ISLANDS;
1083 }
1084
1086
1087 bool hasScalarStores() const {
1088 return HasScalarStores;
1089 }
1090
1091 bool hasScalarAtomics() const {
1092 return HasScalarAtomics;
1093 }
1094
1095 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1097
1098 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1099 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1100
1101 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1102 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1103
1104 bool hasDPP() const {
1105 return HasDPP;
1106 }
1107
1108 bool hasDPPBroadcasts() const {
1109 return HasDPP && getGeneration() < GFX10;
1110 }
1111
1113 return HasDPP && getGeneration() < GFX10;
1114 }
1115
1116 bool hasDPP8() const {
1117 return HasDPP8;
1118 }
1119
1120 bool hasDPALU_DPP() const {
1121 return HasDPALU_DPP;
1122 }
1123
1124 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1125
1126 bool hasPackedFP32Ops() const {
1127 return HasPackedFP32Ops;
1128 }
1129
1130 // Has V_PK_MOV_B32 opcode
1131 bool hasPkMovB32() const {
1132 return GFX90AInsts;
1133 }
1134
1136 return getGeneration() >= GFX10 || hasGFX940Insts();
1137 }
1138
1139 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1140
1141 bool hasImageInsts() const {
1142 return HasImageInsts;
1143 }
1144
1146 return HasExtendedImageInsts;
1147 }
1148
1149 bool hasR128A16() const {
1150 return HasR128A16;
1151 }
1152
1153 bool hasA16() const { return HasA16; }
1154
1155 bool hasG16() const { return HasG16; }
1156
1157 bool hasOffset3fBug() const {
1158 return HasOffset3fBug;
1159 }
1160
1162
1164
1165 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1166
1168
1170
1171 bool hasNSAEncoding() const { return HasNSAEncoding; }
1172
1173 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1174
1176
1177 unsigned getNSAMaxSize(bool HasSampler = false) const {
1178 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1179 }
1180
1181 bool hasGFX10_AEncoding() const {
1182 return GFX10_AEncoding;
1183 }
1184
1185 bool hasGFX10_BEncoding() const {
1186 return GFX10_BEncoding;
1187 }
1188
1189 bool hasGFX10_3Insts() const {
1190 return GFX10_3Insts;
1191 }
1192
1193 bool hasMadF16() const;
1194
1195 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1196
1197 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1198
1199 // Scalar and global loads support scale_offset bit.
1200 bool hasScaleOffset() const { return GFX1250Insts; }
1201
1202 bool hasFlatGVSMode() const { return FlatGVSMode; }
1203
1204 // FLAT GLOBAL VOffset is signed
1205 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1206
1207 bool enableSIScheduler() const {
1208 return EnableSIScheduler;
1209 }
1210
1211 bool loadStoreOptEnabled() const {
1212 return EnableLoadStoreOpt;
1213 }
1214
1215 bool hasSGPRInitBug() const {
1216 return SGPRInitBug;
1217 }
1218
1220 return UserSGPRInit16Bug && isWave32();
1221 }
1222
1224
1228
1231 }
1232
1236
1237 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1239 return CIInsts;
1240 }
1241
1244 }
1245
1250
1253 }
1254
1257 }
1258
1261 }
1262
1265 }
1266
1269 }
1270
1271 bool hasLDSMisalignedBug() const {
1272 return LDSMisalignedBug && !EnableCuMode;
1273 }
1274
1276 return HasInstFwdPrefetchBug;
1277 }
1278
1280 return HasVcmpxExecWARHazard;
1281 }
1282
1285 }
1286
1287 // Shift amount of a 64 bit shift cannot be a highest allocated register
1288 // if also at the end of the allocation block.
1290 return GFX90AInsts && !GFX940Insts;
1291 }
1292
1293 // Has one cycle hazard on transcendental instruction feeding a
1294 // non transcendental VALU.
1295 bool hasTransForwardingHazard() const { return GFX940Insts; }
1296
1297 // Has one cycle hazard on a VALU instruction partially writing dst with
1298 // a shift of result bits feeding another VALU instruction.
1300
1301 // Cannot use op_sel with v_dot instructions.
1302 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1303
1304 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1305 bool hasVDecCoExecHazard() const {
1306 return GFX940Insts;
1307 }
1308
1309 bool hasNSAtoVMEMBug() const {
1310 return HasNSAtoVMEMBug;
1311 }
1312
1313 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1314
1315 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1316
1317 bool hasGFX90AInsts() const { return GFX90AInsts; }
1318
1320 return getGeneration() == GFX10;
1321 }
1322
1323 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1324
1325 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1326
1327 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1328
1330 return getGeneration() == GFX11;
1331 }
1332
1334
1336
1337 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1338
1340
1344
1345 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1346
1347 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1348
1350 return GFX1250Insts && getGeneration() == GFX12;
1351 }
1352
1353 /// Return if operations acting on VGPR tuples require even alignment.
1354 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1355
1356 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1357 bool hasSPackHL() const { return GFX11Insts; }
1358
1359 /// Return true if the target's EXP instruction has the COMPR flag, which
1360 /// affects the meaning of the EN (enable) bits.
1361 bool hasCompressedExport() const { return !GFX11Insts; }
1362
1363 /// Return true if the target's EXP instruction supports the NULL export
1364 /// target.
1365 bool hasNullExportTarget() const { return !GFX11Insts; }
1366
1367 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1368
1369 bool hasVOPDInsts() const { return HasVOPDInsts; }
1370
1372
1373 /// Return true if the target has the S_DELAY_ALU instruction.
1374 bool hasDelayAlu() const { return GFX11Insts; }
1375
1376 bool hasPackedTID() const { return HasPackedTID; }
1377
1378 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1379 // hasGFX90AInsts is also true.
1380 bool hasGFX940Insts() const { return GFX940Insts; }
1381
1382 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1383 // hasGFX940Insts and hasGFX90AInsts are also true.
1384 bool hasGFX950Insts() const { return GFX950Insts; }
1385
1386 /// Returns true if the target supports
1387 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1388 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1389 bool hasLDSLoadB96_B128() const {
1390 return hasGFX950Insts();
1391 }
1392
1393 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1394
1395 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1396
1398
1400
1402
1404
1405 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1406 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1407 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1408
1409 /// \returns true if inline constants are not supported for F16 pseudo
1410 /// scalar transcendentals.
1412 return getGeneration() == GFX12;
1413 }
1414
1415 /// \returns true if the target has instructions with xf32 format support.
1416 bool hasXF32Insts() const { return HasXF32Insts; }
1417
1418 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1419
1420 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1421 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1422 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1423
1426 }
1427
1430 }
1431
1432 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1433
1434 bool hasTanhInsts() const { return HasTanhInsts; }
1435
1437
1438 bool hasAddPC64Inst() const { return GFX1250Insts; }
1439
1441
1444 }
1445
1447
1448 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1449 /// GFX1250.
1450 bool hasWaitXCnt() const { return HasWaitXcnt; }
1451
1452 // A single DWORD instructions can use a 64-bit literal.
1453 bool has64BitLiterals() const { return Has64BitLiterals; }
1454
1456
1458
1459 /// \returns The maximum number of instructions that can be enclosed in an
1460 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1461 /// instruction.
1462 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1463
1464 bool hasPrngInst() const { return HasPrngInst; }
1465
1467
1468 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1469 /// SGPRs
1470 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1471
1472 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1473 /// VGPRs
1474 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1475 unsigned DynamicVGPRBlockSize) const;
1476
1477 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1478 /// be achieved when the only function running on a CU is \p F, each workgroup
1479 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1480 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1481 /// range, so this returns a range as well.
1482 ///
1483 /// Note that occupancy can be affected by the scratch allocation as well, but
1484 /// we do not have enough information to compute it.
1485 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1486 unsigned LDSSize = 0,
1487 unsigned NumSGPRs = 0,
1488 unsigned NumVGPRs = 0) const;
1489
1490 /// \returns true if the flat_scratch register should be initialized with the
1491 /// pointer to the wave's scratch memory rather than a size and offset.
1494 }
1495
1496 /// \returns true if the flat_scratch register is initialized by the HW.
1497 /// In this case it is readonly.
1499
1500 /// \returns true if the architected SGPRs are enabled.
1502
1503 /// \returns true if Global Data Share is supported.
1504 bool hasGDS() const { return HasGDS; }
1505
1506 /// \returns true if Global Wave Sync is supported.
1507 bool hasGWS() const { return HasGWS; }
1508
1509 /// \returns true if the machine has merged shaders in which s0-s7 are
1510 /// reserved by the hardware and user SGPRs start at s8
1511 bool hasMergedShaders() const {
1512 return getGeneration() >= GFX9;
1513 }
1514
1515 // \returns true if the target supports the pre-NGG legacy geometry path.
1516 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1517
1518 // \returns true if preloading kernel arguments is supported.
1519 bool hasKernargPreload() const { return KernargPreload; }
1520
1521 // \returns true if the target has split barriers feature
1522 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1523
1524 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1525 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1526
1527 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1528 // no-return form.
1530
1531 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1532 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1533
1534 // \returns true if the target has IEEE kernel descriptor mode bit
1535 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1536
1537 // \returns true if the target has IEEE fminimum/fmaximum instructions
1539
1540 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1541 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1542
1543 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1544 /// values.
1545 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1546
1547 bool hasGFX1250Insts() const { return GFX1250Insts; }
1548
1549 bool hasVOPD3() const { return GFX1250Insts; }
1550
1551 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1552 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1553
1554 // \returns true if the target has V_MAD_U32 instruction.
1555 bool hasMadU32Inst() const { return HasMadU32Inst; }
1556
1557 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1558 bool hasVectorMulU64() const { return GFX1250Insts; }
1559
1560 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1561 // instructions.
1562 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1563
1564 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1565 bool hasIntMinMax64() const { return GFX1250Insts; }
1566
1567 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1568 bool hasAddMinMaxInsts() const { return GFX1250Insts; }
1569
1570 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1571 bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
1572
1573 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1574 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1575
1576 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1577 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1578
1579 // \returns true if target has S_SETPRIO_INC_WG instruction.
1581
1582 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1583 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1584 // extended VA to 57 bits.
1585 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1586
1587 // \returns true if the target needs to create a prolog for backward
1588 // compatibility when preloading kernel arguments.
1590 return hasKernargPreload() && !GFX1250Insts;
1591 }
1592
1593 /// \returns SGPR allocation granularity supported by the subtarget.
1594 unsigned getSGPRAllocGranule() const {
1596 }
1597
1598 /// \returns SGPR encoding granularity supported by the subtarget.
1599 unsigned getSGPREncodingGranule() const {
1601 }
1602
1603 /// \returns Total number of SGPRs supported by the subtarget.
1604 unsigned getTotalNumSGPRs() const {
1606 }
1607
1608 /// \returns Addressable number of SGPRs supported by the subtarget.
1609 unsigned getAddressableNumSGPRs() const {
1611 }
1612
1613 /// \returns Minimum number of SGPRs that meets the given number of waves per
1614 /// execution unit requirement supported by the subtarget.
1615 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1616 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1617 }
1618
1619 /// \returns Maximum number of SGPRs that meets the given number of waves per
1620 /// execution unit requirement supported by the subtarget.
1621 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1622 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1623 }
1624
1625 /// \returns Reserved number of SGPRs. This is common
1626 /// utility function called by MachineFunction and
1627 /// Function variants of getReservedNumSGPRs.
1628 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1629 /// \returns Reserved number of SGPRs for given machine function \p MF.
1630 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1631
1632 /// \returns Reserved number of SGPRs for given function \p F.
1633 unsigned getReservedNumSGPRs(const Function &F) const;
1634
1635 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1636 unsigned getMaxNumPreloadedSGPRs() const;
1637
1638 /// \returns max num SGPRs. This is the common utility
1639 /// function called by MachineFunction and Function
1640 /// variants of getMaxNumSGPRs.
1641 unsigned getBaseMaxNumSGPRs(const Function &F,
1642 std::pair<unsigned, unsigned> WavesPerEU,
1643 unsigned PreloadedSGPRs,
1644 unsigned ReservedNumSGPRs) const;
1645
1646 /// \returns Maximum number of SGPRs that meets number of waves per execution
1647 /// unit requirement for function \p MF, or number of SGPRs explicitly
1648 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1649 ///
1650 /// \returns Value that meets number of waves per execution unit requirement
1651 /// if explicitly requested value cannot be converted to integer, violates
1652 /// subtarget's specifications, or does not meet number of waves per execution
1653 /// unit requirement.
1654 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1655
1656 /// \returns Maximum number of SGPRs that meets number of waves per execution
1657 /// unit requirement for function \p F, or number of SGPRs explicitly
1658 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1659 ///
1660 /// \returns Value that meets number of waves per execution unit requirement
1661 /// if explicitly requested value cannot be converted to integer, violates
1662 /// subtarget's specifications, or does not meet number of waves per execution
1663 /// unit requirement.
1664 unsigned getMaxNumSGPRs(const Function &F) const;
1665
1666 /// \returns VGPR allocation granularity supported by the subtarget.
1667 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1668 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1669 }
1670
1671 /// \returns VGPR encoding granularity supported by the subtarget.
1672 unsigned getVGPREncodingGranule() const {
1674 }
1675
1676 /// \returns Total number of VGPRs supported by the subtarget.
1677 unsigned getTotalNumVGPRs() const {
1679 }
1680
1681 /// \returns Addressable number of architectural VGPRs supported by the
1682 /// subtarget.
1686
1687 /// \returns Addressable number of VGPRs supported by the subtarget.
1688 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1689 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1690 }
1691
1692 /// \returns the minimum number of VGPRs that will prevent achieving more than
1693 /// the specified number of waves \p WavesPerEU.
1694 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1695 unsigned DynamicVGPRBlockSize) const {
1696 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1697 DynamicVGPRBlockSize);
1698 }
1699
1700 /// \returns the maximum number of VGPRs that can be used and still achieved
1701 /// at least the specified number of waves \p WavesPerEU.
1702 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1703 unsigned DynamicVGPRBlockSize) const {
1704 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1705 DynamicVGPRBlockSize);
1706 }
1707
1708 /// \returns max num VGPRs. This is the common utility function
1709 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1710 unsigned
1712 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1713
1714 /// \returns Maximum number of VGPRs that meets number of waves per execution
1715 /// unit requirement for function \p F, or number of VGPRs explicitly
1716 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1717 ///
1718 /// \returns Value that meets number of waves per execution unit requirement
1719 /// if explicitly requested value cannot be converted to integer, violates
1720 /// subtarget's specifications, or does not meet number of waves per execution
1721 /// unit requirement.
1722 unsigned getMaxNumVGPRs(const Function &F) const;
1723
1724 unsigned getMaxNumAGPRs(const Function &F) const {
1725 return getMaxNumVGPRs(F);
1726 }
1727
1728 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1729 /// of waves per execution unit required for the function \p MF.
1730 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1731
1732 /// \returns Maximum number of VGPRs that meets number of waves per execution
1733 /// unit requirement for function \p MF, or number of VGPRs explicitly
1734 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1735 ///
1736 /// \returns Value that meets number of waves per execution unit requirement
1737 /// if explicitly requested value cannot be converted to integer, violates
1738 /// subtarget's specifications, or does not meet number of waves per execution
1739 /// unit requirement.
1740 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1741
1742 bool supportsWave32() const { return getGeneration() >= GFX10; }
1743
1744 bool supportsWave64() const { return !hasGFX1250Insts(); }
1745
1746 bool isWave32() const {
1747 return getWavefrontSize() == 32;
1748 }
1749
1750 bool isWave64() const {
1751 return getWavefrontSize() == 64;
1752 }
1753
1754 /// Returns if the wavesize of this subtarget is known reliable. This is false
1755 /// only for the a default target-cpu that does not have an explicit
1756 /// +wavefrontsize target feature.
1757 bool isWaveSizeKnown() const {
1758 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1759 hasFeature(AMDGPU::FeatureWavefrontSize64);
1760 }
1761
1763 return getRegisterInfo()->getBoolRC();
1764 }
1765
1766 /// \returns Maximum number of work groups per compute unit supported by the
1767 /// subtarget and limited by given \p FlatWorkGroupSize.
1768 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1769 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1770 }
1771
1772 /// \returns Minimum flat work group size supported by the subtarget.
1773 unsigned getMinFlatWorkGroupSize() const override {
1775 }
1776
1777 /// \returns Maximum flat work group size supported by the subtarget.
1778 unsigned getMaxFlatWorkGroupSize() const override {
1780 }
1781
1782 /// \returns Number of waves per execution unit required to support the given
1783 /// \p FlatWorkGroupSize.
1784 unsigned
1785 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1786 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1787 }
1788
1789 /// \returns Minimum number of waves per execution unit supported by the
1790 /// subtarget.
1791 unsigned getMinWavesPerEU() const override {
1793 }
1794
1795 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1796 SDep &Dep,
1797 const TargetSchedModel *SchedModel) const override;
1798
1799 // \returns true if it's beneficial on this subtarget for the scheduler to
1800 // cluster stores as well as loads.
1801 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1802
1803 // \returns the number of address arguments from which to enable MIMG NSA
1804 // on supported architectures.
1805 unsigned getNSAThreshold(const MachineFunction &MF) const;
1806
1807 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1808 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1810
1811 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1812 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1814
1815 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1816 unsigned getDynamicVGPRBlockSize() const {
1817 return DynamicVGPRBlockSize32 ? 32 : 16;
1818 }
1819
1821 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1822 // to the same register.
1823 return false;
1824 }
1825
1826 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1827 // and surronded by S_WAIT_ALU(0xFFE3).
1829 return getGeneration() == GFX12;
1830 }
1831
1832 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1833 // read.
1835 return GFX1250Insts && getGeneration() == GFX12;
1836 }
1837
1838 /// \returns true if the subtarget supports clusters of workgroups.
1839 bool hasClusters() const { return GFX1250Insts; }
1840
1841 /// \returns true if the subtarget requires a wait for xcnt before atomic
1842 /// flat/global stores & rmw.
1844
1845 /// \returns the number of significant bits in the immediate field of the
1846 /// S_NOP instruction.
1847 unsigned getSNopBits() const {
1849 return 7;
1851 return 4;
1852 return 3;
1853 }
1854
1855 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1856 /// num_records.
1860};
1861
1863public:
1864 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1865
1866 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1867
1868 bool hasDispatchPtr() const { return DispatchPtr; }
1869
1870 bool hasQueuePtr() const { return QueuePtr; }
1871
1872 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1873
1874 bool hasDispatchID() const { return DispatchID; }
1875
1876 bool hasFlatScratchInit() const { return FlatScratchInit; }
1877
1878 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1879
1880 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1881
1882 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1883
1884 unsigned getNumFreeUserSGPRs();
1885
1886 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1887
1898
1899 // Returns the size in number of SGPRs for preload user SGPR field.
1901 switch (ID) {
1903 return 2;
1905 return 4;
1906 case DispatchPtrID:
1907 return 2;
1908 case QueuePtrID:
1909 return 2;
1911 return 2;
1912 case DispatchIdID:
1913 return 2;
1914 case FlatScratchInitID:
1915 return 2;
1917 return 1;
1918 }
1919 llvm_unreachable("Unknown UserSGPRID.");
1920 }
1921
1922 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1923
1924private:
1925 const GCNSubtarget &ST;
1926
1927 // Private memory buffer
1928 // Compute directly in sgpr[0:1]
1929 // Other shaders indirect 64-bits at sgpr[0:1]
1930 bool ImplicitBufferPtr = false;
1931
1932 bool PrivateSegmentBuffer = false;
1933
1934 bool DispatchPtr = false;
1935
1936 bool QueuePtr = false;
1937
1938 bool KernargSegmentPtr = false;
1939
1940 bool DispatchID = false;
1941
1942 bool FlatScratchInit = false;
1943
1944 bool PrivateSegmentSize = false;
1945
1946 unsigned NumKernargPreloadSGPRs = 0;
1947
1948 unsigned NumUsedUserSGPRs = 0;
1949};
1950
1951} // end namespace llvm
1952
1953#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:55
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasFFBL() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasBFI() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool requiresWaitXCntBeforeAtomicStores() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasCARRY() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasBFE() const
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.