Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
AMDGPUTargetMachine.cpp File Reference

This file contains both AMDGPU target machine and the CodeGen pass builder. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUExportKernelRuntimeHandles.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPULowerVGPREncoding.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUPreloadKernArgProlog.h"
#include "AMDGPUPrepareAGPRAlloc.h"
#include "AMDGPURemoveIncompatibleFunctions.h"
#include "AMDGPUReserveWWMRegs.h"
#include "AMDGPUResourceUsageAnalysis.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "AMDGPUWaitSGPRHazards.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNNSAReassign.h"
#include "GCNPreRALongBranchReg.h"
#include "GCNPreRAOptimizations.h"
#include "GCNRewritePartialRegUses.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFixVGPRCopies.h"
#include "SIFoldOperands.h"
#include "SIFormMemoryClauses.h"
#include "SILoadStoreOptimizer.h"
#include "SILowerControlFlow.h"
#include "SILowerSGPRSpills.h"
#include "SILowerWWMCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
#include "SIOptimizeExecMaskingPreRA.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPostRABundler.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "SIWholeQuadMode.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/KernelInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/BranchRelaxation.h"
#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PostRAHazardRecognizer.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Go to the source code of this file.

Macros

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"
#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
#define MODULE_ANALYSIS(NAME, CREATE_PASS)
#define MODULE_PASS(NAME, CREATE_PASS)
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)
#define FUNCTION_PASS(NAME, CREATE_PASS)
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#define LOOP_PASS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define ADD_PASS(NAME, CREATE_PASS)
#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#define MODULE_PASS(NAME, CREATE_PASS)
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define FUNCTION_PASS(NAME, CREATE_PASS)
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define LOOP_PASS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define MODULE_ANALYSIS(NAME, CREATE_PASS)
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)

Functions

LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ()
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateGCNMaxILPMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateGCNMaxMemoryClauseMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
static Reloc::Model getEffectiveRelocModel (std::optional< Reloc::Model > RM)
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass.
static Expected< ScanOptionsparseAMDGPUAtomicOptimizerStrategy (StringRef Params)
Expected< AMDGPUAttributorOptionsparseAMDGPUAttributorPassOptions (StringRef Params)

Variables

static cl::opt< boolEnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
static cl::opt< boolOptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static cl::opt< boolLowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
static cl::opt< boolEnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
static cl::opt< boolScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< boolInternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
static cl::opt< boolEarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
static cl::opt< boolRemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
static cl::opt< boolEnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
static cl::opt< boolEnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
static cl::opt< boolEnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static cl::opt< boolEnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
static cl::opt< boolOptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
static cl::opt< ScanOptionsAMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
static cl::opt< boolEnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
static cl::opt< boolEnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
static cl::opt< boolEnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableSwLowerLDS ("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)
static cl::opt< bool, true > EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
static cl::opt< boolEnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
static cl::opt< boolEnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
static cl::opt< boolEnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
static cl::opt< std::stringAMDGPUSchedStrategy ("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))
static cl::opt< boolEnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
static cl::opt< boolEnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
static cl::opt< boolEnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)
static cl::opt< boolNewRegBankSelect ("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)
static cl::opt< boolHasClosedWorldAssumption ("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
static MachineSchedRegistry GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
static MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry ("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
static const char RegAllocOptNotSupportedMessage []

Detailed Description

This file contains both AMDGPU target machine and the CodeGen pass builder.

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ ADD_CLASS_PASS_TO_PASS_NAME

#define ADD_CLASS_PASS_TO_PASS_NAME ( NAME,
CREATE_PASS )
Value:
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
PassInstrumentationCallbacks PIC
static const char * name

◆ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS

#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS ( NAME,
CLASS )
Value:
PIC->addClassToPassName(CLASS, NAME);

◆ ADD_PASS

#define ADD_PASS ( NAME,
CREATE_PASS )
Value:
if (Name == NAME) { \
PM.addPass(CREATE_PASS); \
return true; \
}

◆ ADD_PASS_WITH_PARAMS

#define ADD_PASS_WITH_PARAMS ( NAME,
CREATE_PASS,
PARSER )
Value:
auto Params = PassBuilder::parsePassParameters(PARSER, Name, NAME); \
if (!Params) { \
errs() << NAME ": " << toString(Params.takeError()) << '\n'; \
return false; \
} \
PM.addPass(CREATE_PASS(Params.get())); \
return true; \
}
static bool checkParametrizedPassName(StringRef Name, StringRef PassName)
static auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name, StringRef PassName) -> decltype(Parser(StringRef{}))
This performs customized parsing of pass name with parameters.
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)

◆ FUNCTION_ALIAS_ANALYSIS [1/2]

#define FUNCTION_ALIAS_ANALYSIS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_ALIAS_ANALYSIS [2/2]

#define FUNCTION_ALIAS_ANALYSIS ( NAME,
CREATE_PASS )
Value:
if (Name == NAME) { \
AM.registerFunctionAnalysis< \
std::remove_reference_t<decltype(CREATE_PASS)>>(); \
return true; \
}

◆ FUNCTION_ANALYSIS [1/2]

#define FUNCTION_ANALYSIS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_ANALYSIS [2/2]

#define FUNCTION_ANALYSIS ( NAME,
CREATE_PASS )
Value:
AM.registerPass([&] { return CREATE_PASS; });

◆ FUNCTION_PASS [1/2]

#define FUNCTION_PASS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_PASS [2/2]

#define FUNCTION_PASS ( NAME,
CREATE_PASS )
Value:
ADD_PASS(NAME, CREATE_PASS)
#define ADD_PASS(NAME, CREATE_PASS)

◆ FUNCTION_PASS_WITH_PARAMS [1/2]

#define FUNCTION_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )
Value:
#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)

◆ FUNCTION_PASS_WITH_PARAMS [2/2]

#define FUNCTION_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"

◆ LOOP_ANALYSIS [1/2]

#define LOOP_ANALYSIS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ LOOP_ANALYSIS [2/2]

#define LOOP_ANALYSIS ( NAME,
CREATE_PASS )
Value:
AM.registerPass([&] { return CREATE_PASS; });

◆ LOOP_PASS [1/2]

#define LOOP_PASS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ LOOP_PASS [2/2]

#define LOOP_PASS ( NAME,
CREATE_PASS )
Value:
ADD_PASS(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_ANALYSIS [1/2]

#define MACHINE_FUNCTION_ANALYSIS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_ANALYSIS [2/2]

#define MACHINE_FUNCTION_ANALYSIS ( NAME,
CREATE_PASS )
Value:
AM.registerPass([&] { return CREATE_PASS; });

◆ MACHINE_FUNCTION_PASS [1/2]

#define MACHINE_FUNCTION_PASS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_PASS [2/2]

#define MACHINE_FUNCTION_PASS ( NAME,
CREATE_PASS )
Value:
ADD_PASS(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [1/2]

#define MACHINE_FUNCTION_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [2/2]

#define MACHINE_FUNCTION_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

◆ MODULE_ANALYSIS [1/2]

#define MODULE_ANALYSIS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MODULE_ANALYSIS [2/2]

#define MODULE_ANALYSIS ( NAME,
CREATE_PASS )
Value:
AM.registerPass([&] { return CREATE_PASS; });

◆ MODULE_PASS [1/2]

#define MODULE_PASS ( NAME,
CREATE_PASS )
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MODULE_PASS [2/2]

#define MODULE_PASS ( NAME,
CREATE_PASS )
Value:
ADD_PASS(NAME, CREATE_PASS)

◆ MODULE_PASS_WITH_PARAMS [1/2]

#define MODULE_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )

◆ MODULE_PASS_WITH_PARAMS [2/2]

#define MODULE_PASS_WITH_PARAMS ( NAME,
CLASS,
CREATE_PASS,
PARSER,
PARAMS )
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

Function Documentation

◆ createGCNMaxILPMachineScheduler()

◆ createGCNMaxMemoryClauseMachineScheduler()

◆ createGCNMaxOccupancyMachineScheduler()

◆ createIterativeGCNMaxOccupancyMachineScheduler()

◆ createIterativeILPMachineScheduler()

◆ createMinRegScheduler()

◆ createSIMachineScheduler()

ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext * C)
static

◆ createTLOF()

std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple & TT)
static

Definition at line 618 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

Reloc::Model getEffectiveRelocModel ( std::optional< Reloc::Model > RM)
static

Definition at line 735 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

LLVM_READNONE StringRef getGPUOrDefault ( const Triple & TT,
StringRef GPU )
static

◆ LLVMInitializeAMDGPUTarget()

LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 529 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAsmPrinterPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelLegacyPass(), llvm::initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluLegacyPass(), llvm::initializeAMDGPULateCodeGenPrepareLegacyPass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerIntrinsicsLegacyPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPULowerVGPREncodingLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadLegacyPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPreloadKernArgPrologLegacyPass(), llvm::initializeAMDGPUPreloadKernelArgumentsLegacyPass(), llvm::initializeAMDGPUPrepareAGPRAllocLegacyPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankLegalizePass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(), llvm::initializeAMDGPUReserveWWMRegsLegacyPass(), llvm::initializeAMDGPUResourceUsageAnalysisWrapperPassPass(), llvm::initializeAMDGPURewriteAGPRCopyMFMALegacyPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUSwLowerLDSLegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUWaitSGPRHazardsLegacyPass(), llvm::initializeGCNCreateVOPDLegacyPass(), llvm::initializeGCNDPPCombineLegacyPass(), llvm::initializeGCNNSAReassignLegacyPass(), llvm::initializeGCNPreRALongBranchRegLegacyPass(), llvm::initializeGCNPreRAOptimizationsLegacyPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesLegacyPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600EmitClauseMarkersPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600MachineCFGStructurizerPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowLegacyPass(), llvm::initializeSIFixSGPRCopiesLegacyPass(), llvm::initializeSIFixVGPRCopiesLegacyPass(), llvm::initializeSIFoldOperandsLegacyPass(), llvm::initializeSIFormMemoryClausesLegacyPass(), llvm::initializeSIInsertHardClausesLegacyPass(), llvm::initializeSIInsertWaitcntsLegacyPass(), llvm::initializeSILateBranchLoweringLegacyPass(), llvm::initializeSILoadStoreOptimizerLegacyPass(), llvm::initializeSILowerControlFlowLegacyPass(), llvm::initializeSILowerI1CopiesLegacyPass(), llvm::initializeSILowerSGPRSpillsLegacyPass(), llvm::initializeSILowerWWMCopiesLegacyPass(), llvm::initializeSIMemoryLegalizerLegacyPass(), llvm::initializeSIModeRegisterLegacyPass(), llvm::initializeSIOptimizeExecMaskingLegacyPass(), llvm::initializeSIOptimizeExecMaskingPreRALegacyPass(), llvm::initializeSIOptimizeVGPRLiveRangeLegacyPass(), llvm::initializeSIPeepholeSDWALegacyPass(), llvm::initializeSIPostRABundlerLegacyPass(), llvm::initializeSIPreAllocateWWMRegsLegacyPass(), llvm::initializeSIPreEmitPeepholeLegacyPass(), llvm::initializeSIShrinkInstructionsLegacyPass(), llvm::initializeSIWholeQuadModeLegacyPass(), LLVM_ABI, LLVM_EXTERNAL_VISIBILITY, X, and Y.

◆ mustPreserveGV()

◆ parseAMDGPUAtomicOptimizerStrategy()

◆ parseAMDGPUAttributorPassOptions()

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) ( "amdgpu-atomic-optimizer-strategy" ,
cl::desc("Select DPP or Iterative strategy for scan") ,
cl::init(ScanOptions::Iterative) ,
cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))  )
static

◆ AMDGPUSchedStrategy

cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init("")) ( "amdgpu-sched-strategy" ,
cl::desc("Select custom AMDGPU scheduling strategy.") ,
cl::Hidden ,
cl::init("")  )
static

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) ( "amdgpu-early-inline-all" ,
cl::desc("Inline all functions early") ,
cl::init(false) ,
cl::Hidden  )
static

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) ( "enable-amdgpu-aa" ,
cl::Hidden ,
cl::desc("Enable AMDGPU Alias Analysis") ,
cl::init(true)  )
static

◆ EnableAMDGPUAttributor

cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) ( "amdgpu-attributor-enable" ,
cl::desc("Enable AMDGPUAttributorPass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) ( "amdgpu-dce-in-ra" ,
cl::init(true) ,
cl::Hidden ,
cl::desc("Enable machine DCE inside regalloc")  )
static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) ( "amdgpu-dpp-combine" ,
cl::desc("Enable DPP combiner") ,
cl::init(true)  )
static

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) ( "amdgpu-early-ifcvt" ,
cl::Hidden ,
cl::desc("Run early if-conversion") ,
cl::init(false)  )
static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) ( "amdgpu-enable-hipstdpar" ,
cl::desc("Enable HIP Standard Parallelism Offload support") ,
cl::init(false) ,
cl::Hidden  )
static

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-image-intrinsic-optimizer" ,
cl::desc("Enable image intrinsic optimizer pass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) ( "amdgpu-enable-delay-alu" ,
cl::desc("Enable s_delay_alu insertion") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) ( "amdgpu-simplify-libcall" ,
cl::desc("Enable amdgpu library simplifications") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) ( "amdgpu-load-store-vectorizer" ,
cl::desc("Enable load store vectorizer") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) ( "amdgpu-loop-prefetch" ,
cl::desc("Enable loop data prefetch on AMDGPU") ,
cl::Hidden ,
cl::init(false)  )
static

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) ( "amdgpu-ir-lower-kernel-arguments" ,
cl::desc("Lower kernel argument loads in IR pass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) ( "amdgpu-enable-lower-module-lds" ,
cl::desc("Enable lower module lds pass") ,
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS) ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-pre-ra-optimizations" ,
cl::desc("Enable Pre-RA optimizations pass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) ( "amdgpu-enable-promote-kernel-arguments" ,
cl::desc("Enable promotion of flat kernel pointer arguments to global") ,
cl::Hidden ,
cl::init(true)  )
static

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) ( "amdgpu-reassign-regs" ,
cl::desc("Enable register reassign optimizations on gfx10+") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-rewrite-partial-reg-uses" ,
cl::desc("Enable rewrite partial reg uses pass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) ( "amdgpu-scalar-ir-passes" ,
cl::desc("Enable scalar IR passes") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) ( "amdgpu-sdwa-peephole" ,
cl::desc("Enable SDWA peepholer") ,
cl::init(true)  )
static

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) ( "amdgpu-set-wave-priority" ,
cl::desc("Adjust wave priority") ,
cl::init(false) ,
cl::Hidden  )
static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) ( "amdgpu-mode-register" ,
cl::desc("Enable mode register pass") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableSwLowerLDS

cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden) ( "amdgpu-enable-sw-lower-lds" ,
cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR.") ,
cl::init(true) ,
cl::Hidden  )
static

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) ( "amdgpu-enable-vopd" ,
cl::desc("Enable VOPD, dual issue of VALU in wave32") ,
cl::init(true) ,
cl::Hidden  )
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) ( "gcn-iterative-ilp" ,
"Run GCN iterative scheduler for ILP scheduling (experimental)" ,
createIterativeILPMachineScheduler  )
static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) ( "gcn-max-ilp" ,
"Run GCN scheduler to maximize ilp" ,
createGCNMaxILPMachineScheduler  )
static

◆ GCNMaxMemoryClauseSchedRegistry

MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler) ( "gcn-max-memory-clause" ,
"Run GCN scheduler to maximize memory clause" ,
createGCNMaxMemoryClauseMachineScheduler  )
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) ( "gcn-max-occupancy" ,
"Run GCN scheduler to maximize occupancy" ,
createGCNMaxOccupancyMachineScheduler  )
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) ( "gcn-iterative-minreg" ,
"Run GCN iterative scheduler for minimal register usage (experimental)" ,
createMinRegScheduler  )
static

◆ HasClosedWorldAssumption

cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden) ( "amdgpu-link-time-closed-world" ,
cl::desc("Whether has closed-world assumption at link time") ,
cl::init(false) ,
cl::Hidden  )
static

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) ( "amdgpu-internalize-symbols" ,
cl::desc("Enable elimination of non-kernel functions and unused globals") ,
cl::init(false) ,
cl::Hidden  )
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) ( "gcn-iterative-max-occupancy-experimental" ,
"Run GCN scheduler to maximize occupancy (experimental)" ,
createIterativeGCNMaxOccupancyMachineScheduler  )
static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) ( "amdgpu-lower-global-ctor-dtor" ,
cl::desc("Lower GPU ctor / dtors to globals on the device.") ,
cl::init(true) ,
cl::Hidden  )
static

◆ NewRegBankSelect

cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden) ( "new-reg-bank-select" ,
cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect") ,
cl::init(false) ,
cl::Hidden  )
static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) ( "amdgpu-opt-exec-mask-pre-ra" ,
cl::Hidden ,
cl::desc("Run pre-RA exec mask optimizations") ,
cl::init(true)  )
static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) ( "amdgpu-opt-vgpr-liverange" ,
cl::desc("Enable VGPR liverange optimizations for if-else structure") ,
cl::init(true) ,
cl::Hidden  )
static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]
static
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
"and -vgpr-regalloc"

Definition at line 1672 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) ( "amdgpu-enable-remove-incompatible-functions" ,
cl::Hidden ,
cl::desc("Enable removal of functions when they" "use features not supported by the target GPU") ,
cl::init(true)  )
static

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) ( "amdgpu-scalarize-global-loads" ,
cl::desc("Enable global load scalarization") ,
cl::init(true) ,
cl::Hidden  )
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler) ( "si" ,
"Run SI's custom scheduler" ,
createSIMachineScheduler  )
static