65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr const size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3511 Builder.restoreIP(*AfterIP);
3512 Builder.CreateStore(Reduced, LHSPtr);
3516 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3517 for (
auto En :
enumerate(ReductionInfos)) {
3518 unsigned Index = En.index();
3519 const ReductionInfo &RI = En.value();
3520 Value *LHSFixupPtr, *RHSFixupPtr;
3521 Builder.restoreIP(RI.ReductionGenClang(
3522 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3527 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3532 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3538 Builder.CreateRetVoid();
3539 return ReductionFunc;
3545 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3547 assert(RI.Variable &&
"expected non-null variable");
3548 assert(RI.PrivateVariable &&
"expected non-null private variable");
3549 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3550 "expected non-null reduction generator callback");
3553 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3554 "expected variables and their private equivalents to have the same "
3557 assert(RI.Variable->getType()->isPointerTy() &&
3558 "expected variables to be pointers");
3562OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3563 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3565 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3566 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3567 Value *SrcLocInfo) {
3568 if (!updateToLocation(
Loc))
3569 return InsertPointTy();
3570 Builder.restoreIP(CodeGenIP);
3577 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3578 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3581 if (ReductionInfos.
size() == 0)
3582 return Builder.saveIP();
3585 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3591 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3595 AttributeList FuncAttrs;
3596 AttrBuilder AttrBldr(Ctx);
3598 AttrBldr.addAttribute(Attr);
3599 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3600 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3602 CodeGenIP = Builder.saveIP();
3604 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3605 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3606 if (!ReductionResult)
3608 Function *ReductionFunc = *ReductionResult;
3609 Builder.restoreIP(CodeGenIP);
3612 if (GridValue.has_value())
3613 Config.setGridValue(GridValue.value());
3628 CodeGenIP = Builder.saveIP();
3629 Builder.restoreIP(AllocaIP);
3630 Value *ReductionListAlloca =
3631 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3632 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3633 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3634 Builder.restoreIP(CodeGenIP);
3635 Type *IndexTy = Builder.getIndexTy(
3636 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3637 for (
auto En :
enumerate(ReductionInfos)) {
3638 const ReductionInfo &RI = En.value();
3639 Value *ElemPtr = Builder.CreateInBoundsGEP(
3640 RedArrayTy, ReductionList,
3641 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3643 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3644 Builder.CreateStore(CastElem, ElemPtr);
3646 CodeGenIP = Builder.saveIP();
3648 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3650 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3654 Builder.restoreIP(CodeGenIP);
3656 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3658 unsigned MaxDataSize = 0;
3660 for (
auto En :
enumerate(ReductionInfos)) {
3661 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3662 if (
Size > MaxDataSize)
3664 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3666 Value *ReductionDataSize =
3667 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3668 if (!IsTeamsReduction) {
3669 Value *SarFuncCast =
3670 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
3672 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
3673 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3675 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3676 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3677 Res = Builder.CreateCall(Pv2Ptr, Args);
3679 CodeGenIP = Builder.saveIP();
3681 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3682 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3683 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3684 Function *LtGCFunc = emitListToGlobalCopyFunction(
3685 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3686 Function *LtGRFunc = emitListToGlobalReduceFunction(
3687 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3688 Function *GtLCFunc = emitGlobalToListCopyFunction(
3689 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3690 Function *GtLRFunc = emitGlobalToListReduceFunction(
3691 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3692 Builder.restoreIP(CodeGenIP);
3694 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3695 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3697 Value *Args3[] = {SrcLocInfo,
3698 KernelTeamsReductionPtr,
3699 Builder.getInt32(ReductionBufNum),
3709 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3710 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3711 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3717 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3718 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3724 emitBlock(ThenBB, CurFunc);
3727 for (
auto En :
enumerate(ReductionInfos)) {
3728 const ReductionInfo &RI = En.value();
3731 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3733 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3734 Value *LHSPtr, *RHSPtr;
3735 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3736 &LHSPtr, &RHSPtr, CurFunc));
3749 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3750 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3752 InsertPointOrErrorTy AfterIP =
3753 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3755 return AfterIP.takeError();
3756 Builder.restoreIP(*AfterIP);
3757 Builder.CreateStore(Reduced,
LHS,
false);
3760 emitBlock(ExitBB, CurFunc);
3761 if (ContinuationBlock) {
3762 Builder.CreateBr(ContinuationBlock);
3763 Builder.SetInsertPoint(ContinuationBlock);
3765 Config.setEmitLLVMUsed();
3767 return Builder.saveIP();
3776 ".omp.reduction.func", &M);
3786 Builder.SetInsertPoint(ReductionFuncBlock);
3787 Value *LHSArrayPtr =
nullptr;
3788 Value *RHSArrayPtr =
nullptr;
3799 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3801 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3802 Value *LHSAddrCast =
3803 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3804 Value *RHSAddrCast =
3805 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3806 Builder.CreateStore(Arg0, LHSAddrCast);
3807 Builder.CreateStore(Arg1, RHSAddrCast);
3808 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3809 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3811 LHSArrayPtr = ReductionFunc->
getArg(0);
3812 RHSArrayPtr = ReductionFunc->
getArg(1);
3815 unsigned NumReductions = ReductionInfos.
size();
3818 for (
auto En :
enumerate(ReductionInfos)) {
3819 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3820 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3821 RedArrayTy, LHSArrayPtr, 0, En.index());
3822 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3823 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 LHSI8Ptr, RI.Variable->
getType());
3825 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3826 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3827 RedArrayTy, RHSArrayPtr, 0, En.index());
3828 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3829 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3830 RHSI8Ptr, RI.PrivateVariable->
getType());
3831 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3833 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3834 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3836 return AfterIP.takeError();
3838 Builder.restoreIP(*AfterIP);
3840 if (!Builder.GetInsertBlock())
3844 if (!IsByRef[En.index()])
3845 Builder.CreateStore(Reduced, LHSPtr);
3847 Builder.CreateRetVoid();
3851OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3852 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3854 bool IsNoWait,
bool IsTeamsReduction) {
3857 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3858 IsNoWait, IsTeamsReduction);
3862 if (!updateToLocation(
Loc))
3863 return InsertPointTy();
3865 if (ReductionInfos.
size() == 0)
3866 return Builder.saveIP();
3875 unsigned NumReductions = ReductionInfos.
size();
3877 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3878 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3880 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3882 for (
auto En :
enumerate(ReductionInfos)) {
3883 unsigned Index = En.index();
3884 const ReductionInfo &RI = En.value();
3885 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3886 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3887 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3892 Type *IndexTy = Builder.getIndexTy(
3893 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3894 Function *
Func = Builder.GetInsertBlock()->getParent();
3897 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3898 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3899 return RI.AtomicReductionGen;
3901 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3903 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3905 Value *ThreadId = getOrCreateThreadID(Ident);
3906 Constant *NumVariables = Builder.getInt32(NumReductions);
3908 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3909 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3911 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3912 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3913 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3914 : RuntimeFunction::OMPRTL___kmpc_reduce);
3916 Builder.CreateCall(ReduceFunc,
3917 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3918 ReductionFunc, Lock},
3929 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3930 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3931 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3936 Builder.SetInsertPoint(NonAtomicRedBlock);
3937 for (
auto En :
enumerate(ReductionInfos)) {
3938 const ReductionInfo &RI = En.value();
3942 Value *RedValue = RI.Variable;
3943 if (!IsByRef[En.index()]) {
3944 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3945 "red.value." +
Twine(En.index()));
3947 Value *PrivateRedValue =
3948 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3949 "red.private.value." +
Twine(En.index()));
3951 InsertPointOrErrorTy AfterIP =
3952 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3954 return AfterIP.takeError();
3955 Builder.restoreIP(*AfterIP);
3957 if (!Builder.GetInsertBlock())
3958 return InsertPointTy();
3960 if (!IsByRef[En.index()])
3961 Builder.CreateStore(Reduced, RI.Variable);
3963 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3964 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3965 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3966 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3967 Builder.CreateBr(ContinuationBlock);
3972 Builder.SetInsertPoint(AtomicRedBlock);
3973 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3974 for (
const ReductionInfo &RI : ReductionInfos) {
3975 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3976 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3978 return AfterIP.takeError();
3979 Builder.restoreIP(*AfterIP);
3980 if (!Builder.GetInsertBlock())
3981 return InsertPointTy();
3983 Builder.CreateBr(ContinuationBlock);
3985 Builder.CreateUnreachable();
3996 if (!Builder.GetInsertBlock())
3997 return InsertPointTy();
3999 Builder.SetInsertPoint(ContinuationBlock);
4000 return Builder.saveIP();
4003OpenMPIRBuilder::InsertPointOrErrorTy
4004OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4005 BodyGenCallbackTy BodyGenCB,
4006 FinalizeCallbackTy FiniCB) {
4007 if (!updateToLocation(
Loc))
4010 Directive OMPD = Directive::OMPD_master;
4012 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4013 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4014 Value *ThreadId = getOrCreateThreadID(Ident);
4017 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4018 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4020 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4021 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4023 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4027OpenMPIRBuilder::InsertPointOrErrorTy
4028OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4029 BodyGenCallbackTy BodyGenCB,
4031 if (!updateToLocation(
Loc))
4034 Directive OMPD = Directive::OMPD_masked;
4036 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4037 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4038 Value *ThreadId = getOrCreateThreadID(Ident);
4040 Value *ArgsEnd[] = {Ident, ThreadId};
4042 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4043 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4045 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4046 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4048 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4058 Call->setDoesNotThrow();
4070OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4071 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4073 bool IsInclusive, ScanInfo *ScanRedInfo) {
4074 if (ScanRedInfo->OMPFirstScanLoop) {
4075 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4076 ScanVarsType, ScanRedInfo);
4080 if (!updateToLocation(
Loc))
4085 if (ScanRedInfo->OMPFirstScanLoop) {
4087 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4088 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4089 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4090 Type *DestTy = ScanVarsType[i];
4091 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4092 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4094 Builder.CreateStore(Src, Val);
4097 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4098 emitBlock(ScanRedInfo->OMPScanDispatch,
4099 Builder.GetInsertBlock()->getParent());
4101 if (!ScanRedInfo->OMPFirstScanLoop) {
4102 IV = ScanRedInfo->IV;
4105 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4106 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4107 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4108 Type *DestTy = ScanVarsType[i];
4110 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4111 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4112 Builder.CreateStore(Src, ScanVars[i]);
4118 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4119 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4120 ScanRedInfo->OMPAfterScanBlock);
4122 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4123 ScanRedInfo->OMPBeforeScanBlock);
4125 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4126 Builder.GetInsertBlock()->getParent());
4127 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4128 return Builder.saveIP();
4131Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4135 Builder.restoreIP(AllocaIP);
4137 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4139 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4140 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4144 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4145 InsertPointTy CodeGenIP) ->
Error {
4146 Builder.restoreIP(CodeGenIP);
4148 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4149 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4153 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4154 AllocSpan,
nullptr,
"arr");
4155 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4163 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4165 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4166 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4169 return AfterIP.takeError();
4170 Builder.restoreIP(*AfterIP);
4171 BasicBlock *InputBB = Builder.GetInsertBlock();
4173 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4174 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4176 return AfterIP.takeError();
4177 Builder.restoreIP(*AfterIP);
4182Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4184 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4185 InsertPointTy CodeGenIP) ->
Error {
4186 Builder.restoreIP(CodeGenIP);
4187 for (ReductionInfo RedInfo : ReductionInfos) {
4188 Value *PrivateVar = RedInfo.PrivateVariable;
4189 Value *OrigVar = RedInfo.Variable;
4190 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4191 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4193 Type *SrcTy = RedInfo.ElementType;
4194 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4196 Value *Src = Builder.CreateLoad(SrcTy, Val);
4198 Builder.CreateStore(Src, OrigVar);
4199 Builder.CreateFree(Buff);
4207 if (ScanRedInfo->OMPScanFinish->getTerminator())
4208 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4210 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4213 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4214 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4217 return AfterIP.takeError();
4218 Builder.restoreIP(*AfterIP);
4219 BasicBlock *InputBB = Builder.GetInsertBlock();
4221 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4222 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4224 return AfterIP.takeError();
4225 Builder.restoreIP(*AfterIP);
4229OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4230 const LocationDescription &
Loc,
4232 ScanInfo *ScanRedInfo) {
4234 if (!updateToLocation(
Loc))
4236 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4237 InsertPointTy CodeGenIP) ->
Error {
4238 Builder.restoreIP(CodeGenIP);
4244 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4246 Builder.GetInsertBlock()->getModule(),
4250 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4253 Builder.GetInsertBlock()->getModule(),
4256 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4259 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4260 Builder.SetInsertPoint(InputBB);
4261 Builder.CreateBr(LoopBB);
4262 emitBlock(LoopBB, CurFn);
4263 Builder.SetInsertPoint(LoopBB);
4265 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4267 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4268 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4270 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4278 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4279 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4280 emitBlock(InnerLoopBB, CurFn);
4281 Builder.SetInsertPoint(InnerLoopBB);
4282 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4284 for (ReductionInfo RedInfo : ReductionInfos) {
4285 Value *ReductionVal = RedInfo.PrivateVariable;
4286 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4287 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4288 Type *DestTy = RedInfo.ElementType;
4289 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4291 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4292 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4294 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4295 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4296 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4298 InsertPointOrErrorTy AfterIP =
4299 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4301 return AfterIP.takeError();
4302 Builder.CreateStore(Result, LHSPtr);
4305 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4306 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4307 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4308 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4309 emitBlock(InnerExitBB, CurFn);
4311 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4314 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4315 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4317 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4327 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4328 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4331 return AfterIP.takeError();
4332 Builder.restoreIP(*AfterIP);
4333 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4336 return AfterIP.takeError();
4337 Builder.restoreIP(*AfterIP);
4338 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4345Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4348 ScanInfo *ScanRedInfo) {
4356 ScanRedInfo->OMPFirstScanLoop =
true;
4357 Error Err = InputLoopGen();
4367 ScanRedInfo->OMPFirstScanLoop =
false;
4368 Error Err = ScanLoopGen(Builder.saveIP());
4375void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4376 Function *
Fun = Builder.GetInsertBlock()->getParent();
4377 ScanRedInfo->OMPScanDispatch =
4379 ScanRedInfo->OMPAfterScanBlock =
4381 ScanRedInfo->OMPBeforeScanBlock =
4383 ScanRedInfo->OMPScanLoopExit =
4386CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4410 Builder.SetCurrentDebugLocation(
DL);
4412 Builder.SetInsertPoint(Preheader);
4413 Builder.CreateBr(Header);
4415 Builder.SetInsertPoint(Header);
4416 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4417 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4418 Builder.CreateBr(
Cond);
4420 Builder.SetInsertPoint(
Cond);
4422 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4423 Builder.CreateCondBr(Cmp, Body, Exit);
4425 Builder.SetInsertPoint(Body);
4426 Builder.CreateBr(Latch);
4428 Builder.SetInsertPoint(Latch);
4429 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4430 "omp_" + Name +
".next",
true);
4431 Builder.CreateBr(Header);
4434 Builder.SetInsertPoint(Exit);
4435 Builder.CreateBr(After);
4438 LoopInfos.emplace_front();
4439 CanonicalLoopInfo *CL = &LoopInfos.front();
4441 CL->Header = Header;
4453OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4454 LoopBodyGenCallbackTy BodyGenCB,
4459 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4460 NextBB, NextBB, Name);
4464 if (updateToLocation(
Loc)) {
4468 spliceBB(Builder, After,
false);
4469 Builder.CreateBr(CL->getPreheader());
4474 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4484 ScanInfos.emplace_front();
4485 ScanInfo *
Result = &ScanInfos.front();
4490OpenMPIRBuilder::createCanonicalScanLoops(
4491 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4492 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4493 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4494 LocationDescription ComputeLoc =
4495 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4496 updateToLocation(ComputeLoc);
4500 Value *TripCount = calculateCanonicalLoopTripCount(
4501 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4502 ScanRedInfo->Span = TripCount;
4503 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4504 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4506 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4507 Builder.restoreIP(CodeGenIP);
4508 ScanRedInfo->IV =
IV;
4509 createScanBBs(ScanRedInfo);
4510 BasicBlock *InputBlock = Builder.GetInsertBlock();
4514 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4515 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4516 Builder.GetInsertBlock()->getParent());
4517 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4518 emitBlock(ScanRedInfo->OMPScanLoopExit,
4519 Builder.GetInsertBlock()->getParent());
4520 Builder.CreateBr(ContinueBlock);
4521 Builder.SetInsertPoint(
4522 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4523 return BodyGenCB(Builder.saveIP(),
IV);
4526 const auto &&InputLoopGen = [&]() ->
Error {
4528 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4529 ComputeIP, Name,
true, ScanRedInfo);
4533 Builder.restoreIP((*LoopInfo)->getAfterIP());
4536 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4538 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4539 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4543 Builder.restoreIP((*LoopInfo)->getAfterIP());
4544 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4547 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4553Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4555 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4565 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4566 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4568 updateToLocation(
Loc);
4585 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4586 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4587 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4588 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4589 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4590 ZeroCmp = Builder.CreateICmp(
4593 Span = Builder.CreateSub(Stop, Start,
"",
true);
4594 ZeroCmp = Builder.CreateICmp(
4598 Value *CountIfLooping;
4599 if (InclusiveStop) {
4600 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4603 Value *CountIfTwo = Builder.CreateAdd(
4604 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4606 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4609 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4610 "omp_" + Name +
".tripcount");
4614 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4615 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4616 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4617 ScanInfo *ScanRedInfo) {
4618 LocationDescription ComputeLoc =
4619 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4621 Value *TripCount = calculateCanonicalLoopTripCount(
4622 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4624 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4625 Builder.restoreIP(CodeGenIP);
4626 Value *Span = Builder.CreateMul(
IV, Step);
4627 Value *IndVar = Builder.CreateAdd(Span, Start);
4629 ScanRedInfo->IV = IndVar;
4630 return BodyGenCB(Builder.saveIP(), IndVar);
4632 LocationDescription LoopLoc =
4635 : LocationDescription(Builder.saveIP(),
4636 Builder.getCurrentDebugLocation());
4637 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4646 OpenMPIRBuilder &OMPBuilder) {
4647 unsigned Bitwidth = Ty->getIntegerBitWidth();
4649 return OMPBuilder.getOrCreateRuntimeFunction(
4650 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4652 return OMPBuilder.getOrCreateRuntimeFunction(
4653 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4662 OpenMPIRBuilder &OMPBuilder) {
4663 unsigned Bitwidth = Ty->getIntegerBitWidth();
4665 return OMPBuilder.getOrCreateRuntimeFunction(
4666 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4668 return OMPBuilder.getOrCreateRuntimeFunction(
4669 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4673OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4674 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4676 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4678 "Require dedicated allocate IP");
4681 Builder.restoreIP(CLI->getPreheaderIP());
4682 Builder.SetCurrentDebugLocation(
DL);
4685 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4686 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4690 Type *IVTy =
IV->getType();
4692 LoopType == WorksharingLoopType::DistributeForStaticLoop
4696 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4699 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4702 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4703 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4704 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4705 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4706 CLI->setLastIter(PLastIter);
4712 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4714 Constant *One = ConstantInt::get(IVTy, 1);
4715 Builder.CreateStore(Zero, PLowerBound);
4716 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4717 Builder.CreateStore(UpperBound, PUpperBound);
4718 Builder.CreateStore(One, PStride);
4720 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4723 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4724 ? OMPScheduleType::OrderedDistribute
4727 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4732 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4733 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4734 Value *PDistUpperBound =
4735 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4736 Args.push_back(PDistUpperBound);
4739 Builder.CreateCall(StaticInit, Args);
4740 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4741 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4742 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4743 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4744 CLI->setTripCount(TripCount);
4751 Builder.SetInsertPoint(CLI->getBody(),
4752 CLI->getBody()->getFirstInsertionPt());
4753 Builder.SetCurrentDebugLocation(
DL);
4754 return Builder.CreateAdd(OldIV, LowerBound);
4758 Builder.SetInsertPoint(CLI->getExit(),
4759 CLI->getExit()->getTerminator()->getIterator());
4760 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4764 InsertPointOrErrorTy BarrierIP =
4765 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4766 omp::Directive::OMPD_for,
false,
4769 return BarrierIP.takeError();
4772 InsertPointTy AfterIP = CLI->getAfterIP();
4778OpenMPIRBuilder::InsertPointOrErrorTy
4779OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4780 CanonicalLoopInfo *CLI,
4781 InsertPointTy AllocaIP,
4784 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4785 assert(ChunkSize &&
"Chunk size is required");
4787 LLVMContext &Ctx = CLI->getFunction()->getContext();
4789 Value *OrigTripCount = CLI->getTripCount();
4790 Type *IVTy =
IV->getType();
4792 "Max supported tripcount bitwidth is 64 bits");
4794 :
Type::getInt64Ty(Ctx);
4797 Constant *One = ConstantInt::get(InternalIVTy, 1);
4803 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4806 Builder.restoreIP(AllocaIP);
4807 Builder.SetCurrentDebugLocation(
DL);
4808 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4809 Value *PLowerBound =
4810 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4811 Value *PUpperBound =
4812 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4813 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4814 CLI->setLastIter(PLastIter);
4817 Builder.restoreIP(CLI->getPreheaderIP());
4818 Builder.SetCurrentDebugLocation(
DL);
4821 Value *CastedChunkSize =
4822 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4823 Value *CastedTripCount =
4824 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4826 Constant *SchedulingType = ConstantInt::get(
4827 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4828 Builder.CreateStore(Zero, PLowerBound);
4829 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4830 Builder.CreateStore(OrigUpperBound, PUpperBound);
4831 Builder.CreateStore(One, PStride);
4836 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4837 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4838 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4839 Builder.CreateCall(StaticInit,
4841 SchedulingType, PLastIter,
4842 PLowerBound, PUpperBound,
4847 Value *FirstChunkStart =
4848 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4849 Value *FirstChunkStop =
4850 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4851 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4853 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4854 Value *NextChunkStride =
4855 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4858 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4859 Value *DispatchCounter;
4864 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4865 {Builder.saveIP(),
DL},
4866 [&](InsertPointTy BodyIP,
Value *Counter) {
4867 DispatchCounter = Counter;
4870 FirstChunkStart, CastedTripCount, NextChunkStride,
4876 BasicBlock *DispatchBody = DispatchCLI->getBody();
4877 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4878 BasicBlock *DispatchExit = DispatchCLI->getExit();
4879 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4880 DispatchCLI->invalidate();
4888 Builder.restoreIP(CLI->getPreheaderIP());
4889 Builder.SetCurrentDebugLocation(
DL);
4892 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4893 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4894 Value *IsLastChunk =
4895 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4896 Value *CountUntilOrigTripCount =
4897 Builder.CreateSub(CastedTripCount, DispatchCounter);
4898 Value *ChunkTripCount = Builder.CreateSelect(
4899 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4900 Value *BackcastedChunkTC =
4901 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4902 CLI->setTripCount(BackcastedChunkTC);
4907 Value *BackcastedDispatchCounter =
4908 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4910 Builder.restoreIP(CLI->getBodyIP());
4911 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4916 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4920 InsertPointOrErrorTy AfterIP =
4921 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4924 return AfterIP.takeError();
4942 unsigned Bitwidth = Ty->getIntegerBitWidth();
4943 Module &M = OMPBuilder->M;
4945 case WorksharingLoopType::ForStaticLoop:
4947 return OMPBuilder->getOrCreateRuntimeFunction(
4948 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4950 return OMPBuilder->getOrCreateRuntimeFunction(
4951 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4953 case WorksharingLoopType::DistributeStaticLoop:
4955 return OMPBuilder->getOrCreateRuntimeFunction(
4956 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4958 return OMPBuilder->getOrCreateRuntimeFunction(
4959 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4961 case WorksharingLoopType::DistributeForStaticLoop:
4963 return OMPBuilder->getOrCreateRuntimeFunction(
4964 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4966 return OMPBuilder->getOrCreateRuntimeFunction(
4967 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4970 if (Bitwidth != 32 && Bitwidth != 64) {
4984 Module &M = OMPBuilder->M;
4993 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4994 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4995 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4996 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4997 Builder.CreateCall(RTLFn, RealArgs);
5000 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5001 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5002 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5003 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5006 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5007 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5008 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5009 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5011 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5013 Builder.CreateCall(RTLFn, RealArgs);
5017 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5022 Value *TripCount = CLI->getTripCount();
5028 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5029 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5034 Builder.restoreIP({Preheader, Preheader->
end()});
5037 Builder.CreateBr(CLI->getExit());
5040 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5043 CleanUpInfo.EntryBB = CLI->getHeader();
5044 CleanUpInfo.ExitBB = CLI->getExit();
5045 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5053 "Expected unique undroppable user of outlined function");
5055 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5057 "Expected outlined function call to be located in loop preheader");
5059 if (OutlinedFnCallInstruction->
arg_size() > 1)
5066 LoopBodyArg, TripCount, OutlinedFn);
5068 for (
auto &ToBeDeletedItem : ToBeDeleted)
5069 ToBeDeletedItem->eraseFromParent();
5073OpenMPIRBuilder::InsertPointTy
5074OpenMPIRBuilder::applyWorkshareLoopTarget(
DebugLoc DL, CanonicalLoopInfo *CLI,
5075 InsertPointTy AllocaIP,
5078 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5079 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5082 OI.OuterAllocaBB = CLI->getPreheader();
5088 OI.OuterAllocaBB = AllocaIP.getBlock();
5091 OI.EntryBB = CLI->getBody();
5092 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5093 "omp.prelatch",
true);
5096 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5100 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5102 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5113 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5124 CLI->getPreheader(),
5133 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5139 CLI->getIndVar()->user_end());
5142 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5143 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5149 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5156 OI.PostOutlineCB = [=, ToBeDeletedVec =
5157 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5161 addOutlineInfo(std::move(OI));
5162 return CLI->getAfterIP();
5165OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5166 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5167 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5168 bool HasSimdModifier,
bool HasMonotonicModifier,
5169 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5171 if (Config.isTargetDevice())
5172 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
5174 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5175 HasNonmonotonicModifier, HasOrderedClause);
5177 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5178 OMPScheduleType::ModifierOrdered;
5179 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5180 case OMPScheduleType::BaseStatic:
5181 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5183 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5184 NeedsBarrier, ChunkSize);
5186 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5188 case OMPScheduleType::BaseStaticChunked:
5190 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5191 NeedsBarrier, ChunkSize);
5193 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5196 case OMPScheduleType::BaseRuntime:
5197 case OMPScheduleType::BaseAuto:
5198 case OMPScheduleType::BaseGreedy:
5199 case OMPScheduleType::BaseBalanced:
5200 case OMPScheduleType::BaseSteal:
5201 case OMPScheduleType::BaseGuidedSimd:
5202 case OMPScheduleType::BaseRuntimeSimd:
5204 "schedule type does not support user-defined chunk sizes");
5206 case OMPScheduleType::BaseDynamicChunked:
5207 case OMPScheduleType::BaseGuidedChunked:
5208 case OMPScheduleType::BaseGuidedIterativeChunked:
5209 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5210 case OMPScheduleType::BaseStaticBalancedChunked:
5211 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5212 NeedsBarrier, ChunkSize);
5225 unsigned Bitwidth = Ty->getIntegerBitWidth();
5227 return OMPBuilder.getOrCreateRuntimeFunction(
5228 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5230 return OMPBuilder.getOrCreateRuntimeFunction(
5231 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5241 unsigned Bitwidth = Ty->getIntegerBitWidth();
5243 return OMPBuilder.getOrCreateRuntimeFunction(
5244 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5246 return OMPBuilder.getOrCreateRuntimeFunction(
5247 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5256 unsigned Bitwidth = Ty->getIntegerBitWidth();
5258 return OMPBuilder.getOrCreateRuntimeFunction(
5259 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5261 return OMPBuilder.getOrCreateRuntimeFunction(
5262 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5266OpenMPIRBuilder::InsertPointOrErrorTy
5267OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5268 InsertPointTy AllocaIP,
5270 bool NeedsBarrier,
Value *Chunk) {
5271 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5273 "Require dedicated allocate IP");
5275 "Require valid schedule type");
5277 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5278 OMPScheduleType::ModifierOrdered;
5281 Builder.SetCurrentDebugLocation(
DL);
5284 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5285 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5289 Type *IVTy =
IV->getType();
5294 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5296 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5297 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5298 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5299 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5300 CLI->setLastIter(PLastIter);
5308 Constant *One = ConstantInt::get(IVTy, 1);
5309 Builder.CreateStore(One, PLowerBound);
5310 Value *UpperBound = CLI->getTripCount();
5311 Builder.CreateStore(UpperBound, PUpperBound);
5312 Builder.CreateStore(One, PStride);
5318 InsertPointTy AfterIP = CLI->getAfterIP();
5326 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5329 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5332 Builder.CreateCall(DynamicInit,
5333 {SrcLoc, ThreadNum, SchedulingType, One,
5334 UpperBound, One, Chunk});
5343 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5344 PLowerBound, PUpperBound, PStride});
5345 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5348 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5349 Builder.CreateCondBr(MoreWork, Header, Exit);
5355 PI->setIncomingBlock(0, OuterCond);
5356 PI->setIncomingValue(0, LowerBound);
5361 Br->setSuccessor(0, OuterCond);
5366 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5367 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5374 assert(BI->getSuccessor(1) == Exit);
5375 BI->setSuccessor(1, OuterCond);
5379 Builder.SetInsertPoint(&Latch->
back());
5381 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5386 Builder.SetInsertPoint(&
Exit->back());
5387 InsertPointOrErrorTy BarrierIP =
5388 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5389 omp::Directive::OMPD_for,
false,
5392 return BarrierIP.takeError();
5411 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5416 if (BBsToErase.
count(UseInst->getParent()))
5423 while (BBsToErase.
remove_if(HasRemainingUses)) {
5433 InsertPointTy ComputeIP) {
5434 assert(
Loops.size() >= 1 &&
"At least one loop required");
5435 size_t NumLoops =
Loops.size();
5439 return Loops.front();
5441 CanonicalLoopInfo *Outermost =
Loops.front();
5442 CanonicalLoopInfo *Innermost =
Loops.back();
5443 BasicBlock *OrigPreheader = Outermost->getPreheader();
5444 BasicBlock *OrigAfter = Outermost->getAfter();
5451 Loop->collectControlBlocks(OldControlBBs);
5454 Builder.SetCurrentDebugLocation(
DL);
5455 if (ComputeIP.isSet())
5456 Builder.restoreIP(ComputeIP);
5458 Builder.restoreIP(Outermost->getPreheaderIP());
5462 Value *CollapsedTripCount =
nullptr;
5463 for (CanonicalLoopInfo *L :
Loops) {
5465 "All loops to collapse must be valid canonical loops");
5466 Value *OrigTripCount =
L->getTripCount();
5467 if (!CollapsedTripCount) {
5468 CollapsedTripCount = OrigTripCount;
5473 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5477 CanonicalLoopInfo *
Result =
5478 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5479 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5485 Builder.restoreIP(
Result->getBodyIP());
5489 NewIndVars.
resize(NumLoops);
5490 for (
int i = NumLoops - 1; i >= 1; --i) {
5491 Value *OrigTripCount =
Loops[i]->getTripCount();
5493 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5494 NewIndVars[i] = NewIndVar;
5496 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5499 NewIndVars[0] = Leftover;
5510 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5517 ContinueBlock =
nullptr;
5518 ContinuePred = NextSrc;
5525 for (
size_t i = 0; i < NumLoops - 1; ++i)
5526 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5529 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5532 for (
size_t i = NumLoops - 1; i > 0; --i)
5533 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5536 ContinueWith(
Result->getLatch(),
nullptr);
5543 for (
size_t i = 0; i < NumLoops; ++i)
5544 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5549 for (CanonicalLoopInfo *L :
Loops)
5558std::vector<CanonicalLoopInfo *>
5562 "Must pass as many tile sizes as there are loops");
5563 int NumLoops =
Loops.size();
5564 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5566 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5567 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5568 Function *
F = OutermostLoop->getBody()->getParent();
5569 BasicBlock *InnerEnter = InnermostLoop->getBody();
5570 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5576 Loop->collectControlBlocks(OldControlBBs);
5583 for (CanonicalLoopInfo *L :
Loops) {
5584 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5596 for (
int i = 0; i < NumLoops - 1; ++i) {
5597 CanonicalLoopInfo *Surrounding =
Loops[i];
5600 BasicBlock *EnterBB = Surrounding->getBody();
5606 Builder.SetCurrentDebugLocation(
DL);
5607 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5609 for (
int i = 0; i < NumLoops; ++i) {
5611 Value *OrigTripCount = OrigTripCounts[i];
5614 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5615 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5624 Value *FloorTripOverflow =
5625 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5627 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5628 Value *FloorTripCount =
5629 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5630 "omp_floor" +
Twine(i) +
".tripcount",
true);
5633 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5639 std::vector<CanonicalLoopInfo *>
Result;
5640 Result.reserve(NumLoops * 2);
5644 BasicBlock *Enter = OutermostLoop->getPreheader();
5651 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5653 auto EmbeddNewLoop =
5654 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5656 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5657 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5662 Enter = EmbeddedLoop->getBody();
5663 Continue = EmbeddedLoop->getLatch();
5664 OutroInsertBefore = EmbeddedLoop->getLatch();
5665 return EmbeddedLoop;
5669 const Twine &NameBase) {
5671 CanonicalLoopInfo *EmbeddedLoop =
5672 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5673 Result.push_back(EmbeddedLoop);
5677 EmbeddNewLoops(FloorCount,
"floor");
5681 Builder.SetInsertPoint(Enter->getTerminator());
5683 for (
int i = 0; i < NumLoops; ++i) {
5684 CanonicalLoopInfo *FloorLoop =
Result[i];
5687 Value *FloorIsEpilogue =
5688 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5689 Value *TileTripCount =
5690 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5696 EmbeddNewLoops(TileCounts,
"tile");
5701 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5710 BodyEnter =
nullptr;
5711 BodyEntered = ExitBB;
5723 Builder.restoreIP(
Result.back()->getBodyIP());
5724 for (
int i = 0; i < NumLoops; ++i) {
5725 CanonicalLoopInfo *FloorLoop =
Result[i];
5726 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5727 Value *OrigIndVar = OrigIndVars[i];
5731 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5733 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5740 for (CanonicalLoopInfo *L :
Loops)
5744 for (CanonicalLoopInfo *GenL : Result)
5755 if (Properties.
empty())
5778 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5782 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5790 if (
I.mayReadOrWriteMemory()) {
5794 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5799void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5806void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5814void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5817 const Twine &NamePrefix) {
5818 Function *
F = CanonicalLoop->getFunction();
5840 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5846 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5848 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5851 Builder.SetInsertPoint(SplitBeforeIt);
5853 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5856 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5859 Builder.SetInsertPoint(ElseBlock);
5865 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5867 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5873 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5875 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5882 if (
Block == ThenBlock)
5883 NewBB->
setName(NamePrefix +
".if.else");
5886 VMap[
Block] = NewBB;
5890 Builder.CreateBr(NewBlocks.
front());
5894 L->getLoopLatch()->splitBasicBlock(
5895 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5899 L->addBasicBlockToLoop(ThenBlock, LI);
5903OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5905 if (TargetTriple.
isX86()) {
5906 if (Features.
lookup(
"avx512f"))
5908 else if (Features.
lookup(
"avx"))
5912 if (TargetTriple.
isPPC())
5914 if (TargetTriple.
isWasm())
5919void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5921 Value *IfCond, OrderKind Order,
5925 Function *
F = CanonicalLoop->getFunction();
5940 if (AlignedVars.
size()) {
5941 InsertPointTy IP = Builder.saveIP();
5942 for (
auto &AlignedItem : AlignedVars) {
5943 Value *AlignedPtr = AlignedItem.first;
5944 Value *Alignment = AlignedItem.second;
5947 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5950 Builder.restoreIP(IP);
5955 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5965 if (
Block == CanonicalLoop->getCond() ||
5966 Block == CanonicalLoop->getHeader())
5968 Reachable.insert(
Block);
5978 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5986 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
6002 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6004 if (Simdlen || Safelen) {
6008 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6034static std::unique_ptr<TargetMachine>
6038 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6039 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6050 std::nullopt, OptLevel));
6074 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6075 FAM.registerPass([&]() {
return TIRA; });
6089 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6094 nullptr, ORE,
static_cast<int>(OptLevel),
6115 <<
" Threshold=" << UP.
Threshold <<
"\n"
6118 <<
" PartialOptSizeThreshold="
6138 Ptr = Load->getPointerOperand();
6140 Ptr = Store->getPointerOperand();
6144 Ptr =
Ptr->stripPointerCasts();
6147 if (Alloca->getParent() == &
F->getEntryBlock())
6167 int MaxTripCount = 0;
6168 bool MaxOrZero =
false;
6169 unsigned TripMultiple = 0;
6171 bool UseUpperBound =
false;
6173 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6175 unsigned Factor = UP.
Count;
6176 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6184void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6186 CanonicalLoopInfo **UnrolledCLI) {
6187 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6203 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6216 *UnrolledCLI =
Loop;
6221 "unrolling only makes sense with a factor of 2 or larger");
6223 Type *IndVarTy =
Loop->getIndVarType();
6230 std::vector<CanonicalLoopInfo *>
LoopNest =
6231 tileLoops(
DL, {
Loop}, {FactorVal});
6234 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6245 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6248 (*UnrolledCLI)->assertOK();
6252OpenMPIRBuilder::InsertPointTy
6253OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6256 if (!updateToLocation(
Loc))
6260 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6261 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6262 Value *ThreadId = getOrCreateThreadID(Ident);
6264 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6266 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6268 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6269 Builder.CreateCall(Fn, Args);
6271 return Builder.saveIP();
6274OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6275 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6279 if (!updateToLocation(
Loc))
6285 if (!CPVars.
empty()) {
6287 Builder.CreateStore(Builder.getInt32(0), DidIt);
6290 Directive OMPD = Directive::OMPD_single;
6292 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6293 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6294 Value *ThreadId = getOrCreateThreadID(Ident);
6297 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6298 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6300 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6301 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6303 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6304 if (
Error Err = FiniCB(IP))
6311 Builder.CreateStore(Builder.getInt32(1), DidIt);
6324 InsertPointOrErrorTy AfterIP =
6325 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6329 return AfterIP.takeError();
6332 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6334 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6335 ConstantInt::get(
Int64, 0), CPVars[
I],
6338 }
else if (!IsNowait) {
6339 InsertPointOrErrorTy AfterIP =
6340 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6341 omp::Directive::OMPD_unknown,
false,
6344 return AfterIP.takeError();
6346 return Builder.saveIP();
6349OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6350 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6351 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6353 if (!updateToLocation(
Loc))
6356 Directive OMPD = Directive::OMPD_critical;
6358 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6359 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6360 Value *ThreadId = getOrCreateThreadID(Ident);
6361 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6362 Value *
Args[] = {Ident, ThreadId, LockVar};
6368 EnterArgs.push_back(HintInst);
6369 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6371 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6373 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6376 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6377 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6379 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6383OpenMPIRBuilder::InsertPointTy
6384OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6385 InsertPointTy AllocaIP,
unsigned NumLoops,
6387 const Twine &Name,
bool IsDependSource) {
6391 "OpenMP runtime requires depend vec with i64 type");
6393 if (!updateToLocation(
Loc))
6398 Builder.restoreIP(AllocaIP);
6399 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6401 updateToLocation(
Loc);
6404 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6405 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6406 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6407 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6411 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6412 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6415 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6416 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6417 Value *ThreadId = getOrCreateThreadID(Ident);
6418 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6422 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6424 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6425 Builder.CreateCall(RTLFn, Args);
6427 return Builder.saveIP();
6430OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6431 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6432 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6433 if (!updateToLocation(
Loc))
6436 Directive OMPD = Directive::OMPD_ordered;
6442 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6443 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6444 Value *ThreadId = getOrCreateThreadID(Ident);
6447 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6448 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6451 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6452 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6455 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6459OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6461 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6462 bool HasFinalize,
bool IsCancellable) {
6465 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6469 BasicBlock *EntryBB = Builder.GetInsertBlock();
6478 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6481 if (
Error Err = BodyGenCB( InsertPointTy(),
6489 "Unexpected control flow graph state!!");
6490 InsertPointOrErrorTy AfterIP =
6491 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6493 return AfterIP.takeError();
6495 "Unexpected Control Flow State!");
6501 "Unexpected Insertion point location!");
6504 auto InsertBB = merged ? ExitPredBB : ExitBB;
6507 Builder.SetInsertPoint(InsertBB);
6509 return Builder.saveIP();
6512OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6515 if (!Conditional || !EntryCall)
6516 return Builder.saveIP();
6518 BasicBlock *EntryBB = Builder.GetInsertBlock();
6519 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6531 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6533 Builder.SetInsertPoint(UI);
6534 Builder.Insert(EntryBBTI);
6535 UI->eraseFromParent();
6542OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6543 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6546 Builder.restoreIP(FinIP);
6550 assert(!FinalizationStack.empty() &&
6551 "Unexpected finalization stack state!");
6553 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6554 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6556 if (
Error Err = Fi.FiniCB(FinIP))
6563 Builder.SetInsertPoint(FiniBBTI);
6567 return Builder.saveIP();
6571 Builder.Insert(ExitCall);
6577OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6578 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6607 "copyin.not.master.end");
6614 Builder.SetInsertPoint(OMP_Entry);
6615 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6616 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6617 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6618 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6620 Builder.SetInsertPoint(CopyBegin);
6622 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6624 return Builder.saveIP();
6627CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6631 updateToLocation(
Loc);
6634 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6635 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6636 Value *ThreadId = getOrCreateThreadID(Ident);
6639 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6641 return Builder.CreateCall(Fn, Args, Name);
6644CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6648 updateToLocation(
Loc);
6651 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6652 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6653 Value *ThreadId = getOrCreateThreadID(Ident);
6655 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6656 return Builder.CreateCall(Fn, Args, Name);
6659CallInst *OpenMPIRBuilder::createOMPInteropInit(
6660 const LocationDescription &
Loc,
Value *InteropVar,
6662 Value *DependenceAddress,
bool HaveNowaitClause) {
6664 updateToLocation(
Loc);
6667 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6668 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6669 Value *ThreadId = getOrCreateThreadID(Ident);
6670 if (Device ==
nullptr)
6672 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6673 if (NumDependences ==
nullptr) {
6674 NumDependences = ConstantInt::get(
Int32, 0);
6678 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6680 Ident, ThreadId, InteropVar, InteropTypeVal,
6681 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6683 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6685 return Builder.CreateCall(Fn, Args);
6688CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6689 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6690 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6692 updateToLocation(
Loc);
6695 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6696 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6697 Value *ThreadId = getOrCreateThreadID(Ident);
6698 if (Device ==
nullptr)
6700 if (NumDependences ==
nullptr) {
6701 NumDependences = ConstantInt::get(
Int32, 0);
6705 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6707 Ident, ThreadId, InteropVar,
Device,
6708 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6710 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6712 return Builder.CreateCall(Fn, Args);
6715CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6717 Value *NumDependences,
6718 Value *DependenceAddress,
6719 bool HaveNowaitClause) {
6721 updateToLocation(
Loc);
6723 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6724 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6725 Value *ThreadId = getOrCreateThreadID(Ident);
6726 if (Device ==
nullptr)
6728 if (NumDependences ==
nullptr) {
6729 NumDependences = ConstantInt::get(
Int32, 0);
6733 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6735 Ident, ThreadId, InteropVar,
Device,
6736 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6738 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6740 return Builder.CreateCall(Fn, Args);
6743CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6747 updateToLocation(
Loc);
6750 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6751 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6752 Value *ThreadId = getOrCreateThreadID(Ident);
6754 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6758 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6760 return Builder.CreateCall(Fn, Args);
6763OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6764 const LocationDescription &
Loc,
6765 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6767 "expected num_threads and num_teams to be specified");
6769 if (!updateToLocation(
Loc))
6773 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6774 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6786 const std::string DebugPrefix =
"_debug__";
6787 if (KernelName.
ends_with(DebugPrefix)) {
6788 KernelName = KernelName.
drop_back(DebugPrefix.length());
6789 Kernel = M.getFunction(KernelName);
6795 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6800 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6801 if (MaxThreadsVal < 0)
6802 MaxThreadsVal = std::max(
6805 if (MaxThreadsVal > 0)
6806 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6817 Function *Fn = getOrCreateRuntimeFunctionPtr(
6818 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6821 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6822 Constant *DynamicEnvironmentInitializer =
6826 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6828 DL.getDefaultGlobalsAddressSpace());
6832 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6833 ? DynamicEnvironmentGV
6835 DynamicEnvironmentPtr);
6838 ConfigurationEnvironment, {
6839 UseGenericStateMachineVal,
6840 MayUseNestedParallelismVal,
6847 ReductionBufferLength,
6850 KernelEnvironment, {
6851 ConfigurationEnvironmentInitializer,
6855 std::string KernelEnvironmentName =
6856 (KernelName +
"_kernel_environment").str();
6859 KernelEnvironmentInitializer, KernelEnvironmentName,
6861 DL.getDefaultGlobalsAddressSpace());
6865 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6866 ? KernelEnvironmentGV
6868 KernelEnvironmentPtr);
6869 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6871 KernelLaunchEnvironment =
6872 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6873 ? KernelLaunchEnvironment
6874 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6875 KernelLaunchEnvParamTy);
6877 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6879 Value *ExecUserCode = Builder.CreateICmpEQ(
6889 auto *UI = Builder.CreateUnreachable();
6895 Builder.SetInsertPoint(WorkerExitBB);
6896 Builder.CreateRetVoid();
6899 Builder.SetInsertPoint(CheckBBTI);
6900 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6903 UI->eraseFromParent();
6910void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6911 int32_t TeamsReductionDataSize,
6912 int32_t TeamsReductionBufferLength) {
6913 if (!updateToLocation(
Loc))
6916 Function *Fn = getOrCreateRuntimeFunctionPtr(
6917 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6919 Builder.CreateCall(Fn, {});
6921 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6927 const std::string DebugPrefix =
"_debug__";
6929 KernelName = KernelName.
drop_back(DebugPrefix.length());
6930 auto *KernelEnvironmentGV =
6931 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6932 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6933 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6935 KernelEnvironmentInitializer,
6936 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6938 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6945 if (
Kernel.hasFnAttribute(Name)) {
6946 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6952std::pair<int32_t, int32_t>
6954 int32_t ThreadLimit =
6955 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6958 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6959 if (!Attr.isValid() || !Attr.isStringAttribute())
6960 return {0, ThreadLimit};
6961 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6964 return {0, ThreadLimit};
6965 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6971 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6972 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6973 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6975 return {0, ThreadLimit};
6978void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6981 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6984 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6992std::pair<int32_t, int32_t>
6995 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
6999 int32_t LB, int32_t UB) {
7006 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7009void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7011 if (Config.isTargetDevice()) {
7018 else if (
T.isNVPTX())
7020 else if (
T.isSPIRV())
7027 if (Config.isTargetDevice()) {
7028 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7037Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7042 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7043 "Named kernel already exists?");
7049Error OpenMPIRBuilder::emitTargetRegionFunction(
7050 TargetRegionEntryInfo &EntryInfo,
7051 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7055 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7057 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7061 OutlinedFn = *CBResult;
7063 OutlinedFn =
nullptr;
7069 if (!IsOffloadEntry)
7072 std::string EntryFnIDName =
7073 Config.isTargetDevice()
7074 ? std::string(EntryFnName)
7075 : createPlatformSpecificName({EntryFnName,
"region_id"});
7077 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7078 EntryFnName, EntryFnIDName);
7082Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7083 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7086 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7087 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7088 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7089 OffloadInfoManager.registerTargetRegionEntryInfo(
7090 EntryInfo, EntryAddr, OutlinedFnID,
7091 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7092 return OutlinedFnID;
7095OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7096 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7097 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7098 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7100 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7101 BodyGenTy BodyGenType)>
7104 if (!updateToLocation(
Loc))
7105 return InsertPointTy();
7107 Builder.restoreIP(CodeGenIP);
7109 if (Config.IsTargetDevice.value_or(
false)) {
7111 InsertPointOrErrorTy AfterIP =
7112 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7114 return AfterIP.takeError();
7115 Builder.restoreIP(*AfterIP);
7117 return Builder.saveIP();
7120 bool IsStandAlone = !BodyGenCB;
7121 MapInfosTy *MapInfo;
7125 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7126 InsertPointTy CodeGenIP) ->
Error {
7127 MapInfo = &GenMapInfoCB(Builder.saveIP());
7128 if (
Error Err = emitOffloadingArrays(
7129 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7130 true, DeviceAddrCB))
7133 TargetDataRTArgs RTArgs;
7134 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7137 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7142 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7143 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7147 SrcLocInfo, DeviceID,
7148 PointerNum, RTArgs.BasePointersArray,
7149 RTArgs.PointersArray, RTArgs.SizesArray,
7150 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7151 RTArgs.MappersArray};
7154 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7158 if (
Info.HasNoWait) {
7165 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7168 if (
Info.HasNoWait) {
7172 emitBlock(OffloadContBlock, CurFn,
true);
7173 Builder.restoreIP(Builder.saveIP());
7178 bool RequiresOuterTargetTask =
Info.HasNoWait;
7179 if (!RequiresOuterTargetTask)
7180 cantFail(TaskBodyCB(
nullptr,
nullptr,
7183 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7184 {}, RTArgs,
Info.HasNoWait));
7186 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7187 omp::OMPRTL___tgt_target_data_begin_mapper);
7189 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7191 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7194 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7195 Builder.CreateStore(LI, DeviceMap.second.second);
7202 InsertPointOrErrorTy AfterIP =
7203 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7205 return AfterIP.takeError();
7206 Builder.restoreIP(*AfterIP);
7214 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7215 InsertPointTy CodeGenIP) ->
Error {
7216 InsertPointOrErrorTy AfterIP =
7217 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7219 return AfterIP.takeError();
7220 Builder.restoreIP(*AfterIP);
7225 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7226 TargetDataRTArgs RTArgs;
7227 Info.EmitDebug = !MapInfo->Names.empty();
7228 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7231 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7236 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7237 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7240 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7241 PointerNum, RTArgs.BasePointersArray,
7242 RTArgs.PointersArray, RTArgs.SizesArray,
7243 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7244 RTArgs.MappersArray};
7246 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7248 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7254 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7262 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7263 return BeginThenGen(AllocaIP, Builder.saveIP());
7271 InsertPointOrErrorTy AfterIP =
7272 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7274 return AfterIP.takeError();
7278 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7279 return EndThenGen(AllocaIP, Builder.saveIP());
7282 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7283 return BeginThenGen(AllocaIP, Builder.saveIP());
7289 return Builder.saveIP();
7293OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7294 bool IsGPUDistribute) {
7295 assert((IVSize == 32 || IVSize == 64) &&
7296 "IV size is not compatible with the omp runtime");
7298 if (IsGPUDistribute)
7300 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7301 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7302 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7303 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7305 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7306 : omp::OMPRTL___kmpc_for_static_init_4u)
7307 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7308 : omp::OMPRTL___kmpc_for_static_init_8u);
7310 return getOrCreateRuntimeFunction(M, Name);
7313FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7315 assert((IVSize == 32 || IVSize == 64) &&
7316 "IV size is not compatible with the omp runtime");
7318 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7319 : omp::OMPRTL___kmpc_dispatch_init_4u)
7320 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7321 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7323 return getOrCreateRuntimeFunction(M, Name);
7326FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7328 assert((IVSize == 32 || IVSize == 64) &&
7329 "IV size is not compatible with the omp runtime");
7331 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7332 : omp::OMPRTL___kmpc_dispatch_next_4u)
7333 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7334 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7336 return getOrCreateRuntimeFunction(M, Name);
7339FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7341 assert((IVSize == 32 || IVSize == 64) &&
7342 "IV size is not compatible with the omp runtime");
7344 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7345 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7346 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7347 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7349 return getOrCreateRuntimeFunction(M, Name);
7353 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7358 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7366 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7370 if (NewVar && (arg == NewVar->
getArg()))
7380 auto UpdateDebugRecord = [&](
auto *DR) {
7383 for (
auto Loc : DR->location_ops()) {
7384 auto Iter = ValueReplacementMap.find(
Loc);
7385 if (Iter != ValueReplacementMap.end()) {
7386 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7387 ArgNo = std::get<1>(Iter->second) + 1;
7391 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7398 "Unexpected debug intrinsic");
7400 UpdateDebugRecord(&DVR);
7403 if (OMPBuilder.Config.isTargetDevice()) {
7405 Module *M = Func->getParent();
7408 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7410 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7411 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7413 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7426 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7428 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7429 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7431 if (OMPBuilder.Config.isTargetDevice()) {
7439 for (
auto &Arg : Inputs)
7444 for (
auto &Arg : Inputs)
7448 auto BB = Builder.GetInsertBlock();
7460 if (TargetCpuAttr.isStringAttribute())
7461 Func->addFnAttr(TargetCpuAttr);
7463 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7464 if (TargetFeaturesAttr.isStringAttribute())
7465 Func->addFnAttr(TargetFeaturesAttr);
7467 if (OMPBuilder.Config.isTargetDevice()) {
7469 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7470 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7481 Builder.SetInsertPoint(EntryBB);
7484 if (OMPBuilder.Config.isTargetDevice())
7485 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7487 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7492 if (OMPBuilder.Config.isTargetDevice())
7493 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7497 splitBB(Builder,
true,
"outlined.body");
7498 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7500 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7502 return AfterIP.takeError();
7503 Builder.restoreIP(*AfterIP);
7504 if (OMPBuilder.Config.isTargetDevice())
7505 OMPBuilder.createTargetDeinit(Builder);
7508 Builder.CreateRetVoid();
7512 auto AllocaIP = Builder.saveIP();
7517 const auto &ArgRange =
7518 OMPBuilder.Config.isTargetDevice()
7519 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7552 if (Instr->getFunction() == Func)
7553 Instr->replaceUsesOfWith(
Input, InputCopy);
7559 for (
auto InArg :
zip(Inputs, ArgRange)) {
7561 Argument &Arg = std::get<1>(InArg);
7562 Value *InputCopy =
nullptr;
7564 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7565 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7567 return AfterIP.takeError();
7568 Builder.restoreIP(*AfterIP);
7569 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7589 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7596 ReplaceValue(
Input, InputCopy, Func);
7600 for (
auto Deferred : DeferredReplacement)
7601 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7604 ValueReplacementMap);
7612 Value *TaskWithPrivates,
7613 Type *TaskWithPrivatesTy) {
7615 Type *TaskTy = OMPIRBuilder.Task;
7618 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7619 Value *Shareds = TaskT;
7629 if (TaskWithPrivatesTy != TaskTy)
7630 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7647 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7652 assert((!NumOffloadingArrays || PrivatesTy) &&
7653 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7656 Module &M = OMPBuilder.M;
7680 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7686 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7687 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7693 ".omp_target_task_proxy_func",
7694 Builder.GetInsertBlock()->getModule());
7695 Value *ThreadId = ProxyFn->getArg(0);
7696 Value *TaskWithPrivates = ProxyFn->getArg(1);
7697 ThreadId->
setName(
"thread.id");
7698 TaskWithPrivates->
setName(
"task");
7700 bool HasShareds = SharedArgsOperandNo > 0;
7701 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7704 Builder.SetInsertPoint(EntryBB);
7710 if (HasOffloadingArrays) {
7711 assert(TaskTy != TaskWithPrivatesTy &&
7712 "If there are offloading arrays to pass to the target"
7713 "TaskTy cannot be the same as TaskWithPrivatesTy");
7716 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7717 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7719 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7723 auto *ArgStructAlloca =
7725 assert(ArgStructAlloca &&
7726 "Unable to find the alloca instruction corresponding to arguments "
7727 "for extracted function");
7731 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7733 Value *SharedsSize =
7734 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7737 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7739 Builder.CreateMemCpy(
7740 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7742 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7744 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7745 Builder.CreateRetVoid();
7751 return GEP->getSourceElementType();
7753 return Alloca->getAllocatedType();
7776 if (OffloadingArraysToPrivatize.
empty())
7777 return OMPIRBuilder.Task;
7780 for (
Value *V : OffloadingArraysToPrivatize) {
7781 assert(V->getType()->isPointerTy() &&
7782 "Expected pointer to array to privatize. Got a non-pointer value "
7785 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7791 "struct.task_with_privates");
7794 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7795 TargetRegionEntryInfo &EntryInfo,
7796 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7799 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7800 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7802 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7805 EntryFnName, Inputs, CBFunc,
7809 return OMPBuilder.emitTargetRegionFunction(
7810 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7814OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7815 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7816 OpenMPIRBuilder::InsertPointTy AllocaIP,
7818 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7942 splitBB(Builder,
true,
"target.task.body");
7944 splitBB(Builder,
true,
"target.task.alloca");
7946 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7947 TargetTaskAllocaBB->
begin());
7948 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7951 OI.EntryBB = TargetTaskAllocaBB;
7952 OI.OuterAllocaBB = AllocaIP.getBlock();
7957 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7960 Builder.restoreIP(TargetTaskBodyIP);
7961 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7975 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7979 bool NeedsTargetTask = HasNoWait && DeviceID;
7980 if (NeedsTargetTask) {
7982 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7983 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7984 RTArgs.SizesArray}) {
7986 OffloadingArraysToPrivatize.
push_back(V);
7987 OI.ExcludeArgsFromAggregate.push_back(V);
7991 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7992 DeviceID, OffloadingArraysToPrivatize](
7995 "there must be a single user for the outlined function");
8009 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8010 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8012 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8013 "Wrong number of arguments for StaleCI when shareds are present");
8014 int SharedArgOperandNo =
8015 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8021 if (!OffloadingArraysToPrivatize.
empty())
8026 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8027 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8029 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8032 Builder.SetInsertPoint(StaleCI);
8037 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8038 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8047 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8048 : getOrCreateRuntimeFunctionPtr(
8049 OMPRTL___kmpc_omp_target_task_alloc);
8053 Value *ThreadID = getOrCreateThreadID(Ident);
8060 Value *TaskSize = Builder.getInt64(
8061 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8066 Value *SharedsSize = Builder.getInt64(0);
8068 auto *ArgStructAlloca =
8070 assert(ArgStructAlloca &&
8071 "Unable to find the alloca instruction corresponding to arguments "
8072 "for extracted function");
8073 auto *ArgStructType =
8075 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8076 "arguments for extracted function");
8078 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8087 Value *Flags = Builder.getInt32(0);
8097 TaskSize, SharedsSize,
8100 if (NeedsTargetTask) {
8101 assert(DeviceID &&
"Expected non-empty device ID.");
8105 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8111 *
this, Builder, TaskData, TaskWithPrivatesTy);
8112 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8115 if (!OffloadingArraysToPrivatize.
empty()) {
8117 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8118 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8119 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8126 "ElementType should match ArrayType");
8129 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8130 Builder.CreateMemCpy(
8131 Dst, Alignment, PtrToPrivatize, Alignment,
8132 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8146 if (!NeedsTargetTask) {
8149 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8153 Builder.getInt32(Dependencies.size()),
8155 ConstantInt::get(Builder.getInt32Ty(), 0),
8161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8163 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8164 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8165 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8167 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8168 }
else if (DepArray) {
8173 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8176 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8177 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8181 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8182 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8187 I->eraseFromParent();
8189 addOutlineInfo(std::move(OI));
8192 << *(Builder.GetInsertBlock()) <<
"\n");
8194 << *(Builder.GetInsertBlock()->getParent()->getParent())
8196 return Builder.saveIP();
8199Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8200 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8201 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8202 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8205 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8206 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8208 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8214 OpenMPIRBuilder::InsertPointTy AllocaIP,
8215 OpenMPIRBuilder::TargetDataInfo &
Info,
8216 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8217 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8220 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8221 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8227 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8228 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8229 Builder.restoreIP(IP);
8230 Builder.CreateCall(OutlinedFn, Args);
8231 return Builder.saveIP();
8234 bool HasDependencies = Dependencies.
size() > 0;
8235 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8237 OpenMPIRBuilder::TargetKernelArgs KArgs;
8244 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8252 if (OutlinedFnID && DeviceID)
8253 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8254 EmitTargetCallFallbackCB, KArgs,
8255 DeviceID, RTLoc, TargetTaskAllocaIP);
8263 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8266 OMPBuilder.Builder.restoreIP(AfterIP);
8270 auto &&EmitTargetCallElse =
8271 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8272 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8275 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8276 if (RequiresOuterTargetTask) {
8280 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8281 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8283 Dependencies, EmptyRTArgs, HasNoWait);
8285 return EmitTargetCallFallbackCB(Builder.saveIP());
8288 Builder.restoreIP(AfterIP);
8292 auto &&EmitTargetCallThen =
8293 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8294 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8295 Info.HasNoWait = HasNoWait;
8296 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8297 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8298 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8299 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8306 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8307 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8312 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8314 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8318 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8321 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8329 Value *MaxThreadsClause =
8330 RuntimeAttrs.TeamsThreadLimit.size() == 1
8331 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8334 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8335 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8336 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8337 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8339 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8340 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8342 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8345 unsigned NumTargetItems =
Info.NumberOfPtrs;
8349 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8350 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8353 Value *TripCount = RuntimeAttrs.LoopTripCount
8354 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8355 Builder.getInt64Ty(),
8357 : Builder.getInt64(0);
8360 Value *DynCGGroupMem = Builder.getInt32(0);
8362 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8363 NumTeamsC, NumThreadsC,
8364 DynCGGroupMem, HasNoWait);
8368 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8371 if (RequiresOuterTargetTask)
8372 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8373 Dependencies, KArgs.RTArgs,
8376 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8377 EmitTargetCallFallbackCB, KArgs,
8378 DeviceID, RTLoc, AllocaIP);
8381 Builder.restoreIP(AfterIP);
8388 if (!OutlinedFnID) {
8389 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8395 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8399 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8400 EmitTargetCallElse, AllocaIP));
8403OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8404 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8405 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8406 TargetRegionEntryInfo &EntryInfo,
8407 const TargetKernelDefaultAttrs &DefaultAttrs,
8408 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8410 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8411 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8412 CustomMapperCallbackTy CustomMapperCB,
8415 if (!updateToLocation(
Loc))
8416 return InsertPointTy();
8418 Builder.restoreIP(CodeGenIP);
8426 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8427 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8433 if (!Config.isTargetDevice())
8435 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8436 CustomMapperCB, Dependencies, HasNowait);
8437 return Builder.saveIP();
8450 return OS.
str().str();
8455 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8456 Config.separator());
8460OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8462 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8464 assert(Elem.second->getValueType() == Ty &&
8465 "OMP internal variable has different type than requested");
8481 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8488Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8489 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8490 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8491 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8494Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8499 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8501 return SizePtrToInt;
8506 std::string VarName) {
8510 M, MaptypesArrayInit->
getType(),
8514 return MaptypesArrayGlobal;
8517void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8518 InsertPointTy AllocaIP,
8519 unsigned NumOperands,
8520 struct MapperAllocas &MapperAllocas) {
8521 if (!updateToLocation(
Loc))
8526 Builder.restoreIP(AllocaIP);
8528 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8532 ArrI64Ty,
nullptr,
".offload_sizes");
8533 updateToLocation(
Loc);
8534 MapperAllocas.ArgsBase = ArgsBase;
8535 MapperAllocas.Args =
Args;
8536 MapperAllocas.ArgSizes = ArgSizes;
8539void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8542 struct MapperAllocas &MapperAllocas,
8543 int64_t DeviceID,
unsigned NumOperands) {
8544 if (!updateToLocation(
Loc))
8549 Value *ArgsBaseGEP =
8550 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8551 {Builder.getInt32(0), Builder.getInt32(0)});
8553 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8554 {Builder.getInt32(0), Builder.getInt32(0)});
8555 Value *ArgSizesGEP =
8556 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8557 {Builder.getInt32(0), Builder.getInt32(0)});
8560 Builder.CreateCall(MapperFunc,
8561 {SrcLocInfo, Builder.getInt64(DeviceID),
8562 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8563 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8566void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8567 TargetDataRTArgs &RTArgs,
8568 TargetDataInfo &
Info,
8570 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8571 "expected region end call to runtime only when end call is separate");
8573 auto VoidPtrTy = UnqualPtrTy;
8574 auto VoidPtrPtrTy = UnqualPtrTy;
8576 auto Int64PtrTy = UnqualPtrTy;
8578 if (!
Info.NumberOfPtrs) {
8588 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8590 Info.RTArgs.BasePointersArray,
8592 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8596 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8599 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8601 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8602 :
Info.RTArgs.MapTypesArray,
8608 if (!
Info.EmitDebug)
8611 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8617 if (!
Info.HasMapper)
8620 RTArgs.MappersArray =
8621 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8624void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8625 InsertPointTy CodeGenIP,
8626 MapInfosTy &CombinedInfo,
8627 TargetDataInfo &
Info) {
8628 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8629 CombinedInfo.NonContigInfo;
8642 "struct.descriptor_dim");
8644 enum { OffsetFD = 0, CountFD, StrideFD };
8648 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8651 if (NonContigInfo.Dims[
I] == 1)
8653 Builder.restoreIP(AllocaIP);
8656 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8657 Builder.restoreIP(CodeGenIP);
8658 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8659 unsigned RevIdx = EE -
II - 1;
8660 Value *DimsLVal = Builder.CreateInBoundsGEP(
8662 {Builder.getInt64(0), Builder.getInt64(II)});
8664 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8665 Builder.CreateAlignedStore(
8666 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8667 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8669 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8670 Builder.CreateAlignedStore(
8671 NonContigInfo.Counts[L][RevIdx], CountLVal,
8672 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8674 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8675 Builder.CreateAlignedStore(
8676 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8677 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8680 Builder.restoreIP(CodeGenIP);
8681 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8682 DimsAddr, Builder.getPtrTy());
8683 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8685 Info.RTArgs.PointersArray, 0,
I);
8686 Builder.CreateAlignedStore(
8687 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8692void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8700 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8702 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8703 Value *DeleteBit = Builder.CreateAnd(
8706 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8707 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8712 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8714 Value *PtrAndObjBit = Builder.CreateAnd(
8717 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8718 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8719 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8720 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8721 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8722 DeleteCond = Builder.CreateIsNull(
8724 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8727 DeleteCond = Builder.CreateIsNotNull(
8729 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8731 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8732 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8734 emitBlock(BodyBB, MapperFn);
8737 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8740 Value *MapTypeArg = Builder.CreateAnd(
8743 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8744 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8745 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8746 MapTypeArg = Builder.CreateOr(
8749 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8750 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8754 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8755 ArraySize, MapTypeArg, MapName};
8757 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8765 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8781 MapperFn->
addFnAttr(Attribute::NoInline);
8782 MapperFn->
addFnAttr(Attribute::NoUnwind);
8792 auto SavedIP = Builder.saveIP();
8793 Builder.SetInsertPoint(EntryBB);
8805 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8806 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8807 Value *PtrBegin = BeginIn;
8808 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8813 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8814 MapType, MapName, ElementSize, HeadBB,
8820 emitBlock(HeadBB, MapperFn);
8825 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8826 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8829 emitBlock(BodyBB, MapperFn);
8832 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8836 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8838 return Info.takeError();
8842 Value *OffloadingArgs[] = {MapperHandle};
8843 Value *PreviousSize = Builder.CreateCall(
8844 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8846 Value *ShiftedPreviousSize =
8847 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8850 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8859 Value *OriMapType = Builder.getInt64(
8860 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8862 Value *MemberMapType =
8863 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8877 Value *LeftToFrom = Builder.CreateAnd(
8880 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8881 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8882 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8891 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8892 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8894 emitBlock(AllocBB, MapperFn);
8895 Value *AllocMapType = Builder.CreateAnd(
8898 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8899 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8900 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8901 Builder.CreateBr(EndBB);
8902 emitBlock(AllocElseBB, MapperFn);
8903 Value *IsTo = Builder.CreateICmpEQ(
8906 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8907 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8908 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8910 emitBlock(ToBB, MapperFn);
8911 Value *ToMapType = Builder.CreateAnd(
8914 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8915 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8916 Builder.CreateBr(EndBB);
8917 emitBlock(ToElseBB, MapperFn);
8918 Value *IsFrom = Builder.CreateICmpEQ(
8921 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8922 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8923 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8925 emitBlock(FromBB, MapperFn);
8926 Value *FromMapType = Builder.CreateAnd(
8929 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8930 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8932 emitBlock(EndBB, MapperFn);
8935 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8941 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8942 CurSizeArg, CurMapType, CurNameArg};
8944 auto ChildMapperFn = CustomMapperCB(
I);
8946 return ChildMapperFn.takeError();
8947 if (*ChildMapperFn) {
8949 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8954 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8961 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8962 "omp.arraymap.next");
8964 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8966 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8968 emitBlock(ExitBB, MapperFn);
8971 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8972 MapType, MapName, ElementSize, DoneBB,
8976 emitBlock(DoneBB, MapperFn,
true);
8978 Builder.CreateRetVoid();
8979 Builder.restoreIP(SavedIP);
8983Error OpenMPIRBuilder::emitOffloadingArrays(
8984 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8985 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8986 bool IsNonContiguous,
8990 Info.clearArrayInfo();
8991 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8993 if (
Info.NumberOfPtrs == 0)
8996 Builder.restoreIP(AllocaIP);
9002 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9003 PointerArrayType,
nullptr,
".offload_baseptrs");
9005 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9006 PointerArrayType,
nullptr,
".offload_ptrs");
9007 AllocaInst *MappersArray = Builder.CreateAlloca(
9008 PointerArrayType,
nullptr,
".offload_mappers");
9009 Info.RTArgs.MappersArray = MappersArray;
9016 ConstantInt::get(Int64Ty, 0));
9018 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9021 if (IsNonContiguous &&
9022 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9023 CombinedInfo.Types[
I] &
9024 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9026 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9032 RuntimeSizes.set(
I);
9035 if (RuntimeSizes.all()) {
9037 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9038 SizeArrayType,
nullptr,
".offload_sizes");
9043 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9044 auto *SizesArrayGbl =
9049 if (!RuntimeSizes.any()) {
9050 Info.RTArgs.SizesArray = SizesArrayGbl;
9052 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9053 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9056 SizeArrayType,
nullptr,
".offload_sizes");
9059 Builder.CreateMemCpy(
9060 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9061 SizesArrayGbl, OffloadSizeAlign,
9066 Info.RTArgs.SizesArray = Buffer;
9074 for (
auto mapFlag : CombinedInfo.Types)
9076 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9078 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9079 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9080 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9083 if (!CombinedInfo.Names.empty()) {
9084 auto *MapNamesArrayGbl = createOffloadMapnames(
9085 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9086 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9087 Info.EmitDebug =
true;
9089 Info.RTArgs.MapNamesArray =
9091 Info.EmitDebug =
false;
9096 if (
Info.separateBeginEndCalls()) {
9097 bool EndMapTypesDiffer =
false;
9099 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9100 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9101 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9102 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9103 EndMapTypesDiffer =
true;
9106 if (EndMapTypesDiffer) {
9107 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9108 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9113 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9114 Value *BPVal = CombinedInfo.BasePointers[
I];
9115 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9118 Builder.CreateAlignedStore(BPVal, BP,
9119 M.getDataLayout().getPrefTypeAlign(PtrTy));
9121 if (
Info.requiresDevicePointerInfo()) {
9122 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9123 CodeGenIP = Builder.saveIP();
9124 Builder.restoreIP(AllocaIP);
9125 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9126 Builder.restoreIP(CodeGenIP);
9128 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9129 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9130 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9132 DeviceAddrCB(
I, BP);
9136 Value *PVal = CombinedInfo.Pointers[
I];
9137 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9141 Builder.CreateAlignedStore(PVal,
P,
9142 M.getDataLayout().getPrefTypeAlign(PtrTy));
9144 if (RuntimeSizes.test(
I)) {
9145 Value *S = Builder.CreateConstInBoundsGEP2_32(
9149 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9152 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9155 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9158 auto CustomMFunc = CustomMapperCB(
I);
9160 return CustomMFunc.takeError();
9162 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9164 Value *MAddr = Builder.CreateInBoundsGEP(
9166 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9167 Builder.CreateAlignedStore(
9168 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9171 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9172 Info.NumberOfPtrs == 0)
9174 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9179 BasicBlock *CurBB = Builder.GetInsertBlock();
9186 Builder.CreateBr(
Target);
9189 Builder.ClearInsertionPoint();
9194 BasicBlock *CurBB = Builder.GetInsertBlock();
9210 Builder.SetInsertPoint(BB);
9213Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9214 BodyGenCallbackTy ElseGen,
9215 InsertPointTy AllocaIP) {
9219 auto CondConstant = CI->getSExtValue();
9221 return ThenGen(AllocaIP, Builder.saveIP());
9223 return ElseGen(AllocaIP, Builder.saveIP());
9233 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9235 emitBlock(ThenBlock, CurFn);
9236 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9238 emitBranch(ContBlock);
9241 emitBlock(ElseBlock, CurFn);
9242 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9245 emitBranch(ContBlock);
9247 emitBlock(ContBlock, CurFn,
true);
9251bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9255 "Unexpected Atomic Ordering.");
9312OpenMPIRBuilder::InsertPointTy
9313OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9314 AtomicOpValue &
X, AtomicOpValue &V,
9316 if (!updateToLocation(
Loc))
9319 assert(
X.Var->getType()->isPointerTy() &&
9320 "OMP Atomic expects a pointer to target memory");
9321 Type *XElemTy =
X.ElemTy;
9324 "OMP atomic read expected a scalar type");
9326 Value *XRead =
nullptr;
9330 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9336 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9341 OpenMPIRBuilder::AtomicInfo atomicInfo(
9342 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9343 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9344 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9345 XRead = AtomicLoadRes.first;
9352 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9355 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9357 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9360 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9361 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9362 return Builder.saveIP();
9365OpenMPIRBuilder::InsertPointTy
9366OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9367 AtomicOpValue &
X,
Value *Expr,
9369 if (!updateToLocation(
Loc))
9372 assert(
X.Var->getType()->isPointerTy() &&
9373 "OMP Atomic expects a pointer to target memory");
9374 Type *XElemTy =
X.ElemTy;
9377 "OMP atomic write expected a scalar type");
9380 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9383 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9387 OpenMPIRBuilder::AtomicInfo atomicInfo(
9388 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9389 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9390 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9397 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9398 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9402 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9403 return Builder.saveIP();
9406OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9407 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9409 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9410 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9412 if (!updateToLocation(
Loc))
9416 Type *XTy =
X.Var->getType();
9418 "OMP Atomic expects a pointer to target memory");
9419 Type *XElemTy =
X.ElemTy;
9422 "OMP atomic update expected a scalar type");
9425 "OpenMP atomic does not support LT or GT operations");
9429 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9430 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9432 return AtomicResult.takeError();
9433 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9434 return Builder.saveIP();
9438Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9442 return Builder.CreateAdd(Src1, Src2);
9444 return Builder.CreateSub(Src1, Src2);
9446 return Builder.CreateAnd(Src1, Src2);
9448 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9450 return Builder.CreateOr(Src1, Src2);
9452 return Builder.CreateXor(Src1, Src2);
9477 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9478 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9481 bool emitRMWOp =
false;
9489 emitRMWOp = XElemTy;
9492 emitRMWOp = (IsXBinopExpr && XElemTy);
9499 std::pair<Value *, Value *> Res;
9504 if (IsIgnoreDenormalMode)
9505 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9507 if (!IsFineGrainedMemory)
9508 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9510 if (!IsRemoteMemory)
9514 Res.first = RMWInst;
9519 Res.second = Res.first;
9521 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9525 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9531 OpenMPIRBuilder::AtomicInfo atomicInfo(
9532 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9533 OldVal->
getAlign(),
true , AllocaIP,
X);
9534 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9535 BasicBlock *CurBB = Builder.GetInsertBlock();
9537 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9541 X->getName() +
".atomic.cont");
9543 Builder.restoreIP(AllocaIP);
9544 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9545 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9546 Builder.SetInsertPoint(ContBB);
9548 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9553 Value *Upd = *CBResult;
9554 Builder.CreateStore(Upd, NewAtomicAddr);
9557 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9558 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9560 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9561 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9563 Res.first = OldExprVal;
9569 Builder.SetInsertPoint(ExitBB);
9571 Builder.SetInsertPoint(ExitTI);
9577 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9584 BasicBlock *CurBB = Builder.GetInsertBlock();
9586 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9590 X->getName() +
".atomic.cont");
9592 Builder.restoreIP(AllocaIP);
9593 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9594 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9595 Builder.SetInsertPoint(ContBB);
9597 PHI->addIncoming(OldVal, CurBB);
9602 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9603 X->getName() +
".atomic.fltCast");
9605 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9606 X->getName() +
".atomic.ptrCast");
9613 Value *Upd = *CBResult;
9614 Builder.CreateStore(Upd, NewAtomicAddr);
9615 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9620 Result->setVolatile(VolatileX);
9621 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9622 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9623 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9624 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9626 Res.first = OldExprVal;
9633 Builder.SetInsertPoint(ExitBB);
9635 Builder.SetInsertPoint(ExitTI);
9642OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9643 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9646 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9647 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9648 if (!updateToLocation(
Loc))
9652 Type *XTy =
X.Var->getType();
9654 "OMP Atomic expects a pointer to target memory");
9655 Type *XElemTy =
X.ElemTy;
9658 "OMP atomic capture expected a scalar type");
9660 "OpenMP atomic does not support LT or GT operations");
9667 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9668 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9671 Value *CapturedVal =
9672 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9673 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9675 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9676 return Builder.saveIP();
9679OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9680 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9686 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9687 IsPostfixUpdate, IsFailOnly, Failure);
9690OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9691 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9696 if (!updateToLocation(
Loc))
9699 assert(
X.Var->getType()->isPointerTy() &&
9700 "OMP atomic expects a pointer to target memory");
9703 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9704 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9707 bool IsInteger =
E->getType()->isIntegerTy();
9709 if (
Op == OMPAtomicCompareOp::EQ) {
9714 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9715 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9720 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9724 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9726 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9728 "OldValue and V must be of same type");
9729 if (IsPostfixUpdate) {
9730 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9732 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9743 BasicBlock *CurBB = Builder.GetInsertBlock();
9745 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9747 CurBBTI,
X.Var->getName() +
".atomic.exit");
9753 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9755 Builder.SetInsertPoint(ContBB);
9756 Builder.CreateStore(OldValue, V.Var);
9757 Builder.CreateBr(ExitBB);
9762 Builder.SetInsertPoint(ExitBB);
9764 Builder.SetInsertPoint(ExitTI);
9767 Value *CapturedValue =
9768 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9769 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9775 assert(
R.Var->getType()->isPointerTy() &&
9776 "r.var must be of pointer type");
9777 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9779 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9780 Value *ResultCast =
R.IsSigned
9781 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9782 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9783 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9786 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9787 "Op should be either max or min at this point");
9788 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9826 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9828 Value *CapturedValue =
nullptr;
9829 if (IsPostfixUpdate) {
9830 CapturedValue = OldValue;
9855 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9856 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9858 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9862 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9864 return Builder.saveIP();
9867OpenMPIRBuilder::InsertPointOrErrorTy
9868OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9869 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9872 if (!updateToLocation(
Loc))
9873 return InsertPointTy();
9876 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9877 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9882 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9883 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9884 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9904 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9905 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9907 splitBB(Builder,
true,
"teams.alloca");
9909 bool SubClausesPresent =
9910 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9912 if (!Config.isTargetDevice() && SubClausesPresent) {
9913 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9914 "if lowerbound is non-null, then upperbound must also be non-null "
9915 "for bounds on num_teams");
9917 if (NumTeamsUpper ==
nullptr)
9918 NumTeamsUpper = Builder.getInt32(0);
9920 if (NumTeamsLower ==
nullptr)
9921 NumTeamsLower = NumTeamsUpper;
9925 "argument to if clause must be an integer value");
9929 IfExpr = Builder.CreateICmpNE(IfExpr,
9930 ConstantInt::get(IfExpr->
getType(), 0));
9931 NumTeamsUpper = Builder.CreateSelect(
9932 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9935 NumTeamsLower = Builder.CreateSelect(
9936 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9939 if (ThreadLimit ==
nullptr)
9940 ThreadLimit = Builder.getInt32(0);
9942 Value *ThreadNum = getOrCreateThreadID(Ident);
9944 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9945 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9948 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9949 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9950 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9954 OI.EntryBB = AllocaBB;
9956 OI.OuterAllocaBB = &OuterAllocaBB;
9960 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9962 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9964 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9966 auto HostPostOutlineCB = [
this, Ident,
9967 ToBeDeleted](
Function &OutlinedFn)
mutable {
9972 "there must be a single user for the outlined function");
9977 "Outlined function must have two or three arguments only");
9979 bool HasShared = OutlinedFn.
arg_size() == 3;
9987 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9988 "outlined function.");
9989 Builder.SetInsertPoint(StaleCI);
9991 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9994 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9995 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9999 I->eraseFromParent();
10002 if (!Config.isTargetDevice())
10003 OI.PostOutlineCB = HostPostOutlineCB;
10005 addOutlineInfo(std::move(OI));
10007 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10009 return Builder.saveIP();
10012OpenMPIRBuilder::InsertPointOrErrorTy
10013OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10014 InsertPointTy OuterAllocaIP,
10015 BodyGenCallbackTy BodyGenCB) {
10016 if (!updateToLocation(
Loc))
10017 return InsertPointTy();
10019 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10021 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10023 splitBB(Builder,
true,
"distribute.entry");
10024 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10027 splitBB(Builder,
true,
"distribute.exit");
10029 splitBB(Builder,
true,
"distribute.body");
10031 splitBB(Builder,
true,
"distribute.alloca");
10034 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10035 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10036 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10041 if (Config.isTargetDevice()) {
10043 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10044 OI.EntryBB = AllocaBB;
10045 OI.ExitBB = ExitBB;
10047 addOutlineInfo(std::move(OI));
10049 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10051 return Builder.saveIP();
10056 std::string VarName) {
10062 M, MapNamesArrayInit->
getType(),
10065 return MapNamesArrayGlobal;
10070void OpenMPIRBuilder::initializeTypes(
Module &M) {
10073 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10074#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10075#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10076 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10077 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10078#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10079 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10080 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10081#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10082 T = StructType::getTypeByName(Ctx, StructName); \
10084 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10086 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10087#include "llvm/Frontend/OpenMP/OMPKinds.def"
10090void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10098 while (!Worklist.
empty()) {
10102 if (
BlockSet.insert(SuccBB).second)
10111 if (!Config.isGPU()) {
10126 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10127 Fn->
addFnAttr(Attribute::MustProgress);
10131void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10132 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10135 if (OffloadInfoManager.empty())
10139 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10140 TargetRegionEntryInfo>,
10142 OrderedEntries(OffloadInfoManager.size());
10145 auto &&GetMDInt = [
this](
unsigned V) {
10152 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10153 auto &&TargetRegionMetadataEmitter =
10154 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10155 const TargetRegionEntryInfo &EntryInfo,
10156 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10169 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10170 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10171 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10172 GetMDInt(
E.getOrder())};
10175 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10181 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10184 auto &&DeviceGlobalVarMetadataEmitter =
10185 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10187 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10195 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10196 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10199 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10200 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10206 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10207 DeviceGlobalVarMetadataEmitter);
10209 for (
const auto &
E : OrderedEntries) {
10210 assert(
E.first &&
"All ordered entries must exist!");
10211 if (
const auto *CE =
10214 if (!
CE->getID() || !
CE->getAddress()) {
10216 TargetRegionEntryInfo EntryInfo =
E.second;
10217 StringRef FnName = EntryInfo.ParentName;
10218 if (!M.getNamedValue(FnName))
10220 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10223 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10226 }
else if (
const auto *CE =
dyn_cast<
10227 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10229 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10230 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10233 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10234 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10235 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10237 if (!
CE->getAddress()) {
10238 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10242 if (
CE->getVarSize() == 0)
10245 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10246 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10247 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10248 "Declaret target link address is set.");
10249 if (Config.isTargetDevice())
10251 if (!
CE->getAddress()) {
10252 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10264 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10265 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10270 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10271 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10272 Flags,
CE->getLinkage(),
CE->getVarName());
10274 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10275 Flags,
CE->getLinkage());
10286 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10291 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10292 Config.getRequiresFlags());
10295void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10297 unsigned FileID,
unsigned Line,
unsigned Count) {
10299 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10300 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10302 OS <<
"_" <<
Count;
10305void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10307 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10308 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10309 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10310 EntryInfo.Line, NewCount);
10313TargetRegionEntryInfo
10314OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10317 auto FileIDInfo = CallBack();
10323 FileID =
hash_value(std::get<0>(FileIDInfo));
10325 FileID =
ID.getFile();
10327 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10328 std::get<1>(FileIDInfo));
10331unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10334 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10336 !(Remain & 1); Remain = Remain >> 1)
10342OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10345 << getFlagMemberOffset());
10348void OpenMPIRBuilder::setCorrectMemberOfFlag(
10354 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10356 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10363 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10364 Flags |= MemberOfFlag;
10367Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10368 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10369 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10370 bool IsDeclaration,
bool IsExternallyVisible,
10371 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10372 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10373 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10374 std::function<
Constant *()> GlobalInitializer,
10381 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10382 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10384 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10385 Config.hasRequiresUnifiedSharedMemory())) {
10390 if (!IsExternallyVisible)
10391 OS <<
format(
"_%x", EntryInfo.FileID);
10392 OS <<
"_decl_tgt_ref_ptr";
10395 Value *
Ptr = M.getNamedValue(PtrName);
10399 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10404 if (!Config.isTargetDevice()) {
10405 if (GlobalInitializer)
10406 GV->setInitializer(GlobalInitializer());
10411 registerTargetGlobalVariable(
10412 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10413 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10423void OpenMPIRBuilder::registerTargetGlobalVariable(
10424 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10425 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10426 bool IsDeclaration,
bool IsExternallyVisible,
10427 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10428 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10429 std::vector<Triple> TargetTriple,
10430 std::function<
Constant *()> GlobalInitializer,
10433 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10434 (TargetTriple.empty() && !Config.isTargetDevice()))
10437 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10442 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10444 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10445 !Config.hasRequiresUnifiedSharedMemory()) {
10446 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10450 if (!IsDeclaration)
10452 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10459 if (Config.isTargetDevice() &&
10463 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10466 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10468 if (!M.getNamedValue(RefName)) {
10470 getOrCreateInternalVariable(Addr->
getType(), RefName);
10472 GvAddrRef->setConstant(
true);
10474 GvAddrRef->setInitializer(Addr);
10475 GeneratedRefs.push_back(GvAddrRef);
10479 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10480 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10482 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10484 if (Config.isTargetDevice()) {
10488 Addr = getAddrOfDeclareTargetVar(
10489 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10490 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10491 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10494 VarSize = M.getDataLayout().getPointerSize();
10498 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10504void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10508 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10513 auto &&GetMDInt = [MN](
unsigned Idx) {
10518 auto &&GetMDString = [MN](
unsigned Idx) {
10520 return V->getString();
10523 switch (GetMDInt(0)) {
10527 case OffloadEntriesInfoManager::OffloadEntryInfo::
10528 OffloadingEntryInfoTargetRegion: {
10529 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10534 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10538 case OffloadEntriesInfoManager::OffloadEntryInfo::
10539 OffloadingEntryInfoDeviceGlobalVar:
10540 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10542 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10552 if (HostFilePath.
empty())
10556 if (std::error_code Err = Buf.getError()) {
10558 "OpenMPIRBuilder: " +
10566 if (std::error_code Err = M.getError()) {
10568 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10572 loadOffloadInfoMetadata(*M.get());
10579bool OffloadEntriesInfoManager::empty()
const {
10580 return OffloadEntriesTargetRegion.empty() &&
10581 OffloadEntriesDeviceGlobalVar.empty();
10584unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10585 const TargetRegionEntryInfo &EntryInfo)
const {
10586 auto It = OffloadEntriesTargetRegionCount.find(
10587 getTargetRegionEntryCountKey(EntryInfo));
10588 if (It == OffloadEntriesTargetRegionCount.end())
10593void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10594 const TargetRegionEntryInfo &EntryInfo) {
10595 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10596 EntryInfo.Count + 1;
10600void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10601 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10602 OffloadEntriesTargetRegion[EntryInfo] =
10603 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10604 OMPTargetRegionEntryTargetRegion);
10605 ++OffloadingEntriesNum;
10608void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10610 OMPTargetRegionEntryKind Flags) {
10611 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10614 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10618 if (OMPBuilder->Config.isTargetDevice()) {
10620 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10623 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10624 Entry.setAddress(Addr);
10626 Entry.setFlags(Flags);
10628 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10629 hasTargetRegionEntryInfo(EntryInfo,
true))
10631 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10632 "Target region entry already registered!");
10633 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10634 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10635 ++OffloadingEntriesNum;
10637 incrementTargetRegionEntryInfoCount(EntryInfo);
10640bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10641 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10644 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10646 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10647 if (It == OffloadEntriesTargetRegion.end()) {
10651 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10656void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10657 const OffloadTargetRegionEntryInfoActTy &Action) {
10659 for (
const auto &It : OffloadEntriesTargetRegion) {
10660 Action(It.first, It.second);
10664void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10665 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10666 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10667 ++OffloadingEntriesNum;
10670void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10673 if (OMPBuilder->Config.isTargetDevice()) {
10675 if (!hasDeviceGlobalVarEntryInfo(VarName))
10677 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10678 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10679 if (Entry.getVarSize() == 0) {
10680 Entry.setVarSize(VarSize);
10685 Entry.setVarSize(VarSize);
10687 Entry.setAddress(Addr);
10689 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10690 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10691 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10692 "Entry not initialized!");
10693 if (Entry.getVarSize() == 0) {
10694 Entry.setVarSize(VarSize);
10699 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10700 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10701 Addr, VarSize, Flags,
Linkage,
10704 OffloadEntriesDeviceGlobalVar.try_emplace(
10705 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10706 ++OffloadingEntriesNum;
10710void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10711 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10713 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10714 Action(
E.getKey(),
E.getValue());
10721void CanonicalLoopInfo::collectControlBlocks(
10728 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10731BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10740void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10752void CanonicalLoopInfo::mapIndVar(
10762 for (
Use &U : OldIV->
uses()) {
10766 if (
User->getParent() == getCond())
10768 if (
User->getParent() == getLatch())
10774 Value *NewIV = Updater(OldIV);
10777 for (
Use *U : ReplacableUses)
10785void CanonicalLoopInfo::assertOK()
const {
10798 "Preheader must terminate with unconditional branch");
10800 "Preheader must jump to header");
10804 "Header must terminate with unconditional branch");
10805 assert(Header->getSingleSuccessor() ==
Cond &&
10806 "Header must jump to exiting block");
10809 assert(
Cond->getSinglePredecessor() == Header &&
10810 "Exiting block only reachable from header");
10813 "Exiting block must terminate with conditional branch");
10815 "Exiting block must have two successors");
10817 "Exiting block's first successor jump to the body");
10819 "Exiting block's second successor must exit the loop");
10823 "Body only reachable from exiting block");
10828 "Latch must terminate with unconditional branch");
10837 "Exit block must terminate with unconditional branch");
10838 assert(
Exit->getSingleSuccessor() == After &&
10839 "Exit block must jump to after block");
10843 "After block only reachable from exit block");
10847 assert(IndVar &&
"Canonical induction variable not found?");
10849 "Induction variable must be an integer");
10851 "Induction variable must be a PHI in the loop header");
10857 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10864 Value *TripCount = getTripCount();
10865 assert(TripCount &&
"Loop trip count not found?");
10867 "Trip count and induction variable must have the same type");
10871 "Exit condition must be a signed less-than comparison");
10873 "Exit condition must compare the induction variable");
10875 "Exit condition must compare with the trip count");
10879void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
LLVM_ABI std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...