diff --git a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp index c5da66a1f28b6..c21b7cab1b8da 100644 --- a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp @@ -16,14 +16,14 @@ namespace clang::tidy::altera { void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) { // Find any function that calls barrier but does not call an ID function. - // hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions. + // hasAttr(attr::Kind::DeviceKernel) restricts it to only kernel functions. // FIXME: Have it accept all functions but check for a parameter that gets an // ID from one of the four ID functions. Finder->addMatcher( // Find function declarations... functionDecl( - // That are OpenCL kernels... - hasAttr(attr::Kind::OpenCLKernel), + // That are device kernels... + hasAttr(attr::Kind::DeviceKernel), // And call a barrier function (either 1.x or 2.x version)... forEachDescendant(callExpr(callee(functionDecl(hasAnyName( "barrier", "work_group_barrier")))) diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index baf5371d2682d..97caff0198cb0 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -164,7 +164,7 @@ class GlobalDecl { } static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) { - return (D->hasAttr() || D->getLangOpts().CUDAIsDevice) + return (D->hasAttr() || D->getLangOpts().CUDAIsDevice) ? KernelReferenceKind::Kernel : KernelReferenceKind::Stub; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 06462b8a26bc0..3a4d6bf4c3ad2 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -190,8 +190,10 @@ def FunctionPointer : SubsetSubject; def OpenCLKernelFunction - : SubsetSubjecthasAttr()}], - "kernel functions">; + : SubsetSubjectgetASTContext().getLangOpts().OpenCL && + DeviceKernelAttr::isOpenCLSpelling( + S->getAttr()}], + "kernel functions">; // HasFunctionProto is a more strict version of FunctionLike, so it should // never be specified in a Subjects list along with FunctionLike (due to the @@ -1509,12 +1511,6 @@ def CUDAGridConstant : InheritableAttr { let Documentation = [CUDAGridConstantAttrDocs]; } -def NVPTXKernel : InheritableAttr, TargetSpecificAttr { - let Spellings = [Clang<"nvptx_kernel">]; - let Subjects = SubjectList<[Function]>; - let Documentation = [Undocumented]; -} - def HIPManaged : InheritableAttr { let Spellings = [GNU<"managed">, Declspec<"__managed__">]; let Subjects = SubjectList<[Var]>; @@ -1549,11 +1545,52 @@ def CUDAShared : InheritableAttr { } def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>; -def SYCLKernel : InheritableAttr { - let Spellings = [Clang<"sycl_kernel">]; - let Subjects = SubjectList<[FunctionTmpl]>; - let LangOpts = [SYCLDevice]; - let Documentation = [SYCLKernelDocs]; +def DeviceKernel : DeclOrTypeAttr { + let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">, + Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, + CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; + let Documentation = [DeviceKernelDocs]; + let AdditionalMembers = + [{ + static inline bool isAMDGPUSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_amdgpu_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_amdgpu_kernel || + A.getAttributeSpellingListIndex() == C23_clang_amdgpu_kernel; + } + static inline bool isAMDGPUSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isAMDGPUSpelling(*A); + } + static inline bool isNVPTXSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_nvptx_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_nvptx_kernel || + A.getAttributeSpellingListIndex() == C23_clang_nvptx_kernel; + } + static inline bool isNVPTXSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isNVPTXSpelling(*A); + } + static inline bool isSYCLSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_sycl_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_sycl_kernel || + A.getAttributeSpellingListIndex() == C23_clang_sycl_kernel; + } + static inline bool isSYCLSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isSYCLSpelling(*A); + } + static inline bool isOpenCLSpelling(const AttributeCommonInfo& A) { + // Tablegen trips underscores from spellings to build the spelling + // list, but here we have the same spelling with unscores and without, + // so handle that case manually. + return A.getAttributeSpellingListIndex() == Keyword_kernel || + A.getAttrName()->getName() == "kernel"; + } + static inline bool isOpenCLSpelling(const AttributeCommonInfo* A) { + if (!A) return false; + return isOpenCLSpelling(*A); + } +}]; } def SYCLKernelEntryPoint : InheritableAttr { @@ -1619,15 +1656,6 @@ def Allocating : TypeAttr { let Documentation = [AllocatingDocs]; } -// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because -// the specification does not expose them with one currently. -def OpenCLKernel : InheritableAttr { - let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; - let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [Undocumented]; - let SimpleHandler = 1; -} - def OpenCLUnrollHint : StmtAttr { let Spellings = [GNU<"opencl_unroll_hint">]; let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], @@ -2362,11 +2390,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } -def AMDGPUKernelCall : DeclOrTypeAttr { - let Spellings = [Clang<"amdgpu_kernel">]; - let Documentation = [Undocumented]; -} - def BPFPreserveAccessIndex : InheritableAttr, TargetSpecificAttr { let Spellings = [Clang<"preserve_access_index">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 65d66dd398ad1..2695ac06251ee 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -396,9 +396,13 @@ any option of a multiversioned function is undefined. }]; } -def SYCLKernelDocs : Documentation { +def DeviceKernelDocs : Documentation { let Category = DocCatFunction; + let Heading = "device_kernel, sycl_kernel, nvptx_kernel, amdgpu_kernel, " + "kernel, __kernel"; let Content = [{ +These attributes specify that the function represents a kernel for device offloading. +The specific semantics depend on the offloading language, target, and attribute spelling. The ``sycl_kernel`` attribute specifies that a function template will be used to outline device code and to generate an OpenCL kernel. Here is a code example of the SYCL program, which demonstrates the compiler's diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 491badcc804e7..698fd9da5ced1 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -289,14 +289,13 @@ namespace clang { CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels + CC_DeviceKernel, // __attribute__((device_kernel)) CC_Swift, // __attribute__((swiftcall)) CC_SwiftAsync, // __attribute__((swiftasynccall)) CC_PreserveMost, // __attribute__((preserve_most)) CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) - CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) CC_M68kRTD, // __attribute__((m68k_rtd)) CC_PreserveNone, // __attribute__((preserve_none)) CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) @@ -326,7 +325,7 @@ namespace clang { case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_Swift: case CC_SwiftAsync: case CC_M68kRTD: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8425e40567b27..aad2d82401111 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const { } bool FunctionDecl::isInExternCContext() const { - if (hasAttr()) + if (DeviceKernelAttr::isOpenCLSpelling(getAttr())) return true; return getLexicalDeclContext()->isExternCContext(); } @@ -5510,7 +5510,8 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { } bool FunctionDecl::isReferenceableKernel() const { - return hasAttr() || hasAttr(); + return hasAttr() || + DeviceKernelAttr::isOpenCLSpelling(getAttr()); } BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 33a8728728574..db52c0d54db35 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1556,7 +1556,8 @@ void CXXNameMangler::mangleUnqualifiedName( FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - FD && FD->hasAttr() && + FD && + DeviceKernelAttr::isOpenCLSpelling(FD->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleDeviceStubName(II); @@ -3529,10 +3530,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_AAPCS_VFP: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: - case CC_AMDGPUKernelCall: case CC_IntelOclBicc: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_M68kRTD: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index add737b762ccc..e958498792bb0 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1164,7 +1164,9 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, ->hasAttr())) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - ND && isa(ND) && ND->hasAttr() && + ND && isa(ND) && + DeviceKernelAttr::isOpenCLSpelling( + ND->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleSourceName( diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index ccacaf29e001f..17f88f51448d9 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3600,14 +3600,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { return "aarch64_vector_pcs"; case CC_AArch64SVEPCS: return "aarch64_sve_pcs"; - case CC_AMDGPUKernelCall: - return "amdgpu_kernel"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; - case CC_OpenCLKernel: - return "opencl_kernel"; + case CC_DeviceKernel: + return "device_kernel"; case CC_Swift: return "swiftcall"; case CC_SwiftAsync: @@ -4320,7 +4318,7 @@ bool AttributedType::isCallingConv() const { case attr::VectorCall: case attr::AArch64VectorPcs: case attr::AArch64SVEPcs: - case attr::AMDGPUKernelCall: + case attr::DeviceKernel: case attr::Pascal: case attr::MSABI: case attr::SysVABI: diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index cba1a2d98d660..1c92abba73905 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1096,8 +1096,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_AArch64SVEPCS: OS << "__attribute__((aarch64_sve_pcs))"; break; - case CC_AMDGPUKernelCall: - OS << "__attribute__((amdgpu_kernel))"; + case CC_DeviceKernel: + OS << "__attribute__((device_kernel))"; break; case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; @@ -1112,7 +1112,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, OS << " __attribute__((regcall))"; break; case CC_SpirFunction: - case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. break; case CC_Swift: @@ -2065,7 +2064,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; - case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; + case attr::DeviceKernel: + OS << T->getAttr()->getSpelling(); + break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; case attr::PreserveMost: OS << "preserve_most"; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index e1f6c7b834dc7..fd2278a700fe9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1346,7 +1346,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: case CC_Win64: @@ -1704,7 +1704,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86FastCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 8ea544ba28b10..509128f3cf070 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -415,8 +415,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index ca2c1ffbb0eb7..acf28b7a71454 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -1405,7 +1405,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_AAPCS_VFP: case CC_Swift: case CC_SwiftAsync: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; @@ -1480,7 +1480,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h index d1f68b842348e..d9e5cf4d8a92f 100644 --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -94,7 +94,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index 0bf5a062d3192..971a62b6c4037 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -337,7 +337,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index bf249e271a870..1521b3e9eada3 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -191,7 +191,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { } CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { - return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK + return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK : CCCR_Warning; } diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 1f69530c4757f..8a54ca4b75d7b 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -244,7 +244,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { switch (CC) { case CC_C: case CC_Swift: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 780385f9c9bc5..5af5210d3b90f 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -408,10 +408,11 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { case CC_Swift: case CC_X86Pascal: case CC_IntelOclBicc: - case CC_OpenCLKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -439,7 +440,13 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { uint64_t getPointerAlignV(LangAS AddrSpace) const override { return getPointerWidthV(AddrSpace); } + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } +private: + bool IsOpenCL = false; }; // X86-32 generic target @@ -785,8 +792,9 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { case CC_PreserveAll: case CC_PreserveNone: case CC_X86RegCall: - case CC_OpenCLKernel: return CCCR_OK; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -817,7 +825,6 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize, HasSizeMismatch); } - void setMaxAtomicWidth() override { if (hasFeature("cx16")) MaxAtomicInlineWidth = 128; @@ -829,6 +836,14 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { size_t getMaxBitIntWidth() const override { return llvm::IntegerType::MAX_INT_BITS; } + + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } + +private: + bool IsOpenCL = false; }; // x86-64 UEFI target @@ -914,7 +929,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64TargetInfo case CC_Swift: case CC_SwiftAsync: case CC_X86RegCall: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bd920a2e3f2dd..3130059b00740 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -80,12 +80,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { return llvm::CallingConv::AArch64_VectorCall; case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall; - case CC_AMDGPUKernelCall: - return llvm::CallingConv::AMDGPU_KERNEL; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_OpenCLKernel: - return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + case CC_DeviceKernel: { + if (CGM.getLangOpts().OpenCL) + return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + if (CGM.getTriple().isSPIROrSPIRV()) + return llvm::CallingConv::SPIR_KERNEL; + if (CGM.getTriple().isAMDGPU()) + return llvm::CallingConv::AMDGPU_KERNEL; + if (CGM.getTriple().isNVPTX()) + return llvm::CallingConv::PTX_Kernel; + llvm_unreachable("Unknown kernel calling convention"); + } case CC_PreserveMost: return llvm::CallingConv::PreserveMost; case CC_PreserveAll: @@ -283,8 +290,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr()) return CC_AArch64SVEPCS; - if (D->hasAttr()) - return CC_AMDGPUKernelCall; + if (D->hasAttr()) + return CC_DeviceKernel; if (D->hasAttr()) return CC_IntelOclBicc; @@ -532,7 +539,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) { assert(isa(FTy)); setCUDAKernelCallingConvention(FTy, CGM, FD); - if (FD->hasAttr() && + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const FunctionType *FT = FTy->getAs(); CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); @@ -760,7 +767,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType, return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, argTypes, - FunctionType::ExtInfo(CC_OpenCLKernel), + FunctionType::ExtInfo(CC_DeviceKernel), /*paramInfos=*/{}, RequiredArgs::All); } @@ -2528,7 +2535,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, NumElemsParam); } - if (TargetDecl->hasAttr() && + if (DeviceKernelAttr::isOpenCLSpelling( + TargetDecl->getAttr()) && CallingConv != CallingConv::CC_C && CallingConv != CallingConv::CC_SpirFunction) { // Check CallingConv to avoid adding uniform-work-group-size attribute to @@ -2911,7 +2919,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // > For arguments to a __kernel function declared to be a pointer to a // > data type, the OpenCL compiler can assume that the pointee is always // > appropriately aligned as required by the data type. - if (TargetDecl && TargetDecl->hasAttr() && + if (TargetDecl && + DeviceKernelAttr::isOpenCLSpelling( + TargetDecl->getAttr()) && ParamType->isPointerType()) { QualType PTy = ParamType->getPointeeType(); if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 3d20756774708..9cf46f13b7b94 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1693,9 +1693,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_IntelOclBicc; case CC_SpirFunction: return llvm::dwarf::DW_CC_LLVM_SpirFunction; - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: - return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; + case CC_DeviceKernel: + return llvm::dwarf::DW_CC_LLVM_DeviceKernel; case CC_Swift: return llvm::dwarf::DW_CC_LLVM_Swift; case CC_SwiftAsync: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 7cb7ee20fcf6a..7919cfbb157ad 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5887,7 +5887,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { } static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) { - if (FD->hasAttr()) + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr())) return GlobalDecl(FD, KernelReferenceKind::Stub); return GlobalDecl(FD); } @@ -6309,7 +6309,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const auto *FnType = cast(PointeeType); if (const auto *FD = dyn_cast_or_null(TargetDecl); - FD && FD->hasAttr()) + FD && DeviceKernelAttr::isOpenCLSpelling(FD->getAttr())) CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType); // If we are checking indirect calls and this call is indirect, check that the diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 2256cc08e2212..ba13c114309b5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -621,7 +621,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { - if (!FD->hasAttr() && !FD->hasAttr()) + if (!FD->hasAttr() && !FD->hasAttr()) return; llvm::LLVMContext &Context = getLLVMContext(); @@ -1588,7 +1588,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Implicit copy-assignment gets the same special treatment as implicit // copy-constructors. emitImplicitAssignmentOperatorBody(Args); - } else if (FD->hasAttr() && + } else if (DeviceKernelAttr::isOpenCLSpelling( + FD->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) { CallArgList CallArgs; for (unsigned i = 0; i < Args.size(); ++i) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 16e010adbeb5f..ac290c9a763f2 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1912,7 +1912,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); - } else if (FD && FD->hasAttr() && + } else if (FD && + DeviceKernelAttr::isOpenCLSpelling( + FD->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__clang_ocl_kern_imp_" << II->getName(); } else { @@ -3927,7 +3929,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { - if (FD->hasAttr() && FD->doesThisDeclarationHaveABody()) + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr()) && + FD->doesThisDeclarationHaveABody()) addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub)); // Update deferred annotations with the latest declaration if the function @@ -4893,7 +4896,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, if (!Ty) { const auto *FD = cast(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); - if (FD->hasAttr() && + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); Ty = getTypes().GetFunctionType(FI); @@ -6177,7 +6180,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, (CodeGenOpts.OptimizationLevel == 0) && !D->hasAttr(); - if (D->hasAttr()) { + if (DeviceKernelAttr::isOpenCLSpelling(D->getAttr())) { if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub && !D->hasAttr() && !Fn->hasFnAttribute(llvm::Attribute::NoInline) && diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 7d176e421ac4e..f3df92c44bb6b 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name, &CGF.CGM.getModule()); llvm::CallingConv::ID KernelCC = - CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel); + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel); F->setCallingConv(KernelCC); llvm::AttrBuilder KernelAttrs(C); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index c8921c434db47..90ccb343b2214 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -337,7 +337,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, return false; return !D->hasAttr() && - (D->hasAttr() || + (D->hasAttr() || (isa(D) && D->hasAttr()) || (isa(D) && (D->hasAttr() || D->hasAttr() || @@ -350,7 +350,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr(); + M.getLangOpts().OpenCL && FD->hasAttr(); const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr(); const auto *FlatWGS = FD->getAttr(); @@ -572,7 +572,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( const FunctionType *&FT) const { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); } /// Return IR struct type for rtinfo struct in rocm-device-libs used for device diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 25ab28c54b659..d9c3498233b14 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -259,40 +259,31 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast(GV); - // Perform special handling in OpenCL mode - if (M.getLangOpts().OpenCL) { - // Use OpenCL function attributes to check for kernel functions + // Perform special handling in OpenCL/CUDA mode + if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) { + // Use function attributes to check for kernel functions // By default, all functions are device functions - if (FD->hasAttr()) { - // OpenCL __kernel functions get kernel metadata + if (FD->hasAttr() || FD->hasAttr()) { + // OpenCL/CUDA kernel functions get kernel metadata // Create !{, metadata !"kernel", i32 1} node - F->setCallingConv(llvm::CallingConv::PTX_Kernel); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); + if (FD->hasAttr()) { + SmallVector GCI; + for (auto IV : llvm::enumerate(FD->parameters())) + if (IV.value()->hasAttr()) + // For some reason arg indices are 1-based in NVVM + GCI.push_back(IV.index() + 1); + // Create !{, metadata !"kernel", i32 1} node + F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addGridConstantNVVMMetadata(F, GCI); + } + if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) + M.handleCUDALaunchBoundsAttr(F, Attr); } } - - // Perform special handling in CUDA mode. - if (M.getLangOpts().CUDA) { - // CUDA __global__ functions get a kernel metadata entry. Since - // __global__ functions cannot be called from the device, we do not - // need to set the noinline attribute. - if (FD->hasAttr()) { - SmallVector GCI; - for (auto IV : llvm::enumerate(FD->parameters())) - if (IV.value()->hasAttr()) - // For some reason arg indices are 1-based in NVVM - GCI.push_back(IV.index() + 1); - // Create !{, metadata !"kernel", i32 1} node - F->setCallingConv(llvm::CallingConv::PTX_Kernel); - addGridConstantNVVMMetadata(F, GCI); - } - if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) - M.handleCUDALaunchBoundsAttr(F, Attr); - } - // Attach kernel metadata directly if compiling for NVPTX. - if (FD->hasAttr()) { + if (FD->hasAttr()) { F->setCallingConv(llvm::CallingConv::PTX_Kernel); } } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index f35c124f50aa0..9c186a16e787a 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -227,7 +227,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( // Convert HIP kernels to SPIR-V kernels. if (getABIInfo().getContext().getLangOpts().HIP) { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); return; } } diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp index f3685ccd9825a..df49aea49a1e3 100644 --- a/clang/lib/CodeGen/Targets/TCE.cpp +++ b/clang/lib/CodeGen/Targets/TCE.cpp @@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast(GV); if (M.getLangOpts().OpenCL) { - if (FD->hasAttr()) { + if (FD->hasAttr()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 814f81cb64cae..515dd37436e14 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8790,7 +8790,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { FunctionDecl *FD = getCurFunctionDecl(); // OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables // in functions. - if (FD && !FD->hasAttr()) { + if (FD && !FD->hasAttr()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_function_variable) << 0 /*non-kernel only*/ << "constant"; @@ -8802,7 +8802,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { } // OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be // in the outermost scope of a kernel function. - if (FD && FD->hasAttr()) { + if (FD && FD->hasAttr()) { if (!getCurScope()->isFunctionScope()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope) @@ -10935,9 +10935,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, MarkUnusedFileScopedDecl(NewFD); - - - if (getLangOpts().OpenCL && NewFD->hasAttr()) { + if (getLangOpts().OpenCL && NewFD->hasAttr()) { // OpenCL v1.2 s6.8 static is invalid for kernel functions. if (SC == SC_Static) { Diag(D.getIdentifierLoc(), diag::err_static_kernel); @@ -12442,7 +12440,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) { if (getLangOpts().OpenCL) { Diag(FD->getLocation(), diag::err_opencl_no_main) - << FD->hasAttr(); + << FD->hasAttr(); FD->setInvalidDecl(); return; } @@ -15708,7 +15706,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD, return false; // Don't warn for OpenCL kernels. - if (FD->hasAttr()) + if (FD->hasAttr()) return false; // Don't warn on explicitly deleted functions. @@ -20602,7 +20600,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // SYCL functions can be template, so we check if they have appropriate // attribute prior to checking if it is a template. - if (LangOpts.SYCLIsDevice && FD->hasAttr()) + if (LangOpts.SYCLIsDevice && FD->hasAttr()) return FunctionEmissionStatus::Emitted; // Templates are emitted when they're instantiated. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 8ce51cc2882bf..9116d7afba78f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5088,8 +5088,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice) S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD; - if (AL.getKind() == ParsedAttr::AT_NVPTXKernel) - D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL)); + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) + D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL)); else D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL)); // In host compilation the kernel is emitted as a stub function, which is @@ -5224,9 +5224,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_AArch64SVEPcs: D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL)); return; - case ParsedAttr::AT_AMDGPUKernelCall: - D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL)); + case ParsedAttr::AT_DeviceKernel: { + // The attribute should already be applied. + assert(D->hasAttr() && "Expected attribute"); return; + } case ParsedAttr::AT_IntelOclBicc: D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL)); return; @@ -5269,6 +5271,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } } +static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const auto *FD = dyn_cast_or_null(D); + bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); + if (S.getLangOpts().SYCLIsDevice) { + if (!IsFunctionTemplate) { + S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "function templates"; + } else { + S.SYCL().handleKernelAttr(D, AL); + } + } else if (DeviceKernelAttr::isSYCLSpelling(AL)) { + S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; + } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + handleGlobalAttr(S, D, AL); + } else { + // OpenCL C++ will throw a more specific error. + if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { + S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "functions"; + } + handleSimpleAttribute(S, D, AL); + } + // Make sure we validate the CC with the target + // and warn/error if necessary. + handleCallConvAttr(S, D, AL); +} + static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) { // Suppression attribute with GSL spelling requires at least 1 argument. @@ -5433,9 +5462,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; - case ParsedAttr::AT_AMDGPUKernelCall: - CC = CC_AMDGPUKernelCall; - break; case ParsedAttr::AT_RegCall: CC = CC_X86RegCall; break; @@ -5505,6 +5531,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, llvm::Log2_64(ABIVLen) - 5); break; } + case ParsedAttr::AT_DeviceKernel: { + // Validation was handled in handleDeviceKernelAttr. + CC = CC_DeviceKernel; + break; + } default: llvm_unreachable("unexpected attribute kind"); } @@ -7109,9 +7140,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_EnumExtensibility: handleEnumExtensibilityAttr(S, D, AL); break; - case ParsedAttr::AT_SYCLKernel: - S.SYCL().handleKernelAttr(D, AL); - break; case ParsedAttr::AT_SYCLKernelEntryPoint: S.SYCL().handleKernelEntryPointAttr(D, AL); break; @@ -7136,7 +7164,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CalledOnce: handleCalledOnceAttr(S, D, AL); break; - case ParsedAttr::AT_NVPTXKernel: case ParsedAttr::AT_CUDAGlobal: handleGlobalAttr(S, D, AL); break; @@ -7400,13 +7427,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_PreserveAll: case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: - case ParsedAttr::AT_AMDGPUKernelCall: case ParsedAttr::AT_M68kRTD: case ParsedAttr::AT_PreserveNone: case ParsedAttr::AT_RISCVVectorCC: case ParsedAttr::AT_RISCVVLSCC: handleCallConvAttr(S, D, AL); break; + case ParsedAttr::AT_DeviceKernel: + handleDeviceKernelAttr(S, D, AL); + break; case ParsedAttr::AT_Suppress: handleSuppressAttr(S, D, AL); break; @@ -7719,9 +7748,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, static bool isKernelDecl(Decl *D) { const FunctionType *FnTy = D->getFunctionType(); - return D->hasAttr() || - (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) || - D->hasAttr() || D->getAttr(); + return D->hasAttr() || + (FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) || + D->hasAttr(); } void Sema::ProcessDeclAttributeList( @@ -7748,7 +7777,7 @@ void Sema::ProcessDeclAttributeList( // good to have a way to specify "these attributes must appear as a group", // for these. Additionally, it would be good to have a way to specify "these // attribute must never appear as a group" for attributes like cold and hot. - if (!(D->hasAttr() || + if (!(D->hasAttr() || (D->hasAttr() && Context.getTargetInfo().getTriple().isSPIRV()))) { // These attributes cannot be applied to a non-kernel function. diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1969d7b0ba837..3e03cb4bd5f99 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) { return; } - handleSimpleAttribute(*this, D, AL); + handleSimpleAttribute(*this, D, AL); } void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 7fbda2a804d75..7cb611d26e826 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -668,9 +668,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. -static void instantiateDependentSYCLKernelAttr( +static void instantiateDependentDeviceKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, - const SYCLKernelAttr &Attr, Decl *New) { + const DeviceKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } @@ -912,8 +912,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } - if (auto *A = dyn_cast(TmplAttr)) { - instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); + if (auto *A = dyn_cast(TmplAttr)) { + instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New); continue; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 874e41ac0b90c..31d132de18d68 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_AMDGPUKernelCall: \ + case ParsedAttr::AT_DeviceKernel: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -3754,18 +3754,7 @@ static CallingConv getCCForDeclaratorChunk( CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention for SPIR - // and AMDGPU targets, hence it cannot be treated as a calling - // convention attribute. This is the simplest place to infer - // calling convention for OpenCL kernels. - if (S.getLangOpts().OpenCL) { - for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { - if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) { - CC = CC_OpenCLKernel; - break; - } - } - } else if (S.getLangOpts().CUDA) { + if (S.getLangOpts().CUDA) { // If we're compiling CUDA/HIP code and targeting HIPSPV we need to make // sure the kernels will be marked with the right calling convention so that // they will be visible by the APIs that ingest SPIR-V. We do not do this @@ -3774,13 +3763,20 @@ static CallingConv getCCForDeclaratorChunk( if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) { for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) { - CC = CC_OpenCLKernel; + CC = CC_DeviceKernel; break; } } } } - + if (!S.getLangOpts().isSYCL()) { + for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { + CC = CC_DeviceKernel; + break; + } + } + } return CC; } @@ -7530,8 +7526,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_ArmStreaming: return createSimpleAttr(Ctx, Attr); - case ParsedAttr::AT_AMDGPUKernelCall: - return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_DeviceKernel: + return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, @@ -8719,6 +8715,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, } } +static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) { + // The DeviceKernel attribute is shared for many targets, and + // it is only allowed to be a type attribute with the AMDGPU + // spelling, so skip processing the attr as a type attr + // unless it has that spelling. + if (Attr.getKind() != ParsedAttr::AT_DeviceKernel) + return true; + return DeviceKernelAttr::isAMDGPUSpelling(Attr); +} + static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, const ParsedAttributesView &attrs, @@ -8972,6 +8978,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; [[fallthrough]]; FUNCTION_TYPE_ATTRS_CASELIST: + if (!isMultiSubjectAttrAllowedOnType(attr)) + break; + attr.setUsedAsTypeAttr(); // Attributes with standard syntax have strict rules for what they diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 7affacb1a109a..af05deb3a13da 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -109,7 +109,6 @@ // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method) // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum) -// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function) // CHECK-NEXT: Naked (SubjectMatchRule_function) // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function) // CHECK-NEXT: NoCommon (SubjectMatchRule_variable) diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index ffa942d10669c..f5907d74b0339 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -731,8 +731,8 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(RISCVVLSCall_32768); TCALLINGCONV(RISCVVLSCall_65536); case CC_SpirFunction: return CXCallingConv_Unexposed; - case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; - case CC_OpenCLKernel: return CXCallingConv_Unexposed; + case CC_DeviceKernel: + return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index e52324a8ebc12..803ed67d534ea 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1117,7 +1117,7 @@ HANDLE_DW_CC(0xc3, LLVM_AAPCS) HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP) HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc) HANDLE_DW_CC(0xc6, LLVM_SpirFunction) -HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel) +HANDLE_DW_CC(0xc7, LLVM_DeviceKernel) HANDLE_DW_CC(0xc8, LLVM_Swift) HANDLE_DW_CC(0xc9, LLVM_PreserveMost) HANDLE_DW_CC(0xca, LLVM_PreserveAll) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h index bd25f6c30ebf1..a760f773055d2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -734,13 +734,15 @@ void DWARFTypePrinter::appendSubroutineNameAfter( OS << " __attribute__((intel_ocl_bicc))"; break; case dwarf::CallingConvention::DW_CC_LLVM_SpirFunction: - case dwarf::CallingConvention::DW_CC_LLVM_OpenCLKernel: - // These aren't available as attributes, but maybe we should still - // render them somehow? (Clang doesn't render them, but that's an issue + // This isn't available as an attribute, but maybe we should still + // render it somehow? (Clang doesn't render it, but that's an issue // for template names too - since then the DWARF names of templates // instantiated with function types with these calling conventions won't // have distinct names - so we'd need to fix that too) break; + case dwarf::CallingConvention::DW_CC_LLVM_DeviceKernel: + OS << " __attribute__((device_kernel))"; + break; case dwarf::CallingConvention::DW_CC_LLVM_Swift: // SwiftAsync missing OS << " __attribute__((swiftcall))"; diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll index 1055abe6d3499..0f9a08a85a8cd 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll @@ -28,6 +28,6 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) -!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6) +!5 = !DISubroutineType(cc: DW_CC_LLVM_DeviceKernel, types: !6) !6 = !{null} !7 = !{i32 1024, i32 1, i32 1}