diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index e7f861b0556ca8..4663bec7d4f69d 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -793,6 +793,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rdm, W("EnableArm64Rd RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha1, W("EnableArm64Sha1"), 1, "Allows Arm64 Sha1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sha256"), 1, "Allows Arm64 Sha256+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc2, W("EnableArm64Rcpc2"), 1, "Allows Arm64 Rcpc2+ hardware intrinsics to be disabled") #endif /// diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5aecf46bf3928c..2ffd9bf6873201 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -30,14 +30,15 @@ enum CORINFO_InstructionSet InstructionSet_Dczva=12, InstructionSet_Rcpc=13, InstructionSet_VectorT128=14, - InstructionSet_ArmBase_Arm64=15, - InstructionSet_AdvSimd_Arm64=16, - InstructionSet_Aes_Arm64=17, - InstructionSet_Crc32_Arm64=18, - InstructionSet_Dp_Arm64=19, - InstructionSet_Rdm_Arm64=20, - InstructionSet_Sha1_Arm64=21, - InstructionSet_Sha256_Arm64=22, + InstructionSet_Rcpc2=15, + InstructionSet_ArmBase_Arm64=16, + InstructionSet_AdvSimd_Arm64=17, + InstructionSet_Aes_Arm64=18, + InstructionSet_Crc32_Arm64=19, + InstructionSet_Dp_Arm64=20, + InstructionSet_Rdm_Arm64=21, + InstructionSet_Sha1_Arm64=22, + InstructionSet_Sha256_Arm64=23, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -761,6 +762,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Rcpc"; case InstructionSet_VectorT128 : return "VectorT128"; + case InstructionSet_Rcpc2 : + return "Rcpc2"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : @@ -994,6 +997,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics; case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_Rcpc2: return InstructionSet_Rcpc2; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index d5d27b4b457f3a..70960ff577ee8a 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 5bf301d6-d08e-4c74-ab9b-1d9c1975950f */ - 0x5bf301d6, - 0xd08e, - 0x4c74, - {0xab, 0x9b, 0x1d, 0x9c, 0x19, 0x75, 0x95, 0x0f} +constexpr GUID JITEEVersionIdentifier = { /* a2974440-e8ee-4d95-9e6e-799a330be1a0 */ + 0xa2974440, + 0xe8ee, + 0x4d95, + {0x9e, 0x6e, 0x79, 0x9a, 0x33, 0x0b, 0xe1, 0xa0} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 0a9a78e03f6c15..190fe4b516617e 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -50,6 +50,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_VectorT128=39, READYTORUN_INSTRUCTION_VectorT256=40, READYTORUN_INSTRUCTION_VectorT512=41, + READYTORUN_INSTRUCTION_Rcpc2=42, }; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 5434ef7b722ef9..a57ba2e964a9c4 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1761,10 +1761,35 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) bool addrIsInReg = tree->Addr()->isUsedFromReg(); bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - // on arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid + // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid // full memory barriers if mixed with STLR bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); + // On arm64-v8.4+ we can use ldapur* instructions with acquire/release semantics to + // avoid full memory barriers if address is contained and unscaled + bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); + + bool handledWithLdapur = false; + if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && !tree->HasIndex() && (tree->Scale() == 1) && + emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset())) + { + if (ins == INS_ldrb) + { + ins = INS_ldapurb; + handledWithLdapur = true; + } + else if (ins == INS_ldrh) + { + ins = INS_ldapurh; + handledWithLdapur = true; + } + else if (ins == INS_ldr) + { + ins = INS_ldapur; + handledWithLdapur = true; + } + } + if ((ins == INS_ldrb) && addrIsInReg) { ins = hasRcpc ? INS_ldaprb : INS_ldarb; @@ -1777,7 +1802,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) { ins = hasRcpc ? INS_ldapr : INS_ldar; } - else + else if (!handledWithLdapur) #endif // TARGET_ARM64 { emitBarrier = true; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5bb32461a9363f..3c892ec072fd6e 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -628,8 +628,8 @@ class emitter #define MAX_ENCODED_SIZE 15 #elif defined(TARGET_ARM64) #define INSTR_ENCODED_SIZE 4 - static_assert_no_msg(INS_count <= 512); - instruction _idIns : 9; + static_assert_no_msg(INS_count <= 1024); + instruction _idIns : 10; #elif defined(TARGET_LOONGARCH64) // TODO-LoongArch64: not include SIMD-vector. static_assert_no_msg(INS_count <= 512); @@ -712,7 +712,7 @@ class emitter // x86: 17 bits // amd64: 17 bits // arm: 16 bits - // arm64: 17 bits + // arm64: 18 bits // loongarch64: 14 bits // risc-v: 14 bits @@ -754,7 +754,7 @@ class emitter // x86: 38 bits // amd64: 38 bits // arm: 32 bits - // arm64: 31 bits + // arm64: 32 bits // loongarch64: 28 bits // risc-v: 28 bits @@ -763,10 +763,12 @@ class emitter unsigned _idLargeDsp : 1; // does a large displacement follow? unsigned _idLargeCall : 1; // large call descriptor used - unsigned _idBound : 1; // jump target / frame offset bound + unsigned _idBound : 1; // jump target / frame offset bound +#ifndef TARGET_ARMARCH unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg - unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr - unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables +#endif + unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr + unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables #if defined(TARGET_XARCH) unsigned _idEvexbContext : 1; // does EVEX.b need to be set. #endif // TARGET_XARCH @@ -1509,6 +1511,7 @@ class emitter _idBound = 1; } +#ifndef TARGET_ARMARCH bool idIsCallRegPtr() const { return _idCallRegPtr != 0; @@ -1517,6 +1520,7 @@ class emitter { _idCallRegPtr = 1; } +#endif // Only call instructions that call helper functions may be marked as "IsNoGC", indicating // that a thread executing such a call cannot be stopped for GC. Thus, in partially-interruptible diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index a0dc786782b0bc..b526eafa53e2e3 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4770,8 +4770,6 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is an indirect call (either a virtual call or func ptr call) */ - id->idSetIsCallRegPtr(); - if (isJump) { ins = INS_bx; // INS_bx Reg diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f01ad548d2ee04..86e23282f11de3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1157,7 +1157,9 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_ldrb: case INS_strb: case INS_ldurb: + case INS_ldapurb: case INS_sturb: + case INS_stlurb: result = EA_4BYTE; break; @@ -1172,6 +1174,8 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_strh: case INS_ldurh: case INS_sturh: + case INS_ldapurh: + case INS_stlurh: result = EA_4BYTE; break; @@ -1209,6 +1213,8 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_str: case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: result = id->idOpSize(); break; @@ -1237,7 +1243,9 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_ldrb: case INS_strb: case INS_ldurb: + case INS_ldapurb: case INS_sturb: + case INS_stlurb: case INS_ldrsb: case INS_ldursb: result = EA_1BYTE; @@ -1252,6 +1260,8 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_sturh: case INS_ldrsh: case INS_ldursh: + case INS_ldapurh: + case INS_stlurh: result = EA_2BYTE; break; @@ -1275,6 +1285,8 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_str: case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: result = id->idOpSize(); break; @@ -2372,6 +2384,12 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) return false; } +// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction +/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm) +{ + return (imm >= -256) && (imm <= 255); +} + // true if this 'imm' can be encoded as the offset in a ldr/str instruction /*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr) { @@ -5505,6 +5523,8 @@ void emitter::emitIns_R_R_I( isLdSt = true; break; + case INS_ldapurb: + case INS_stlurb: case INS_ldurb: case INS_sturb: // size is ignored @@ -5522,7 +5542,9 @@ void emitter::emitIns_R_R_I( break; case INS_ldurh: + case INS_ldapurh: case INS_sturh: + case INS_stlurh: // size is ignored unscaledOp = true; scale = 0; @@ -5550,6 +5572,8 @@ void emitter::emitIns_R_R_I( case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: // Is the target a vector register? if (isVectorRegister(reg1)) { @@ -8813,8 +8837,6 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is an indirect call (either a virtual call or func ptr call) */ - id->idSetIsCallRegPtr(); - if (isJump) { ins = INS_br_tail; // INS_br_tail Reg diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index ca205eef07b0fa..a971f56f9d2884 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -543,6 +543,9 @@ static bool emitIns_valid_imm_for_alu(INT64 imm, emitAttr size); // true if this 'imm' can be encoded as the offset in a ldr/str instruction static bool emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr size); +// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction +static bool emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm); + // true if this 'imm' can be encoded as a input operand to a ccmp instruction static bool emitIns_valid_imm_for_ccmp(INT64 imm); diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index ee7483d5257c80..39d729b52de92b 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1053,17 +1053,15 @@ INST1(ldarb, "ldarb", LD, IF_LS_2A, 0x08DFFC00) INST1(ldarh, "ldarh", LD, IF_LS_2A, 0x48DFFC00) // ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00 - -INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000) +INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000) // ldapr Rt,[Xn] LS_2A 1X11100010111111 110000nnnnnttttt B8BF C000 Rm Rt Rn ARMv8.3 LRCPC -INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000) +INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000) // ldaprb Rt,[Xn] LS_2A 0011100010111111 110000nnnnnttttt 38BF C000 Rm Rt Rn ARMv8.3 LRCPC -INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000) +INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000) // ldaprh Rt,[Xn] LS_2A 0111100010111111 110000nnnnnttttt 78BF C000 Rm Rt Rn ARMv8.3 LRCPC - INST1(ldxr, "ldxr", LD, IF_LS_2A, 0x885F7C00) // ldxr Rt,[Xn] LS_2A 1X00100001011111 011111nnnnnttttt 885F 7C00 @@ -1100,6 +1098,15 @@ INST1(ldursh, "ldursh", LD, IF_LS_2C, 0x78800000) INST1(ldursw, "ldursw", LD, IF_LS_2C, 0xB8800000) // ldursw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiii00nnnnnttttt B880 0000 [Xn imm(-256..+255)] +INST1(ldapur, "ldapur", LD, IF_LS_2C, 0x99400000) + // ldapur Rt,[Xn+simm9] LS_2C 1X011001010iiiii iiii00nnnnnttttt 9940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(ldapurb, "ldapurb", LD, IF_LS_2C, 0x19400000) + // ldapurb Rt,[Xn+simm9] LS_2C 00011001010iiiii iiii00nnnnnttttt 1940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(ldapurh, "ldapurh", LD, IF_LS_2C, 0x59400000) + // ldapurh Rt,[Xn+simm9] LS_2C 01011001010iiiii iiii00nnnnnttttt 5940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + INST1(stlr, "stlr", ST, IF_LS_2A, 0x889FFC00) // stlr Rt,[Xn] LS_2A 1X00100010011111 111111nnnnnttttt 889F FC00 @@ -1136,6 +1143,15 @@ INST1(sturb, "sturb", ST, IF_LS_2C, 0x38000000) INST1(sturh, "sturh", ST, IF_LS_2C, 0x78000000) // sturh Rt,[Xn+simm9] LS_2C 01111000000iiiii iiii00nnnnnttttt 7800 0000 [Xn imm(-256..+255)] +INST1(stlur, "stlur", ST, IF_LS_2C, 0x99000000) + // stlur Rt,[Xn+simm9] LS_2C 1X011001000iiiii iiii00nnnnnttttt 9900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(stlurb, "stlurb", ST, IF_LS_2C, 0x19000000) + // stlurb Rt,[Xn+simm9] LS_2C 00011001000iiiii iiii00nnnnnttttt 1900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(stlurh, "stlurh", ST, IF_LS_2C, 0x59000000) + // stlurh Rt,[Xn+simm9] LS_2C 01011001000iiiii iiii00nnnnnttttt 5900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + INST1(casb, "casb", LD|ST, IF_LS_3E, 0x08A07C00) // casb Wm, Wt, [Xn] LS_3E 00001000101mmmmm 011111nnnnnttttt 08A0 7C00 Rm Rt Rn ARMv8.1 LSE Atomics diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 81df694f05e1ff..f5555afed31f45 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6033,7 +6033,8 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par } #ifdef TARGET_ARM64 - if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) + const bool hasRcpc2 = comp->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); + if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile() && !hasRcpc2) { // For Arm64 we avoid using LEA for volatile INDs // because we won't be able to use ldar/star @@ -6056,6 +6057,20 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par &scale, // scaling &offset); // displacement +#ifdef TARGET_ARM64 + if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) + { + // Generally, we try to avoid creating addressing modes for volatile INDs so we can then use + // ldar/stlr instead of ldr/str + dmb. Although, with Arm 8.4+'s RCPC2 we can handle unscaled + // addressing modes (if the offset fits into 9 bits) + assert(hasRcpc2); + if ((scale > 1) || (!emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset)) || (index != nullptr)) + { + return false; + } + } +#endif + var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF; #ifdef TARGET_ARMARCH diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 28c19ee96de776..1e778e97c67f3e 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -220,6 +220,7 @@ private static class Arm64IntrinsicConstants public const int Atomics = 0x0080; public const int Rcpc = 0x0100; public const int VectorT128 = 0x0200; + public const int Rcpc2 = 0x0400; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -241,6 +242,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("lse"); if ((flags & Rcpc) != 0) builder.AddSupportedInstructionSet("rcpc"); + if ((flags & Rcpc2) != 0) + builder.AddSupportedInstructionSet("rcpc2"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -269,6 +272,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Sha256_Arm64 => Sha256, InstructionSet.ARM64_Atomics => Atomics, InstructionSet.ARM64_Rcpc => Rcpc, + InstructionSet.ARM64_Rcpc2 => Rcpc2, // Vector Sizes InstructionSet.ARM64_VectorT128 => VectorT128, diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 089f112915572a..acf136d49d4d10 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -187,6 +187,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc2"); } // Vector can always be part of the optimistic set, we only want to optionally exclude it from the supported set diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 32b60ecbcda7da..51c1a4854d71f4 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -53,6 +53,7 @@ public enum ReadyToRunInstructionSet VectorT128=39, VectorT256=40, VectorT512=41, + Rcpc2=42, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index f593808be32989..57629f66b9c6f5 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -45,6 +45,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.ARM64_Rcpc2: return ReadyToRunInstructionSet.Rcpc2; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index f26abd68262391..d7bc2e56ebd2a4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -30,6 +30,7 @@ public enum InstructionSet ARM64_Dczva = InstructionSet_ARM64.Dczva, ARM64_Rcpc = InstructionSet_ARM64.Rcpc, ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, + ARM64_Rcpc2 = InstructionSet_ARM64.Rcpc2, ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64, ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64, ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64, @@ -185,14 +186,15 @@ public enum InstructionSet_ARM64 Dczva = 12, Rcpc = 13, VectorT128 = 14, - ArmBase_Arm64 = 15, - AdvSimd_Arm64 = 16, - Aes_Arm64 = 17, - Crc32_Arm64 = 18, - Dp_Arm64 = 19, - Rdm_Arm64 = 20, - Sha1_Arm64 = 21, - Sha256_Arm64 = 22, + Rcpc2 = 15, + ArmBase_Arm64 = 16, + AdvSimd_Arm64 = 17, + Aes_Arm64 = 18, + Crc32_Arm64 = 19, + Dp_Arm64 = 20, + Rdm_Arm64 = 21, + Sha1_Arm64 = 22, + Sha256_Arm64 = 23, } public enum InstructionSet_X64 @@ -1147,7 +1149,7 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, { ("armv8.3-a", TargetArchitecture.ARM64), "armv8.2-a rcpc" }, - { ("armv8.4-a", TargetArchitecture.ARM64), "armv8.3-a dotprod" }, + { ("armv8.4-a", TargetArchitecture.ARM64), "armv8.3-a dotprod rcpc2" }, { ("armv8.5-a", TargetArchitecture.ARM64), "armv8.4-a" }, { ("armv8.6-a", TargetArchitecture.ARM64), "armv8.5-a" }, { ("apple-m1", TargetArchitecture.ARM64), "armv8.5-a" }, @@ -1195,6 +1197,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true); + yield return new InstructionSetInfo("rcpc2", "", InstructionSet.ARM64_Rcpc2, true); break; case TargetArchitecture.X64: diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 3c669e1ea95fea..ba4db589c81a0f 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -167,6 +167,7 @@ instructionset ,ARM64 , , , ,Vector128 instructionset ,ARM64 , , , ,Dczva , instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128 +instructionset ,ARM64 , ,Rcpc2 ,42 ,Rcpc2 ,rcpc2 instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -203,7 +204,7 @@ instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma instructionsetgroup ,armv8.2-a ,ARM64 ,armv8.1-a instructionsetgroup ,armv8.3-a ,ARM64 ,armv8.2-a rcpc -instructionsetgroup ,armv8.4-a ,ARM64 ,armv8.3-a dotprod +instructionsetgroup ,armv8.4-a ,ARM64 ,armv8.3-a dotprod rcpc2 instructionsetgroup ,armv8.5-a ,ARM64 ,armv8.4-a instructionsetgroup ,armv8.6-a ,ARM64 ,armv8.5-a diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 6468794ad52171..8f8fa45d853dc6 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1492,6 +1492,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_Rcpc); } + if (((cpuFeatures & ARM64IntrinsicConstants_Rcpc2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc2)) + { + CPUCompileFlags.Set(InstructionSet_Rcpc2); + } + if (((cpuFeatures & ARM64IntrinsicConstants_Crc32) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32)) { CPUCompileFlags.Set(InstructionSet_Crc32); diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index f637e58c10f14e..af122d4fbb1c07 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -28,6 +28,9 @@ #ifndef HWCAP_LRCPC #define HWCAP_LRCPC (1 << 15) #endif +#ifndef HWCAP_ILRCPC +#define HWCAP_ILRCPC (1 << 26) +#endif #ifndef HWCAP_ASIMDDP #define HWCAP_ASIMDDP (1 << 20) #endif @@ -335,8 +338,11 @@ int minipal_getcpufeatures(void) if (hwCap & HWCAP_ASIMDDP) result |= ARM64IntrinsicConstants_Dp; - if (hwCap & HWCAP_LRCPC) - result |= ARM64IntrinsicConstants_Rcpc; + if (hwCap & HWCAP_LRCPC) + result |= ARM64IntrinsicConstants_Rcpc; + + if (hwCap & HWCAP_ILRCPC) + result |= ARM64IntrinsicConstants_Rcpc2; if (hwCap & HWCAP_SHA1) result |= ARM64IntrinsicConstants_Sha1; @@ -379,6 +385,9 @@ int minipal_getcpufeatures(void) if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) result |= ARM64IntrinsicConstants_Rcpc; + + if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC2", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Rcpc2; #endif // HAVE_SYSCTLBYNAME // Every ARM64 CPU should support SIMD and FP @@ -418,6 +427,9 @@ int minipal_getcpufeatures(void) { result |= ARM64IntrinsicConstants_Rcpc; } + + // TODO: IsProcessorFeaturePresent doesn't support LRCPC2 yet. + #endif // TARGET_WINDOWS #endif // TARGET_ARM64 diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 312bee84ace2bd..ead191f2921d9e 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -56,6 +56,7 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_Atomics = 0x0080, ARM64IntrinsicConstants_Rcpc = 0x0100, ARM64IntrinsicConstants_VectorT128 = 0x0200, + ARM64IntrinsicConstants_Rcpc2 = 0x0400, }; #include