From 90cb0d01bfba2af5c0504832d93da732c58350e9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 15 Jun 2022 21:26:17 +0200 Subject: [PATCH 01/16] Don't use addressing modes for volatile loads for gc types --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index fb8e0104d34d7c..a2e195ad3ae522 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5215,7 +5215,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par } #ifdef TARGET_ARM64 - if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile() && !varTypeIsGC(addr)) + if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) { // For Arm64 we avoid using LEA for volatile INDs // because we won't be able to use ldar/star From 08a61dbfdaa75a7941a939eb7a338cf0c31dca43 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 17 Jun 2022 00:54:42 +0200 Subject: [PATCH 02/16] print perfscore --- src/coreclr/scripts/superpmi_diffs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index 47243ad43d3a23..33050897d0b2ed 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -207,6 +207,8 @@ def main(main_args): os.path.join(script_dir, "superpmi.py"), "asmdiffs", "--no_progress", + "-metrics", "CodeSize", + "-metrics", "PerfScore", "-core_root", core_root_dir, "-target_os", platform_name, "-target_arch", arch_name, From 1f76c43db0cc80909869c144affed7d0a14b153c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 17 Jun 2022 01:48:59 +0200 Subject: [PATCH 03/16] fix assert --- src/coreclr/jit/gcinfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gcinfo.cpp b/src/coreclr/jit/gcinfo.cpp index 1d0229fbf26f53..be152e4b38b105 100644 --- a/src/coreclr/jit/gcinfo.cpp +++ b/src/coreclr/jit/gcinfo.cpp @@ -302,9 +302,10 @@ GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTree* tg GenTree* addOp2 = tgtAddr->AsOp()->gtGetOp2(); var_types addOp1Type = addOp1->TypeGet(); var_types addOp2Type = addOp2->TypeGet(); + if (addOp1Type == TYP_BYREF || addOp1Type == TYP_REF) { - assert(addOp2Type != TYP_BYREF && addOp2Type != TYP_REF); + assert(((addOp2Type != TYP_BYREF) || (addOp2->OperIs(GT_CNS_INT))) && (addOp2Type != TYP_REF)); tgtAddr = addOp1; simplifiedExpr = true; } From b0e0f8bd59eab12771f3899bcc4e8e8769489445 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 20:07:39 +0200 Subject: [PATCH 04/16] stlur/ldupr --- src/coreclr/inc/clrconfigvalues.h | 1 + src/coreclr/inc/corinfoinstructionset.h | 20 ++++++++------ src/coreclr/inc/jiteeversionguid.h | 10 +++---- src/coreclr/inc/readytoruninstructionset.h | 1 + src/coreclr/jit/codegenarm64.cpp | 6 +++++ src/coreclr/jit/emit.h | 4 +-- src/coreclr/jit/instrsarm64.h | 26 +++++++++++++++---- .../Compiler/HardwareIntrinsicHelpers.cs | 4 +++ .../tools/Common/InstructionSetHelpers.cs | 1 + .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 1 + .../JitInterface/CorInfoInstructionSet.cs | 21 ++++++++------- .../ThunkGenerator/InstructionSetDesc.txt | 1 + src/coreclr/vm/codeman.cpp | 5 ++++ src/native/minipal/cpufeatures.c | 10 +++++++ src/native/minipal/cpufeatures.h | 1 + 16 files changed, 84 insertions(+), 29 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 401a4f9dd85ff0..4fa61ccd28d4dd 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -786,6 +786,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rdm, W("EnableArm64Rd RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha1, W("EnableArm64Sha1"), 1, "Allows Arm64 Sha1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sha256"), 1, "Allows Arm64 Sha256+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc2, W("EnableArm64Rcpc2"), 1, "Allows Arm64 Rcpc2+ hardware intrinsics to be disabled") #endif /// diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5aecf46bf3928c..2ffd9bf6873201 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -30,14 +30,15 @@ enum CORINFO_InstructionSet InstructionSet_Dczva=12, InstructionSet_Rcpc=13, InstructionSet_VectorT128=14, - InstructionSet_ArmBase_Arm64=15, - InstructionSet_AdvSimd_Arm64=16, - InstructionSet_Aes_Arm64=17, - InstructionSet_Crc32_Arm64=18, - InstructionSet_Dp_Arm64=19, - InstructionSet_Rdm_Arm64=20, - InstructionSet_Sha1_Arm64=21, - InstructionSet_Sha256_Arm64=22, + InstructionSet_Rcpc2=15, + InstructionSet_ArmBase_Arm64=16, + InstructionSet_AdvSimd_Arm64=17, + InstructionSet_Aes_Arm64=18, + InstructionSet_Crc32_Arm64=19, + InstructionSet_Dp_Arm64=20, + InstructionSet_Rdm_Arm64=21, + InstructionSet_Sha1_Arm64=22, + InstructionSet_Sha256_Arm64=23, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -761,6 +762,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Rcpc"; case InstructionSet_VectorT128 : return "VectorT128"; + case InstructionSet_Rcpc2 : + return "Rcpc2"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : @@ -994,6 +997,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics; case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_Rcpc2: return InstructionSet_Rcpc2; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index bcd85a573d69bc..70960ff577ee8a 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* cef79bc8-29bf-4f7b-9d05-9fc06832098c */ - 0xcef79bc8, - 0x29bf, - 0x4f7b, - {0x9d, 0x05, 0x9f, 0xc0, 0x68, 0x32, 0x09, 0x8c} +constexpr GUID JITEEVersionIdentifier = { /* a2974440-e8ee-4d95-9e6e-799a330be1a0 */ + 0xa2974440, + 0xe8ee, + 0x4d95, + {0x9e, 0x6e, 0x79, 0x9a, 0x33, 0x0b, 0xe1, 0xa0} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 0a9a78e03f6c15..190fe4b516617e 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -50,6 +50,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_VectorT128=39, READYTORUN_INSTRUCTION_VectorT256=40, READYTORUN_INSTRUCTION_VectorT512=41, + READYTORUN_INSTRUCTION_Rcpc2=42, }; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f99602d21d953b..eba3900e99a4a6 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5544,6 +5544,12 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); + // stlur Ws, Rt, [reg] + theEmitter->emitIns_R_R_R(INS_stlur, EA_8BYTE, REG_R1, REG_R9, REG_R8); + theEmitter->emitIns_R_R_R(INS_stlur, EA_4BYTE, REG_R3, REG_R7, REG_R13); + theEmitter->emitIns_R_R_R(INS_stlurb, EA_4BYTE, REG_R8, REG_R5, REG_R14); + theEmitter->emitIns_R_R_R(INS_stlurh, EA_4BYTE, REG_R12, REG_R3, REG_R15); + #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index fb3ba74e0e44b9..9aeae5209904ed 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -628,8 +628,8 @@ class emitter #define MAX_ENCODED_SIZE 15 #elif defined(TARGET_ARM64) #define INSTR_ENCODED_SIZE 4 - static_assert_no_msg(INS_count <= 512); - instruction _idIns : 9; + static_assert_no_msg(INS_count <= 1024); + instruction _idIns : 10; #elif defined(TARGET_LOONGARCH64) // TODO-LoongArch64: not include SIMD-vector. static_assert_no_msg(INS_count <= 512); diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index ee7483d5257c80..39d729b52de92b 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1053,17 +1053,15 @@ INST1(ldarb, "ldarb", LD, IF_LS_2A, 0x08DFFC00) INST1(ldarh, "ldarh", LD, IF_LS_2A, 0x48DFFC00) // ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00 - -INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000) +INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000) // ldapr Rt,[Xn] LS_2A 1X11100010111111 110000nnnnnttttt B8BF C000 Rm Rt Rn ARMv8.3 LRCPC -INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000) +INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000) // ldaprb Rt,[Xn] LS_2A 0011100010111111 110000nnnnnttttt 38BF C000 Rm Rt Rn ARMv8.3 LRCPC -INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000) +INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000) // ldaprh Rt,[Xn] LS_2A 0111100010111111 110000nnnnnttttt 78BF C000 Rm Rt Rn ARMv8.3 LRCPC - INST1(ldxr, "ldxr", LD, IF_LS_2A, 0x885F7C00) // ldxr Rt,[Xn] LS_2A 1X00100001011111 011111nnnnnttttt 885F 7C00 @@ -1100,6 +1098,15 @@ INST1(ldursh, "ldursh", LD, IF_LS_2C, 0x78800000) INST1(ldursw, "ldursw", LD, IF_LS_2C, 0xB8800000) // ldursw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiii00nnnnnttttt B880 0000 [Xn imm(-256..+255)] +INST1(ldapur, "ldapur", LD, IF_LS_2C, 0x99400000) + // ldapur Rt,[Xn+simm9] LS_2C 1X011001010iiiii iiii00nnnnnttttt 9940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(ldapurb, "ldapurb", LD, IF_LS_2C, 0x19400000) + // ldapurb Rt,[Xn+simm9] LS_2C 00011001010iiiii iiii00nnnnnttttt 1940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(ldapurh, "ldapurh", LD, IF_LS_2C, 0x59400000) + // ldapurh Rt,[Xn+simm9] LS_2C 01011001010iiiii iiii00nnnnnttttt 5940 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + INST1(stlr, "stlr", ST, IF_LS_2A, 0x889FFC00) // stlr Rt,[Xn] LS_2A 1X00100010011111 111111nnnnnttttt 889F FC00 @@ -1136,6 +1143,15 @@ INST1(sturb, "sturb", ST, IF_LS_2C, 0x38000000) INST1(sturh, "sturh", ST, IF_LS_2C, 0x78000000) // sturh Rt,[Xn+simm9] LS_2C 01111000000iiiii iiii00nnnnnttttt 7800 0000 [Xn imm(-256..+255)] +INST1(stlur, "stlur", ST, IF_LS_2C, 0x99000000) + // stlur Rt,[Xn+simm9] LS_2C 1X011001000iiiii iiii00nnnnnttttt 9900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(stlurb, "stlurb", ST, IF_LS_2C, 0x19000000) + // stlurb Rt,[Xn+simm9] LS_2C 00011001000iiiii iiii00nnnnnttttt 1900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + +INST1(stlurh, "stlurh", ST, IF_LS_2C, 0x59000000) + // stlurh Rt,[Xn+simm9] LS_2C 01011001000iiiii iiii00nnnnnttttt 5900 0000 [Xn imm(-256..+255)] ARMv8.4 RCPC2 + INST1(casb, "casb", LD|ST, IF_LS_3E, 0x08A07C00) // casb Wm, Wt, [Xn] LS_3E 00001000101mmmmm 011111nnnnnttttt 08A0 7C00 Rm Rt Rn ARMv8.1 LSE Atomics diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 28c19ee96de776..1e778e97c67f3e 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -220,6 +220,7 @@ private static class Arm64IntrinsicConstants public const int Atomics = 0x0080; public const int Rcpc = 0x0100; public const int VectorT128 = 0x0200; + public const int Rcpc2 = 0x0400; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -241,6 +242,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("lse"); if ((flags & Rcpc) != 0) builder.AddSupportedInstructionSet("rcpc"); + if ((flags & Rcpc2) != 0) + builder.AddSupportedInstructionSet("rcpc2"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -269,6 +272,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Sha256_Arm64 => Sha256, InstructionSet.ARM64_Atomics => Atomics, InstructionSet.ARM64_Rcpc => Rcpc, + InstructionSet.ARM64_Rcpc2 => Rcpc2, // Vector Sizes InstructionSet.ARM64_VectorT128 => VectorT128, diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 089f112915572a..acf136d49d4d10 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -187,6 +187,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc2"); } // Vector can always be part of the optimistic set, we only want to optionally exclude it from the supported set diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 32b60ecbcda7da..51c1a4854d71f4 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -53,6 +53,7 @@ public enum ReadyToRunInstructionSet VectorT128=39, VectorT256=40, VectorT512=41, + Rcpc2=42, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index f593808be32989..57629f66b9c6f5 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -45,6 +45,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.ARM64_Rcpc2: return ReadyToRunInstructionSet.Rcpc2; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index f26abd68262391..d7bc2e56ebd2a4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -30,6 +30,7 @@ public enum InstructionSet ARM64_Dczva = InstructionSet_ARM64.Dczva, ARM64_Rcpc = InstructionSet_ARM64.Rcpc, ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, + ARM64_Rcpc2 = InstructionSet_ARM64.Rcpc2, ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64, ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64, ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64, @@ -185,14 +186,15 @@ public enum InstructionSet_ARM64 Dczva = 12, Rcpc = 13, VectorT128 = 14, - ArmBase_Arm64 = 15, - AdvSimd_Arm64 = 16, - Aes_Arm64 = 17, - Crc32_Arm64 = 18, - Dp_Arm64 = 19, - Rdm_Arm64 = 20, - Sha1_Arm64 = 21, - Sha256_Arm64 = 22, + Rcpc2 = 15, + ArmBase_Arm64 = 16, + AdvSimd_Arm64 = 17, + Aes_Arm64 = 18, + Crc32_Arm64 = 19, + Dp_Arm64 = 20, + Rdm_Arm64 = 21, + Sha1_Arm64 = 22, + Sha256_Arm64 = 23, } public enum InstructionSet_X64 @@ -1147,7 +1149,7 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, { ("armv8.3-a", TargetArchitecture.ARM64), "armv8.2-a rcpc" }, - { ("armv8.4-a", TargetArchitecture.ARM64), "armv8.3-a dotprod" }, + { ("armv8.4-a", TargetArchitecture.ARM64), "armv8.3-a dotprod rcpc2" }, { ("armv8.5-a", TargetArchitecture.ARM64), "armv8.4-a" }, { ("armv8.6-a", TargetArchitecture.ARM64), "armv8.5-a" }, { ("apple-m1", TargetArchitecture.ARM64), "armv8.5-a" }, @@ -1195,6 +1197,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true); + yield return new InstructionSetInfo("rcpc2", "", InstructionSet.ARM64_Rcpc2, true); break; case TargetArchitecture.X64: diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 3c669e1ea95fea..ad1ca93b659ae1 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -167,6 +167,7 @@ instructionset ,ARM64 , , , ,Vector128 instructionset ,ARM64 , , , ,Dczva , instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128 +instructionset ,ARM64 , ,Rcpc2 ,42 ,Rcpc2 ,rcpc2 instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 6468794ad52171..8f8fa45d853dc6 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1492,6 +1492,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_Rcpc); } + if (((cpuFeatures & ARM64IntrinsicConstants_Rcpc2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc2)) + { + CPUCompileFlags.Set(InstructionSet_Rcpc2); + } + if (((cpuFeatures & ARM64IntrinsicConstants_Crc32) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32)) { CPUCompileFlags.Set(InstructionSet_Crc32); diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index a606bfbe76b4dd..af6f3c40c4bd55 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -356,6 +356,10 @@ int minipal_getcpufeatures(void) if (hwCap & HWCAP_LRCPC) result |= ARM64IntrinsicConstants_Rcpc; #endif +#ifdef HWCAP_LRCPC2 + if (hwCap & HWCAP_LRCPC2) + result |= ARM64IntrinsicConstants_Rcpc2; +#endif #ifdef HWCAP_PMULL // if (hwCap & HWCAP_PMULL) // result |= ARM64IntrinsicConstants_???; @@ -472,6 +476,9 @@ int minipal_getcpufeatures(void) if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) result |= ARM64IntrinsicConstants_Rcpc; + + if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC2", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Rcpc2; #endif // HAVE_SYSCTLBYNAME // Every ARM64 CPU should support SIMD and FP @@ -511,6 +518,9 @@ int minipal_getcpufeatures(void) { result |= ARM64IntrinsicConstants_Rcpc; } + + // TODO: IsProcessorFeaturePresent doesn't support LRCPC2 yet. + #endif // TARGET_WINDOWS #endif // TARGET_ARM64 diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 312bee84ace2bd..ead191f2921d9e 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -56,6 +56,7 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_Atomics = 0x0080, ARM64IntrinsicConstants_Rcpc = 0x0100, ARM64IntrinsicConstants_VectorT128 = 0x0200, + ARM64IntrinsicConstants_Rcpc2 = 0x0400, }; #include From 51c2902939a4b3d86f52a2af21aefb53d3bbb4c9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 20:38:50 +0200 Subject: [PATCH 05/16] fix compilation --- src/coreclr/jit/emit.h | 10 +++++++--- src/coreclr/jit/emitarm64.cpp | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 9aeae5209904ed..fa960115496fa8 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -712,7 +712,7 @@ class emitter // x86: 17 bits // amd64: 17 bits // arm: 16 bits - // arm64: 17 bits + // arm64: 18 bits // loongarch64: 14 bits // risc-v: 14 bits @@ -754,7 +754,7 @@ class emitter // x86: 38 bits // amd64: 38 bits // arm: 32 bits - // arm64: 31 bits + // arm64: 32 bits // loongarch64: 28 bits // risc-v: 28 bits @@ -764,7 +764,9 @@ class emitter unsigned _idLargeCall : 1; // large call descriptor used unsigned _idBound : 1; // jump target / frame offset bound +#ifndef TARGET_ARMARCH unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg +#endif unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables #if defined(TARGET_XARCH) @@ -806,7 +808,7 @@ class emitter // x86: 47 bits // amd64: 47 bits // arm: 48 bits - // arm64: 50 bits + // arm64: 51 bits // loongarch64: 46 bits // risc-v: 46 bits @@ -1509,6 +1511,7 @@ class emitter _idBound = 1; } +#ifndef TARGET_ARMARCH bool idIsCallRegPtr() const { return _idCallRegPtr != 0; @@ -1517,6 +1520,7 @@ class emitter { _idCallRegPtr = 1; } +#endif // Only call instructions that call helper functions may be marked as "IsNoGC", indicating // that a thread executing such a call cannot be stopped for GC. Thus, in partially-interruptible diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 78df291a965498..294385fdec5032 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8807,7 +8807,9 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is an indirect call (either a virtual call or func ptr call) */ +#ifndef TARGET_ARMARCH id->idSetIsCallRegPtr(); +#endif if (isJump) { From bce421ca118e416ab456426be38a4a39e707243a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 22:23:35 +0200 Subject: [PATCH 06/16] use ldapur --- src/coreclr/jit/codegenarmarch.cpp | 31 +++++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.cpp | 18 +++++++++++++++++ src/coreclr/jit/lower.cpp | 17 +++++++++++++++- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 5434ef7b722ef9..d0245ac6b7f71a 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1761,9 +1761,34 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) bool addrIsInReg = tree->Addr()->isUsedFromReg(); bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - // on arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid + // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid // full memory barriers if mixed with STLR - bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); + bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); + + // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid + // full memory barriers if address is contained and unscaled + bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); + + bool handledWithLdapur = false; + if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && (tree->Scale() == 1) && + GetEmitter()->emitIns_valid_imm_for_ldst_offset(tree->Offset(), emitTypeSize(type))) + { + if (ins == INS_ldrb) + { + ins = INS_ldapurb; + handledWithLdapur = true; + } + else if (ins == INS_ldrh) + { + ins = INS_ldapurh; + handledWithLdapur = true; + } + else if (ins == INS_ldr) + { + ins = INS_ldapur; + handledWithLdapur = true; + } + } if ((ins == INS_ldrb) && addrIsInReg) { @@ -1777,7 +1802,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) { ins = hasRcpc ? INS_ldapr : INS_ldar; } - else + else if (!handledWithLdapur) #endif // TARGET_ARM64 { emitBarrier = true; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 294385fdec5032..8e65a9fb4fcfcf 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1154,7 +1154,9 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_ldrb: case INS_strb: case INS_ldurb: + case INS_ldapurb: case INS_sturb: + case INS_stlurb: result = EA_4BYTE; break; @@ -1169,6 +1171,8 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_strh: case INS_ldurh: case INS_sturh: + case INS_ldapurh: + case INS_stlurh: result = EA_4BYTE; break; @@ -1206,6 +1210,8 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id) case INS_str: case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: result = id->idOpSize(); break; @@ -1234,7 +1240,9 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_ldrb: case INS_strb: case INS_ldurb: + case INS_ldapurb: case INS_sturb: + case INS_stlurb: case INS_ldrsb: case INS_ldursb: result = EA_1BYTE; @@ -1249,6 +1257,8 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_sturh: case INS_ldrsh: case INS_ldursh: + case INS_ldapurh: + case INS_stlurh: result = EA_2BYTE; break; @@ -1272,6 +1282,8 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) case INS_str: case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: result = id->idOpSize(); break; @@ -5499,6 +5511,8 @@ void emitter::emitIns_R_R_I( isLdSt = true; break; + case INS_ldapurb: + case INS_stlurb: case INS_ldurb: case INS_sturb: // size is ignored @@ -5516,7 +5530,9 @@ void emitter::emitIns_R_R_I( break; case INS_ldurh: + case INS_ldapurh: case INS_sturh: + case INS_stlurh: // size is ignored unscaledOp = true; scale = 0; @@ -5544,6 +5560,8 @@ void emitter::emitIns_R_R_I( case INS_ldur: case INS_stur: + case INS_ldapur: + case INS_stlur: // Is the target a vector register? if (isVectorRegister(reg1)) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 81df694f05e1ff..25af3cacd871a8 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6033,7 +6033,8 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par } #ifdef TARGET_ARM64 - if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) + const bool hasRcpc2 = comp->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); + if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile() && !hasRcpc2) { // For Arm64 we avoid using LEA for volatile INDs // because we won't be able to use ldar/star @@ -6056,6 +6057,20 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par &scale, // scaling &offset); // displacement +#ifdef TARGET_ARM64 + if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) + { + // Generally, we try to avoid creating addressing modes for volatile INDs so we can then use + // ldar/stlr with less strict semantics. Although, with Arm 8.4+'s RCPC2 we handle unscaled + // addressing modes (if the offset fits into 9 bits) + assert(hasRcpc2); + if ((scale > 1) || !emitter::emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(parent->TypeGet()))) + { + return false; + } + } +#endif + var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF; #ifdef TARGET_ARMARCH From 60d890200593258ed0e290cf338b873f3ddb095c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 22:42:06 +0200 Subject: [PATCH 07/16] Fix build --- src/coreclr/jit/codegenarmarch.cpp | 8 ++++---- src/coreclr/jit/emit.h | 6 +++--- src/coreclr/jit/emitarm.cpp | 2 -- src/coreclr/jit/emitarm64.cpp | 6 +----- src/coreclr/jit/lower.cpp | 2 +- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index d0245ac6b7f71a..0e266885de33dd 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1763,7 +1763,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid // full memory barriers if mixed with STLR - bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); + bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid // full memory barriers if address is contained and unscaled @@ -1775,17 +1775,17 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) { if (ins == INS_ldrb) { - ins = INS_ldapurb; + ins = INS_ldapurb; handledWithLdapur = true; } else if (ins == INS_ldrh) { - ins = INS_ldapurh; + ins = INS_ldapurh; handledWithLdapur = true; } else if (ins == INS_ldr) { - ins = INS_ldapur; + ins = INS_ldapur; handledWithLdapur = true; } } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index fa960115496fa8..5d1a0ed0fa2bb7 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -763,12 +763,12 @@ class emitter unsigned _idLargeDsp : 1; // does a large displacement follow? unsigned _idLargeCall : 1; // large call descriptor used - unsigned _idBound : 1; // jump target / frame offset bound + unsigned _idBound : 1; // jump target / frame offset bound #ifndef TARGET_ARMARCH unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg #endif - unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr - unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables + unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr + unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables #if defined(TARGET_XARCH) unsigned _idEvexbContext : 1; // does EVEX.b need to be set. #endif // TARGET_XARCH diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 3099b4d869f289..8dcea0d030b4f3 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4770,8 +4770,6 @@ void emitter::emitIns_Call(EmitCallType callType, { /* This is an indirect call (either a virtual call or func ptr call) */ - id->idSetIsCallRegPtr(); - if (isJump) { ins = INS_bx; // INS_bx Reg diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8e65a9fb4fcfcf..14fe4c78956916 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8823,11 +8823,7 @@ void emitter::emitIns_Call(EmitCallType callType, if (callType == EC_INDIR_R) { - /* This is an indirect call (either a virtual call or func ptr call) */ - -#ifndef TARGET_ARMARCH - id->idSetIsCallRegPtr(); -#endif +/* This is an indirect call (either a virtual call or func ptr call) */ if (isJump) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 25af3cacd871a8..e8385fcf37d2e3 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6061,7 +6061,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) { // Generally, we try to avoid creating addressing modes for volatile INDs so we can then use - // ldar/stlr with less strict semantics. Although, with Arm 8.4+'s RCPC2 we handle unscaled + // ldar/stlr instead of ldr/str + dmb. Although, with Arm 8.4+'s RCPC2 we can handle unscaled // addressing modes (if the offset fits into 9 bits) assert(hasRcpc2); if ((scale > 1) || !emitter::emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(parent->TypeGet()))) From f743875c6745a5f089a79dbc9e74c75142f1503a Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 29 Jul 2023 22:57:28 +0200 Subject: [PATCH 08/16] Update codegenarm64.cpp --- src/coreclr/jit/codegenarm64.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index eba3900e99a4a6..f99602d21d953b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5544,12 +5544,6 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); - // stlur Ws, Rt, [reg] - theEmitter->emitIns_R_R_R(INS_stlur, EA_8BYTE, REG_R1, REG_R9, REG_R8); - theEmitter->emitIns_R_R_R(INS_stlur, EA_4BYTE, REG_R3, REG_R7, REG_R13); - theEmitter->emitIns_R_R_R(INS_stlurb, EA_4BYTE, REG_R8, REG_R5, REG_R14); - theEmitter->emitIns_R_R_R(INS_stlurh, EA_4BYTE, REG_R12, REG_R3, REG_R15); - #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS From 9f3039b07f2f6a97d7f68c6000972ac1f5316898 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 29 Jul 2023 22:58:07 +0200 Subject: [PATCH 09/16] Update codegenarmarch.cpp --- src/coreclr/jit/codegenarmarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 0e266885de33dd..b48a23f459f4a2 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1765,8 +1765,8 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) // full memory barriers if mixed with STLR bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); - // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid - // full memory barriers if address is contained and unscaled + // On arm64-v8.3+ we can use ldapur* instructions with acquire/release semantics to + // avoid full memory barriers if address is contained and unscaled bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); bool handledWithLdapur = false; From cd9f9a5f6702715e9ae44a45b24fc9642d53837a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 23:16:58 +0200 Subject: [PATCH 10/16] Address feedback --- .../Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index ad1ca93b659ae1..ba4db589c81a0f 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -204,7 +204,7 @@ instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma instructionsetgroup ,armv8.2-a ,ARM64 ,armv8.1-a instructionsetgroup ,armv8.3-a ,ARM64 ,armv8.2-a rcpc -instructionsetgroup ,armv8.4-a ,ARM64 ,armv8.3-a dotprod +instructionsetgroup ,armv8.4-a ,ARM64 ,armv8.3-a dotprod rcpc2 instructionsetgroup ,armv8.5-a ,ARM64 ,armv8.4-a instructionsetgroup ,armv8.6-a ,ARM64 ,armv8.5-a From 261cbf392a649cc0f9cf0aa56c62f6a54aecc983 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 29 Jul 2023 23:17:25 +0200 Subject: [PATCH 11/16] formatting --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 14fe4c78956916..3942bc17ed661b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8823,7 +8823,7 @@ void emitter::emitIns_Call(EmitCallType callType, if (callType == EC_INDIR_R) { -/* This is an indirect call (either a virtual call or func ptr call) */ + /* This is an indirect call (either a virtual call or func ptr call) */ if (isJump) { From 675a36abeecda86faeda235f6bfaf1bf749b0bd8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 30 Jul 2023 16:33:29 +0200 Subject: [PATCH 12/16] Fix assert --- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/coreclr/jit/emitarm64.cpp | 6 ++++++ src/coreclr/jit/emitarm64.h | 3 +++ src/coreclr/jit/lower.cpp | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index b48a23f459f4a2..cd59dd4826a24c 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1771,7 +1771,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) bool handledWithLdapur = false; if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && (tree->Scale() == 1) && - GetEmitter()->emitIns_valid_imm_for_ldst_offset(tree->Offset(), emitTypeSize(type))) + emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset())) { if (ins == INS_ldrb) { diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3942bc17ed661b..271e43c8657c35 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2381,6 +2381,12 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) return false; } +// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction +/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm) +{ + return (imm >= -256) && (imm <= 255); +} + // true if this 'imm' can be encoded as the offset in a ldr/str instruction /*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr) { diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index ca205eef07b0fa..a971f56f9d2884 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -543,6 +543,9 @@ static bool emitIns_valid_imm_for_alu(INT64 imm, emitAttr size); // true if this 'imm' can be encoded as the offset in a ldr/str instruction static bool emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr size); +// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction +static bool emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm); + // true if this 'imm' can be encoded as a input operand to a ccmp instruction static bool emitIns_valid_imm_for_ccmp(INT64 imm); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index e8385fcf37d2e3..970133694ada85 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6064,7 +6064,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par // ldar/stlr instead of ldr/str + dmb. Although, with Arm 8.4+'s RCPC2 we can handle unscaled // addressing modes (if the offset fits into 9 bits) assert(hasRcpc2); - if ((scale > 1) || !emitter::emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(parent->TypeGet()))) + if ((scale > 1) || !emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset)) { return false; } From 064ec923a148d3a71e30a2bf142ba9b80814eb5b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 30 Jul 2023 16:40:22 +0200 Subject: [PATCH 13/16] Fix assert --- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/coreclr/jit/lower.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index cd59dd4826a24c..69897c5c9cb628 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1770,7 +1770,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); bool handledWithLdapur = false; - if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && (tree->Scale() == 1) && + if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && !tree->HasIndex() && (tree->Scale() == 1) && emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset())) { if (ins == INS_ldrb) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 970133694ada85..97e0734a696b89 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6064,7 +6064,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par // ldar/stlr instead of ldr/str + dmb. Although, with Arm 8.4+'s RCPC2 we can handle unscaled // addressing modes (if the offset fits into 9 bits) assert(hasRcpc2); - if ((scale > 1) || !emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset)) + if ((scale > 1) || !emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset) && (index == nullptr)) { return false; } From 68c2e0dc84785aeb9e29f045e5be1b7153264a32 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 30 Jul 2023 17:10:03 +0200 Subject: [PATCH 14/16] Update lower.cpp --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 97e0734a696b89..f5555afed31f45 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6064,7 +6064,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par // ldar/stlr instead of ldr/str + dmb. Although, with Arm 8.4+'s RCPC2 we can handle unscaled // addressing modes (if the offset fits into 9 bits) assert(hasRcpc2); - if ((scale > 1) || !emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset) && (index == nullptr)) + if ((scale > 1) || (!emitter::emitIns_valid_imm_for_unscaled_ldst_offset(offset)) || (index != nullptr)) { return false; } From 411699fa5fbac6ebbf92ba6ae1e5ef8db9c4a9aa Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 30 Jul 2023 17:10:55 +0200 Subject: [PATCH 15/16] Update emit.h --- src/coreclr/jit/emit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5d1a0ed0fa2bb7..e1d503443e8fc4 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -808,7 +808,7 @@ class emitter // x86: 47 bits // amd64: 47 bits // arm: 48 bits - // arm64: 51 bits + // arm64: 50 bits // loongarch64: 46 bits // risc-v: 46 bits From 33bc74794c15357176403dd347fb83d26b83a9ab Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 26 Aug 2023 20:38:32 +0200 Subject: [PATCH 16/16] Fix conflicts --- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/native/minipal/cpufeatures.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 69897c5c9cb628..a57ba2e964a9c4 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1765,7 +1765,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) // full memory barriers if mixed with STLR bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); - // On arm64-v8.3+ we can use ldapur* instructions with acquire/release semantics to + // On arm64-v8.4+ we can use ldapur* instructions with acquire/release semantics to // avoid full memory barriers if address is contained and unscaled bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 72a83ece2a0765..af122d4fbb1c07 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -28,6 +28,9 @@ #ifndef HWCAP_LRCPC #define HWCAP_LRCPC (1 << 15) #endif +#ifndef HWCAP_ILRCPC +#define HWCAP_ILRCPC (1 << 26) +#endif #ifndef HWCAP_ASIMDDP #define HWCAP_ASIMDDP (1 << 20) #endif @@ -338,7 +341,7 @@ int minipal_getcpufeatures(void) if (hwCap & HWCAP_LRCPC) result |= ARM64IntrinsicConstants_Rcpc; - if (hwCap & HWCAP_LRCPC2) + if (hwCap & HWCAP_ILRCPC) result |= ARM64IntrinsicConstants_Rcpc2; if (hwCap & HWCAP_SHA1)