[compiler-rt] Add initial ARM64EC builtins support #139279

cjacek · 2025-05-09T15:12:42Z

Use the aarch64 variants of assembly functions.

Based on a patch by Billy Laws.

github-actions · 2025-05-09T15:15:00Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions h,inc,c -- compiler-rt/lib/builtins/clear_cache.c compiler-rt/lib/builtins/cpu_model/aarch64.c compiler-rt/lib/builtins/cpu_model/aarch64.h compiler-rt/lib/builtins/fp_compare_impl.inc compiler-rt/lib/builtins/fp_lib.h compiler-rt/lib/builtins/udivmodti4.c compiler-rt/test/builtins/Unit/enable_execute_stack_test.c compiler-rt/test/builtins/Unit/fixunstfdi_test.c compiler-rt/test/builtins/Unit/multc3_test.c

View the diff from clang-format here.

diff --git a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
index 982f3a462..0a66602fa 100644
--- a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
+++ b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
@@ -6,7 +6,7 @@
 
 #if _ARCH_PPC || __aarch64__ || __arm64ec__
 
-#include "int_lib.h"
+#  include "int_lib.h"
 
 // Returns: convert a to a unsigned long long, rounding toward zero.
 //          Negative values all become zero.
@@ -36,77 +36,77 @@ char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0};
 int main()
 {
 #if _ARCH_PPC || __aarch64__ || __arm64ec__
-    if (test__fixunstfdi(0.0, 0))
-        return 1;
-
-    if (test__fixunstfdi(0.5, 0))
-        return 1;
-    if (test__fixunstfdi(0.99, 0))
-        return 1;
-    if (test__fixunstfdi(1.0, 1))
-        return 1;
-    if (test__fixunstfdi(1.5, 1))
-        return 1;
-    if (test__fixunstfdi(1.99, 1))
-        return 1;
-    if (test__fixunstfdi(2.0, 2))
-        return 1;
-    if (test__fixunstfdi(2.01, 2))
-        return 1;
-    if (test__fixunstfdi(-0.5, 0))
-        return 1;
-    if (test__fixunstfdi(-0.99, 0))
-        return 1;
-    if (test__fixunstfdi(-1.0, 0))
-        return 1;
-    if (test__fixunstfdi(-1.5, 0))
-        return 1;
-    if (test__fixunstfdi(-1.99, 0))
-        return 1;
-    if (test__fixunstfdi(-2.0, 0))
-        return 1;
-    if (test__fixunstfdi(-2.01, 0))
-        return 1;
-
-    if (test__fixunstfdi(0x1.FFFFFEp+62, 0x7FFFFF8000000000LL))
-        return 1;
-    if (test__fixunstfdi(0x1.FFFFFCp+62, 0x7FFFFF0000000000LL))
-        return 1;
-
-    if (test__fixunstfdi(-0x1.FFFFFEp+62, 0))
-        return 1;
-    if (test__fixunstfdi(-0x1.FFFFFCp+62, 0))
-        return 1;
-
-    if (test__fixunstfdi(0x1.FFFFFFFFFFFFFp+62, 0x7FFFFFFFFFFFFC00LL))
-        return 1;
-    if (test__fixunstfdi(0x1.FFFFFFFFFFFFEp+62, 0x7FFFFFFFFFFFF800LL))
-        return 1;
-
-    if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFp+62, 0))
-        return 1;
-    if (test__fixunstfdi(-0x1.FFFFFFFFFFFFEp+62, 0))
-        return 1;
-
-    if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFEp+63L, 0xFFFFFFFFFFFFFFFFLL))
-        return 1;
-    if (test__fixunstfdi(0x1.0000000000000002p+63L, 0x8000000000000001LL))
-        return 1;
-    if (test__fixunstfdi(0x1.0000000000000000p+63L, 0x8000000000000000LL))
-        return 1;
-    if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFCp+62L, 0x7FFFFFFFFFFFFFFFLL))
-        return 1;
-    if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFF8p+62L, 0x7FFFFFFFFFFFFFFELL))
-        return 1;
-    if (test__fixunstfdi(0x1.p+64L, 0xFFFFFFFFFFFFFFFFLL))
-        return 1;
-
-    if (test__fixunstfdi(-0x1.0000000000000000p+63L, 0))
-        return 1;
-    if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFFCp+62L, 0))
-        return 1;
-    if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFF8p+62L, 0))
-        return 1;
+  if (test__fixunstfdi(0.0, 0))
+    return 1;
+
+  if (test__fixunstfdi(0.5, 0))
+    return 1;
+  if (test__fixunstfdi(0.99, 0))
+    return 1;
+  if (test__fixunstfdi(1.0, 1))
+    return 1;
+  if (test__fixunstfdi(1.5, 1))
+    return 1;
+  if (test__fixunstfdi(1.99, 1))
+    return 1;
+  if (test__fixunstfdi(2.0, 2))
+    return 1;
+  if (test__fixunstfdi(2.01, 2))
+    return 1;
+  if (test__fixunstfdi(-0.5, 0))
+    return 1;
+  if (test__fixunstfdi(-0.99, 0))
+    return 1;
+  if (test__fixunstfdi(-1.0, 0))
+    return 1;
+  if (test__fixunstfdi(-1.5, 0))
+    return 1;
+  if (test__fixunstfdi(-1.99, 0))
+    return 1;
+  if (test__fixunstfdi(-2.0, 0))
+    return 1;
+  if (test__fixunstfdi(-2.01, 0))
+    return 1;
+
+  if (test__fixunstfdi(0x1.FFFFFEp+62, 0x7FFFFF8000000000LL))
+    return 1;
+  if (test__fixunstfdi(0x1.FFFFFCp+62, 0x7FFFFF0000000000LL))
+    return 1;
+
+  if (test__fixunstfdi(-0x1.FFFFFEp+62, 0))
+    return 1;
+  if (test__fixunstfdi(-0x1.FFFFFCp+62, 0))
+    return 1;
+
+  if (test__fixunstfdi(0x1.FFFFFFFFFFFFFp+62, 0x7FFFFFFFFFFFFC00LL))
+    return 1;
+  if (test__fixunstfdi(0x1.FFFFFFFFFFFFEp+62, 0x7FFFFFFFFFFFF800LL))
+    return 1;
+
+  if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFp+62, 0))
+    return 1;
+  if (test__fixunstfdi(-0x1.FFFFFFFFFFFFEp+62, 0))
+    return 1;
+
+  if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFEp+63L, 0xFFFFFFFFFFFFFFFFLL))
+    return 1;
+  if (test__fixunstfdi(0x1.0000000000000002p+63L, 0x8000000000000001LL))
+    return 1;
+  if (test__fixunstfdi(0x1.0000000000000000p+63L, 0x8000000000000000LL))
+    return 1;
+  if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFCp+62L, 0x7FFFFFFFFFFFFFFFLL))
+    return 1;
+  if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFF8p+62L, 0x7FFFFFFFFFFFFFFELL))
+    return 1;
+  if (test__fixunstfdi(0x1.p+64L, 0xFFFFFFFFFFFFFFFFLL))
+    return 1;
+
+  if (test__fixunstfdi(-0x1.0000000000000000p+63L, 0))
+    return 1;
+  if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFFCp+62L, 0))
+    return 1;
+  if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFF8p+62L, 0))
+    return 1;
 
 #else
     printf("skipped\n");
diff --git a/compiler-rt/test/builtins/Unit/multc3_test.c b/compiler-rt/test/builtins/Unit/multc3_test.c
index e9c99a72b..44707c75d 100644
--- a/compiler-rt/test/builtins/Unit/multc3_test.c
+++ b/compiler-rt/test/builtins/Unit/multc3_test.c
@@ -6,9 +6,9 @@
 
 #if _ARCH_PPC || __aarch64__ || __arm64ec__
 
-#include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#  include "int_lib.h"
+#  include <math.h>
+#  include <complex.h>
 
 // Returns: the product of a + ib and c + id
 
@@ -349,16 +349,14 @@ long double x[][2] =
 int main()
 {
 #if _ARCH_PPC || __aarch64__ || __arm64ec__
-    const unsigned N = sizeof(x) / sizeof(x[0]);
-    unsigned i, j;
-    for (i = 0; i < N; ++i)
-    {
-        for (j = 0; j < N; ++j)
-        {
-            if (test__multc3(x[i][0], x[i][1], x[j][0], x[j][1]))
-                return 1;
-        }
+  const unsigned N = sizeof(x) / sizeof(x[0]);
+  unsigned i, j;
+  for (i = 0; i < N; ++i) {
+    for (j = 0; j < N; ++j) {
+      if (test__multc3(x[i][0], x[i][1], x[j][0], x[j][1]))
+        return 1;
     }
+  }
 #else
     printf("skipped\n");
 #endif

cjacek · 2025-05-09T15:25:40Z

The code formatting errors relate to existing formatting around my changes. Including those in the diff would be misleading, in my opinion.

mstorsjo

Looking good overall; only some comments relating to clear_cache, both for the implementation and the test.

mstorsjo · 2025-05-14T08:55:03Z

compiler-rt/lib/builtins/clear_cache.c

@@ -122,7 +123,7 @@ void __clear_cache(void *start, void *end) {
    compilerrt_abort();
 #endif
  }
-#elif defined(__aarch64__) && !defined(__APPLE__)
+#elif (defined(__aarch64__) || defined(__arm64ec__)) && !defined(__APPLE__)


Isn't this case redundant here - isn't this #elif on the same level as the _WIN32 && (arm || aarch64 || arm64ec) at the start of the function?

On the other hand, it's probably ok on one sense, as I guess this implementation should be usable too - or is it? I'm not entirely sure if this uses some instruction which is not accessible from userspace on Windows (which could be why we're using FlushInstructionCache to being with). Or perhaps that was required for 32 bit arm and we just kept doing the same for 64 bit too?

Good point, I think this is redundant. This came from Billy's original patch. I remember changing a similar case elsewhere but missed this one.

mstorsjo · 2025-05-14T08:57:55Z

compiler-rt/test/builtins/Unit/enable_execute_stack_test.c

@@ -10,9 +10,22 @@ extern void __enable_execute_stack(void* addr);

 typedef int (*pfunc)(void);

+#ifdef __x86_64__
+// On ARM64EC, we need the x86_64 version of this function, but the compiler


Doing this for all of x86_64 seems a bit far-reaching; wouldn't it be cleaner to restrict this case to arm64ec? Then again, extending it to all of x86_64 does give it more coverage and exposure, but ideally I wouldn't be touching the behaviour for other architectures in a patch like this.

I changed this to use __arm64ec__.

mstorsjo · 2025-05-14T08:58:51Z

compiler-rt/test/builtins/Unit/enable_execute_stack_test.c

@@ -31,22 +44,28 @@ int main()
 {
 #if defined(__ve__)
    unsigned char execution_buffer[128] __attribute__((__aligned__(8)));
+#elif defined(__x86_64__)
+  unsigned char execution_buffer[sizeof(func1)];


Why do we need to shrink the buffer here?

mstorsjo · 2025-05-14T08:59:58Z

compiler-rt/test/builtins/Unit/enable_execute_stack_test.c

-    pfunc f1 = (pfunc)memcpy_f(execution_buffer, func1, 128);
-    __clear_cache(execution_buffer, &execution_buffer[128]);
+    pfunc f1 =
+        (pfunc)memcpy_f(execution_buffer, func1, sizeof(execution_buffer));


Same here - shouldn't overreads here be equally problematic for arm64ec as for all the existing architectures?

My thinking was that using an array for the code made it easier to avoid overreads than on other platforms, but there’s no strong reason for it. I’ve reverted that in the new version. Thanks for the review!

Use the aarch64 variants of assembly functions. Based on a patch by Billy Laws.

mstorsjo

LGTM.

If you think it's more than just "based on a patch by", you could also consider adding a Co-authored-by: to share the authoring credit in a more formal way.

cjacek · 2025-05-15T09:47:38Z

I believe the meaning is the same (this is a modified version of Billy's patch), but I agree it's better to use the formalized form. I've updated the commit accordingly, thanks!

cjacek requested a review from mstorsjo May 9, 2025 15:12

llvmbot added compiler-rt compiler-rt:builtins labels May 9, 2025

mstorsjo approved these changes May 14, 2025

View reviewed changes

[compiler-rt] Add initial ARM64EC builtins support

48aa2a7

Use the aarch64 variants of assembly functions. Based on a patch by Billy Laws.

cjacek force-pushed the arm64ec-compiler-rt branch from 5d27c3f to 48aa2a7 Compare May 14, 2025 22:04

mstorsjo approved these changes May 15, 2025

View reviewed changes

cjacek merged commit 3764ba2 into llvm:main May 15, 2025
9 of 10 checks passed

cjacek deleted the arm64ec-compiler-rt branch May 15, 2025 09:43

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[compiler-rt] Add initial ARM64EC builtins support #139279

[compiler-rt] Add initial ARM64EC builtins support #139279

cjacek commented May 9, 2025

github-actions bot commented May 9, 2025

cjacek commented May 9, 2025

mstorsjo left a comment

mstorsjo May 14, 2025

cjacek May 14, 2025

mstorsjo May 14, 2025

cjacek May 14, 2025

mstorsjo May 14, 2025

mstorsjo May 14, 2025

cjacek May 14, 2025

mstorsjo left a comment

cjacek commented May 15, 2025

[compiler-rt] Add initial ARM64EC builtins support #139279

[compiler-rt] Add initial ARM64EC builtins support #139279

Conversation

cjacek commented May 9, 2025

github-actions bot commented May 9, 2025

cjacek commented May 9, 2025

mstorsjo left a comment

Choose a reason for hiding this comment

mstorsjo May 14, 2025

Choose a reason for hiding this comment

cjacek May 14, 2025

Choose a reason for hiding this comment

mstorsjo May 14, 2025

Choose a reason for hiding this comment

cjacek May 14, 2025

Choose a reason for hiding this comment

mstorsjo May 14, 2025

Choose a reason for hiding this comment

mstorsjo May 14, 2025

Choose a reason for hiding this comment

cjacek May 14, 2025

Choose a reason for hiding this comment

mstorsjo left a comment

Choose a reason for hiding this comment

cjacek commented May 15, 2025