-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[compiler-rt] Add initial ARM64EC builtins support #139279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,inc,c -- compiler-rt/lib/builtins/clear_cache.c compiler-rt/lib/builtins/cpu_model/aarch64.c compiler-rt/lib/builtins/cpu_model/aarch64.h compiler-rt/lib/builtins/fp_compare_impl.inc compiler-rt/lib/builtins/fp_lib.h compiler-rt/lib/builtins/udivmodti4.c compiler-rt/test/builtins/Unit/enable_execute_stack_test.c compiler-rt/test/builtins/Unit/fixunstfdi_test.c compiler-rt/test/builtins/Unit/multc3_test.c View the diff from clang-format here.diff --git a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
index 982f3a462..0a66602fa 100644
--- a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
+++ b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
@@ -6,7 +6,7 @@
#if _ARCH_PPC || __aarch64__ || __arm64ec__
-#include "int_lib.h"
+# include "int_lib.h"
// Returns: convert a to a unsigned long long, rounding toward zero.
// Negative values all become zero.
@@ -36,77 +36,77 @@ char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0};
int main()
{
#if _ARCH_PPC || __aarch64__ || __arm64ec__
- if (test__fixunstfdi(0.0, 0))
- return 1;
-
- if (test__fixunstfdi(0.5, 0))
- return 1;
- if (test__fixunstfdi(0.99, 0))
- return 1;
- if (test__fixunstfdi(1.0, 1))
- return 1;
- if (test__fixunstfdi(1.5, 1))
- return 1;
- if (test__fixunstfdi(1.99, 1))
- return 1;
- if (test__fixunstfdi(2.0, 2))
- return 1;
- if (test__fixunstfdi(2.01, 2))
- return 1;
- if (test__fixunstfdi(-0.5, 0))
- return 1;
- if (test__fixunstfdi(-0.99, 0))
- return 1;
- if (test__fixunstfdi(-1.0, 0))
- return 1;
- if (test__fixunstfdi(-1.5, 0))
- return 1;
- if (test__fixunstfdi(-1.99, 0))
- return 1;
- if (test__fixunstfdi(-2.0, 0))
- return 1;
- if (test__fixunstfdi(-2.01, 0))
- return 1;
-
- if (test__fixunstfdi(0x1.FFFFFEp+62, 0x7FFFFF8000000000LL))
- return 1;
- if (test__fixunstfdi(0x1.FFFFFCp+62, 0x7FFFFF0000000000LL))
- return 1;
-
- if (test__fixunstfdi(-0x1.FFFFFEp+62, 0))
- return 1;
- if (test__fixunstfdi(-0x1.FFFFFCp+62, 0))
- return 1;
-
- if (test__fixunstfdi(0x1.FFFFFFFFFFFFFp+62, 0x7FFFFFFFFFFFFC00LL))
- return 1;
- if (test__fixunstfdi(0x1.FFFFFFFFFFFFEp+62, 0x7FFFFFFFFFFFF800LL))
- return 1;
-
- if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFp+62, 0))
- return 1;
- if (test__fixunstfdi(-0x1.FFFFFFFFFFFFEp+62, 0))
- return 1;
-
- if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFEp+63L, 0xFFFFFFFFFFFFFFFFLL))
- return 1;
- if (test__fixunstfdi(0x1.0000000000000002p+63L, 0x8000000000000001LL))
- return 1;
- if (test__fixunstfdi(0x1.0000000000000000p+63L, 0x8000000000000000LL))
- return 1;
- if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFCp+62L, 0x7FFFFFFFFFFFFFFFLL))
- return 1;
- if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFF8p+62L, 0x7FFFFFFFFFFFFFFELL))
- return 1;
- if (test__fixunstfdi(0x1.p+64L, 0xFFFFFFFFFFFFFFFFLL))
- return 1;
-
- if (test__fixunstfdi(-0x1.0000000000000000p+63L, 0))
- return 1;
- if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFFCp+62L, 0))
- return 1;
- if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFF8p+62L, 0))
- return 1;
+ if (test__fixunstfdi(0.0, 0))
+ return 1;
+
+ if (test__fixunstfdi(0.5, 0))
+ return 1;
+ if (test__fixunstfdi(0.99, 0))
+ return 1;
+ if (test__fixunstfdi(1.0, 1))
+ return 1;
+ if (test__fixunstfdi(1.5, 1))
+ return 1;
+ if (test__fixunstfdi(1.99, 1))
+ return 1;
+ if (test__fixunstfdi(2.0, 2))
+ return 1;
+ if (test__fixunstfdi(2.01, 2))
+ return 1;
+ if (test__fixunstfdi(-0.5, 0))
+ return 1;
+ if (test__fixunstfdi(-0.99, 0))
+ return 1;
+ if (test__fixunstfdi(-1.0, 0))
+ return 1;
+ if (test__fixunstfdi(-1.5, 0))
+ return 1;
+ if (test__fixunstfdi(-1.99, 0))
+ return 1;
+ if (test__fixunstfdi(-2.0, 0))
+ return 1;
+ if (test__fixunstfdi(-2.01, 0))
+ return 1;
+
+ if (test__fixunstfdi(0x1.FFFFFEp+62, 0x7FFFFF8000000000LL))
+ return 1;
+ if (test__fixunstfdi(0x1.FFFFFCp+62, 0x7FFFFF0000000000LL))
+ return 1;
+
+ if (test__fixunstfdi(-0x1.FFFFFEp+62, 0))
+ return 1;
+ if (test__fixunstfdi(-0x1.FFFFFCp+62, 0))
+ return 1;
+
+ if (test__fixunstfdi(0x1.FFFFFFFFFFFFFp+62, 0x7FFFFFFFFFFFFC00LL))
+ return 1;
+ if (test__fixunstfdi(0x1.FFFFFFFFFFFFEp+62, 0x7FFFFFFFFFFFF800LL))
+ return 1;
+
+ if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFp+62, 0))
+ return 1;
+ if (test__fixunstfdi(-0x1.FFFFFFFFFFFFEp+62, 0))
+ return 1;
+
+ if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFEp+63L, 0xFFFFFFFFFFFFFFFFLL))
+ return 1;
+ if (test__fixunstfdi(0x1.0000000000000002p+63L, 0x8000000000000001LL))
+ return 1;
+ if (test__fixunstfdi(0x1.0000000000000000p+63L, 0x8000000000000000LL))
+ return 1;
+ if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFFCp+62L, 0x7FFFFFFFFFFFFFFFLL))
+ return 1;
+ if (test__fixunstfdi(0x1.FFFFFFFFFFFFFFF8p+62L, 0x7FFFFFFFFFFFFFFELL))
+ return 1;
+ if (test__fixunstfdi(0x1.p+64L, 0xFFFFFFFFFFFFFFFFLL))
+ return 1;
+
+ if (test__fixunstfdi(-0x1.0000000000000000p+63L, 0))
+ return 1;
+ if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFFCp+62L, 0))
+ return 1;
+ if (test__fixunstfdi(-0x1.FFFFFFFFFFFFFFF8p+62L, 0))
+ return 1;
#else
printf("skipped\n");
diff --git a/compiler-rt/test/builtins/Unit/multc3_test.c b/compiler-rt/test/builtins/Unit/multc3_test.c
index e9c99a72b..44707c75d 100644
--- a/compiler-rt/test/builtins/Unit/multc3_test.c
+++ b/compiler-rt/test/builtins/Unit/multc3_test.c
@@ -6,9 +6,9 @@
#if _ARCH_PPC || __aarch64__ || __arm64ec__
-#include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+# include "int_lib.h"
+# include <math.h>
+# include <complex.h>
// Returns: the product of a + ib and c + id
@@ -349,16 +349,14 @@ long double x[][2] =
int main()
{
#if _ARCH_PPC || __aarch64__ || __arm64ec__
- const unsigned N = sizeof(x) / sizeof(x[0]);
- unsigned i, j;
- for (i = 0; i < N; ++i)
- {
- for (j = 0; j < N; ++j)
- {
- if (test__multc3(x[i][0], x[i][1], x[j][0], x[j][1]))
- return 1;
- }
+ const unsigned N = sizeof(x) / sizeof(x[0]);
+ unsigned i, j;
+ for (i = 0; i < N; ++i) {
+ for (j = 0; j < N; ++j) {
+ if (test__multc3(x[i][0], x[i][1], x[j][0], x[j][1]))
+ return 1;
}
+ }
#else
printf("skipped\n");
#endif
|
The code formatting errors relate to existing formatting around my changes. Including those in the diff would be misleading, in my opinion. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking good overall; only some comments relating to clear_cache
, both for the implementation and the test.
@@ -122,7 +123,7 @@ void __clear_cache(void *start, void *end) { | |||
compilerrt_abort(); | |||
#endif | |||
} | |||
#elif defined(__aarch64__) && !defined(__APPLE__) | |||
#elif (defined(__aarch64__) || defined(__arm64ec__)) && !defined(__APPLE__) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this case redundant here - isn't this #elif
on the same level as the _WIN32 && (arm || aarch64 || arm64ec)
at the start of the function?
On the other hand, it's probably ok on one sense, as I guess this implementation should be usable too - or is it? I'm not entirely sure if this uses some instruction which is not accessible from userspace on Windows (which could be why we're using FlushInstructionCache
to being with). Or perhaps that was required for 32 bit arm and we just kept doing the same for 64 bit too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, I think this is redundant. This came from Billy's original patch. I remember changing a similar case elsewhere but missed this one.
@@ -10,9 +10,22 @@ extern void __enable_execute_stack(void* addr); | |||
|
|||
typedef int (*pfunc)(void); | |||
|
|||
#ifdef __x86_64__ | |||
// On ARM64EC, we need the x86_64 version of this function, but the compiler |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doing this for all of x86_64 seems a bit far-reaching; wouldn't it be cleaner to restrict this case to arm64ec? Then again, extending it to all of x86_64 does give it more coverage and exposure, but ideally I wouldn't be touching the behaviour for other architectures in a patch like this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed this to use __arm64ec__
.
@@ -31,22 +44,28 @@ int main() | |||
{ | |||
#if defined(__ve__) | |||
unsigned char execution_buffer[128] __attribute__((__aligned__(8))); | |||
#elif defined(__x86_64__) | |||
unsigned char execution_buffer[sizeof(func1)]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to shrink the buffer here?
pfunc f1 = (pfunc)memcpy_f(execution_buffer, func1, 128); | ||
__clear_cache(execution_buffer, &execution_buffer[128]); | ||
pfunc f1 = | ||
(pfunc)memcpy_f(execution_buffer, func1, sizeof(execution_buffer)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here - shouldn't overreads here be equally problematic for arm64ec as for all the existing architectures?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My thinking was that using an array for the code made it easier to avoid overreads than on other platforms, but there’s no strong reason for it. I’ve reverted that in the new version. Thanks for the review!
Use the aarch64 variants of assembly functions. Based on a patch by Billy Laws.
5d27c3f
to
48aa2a7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
If you think it's more than just "based on a patch by", you could also consider adding a Co-authored-by:
to share the authoring credit in a more formal way.
I believe the meaning is the same (this is a modified version of Billy's patch), but I agree it's better to use the formalized form. I've updated the commit accordingly, thanks! |
Use the aarch64 variants of assembly functions.
Based on a patch by Billy Laws.