From e000d0ec03d12423ee3c75a1fb02f8324594430d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 1 Sep 2022 13:14:34 +0100 Subject: [PATCH 1/3] gh-96143: Clear instruction cache after mprotect call --- Objects/perf_trampoline.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/Objects/perf_trampoline.c b/Objects/perf_trampoline.c index 2cbe3741f26fbc..389541f7d31585 100644 --- a/Objects/perf_trampoline.c +++ b/Objects/perf_trampoline.c @@ -185,6 +185,18 @@ struct trampoline_api_st { typedef struct trampoline_api_st trampoline_api_t; +#if defined(__clang__) || defined(__GNUC__) +extern void __clear_cache(void *, void*); +#endif + +static void invalidate_icache(char* begin, char*end) { +#if defined(__clang__) || defined(__GNUC__) + return __clear_cache(begin, end); +#else + return; +#endif +} + static perf_status_t perf_status = PERF_STATUS_NO_INIT; static Py_ssize_t extra_code_index = -1; static code_arena_t *code_arena; @@ -297,10 +309,6 @@ new_code_arena(void) memcpy(memory + i * code_size, start, code_size * sizeof(char)); } // Some systems may prevent us from creating executable code on the fly. - // TODO: Call icache invalidation intrinsics if available: - // __builtin___clear_cache/__clear_cache (depending if clang/gcc). This is - // technically not necessary but we could be missing something so better be - // safe. int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); if (res == -1) { PyErr_SetFromErrno(PyExc_OSError); @@ -311,6 +319,12 @@ new_code_arena(void) return -1; } +#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) + // Before the JIT can run a block of code that has been emitted it must invalidate + // the instruction cache on some platforms like arm and aarch64. + invalidate_icache(memory, memory + mem_size); +#endif + code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t)); if (new_arena == NULL) { PyErr_NoMemory(); From 33e10bfb2f18ba75cf899484a34ef3da6ac44546 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 1 Sep 2022 16:36:15 +0100 Subject: [PATCH 2/3] Add a bunch of macros Signed-off-by: Pablo Galindo --- Objects/perf_trampoline.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/Objects/perf_trampoline.c b/Objects/perf_trampoline.c index 389541f7d31585..20a715d5f01888 100644 --- a/Objects/perf_trampoline.c +++ b/Objects/perf_trampoline.c @@ -149,6 +149,22 @@ typedef enum { #include #include +#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#define PY_HAVE_INVALIDATE_ICACHE + +#if defined(__clang__) || defined(__GNUC__) +extern void __clear_cache(void *, void*); +#endif + +static void invalidate_icache(char* begin, char*end) { +#if defined(__clang__) || defined(__GNUC__) + return __clear_cache(begin, end); +#else + return; +#endif +} +#endif + /* The function pointer is passed as last argument. The other three arguments * are passed in the same order as the function requires. This results in * shorter, more efficient ASM code for trampoline. @@ -185,17 +201,9 @@ struct trampoline_api_st { typedef struct trampoline_api_st trampoline_api_t; -#if defined(__clang__) || defined(__GNUC__) -extern void __clear_cache(void *, void*); -#endif +#ifdef PY_HAVE_INVALIDATE_ICACHE -static void invalidate_icache(char* begin, char*end) { -#if defined(__clang__) || defined(__GNUC__) - return __clear_cache(begin, end); -#else - return; #endif -} static perf_status_t perf_status = PERF_STATUS_NO_INIT; static Py_ssize_t extra_code_index = -1; @@ -319,7 +327,7 @@ new_code_arena(void) return -1; } -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#ifdef PY_HAVE_INVALIDATE_ICACHE // Before the JIT can run a block of code that has been emitted it must invalidate // the instruction cache on some platforms like arm and aarch64. invalidate_icache(memory, memory + mem_size); From fefcef37e19951a1b3179caf8fcb5f394a053ee4 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 2 Sep 2022 12:04:03 +0100 Subject: [PATCH 3/3] Update Objects/perf_trampoline.c --- Objects/perf_trampoline.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Objects/perf_trampoline.c b/Objects/perf_trampoline.c index 20a715d5f01888..161e0ef74cf1da 100644 --- a/Objects/perf_trampoline.c +++ b/Objects/perf_trampoline.c @@ -201,9 +201,6 @@ struct trampoline_api_st { typedef struct trampoline_api_st trampoline_api_t; -#ifdef PY_HAVE_INVALIDATE_ICACHE - -#endif static perf_status_t perf_status = PERF_STATUS_NO_INIT; static Py_ssize_t extra_code_index = -1;