From 52bb3da654eaac496a3f7cf0e5fe96b37ab4af20 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 21 Feb 2024 23:15:23 -0800 Subject: [PATCH 1/6] Implement the small code model for x86_64 and aarch64 macOS and Linux --- Python/jit.c | 143 +++++++++++++++++++++++++++++++++++++----- Tools/jit/_schema.py | 10 +++ Tools/jit/_targets.py | 68 ++++++++++++++++---- 3 files changed, 191 insertions(+), 30 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 839414bd810677..8a1196139ac379 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -47,18 +47,18 @@ jit_error(const char *message) PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint); } -static char * +static unsigned char * jit_alloc(size_t size) { assert(size); assert(size % get_page_size() == 0); #ifdef MS_WINDOWS int flags = MEM_COMMIT | MEM_RESERVE; - char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); + unsigned char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); int failed = memory == NULL; #else int flags = MAP_ANONYMOUS | MAP_PRIVATE; - char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + unsigned char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); int failed = memory == MAP_FAILED; #endif if (failed) { @@ -69,7 +69,7 @@ jit_alloc(size_t size) } static int -jit_free(char *memory, size_t size) +jit_free(unsigned char *memory, size_t size) { assert(size); assert(size % get_page_size() == 0); @@ -86,7 +86,7 @@ jit_free(char *memory, size_t size) } static int -mark_executable(char *memory, size_t size) +mark_executable(unsigned char *memory, size_t size) { if (size == 0) { return 0; @@ -113,7 +113,7 @@ mark_executable(char *memory, size_t size) } static int -mark_readable(char *memory, size_t size) +mark_readable(unsigned char *memory, size_t size) { if (size == 0) { return 0; @@ -169,18 +169,20 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, // Fill all of stencil's holes in the memory pointed to by base, using the // values in patches. static void -patch(char *base, const Stencil *stencil, uint64_t *patches) +patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) { for (uint64_t i = 0; i < stencil->holes_size; i++) { const Hole *hole = &stencil->holes[i]; - void *location = base + hole->offset; + unsigned char *location = base + hole->offset; uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend; + uint8_t *loc8 = (uint8_t *)location; uint32_t *loc32 = (uint32_t *)location; uint64_t *loc64 = (uint64_t *)location; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! // Here's a good place to start for each platform: // - aarch64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64.cpp // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h // - aarch64-unknown-linux-gnu: @@ -208,6 +210,60 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) // 64-bit absolute address. *loc64 = value; continue; + case HoleKind_R_X86_64_GOTPCRELX: + case HoleKind_R_X86_64_REX_GOTPCRELX: + case HoleKind_X86_64_RELOC_GOT: + case HoleKind_X86_64_RELOC_GOT_LOAD: { + // 32-bit relative address. + // Try to relax the GOT load into an immediate value: + uint64_t relaxed = *(uint64_t *)(value + 4) - 4; + if ((int64_t)relaxed - (int64_t)location >= -(1LL << 31) && + (int64_t)relaxed - (int64_t)location + 1 < (1LL << 31)) + { + if (loc8[-2] == 0x8B) { + // Before: mov eax, dword ptr [rip + AAA] + // After: lea eax, [rip + XXX] + assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || + hole->kind == HoleKind_R_X86_64_GOTPCRELX || + hole->kind == HoleKind_R_X86_64_REX_GOTPCRELX || + hole->kind == HoleKind_X86_64_RELOC_GOT_LOAD); + loc8[-2] = 0x8D; + value = relaxed; + } + else if (loc8[-2] == 0xFF && loc8[-1] == 0x15) { + // Before: call qword ptr [rip + AAA] + // After: nop + // call XXX + assert(hole->kind == HoleKind_R_X86_64_GOTPCRELX || + hole->kind == HoleKind_X86_64_RELOC_GOT); + loc8[-2] = 0x90; + loc8[-1] = 0xE8; + value = relaxed; + } + else if (loc8[-2] == 0xFF && loc8[-1] == 0x25) { + // Before: jmp qword ptr [rip + AAA] + // After: nop + // jmp XXX + assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || + hole->kind == HoleKind_R_X86_64_GOTPCRELX); + loc8[-2] = 0x90; + loc8[-1] = 0xE9; + value = relaxed; + } + } + } + // Fall through... + case HoleKind_R_X86_64_GOTPCREL: + case HoleKind_R_X86_64_PC32: + case HoleKind_X86_64_RELOC_SIGNED: + case HoleKind_X86_64_RELOC_BRANCH: + // 32-bit relative address. + value -= (uint64_t)location; + // Check that we're not out of range of 32 signed bits: + assert((int64_t)value >= -(1LL << 31)); + assert((int64_t)value < (1LL << 31)); + loc32[0] = (uint32_t)value; + continue; case HoleKind_R_AARCH64_CALL26: case HoleKind_R_AARCH64_JUMP26: // 28-bit relative branch. @@ -249,10 +305,61 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) set_bits(loc32, 5, value, 48, 16); continue; case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: + case HoleKind_R_AARCH64_ADR_GOT_PAGE: { // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). - assert(IS_AARCH64_ADRP(*loc32)); + const Hole *next_hole = &stencil->holes[i + 1]; + if (i + 1 < stencil->holes_size && + (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || + next_hole->kind == HoleKind_R_AARCH64_LD64_GOT_LO12_NC) && + next_hole->offset == hole->offset + 4 && + next_hole->symbol == hole->symbol && + next_hole->addend == hole->addend && + next_hole->value == hole->value) + { + assert(IS_AARCH64_ADRP(*loc32)); + unsigned char rd = get_bits(loc32[0], 0, 5); + assert(IS_AARCH64_LDR_OR_STR(loc32[1])); + unsigned char rt = get_bits(loc32[1], 0, 5); + unsigned char rn = get_bits(loc32[1], 5, 5); + assert(rd == rn && rn == rt); + uint64_t relaxed = *(uint64_t *)value; + if (relaxed < (1UL << 16)) { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: movz x0, XXX + // nop + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; + loc32[1] = 0xD503201F; + i++; + continue; + } + if (relaxed < (1ULL << 32)) { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: movz x0, XXX + // movk x0, YYY + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; + loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; + i++; + continue; + } + relaxed = (uint64_t)value - (uint64_t)location; + if ((relaxed & 0x3) == 0 && + (int64_t)relaxed >= -(1L << 19) && + (int64_t)relaxed < (1L << 19)) + { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: ldr x0, XXX + // nop + loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; + loc32[1] = 0xD503201F; + i++; + continue; + } + } // Number of pages between this page and the value's page: value = (value >> 12) - ((uint64_t)location >> 12); // Check that we're not out of range of 21 signed bits: @@ -263,7 +370,9 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) // value[2:21] goes in loc[5:26]: set_bits(loc32, 5, value, 2, 19); continue; + } case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: + case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: // 12-bit low part of an absolute address. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGE21 (above). assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32)); @@ -285,7 +394,7 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) } static void -copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) +copy_and_patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) { memcpy(base, stencil->body, stencil->body_size); patch(base, stencil, patches); @@ -294,8 +403,8 @@ copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) static void emit(const StencilGroup *group, uint64_t patches[]) { - copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches); - copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); + copy_and_patch((unsigned char *)patches[HoleValue_DATA], &group->data, patches); + copy_and_patch((unsigned char *)patches[HoleValue_CODE], &group->code, patches); } // Compiles executor in-place. Don't forget to call _PyJIT_Free later! @@ -316,14 +425,14 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size assert((page_size & (page_size - 1)) == 0); code_size += page_size - (code_size & (page_size - 1)); data_size += page_size - (data_size & (page_size - 1)); - char *memory = jit_alloc(code_size + data_size); + unsigned char *memory = jit_alloc(code_size + data_size); if (memory == NULL) { return -1; } // Loop again to emit the code: - char *code = memory; - char *data = memory + code_size; - char *top = code; + unsigned char *code = memory; + unsigned char *data = memory + code_size; + unsigned char *top = code; if (trace[0].opcode == _START_EXECUTOR) { // Don't want to execute this more than once: top += stencil_groups[_START_EXECUTOR].code.body_size; @@ -360,7 +469,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size void _PyJIT_Free(_PyExecutorObject *executor) { - char *memory = (char *)executor->jit_code; + unsigned char *memory = (unsigned char *)executor->jit_code; size_t size = executor->jit_size; if (memory) { executor->jit_code = NULL; diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 8eeb78e6cd69ee..975ca650a13c1a 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -8,13 +8,23 @@ "IMAGE_REL_AMD64_ADDR64", "IMAGE_REL_I386_DIR32", "R_AARCH64_ABS64", + "R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_CALL26", "R_AARCH64_JUMP26", + "R_AARCH64_LD64_GOT_LO12_NC", "R_AARCH64_MOVW_UABS_G0_NC", "R_AARCH64_MOVW_UABS_G1_NC", "R_AARCH64_MOVW_UABS_G2_NC", "R_AARCH64_MOVW_UABS_G3", "R_X86_64_64", + "R_X86_64_GOTPCREL", + "R_X86_64_GOTPCRELX", + "R_X86_64_PC32", + "R_X86_64_REX_GOTPCRELX", + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_SIGNED", "X86_64_RELOC_UNSIGNED", ] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 51b091eb246413..50f98df8a553b7 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -37,6 +37,7 @@ class _Target(typing.Generic[_S, _R]): triple: str _: dataclasses.KW_ONLY alignment: int = 1 + args: typing.Sequence[str] = () prefix: str = "" debug: bool = False force: bool = False @@ -121,21 +122,14 @@ async def _compile( "-fno-asynchronous-unwind-tables", # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: "-fno-jump-tables", - # Position-independent code adds indirection to every load and jump: - "-fno-pic", + "-fno-plt", # Don't make calls to weird stack-smashing canaries: "-fno-stack-protector", - # We have three options for code model: - # - "small": the default, assumes that code and data reside in the - # lowest 2GB of memory (128MB on aarch64) - # - "medium": assumes that code resides in the lowest 2GB of memory, - # and makes no assumptions about data (not available on aarch64) - # - "large": makes no assumptions about either code or data - "-mcmodel=large", "-o", f"{o}", "-std=c11", f"{c}", + *self.args, ] await _llvm.run("clang", args, echo=self.verbose) return await self._parse(o) @@ -284,7 +278,23 @@ def _handle_section( def _handle_relocation( self, base: int, relocation: _schema.ELFRelocation, raw: bytes ) -> _stencils.Hole: + symbol: str | None match relocation: + case { + "Addend": addend, + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "R_AARCH64_ADR_GOT_PAGE" + | "R_AARCH64_LD64_GOT_LO12_NC" + | "R_X86_64_GOTPCREL" + | "R_X86_64_GOTPCRELX" + | "R_X86_64_REX_GOTPCRELX" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s case { "Addend": addend, "Offset": offset, @@ -356,6 +366,34 @@ def _handle_relocation( s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s addend = 0 + case { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s + addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + case { + "Offset": offset, + "Section": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_SIGNED" as kind + }, + } | { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 case { "Offset": offset, "Section": {"Value": s}, @@ -380,15 +418,19 @@ def _handle_relocation( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" if re.fullmatch(r"aarch64-apple-darwin.*", host): - return _MachO(host, alignment=8, prefix="_") + args = ["-mcmodel=large"] + return _MachO(host, alignment=8, args=args, prefix="_") if re.fullmatch(r"aarch64-.*-linux-gnu", host): - return _ELF(host, alignment=8) + args = ["-mcmodel=large"] + return _ELF(host, alignment=8, args=args) if re.fullmatch(r"i686-pc-windows-msvc", host): - return _COFF(host, prefix="_") + args = ["-mcmodel=large"] + return _COFF(host, args=args, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): return _MachO(host, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): - return _COFF(host) + args = ["-mcmodel=large"] + return _COFF(host, args=args) if re.fullmatch(r"x86_64-.*-linux-gnu", host): return _ELF(host) raise ValueError(host) From 81fe5edbd01eba7af51b7db456e84a020d6aa292 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 21 Feb 2024 23:16:32 -0800 Subject: [PATCH 2/6] blacken --- Tools/jit/_targets.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 50f98df8a553b7..6f7b58a9b1dc2a 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -369,20 +369,18 @@ def _handle_relocation( case { "Offset": offset, "Symbol": {"Value": s}, - "Type": { - "Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind - }, + "Type": {"Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s - addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case { "Offset": offset, "Section": {"Value": s}, - "Type": { - "Value": "X86_64_RELOC_SIGNED" as kind - }, + "Type": {"Value": "X86_64_RELOC_SIGNED" as kind}, } | { "Offset": offset, "Symbol": {"Value": s}, @@ -393,7 +391,9 @@ def _handle_relocation( offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.symbol_to_value(s) - addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case { "Offset": offset, "Section": {"Value": s}, From 36de1cd4a879d9d6401008c5e52b7d75ac8c30f3 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:11:32 -0800 Subject: [PATCH 3/6] Remove references to IMAGE_REL_AMD64_REL32 and clean up comments --- Python/jit.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8a1196139ac379..07e5877cb1ce36 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -221,31 +221,18 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) (int64_t)relaxed - (int64_t)location + 1 < (1LL << 31)) { if (loc8[-2] == 0x8B) { - // Before: mov eax, dword ptr [rip + AAA] - // After: lea eax, [rip + XXX] - assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || - hole->kind == HoleKind_R_X86_64_GOTPCRELX || - hole->kind == HoleKind_R_X86_64_REX_GOTPCRELX || - hole->kind == HoleKind_X86_64_RELOC_GOT_LOAD); + // mov reg, dword ptr [rip + AAA] -> lea reg, [rip + XXX] loc8[-2] = 0x8D; value = relaxed; } else if (loc8[-2] == 0xFF && loc8[-1] == 0x15) { - // Before: call qword ptr [rip + AAA] - // After: nop - // call XXX - assert(hole->kind == HoleKind_R_X86_64_GOTPCRELX || - hole->kind == HoleKind_X86_64_RELOC_GOT); + // call qword ptr [rip + AAA] -> nop; call XXX loc8[-2] = 0x90; loc8[-1] = 0xE8; value = relaxed; } else if (loc8[-2] == 0xFF && loc8[-1] == 0x25) { - // Before: jmp qword ptr [rip + AAA] - // After: nop - // jmp XXX - assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || - hole->kind == HoleKind_R_X86_64_GOTPCRELX); + // jmp qword ptr [rip + AAA] -> nop; jmp XXX loc8[-2] = 0x90; loc8[-1] = 0xE9; value = relaxed; @@ -326,20 +313,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) assert(rd == rn && rn == rt); uint64_t relaxed = *(uint64_t *)value; if (relaxed < (1UL << 16)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: movz x0, XXX - // nop + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[1] = 0xD503201F; i++; continue; } if (relaxed < (1ULL << 32)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: movz x0, XXX - // movk x0, YYY + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; i++; @@ -350,10 +331,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) (int64_t)relaxed >= -(1L << 19) && (int64_t)relaxed < (1L << 19)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: ldr x0, XXX - // nop + // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr x0, XXX; nop loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; loc32[1] = 0xD503201F; i++; From 74860c12134a587d6b481e4112261fa4ba837685 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:13:53 -0800 Subject: [PATCH 4/6] Add comment --- Python/jit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/jit.c b/Python/jit.c index 07e5877cb1ce36..8cc9bae6c79e4a 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -296,6 +296,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). + // Try to relax the pair of GOT loads into an immediate value: const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || From aa53fab0ccc6ac98a704a3cecdfa0bbd6b4efdf9 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:15:21 -0800 Subject: [PATCH 5/6] Move assert back --- Python/jit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8cc9bae6c79e4a..552fb874690e24 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -292,11 +292,12 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) set_bits(loc32, 5, value, 48, 16); continue; case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: - case HoleKind_R_AARCH64_ADR_GOT_PAGE: { + case HoleKind_R_AARCH64_ADR_GOT_PAGE: // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). // Try to relax the pair of GOT loads into an immediate value: + assert(IS_AARCH64_ADRP(*loc32)); const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || @@ -306,7 +307,6 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) next_hole->addend == hole->addend && next_hole->value == hole->value) { - assert(IS_AARCH64_ADRP(*loc32)); unsigned char rd = get_bits(loc32[0], 0, 5); assert(IS_AARCH64_LDR_OR_STR(loc32[1])); unsigned char rt = get_bits(loc32[1], 0, 5); @@ -349,7 +349,6 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // value[2:21] goes in loc[5:26]: set_bits(loc32, 5, value, 2, 19); continue; - } case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: // 12-bit low part of an absolute address. Pairs nicely with From 462095c45048125e75953be2e2fbb2fa2db9fc9a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:16:05 -0800 Subject: [PATCH 6/6] fixup --- Python/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 552fb874690e24..ac2c60ed925a26 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -296,8 +296,8 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). - // Try to relax the pair of GOT loads into an immediate value: assert(IS_AARCH64_ADRP(*loc32)); + // Try to relax the pair of GOT loads into an immediate value: const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 ||