diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index f5ffa81227064..73c3ab54722e9 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -136,7 +136,7 @@ if (LLVM_INCLUDE_TESTS) endif() if (BOLT_ENABLE_RUNTIME) - message(STATUS "Building BOLT runtime libraries for X86") + message(STATUS "Building BOLT runtime libraries for ${CMAKE_SYSTEM_PROCESSOR}") set(extra_args "") if(CMAKE_SYSROOT) list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT}) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 3217dd4324bc7..60131f484f5cc 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -597,8 +597,9 @@ Error RewriteInstance::discoverStorage() { // Hugify: Additional huge page from left side due to // weird ASLR mapping addresses (4KB aligned) - if (opts::Hugify && !BC->HasFixedLoadAddress) + if (opts::Hugify && !BC->HasFixedLoadAddress) { NextAvailableAddress += BC->PageAlign; + } if (!opts::UseGnuStack && !BC->IsLinuxKernel) { // This is where the black magic happens. Creating PHDR table in a segment @@ -5858,17 +5859,28 @@ void RewriteInstance::rewriteFile() { // Write all allocatable sections - reloc-mode text is written here as well for (BinarySection &Section : BC->allocatableSections()) { - if (!Section.isFinalized() || !Section.getOutputData()) + if (!Section.isFinalized() || !Section.getOutputData()) { + LLVM_DEBUG(if (opts::Verbosity > 1) { + dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip " + << Section.getName() << '\n'; + }); continue; - if (Section.isLinkOnly()) + } + if (Section.isLinkOnly()) { + LLVM_DEBUG(if (opts::Verbosity > 1) { + dbgs() << "BOLT-INFO: new section is link only, skip " + << Section.getName() << '\n'; + }); continue; + } if (opts::Verbosity >= 1) BC->outs() << "BOLT: writing new section " << Section.getName() << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) << "\n of size " << Section.getOutputSize() << "\n at offset " - << Section.getOutputFileOffset() << '\n'; + << Section.getOutputFileOffset() << " with content size " + << Section.getOutputContents().size() << '\n'; OS.seek(Section.getOutputFileOffset()); Section.write(OS); } diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 9b9965bae524e..27d0830071067 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -151,10 +151,12 @@ struct timespec { uint64_t tv_nsec; /* nanoseconds */ }; -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm64__) #include "sys_aarch64.h" -#else +#elif defined(__x86_64__) #include "sys_x86_64.h" +#else +#error "For AArch64/ARM64 and X86_64 only." #endif constexpr uint32_t BufSize = 10240; diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index a89cba2243c48..67d5fa26007d2 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -6,7 +6,8 @@ // //===---------------------------------------------------------------------===// -#if defined (__x86_64__) && !defined(__APPLE__) +#if defined(__x86_64__) || \ + (defined(__aarch64__) || defined(__arm64__)) && !defined(__APPLE__) #include "common.h" @@ -73,8 +74,10 @@ static bool hasPagecacheTHPSupport() { if (Res < 0) return false; - if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) + if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) { + DEBUG(report("[hugify] THP support is not enabled.\n");) return false; + } struct KernelVersionTy { uint32_t major; @@ -167,12 +170,20 @@ extern "C" void __bolt_hugify_self_impl() { /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { + // clang-format off #if defined(__x86_64__) __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL - "jmp __bolt_hugify_start_program\n" :: - :); + "jmp __bolt_hugify_start_program\n" + :::); +#elif defined(__aarch64__) || defined(__arm64__) + __asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL + "adrp x16, __bolt_hugify_start_program\n" + "add x16, x16, #:lo12:__bolt_hugify_start_program\n" + "br x16\n" + :::); #else - exit(1); + __exit(1); #endif + // clang-format on } #endif diff --git a/bolt/test/runtime/X86/Inputs/user_func_order.txt b/bolt/test/runtime/Inputs/user_func_order.txt similarity index 100% rename from bolt/test/runtime/X86/Inputs/user_func_order.txt rename to bolt/test/runtime/Inputs/user_func_order.txt diff --git a/bolt/test/runtime/X86/hugify.c b/bolt/test/runtime/hugify.c similarity index 54% rename from bolt/test/runtime/X86/hugify.c rename to bolt/test/runtime/hugify.c index cfc0cb62652b9..a4a718a1160df 100644 --- a/bolt/test/runtime/X86/hugify.c +++ b/bolt/test/runtime/hugify.c @@ -11,17 +11,28 @@ int main(int argc, char **argv) { REQUIRES: system-linux,bolt-runtime RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q -RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q +RUN: %clang %cflags -fpic %s -o %t.pie.exe -Wl,-q RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify +RUN: llvm-nm --numeric-sort --print-armap %t.nopie | \ +RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE -CHECK-NOPIE: Hello world - +RUN: llvm-nm --numeric-sort --print-armap %t.pie | \ +RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE +CHECK-NM: W __hot_start +CHECK-NM-NEXT: T _start +CHECK-NM: T main +CHECK-NM: W __hot_end +CHECK-NM: t __bolt_hugify_start_program +CHECK-NM-NEXT: W __bolt_runtime_start + +CHECK-NOPIE: Hello world + CHECK-PIE: Hello world */ diff --git a/bolt/test/runtime/X86/user-func-reorder.c b/bolt/test/runtime/user-func-reorder.c similarity index 59% rename from bolt/test/runtime/X86/user-func-reorder.c rename to bolt/test/runtime/user-func-reorder.c index fcb92bca16259..c9e12ea652622 100644 --- a/bolt/test/runtime/X86/user-func-reorder.c +++ b/bolt/test/runtime/user-func-reorder.c @@ -5,9 +5,7 @@ */ #include -int foo(int x) { - return x + 1; -} +int foo(int x) { return x + 1; } int fib(int x) { if (x < 2) @@ -15,9 +13,7 @@ int fib(int x) { return fib(x - 1) + fib(x - 2); } -int bar(int x) { - return x - 1; -} +int bar(int x) { return x - 1; } int main(int argc, char **argv) { printf("fib(%d) = %d\n", argc, fib(argc)); @@ -31,14 +27,28 @@ RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t +RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ +RUN: --function-order=%p/Inputs/user_func_order.txt -o %t.nohugify RUN: llvm-nm --numeric-sort --print-armap %t | \ RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT +RUN: llvm-nm --numeric-sort --print-armap %t.nohugify | \ +RUN: FileCheck %s -check-prefix=CHECK-NM-NOHUGIFY +RUN: %t.nohugify 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT-NOHUGIFY + CHECK-NM: W __hot_start CHECK-NM: T main CHECK-NM-NEXT: T fib CHECK-NM-NEXT: W __hot_end +CHECK-NM: t __bolt_hugify_start_program +CHECK-NM-NEXT: W __bolt_runtime_start + +CHECK-NM-NOHUGIFY: W __hot_start +CHECK-NM-NOHUGIFY: T main +CHECK-NM-NOHUGIFY-NEXT: T fib +CHECK-NM-NOHUGIFY-NEXT: W __hot_end CHECK-OUTPUT: fib(4) = 3 +CHECK-OUTPUT-NOHUGIFY: fib(4) = 3 */