diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index f004a8eeea185..1793f4ff1f148 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB, if (MIB.isPseudo(Inst)) continue; - MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86()); + MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64()); // Fix branch target. Strictly speaking, we don't have to do this as // targets of direct branches will be fixed later and don't matter // in the CFG state. However, disassembly may look misleading, and // hence we do the fixing. - if (MIB.isBranch(Inst)) { + if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) { assert(!MIB.isIndirectBranch(Inst) && "unexpected indirect branch in callee"); const BinaryBasicBlock *TargetBB = diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 7e08e5c81d26f..323cc5d6ef124 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -133,6 +133,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { public: using MCPlusBuilder::MCPlusBuilder; + MCPhysReg getStackPointer() const override { return AArch64::SP; } + + bool isPush(const MCInst &Inst) const override { return false; } + + bool isPop(const MCInst &Inst) const override { return false; } + + void createCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + createDirectCall(Inst, Target, Ctx, false); + } + + bool convertTailCallToCall(MCInst &Inst) override { + int NewOpcode; + switch (Inst.getOpcode()) { + default: + return false; + case AArch64::B: + NewOpcode = AArch64::BL; + break; + case AArch64::BR: + NewOpcode = AArch64::BLR; + break; + } + + Inst.setOpcode(NewOpcode); + removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); + clearOffset(Inst); + return true; + } + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, CompFuncTy Comp) const override { const auto &AArch64ExprA = cast(A); diff --git a/bolt/test/AArch64/inline-small-function-1.s b/bolt/test/AArch64/inline-small-function-1.s new file mode 100644 index 0000000000000..3ea22a9915fb4 --- /dev/null +++ b/bolt/test/AArch64/inline-small-function-1.s @@ -0,0 +1,42 @@ +## This test checks that inline is properly handled by BOLT on aarch64. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A \ +# RUN: %t.exe -o %t.bolt | FileCheck %s + +# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes. +# CHECK: Binary Function "_Z3barP1A" after inlining { +# CHECK-NOT: bl _Z3fooP1A +# CHECK: ldr x8, [x0] +# CHECK-NEXT: ldr w0, [x8] + + .text + .globl _Z3fooP1A + .type _Z3fooP1A,@function +_Z3fooP1A: + ldr x8, [x0] + ldr w0, [x8] + ret + .size _Z3fooP1A, .-_Z3fooP1A + + .globl _Z3barP1A + .type _Z3barP1A,@function +_Z3barP1A: + stp x29, x30, [sp, #-16]! + mov x29, sp + bl _Z3fooP1A + mul w0, w0, w0 + ldp x29, x30, [sp], #16 + ret + .size _Z3barP1A, .-_Z3barP1A + + .globl main + .p2align 2 + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main diff --git a/bolt/test/AArch64/inline-small-function-2.s b/bolt/test/AArch64/inline-small-function-2.s new file mode 100644 index 0000000000000..5eb7d391fd157 --- /dev/null +++ b/bolt/test/AArch64/inline-small-function-2.s @@ -0,0 +1,48 @@ +## This test checks that inline is properly handled by BOLT on aarch64. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=test \ +# RUN: %t.exe -o %t.bolt | FileCheck %s + +#CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes. +#CHECK: Binary Function "test" after inlining { +#CHECK-NOT: bl indirect +#CHECK: add w0, w1, w0 +#CHECK-NEXT: blr x2 + + .text + .globl indirect + .type indirect,@function +indirect: + add w0, w1, w0 + br x2 + .size indirect, .-indirect + + .globl test + .type test,@function +test: + stp x29, x30, [sp, #-32]! + stp x20, x19, [sp, #16] + mov x29, sp + mov w19, w1 + mov w20, w0 + bl indirect + add w8, w19, w20 + cmp w0, #0 + csinc w0, w8, wzr, eq + ldp x20, x19, [sp, #16] + ldp x29, x30, [sp], #32 + ret + .size test, .-test + + .globl main + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main + + \ No newline at end of file