From 04e2773dec62f05b7001095e897681d213dcffff Mon Sep 17 00:00:00 2001 From: Xiong Zhou Date: Mon, 17 Jun 2024 21:54:44 +0800 Subject: [PATCH] Add AArch64 support for inline. --- ...rch64-Add-AArch64-support-for-inline.patch | 274 ++++++++++++++++++ llvm-bolt.spec | 9 +- 2 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 0003-AArch64-Add-AArch64-support-for-inline.patch diff --git a/0003-AArch64-Add-AArch64-support-for-inline.patch b/0003-AArch64-Add-AArch64-support-for-inline.patch new file mode 100644 index 0000000..cb64595 --- /dev/null +++ b/0003-AArch64-Add-AArch64-support-for-inline.patch @@ -0,0 +1,274 @@ +From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001 +From: xiongzhou4 +Date: Wed, 12 Jun 2024 17:12:36 +0800 +Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline. + +--- + bolt/include/bolt/Core/MCPlusBuilder.h | 5 +-- + bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ + .../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++ + bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ + bolt/test/AArch64/Inputs/inline-main.c | 5 +++ + bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ + bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ + bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ + bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ + bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ + bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ + bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ + 13 files changed, 136 insertions(+), 4 deletions(-) + create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c + create mode 100644 bolt/test/AArch64/Inputs/inline-main.c + create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp + create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp + create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp + create mode 100644 bolt/test/AArch64/inline-debug-info.test + create mode 100644 bolt/test/AArch64/inlined-function-mixed.test + create mode 100644 bolt/test/AArch64/jmp-optimization.test + +diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h +index db3f7e7f1..56d0228cd 100644 +--- a/bolt/include/bolt/Core/MCPlusBuilder.h ++++ b/bolt/include/bolt/Core/MCPlusBuilder.h +@@ -573,10 +573,7 @@ public: + return 0; + } + +- virtual bool isPush(const MCInst &Inst) const { +- llvm_unreachable("not implemented"); +- return false; +- } ++ virtual bool isPush(const MCInst &Inst) const { return false; } + + /// Return the width, in bytes, of the memory access performed by \p Inst, if + /// this is a push instruction. Return zero otherwise. +diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp +index 8dcb8934f..67dd294fb 100644 +--- a/bolt/lib/Passes/Inliner.cpp ++++ b/bolt/lib/Passes/Inliner.cpp +@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { + << ". Size change: " << SizeAfterInlining + << " bytes.\n"); + ++// Skip situations where some A64 instructions can't be inlined: ++// # Indirect branch, e.g., BR. ++// # Branch instructions but used to make a function call. ++ if (BC.isAArch64()) { ++ auto &MIB = *BC.MIB; ++ bool skip = false; ++ for (const BinaryBasicBlock &BB : *TargetFunction) { ++ for (MCInst Inst : BB) { ++ if (MIB.isPseudo(Inst)) ++ continue; ++ ++ MIB.stripAnnotations(Inst, false); ++ ++ if (MIB.isBranch(Inst)) { ++ const BinaryBasicBlock *TargetBB = ++ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); ++ if (MIB.isIndirectBranch(Inst) || !TargetBB) { ++ skip = true; ++ break; ++ } ++ } ++ } ++ if (skip) ++ break; ++ } ++ if (skip) { ++ ++InstIt; ++ continue; ++ } ++ } ++ + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); + + DidInlining = true; +diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +index d109a5d52..acf21ba23 100644 +--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp ++++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +@@ -34,6 +34,8 @@ public: + const MCRegisterInfo *RegInfo) + : MCPlusBuilder(Analysis, Info, RegInfo) {} + ++ MCPhysReg getStackPointer() const override { return AArch64::SP; } ++ + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, + CompFuncTy Comp) const override { + const auto &AArch64ExprA = cast(A); +@@ -816,6 +818,14 @@ public: + + int getUncondBranchEncodingSize() const override { return 28; } + ++ bool createCall(MCInst &Inst, const MCSymbol *Target, ++ MCContext *Ctx) override { ++ Inst.setOpcode(AArch64::BL); ++ Inst.addOperand(MCOperand::createExpr( ++ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); ++ return true; ++ } ++ + bool createTailCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::B); +diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c +new file mode 100644 +index 000000000..1307c13f2 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-foo.c +@@ -0,0 +1,5 @@ ++#include "stub.h" ++ ++void foo() { ++ puts("Hello world!\n"); ++} +diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c +new file mode 100644 +index 000000000..7853d2b63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inline-main.c +@@ -0,0 +1,5 @@ ++extern void foo(); ++int main() { ++ foo(); ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp +new file mode 100644 +index 000000000..a6ff9e262 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlined.cpp +@@ -0,0 +1,23 @@ ++extern "C" int printf(const char*, ...); ++extern const char* question(); ++ ++inline int answer() __attribute__((always_inline)); ++inline int answer() { return 42; } ++ ++int main(int argc, char *argv[]) { ++ int ans; ++ if (argc == 1) { ++ ans = 0; ++ } else { ++ ans = argc; ++ } ++ printf("%s\n", question()); ++ for (int i = 0; i < 10; ++i) { ++ int x = answer(); ++ int y = answer(); ++ ans += x - y; ++ } ++ // padding to make sure question() is inlineable ++ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); ++ return ans; ++} +diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp +new file mode 100644 +index 000000000..edb7ab145 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/inlinee.cpp +@@ -0,0 +1,3 @@ ++const char* question() { ++ return "What do you get if you multiply six by nine?"; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp +new file mode 100644 +index 000000000..cd6d53c35 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp +@@ -0,0 +1,7 @@ ++int g(); ++ ++int main() { ++ int x = g(); ++ int y = x*x; ++ return y; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +new file mode 100644 +index 000000000..80b853d63 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp +@@ -0,0 +1,3 @@ ++int f() { ++ return 0; ++} +diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +new file mode 100644 +index 000000000..7fb551163 +--- /dev/null ++++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp +@@ -0,0 +1,3 @@ ++int f(); ++ ++int g() { return f(); } +diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test +new file mode 100644 +index 000000000..e20e5e31e +--- /dev/null ++++ b/bolt/test/AArch64/inline-debug-info.test +@@ -0,0 +1,20 @@ ++## Check that BOLT correctly prints and updates debug info for inlined ++## functions. ++ ++# REQUIRES: system-linux ++ ++# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ ++# RUN: -I%p/../Inputs -o %t.exe -Wl,-q ++# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ ++# RUN: --print-only=main --print-after-lowering --force-inline=foo \ ++# RUN: -o %t.bolt \ ++# RUN: | FileCheck %s ++ ++## The call to puts() should come from inline-foo.c: ++# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 ++ ++# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ ++# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP ++ ++## Dump of main() should include debug info from inline-foo.c after inlining: ++# CHECK-OBJDUMP: inline-foo.c:4 +diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test +new file mode 100644 +index 000000000..5a87bdde9 +--- /dev/null ++++ b/bolt/test/AArch64/inlined-function-mixed.test +@@ -0,0 +1,11 @@ ++# Make sure inlining from a unit with debug info into unit without ++# debug info does not cause a crash. ++ ++RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o ++RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g ++RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t ++ ++RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ ++RUN: --inline-small-functions --force-inline=main | FileCheck %s ++ ++CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten +diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test +new file mode 100644 +index 000000000..92f4b9a14 +--- /dev/null ++++ b/bolt/test/AArch64/jmp-optimization.test +@@ -0,0 +1,14 @@ ++# Tests the optimization of functions that just do a tail call in the beginning. ++ ++# This test has commands that rely on shell capabilities that won't execute ++# correctly on Windows e.g. unsupported parameter expansion ++REQUIRES: shell ++ ++RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t ++RUN: llvm-bolt -inline-small-functions %t -o %t.bolt ++RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s ++ ++CHECK:
: ++CHECK-NOT: call ++CHECK: xorl %eax, %eax ++CHECK: retq +-- +2.33.0 + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index e6e1f93..b38d01b 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -22,7 +22,7 @@ Name: %{pkg_name} Version: %{bolt_version} -Release: 4 +Release: 5 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache 2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt @@ -32,6 +32,7 @@ Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-% Patch1: 0001-Fix-trap-value-for-non-X86.patch Patch2: 0002-Add-test-for-emitting-trap-value.patch +Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -143,6 +144,12 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a %doc %{install_docdir} %changelog +* Tue Jun 18 2024 Xiong Zhou 17.0.6-5 +- Type:Feature +- ID:NA +- SUG:NA +- DESC: Add AArch64 support for inline. + * Tue Jun 18 2024 Xiong Zhou 17.0.6-4 - Type:Backport - ID:NA