Add AArch64 support for inline.
This commit is contained in:
parent
7e0b429d98
commit
04e2773dec
274
0003-AArch64-Add-AArch64-support-for-inline.patch
Normal file
274
0003-AArch64-Add-AArch64-support-for-inline.patch
Normal file
@ -0,0 +1,274 @@
|
||||
From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001
|
||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||
Date: Wed, 12 Jun 2024 17:12:36 +0800
|
||||
Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline.
|
||||
|
||||
---
|
||||
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +--
|
||||
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
|
||||
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++
|
||||
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
|
||||
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
|
||||
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
|
||||
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
|
||||
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
|
||||
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
|
||||
13 files changed, 136 insertions(+), 4 deletions(-)
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
create mode 100644 bolt/test/AArch64/inline-debug-info.test
|
||||
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
|
||||
create mode 100644 bolt/test/AArch64/jmp-optimization.test
|
||||
|
||||
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
index db3f7e7f1..56d0228cd 100644
|
||||
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
@@ -573,10 +573,7 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
- virtual bool isPush(const MCInst &Inst) const {
|
||||
- llvm_unreachable("not implemented");
|
||||
- return false;
|
||||
- }
|
||||
+ virtual bool isPush(const MCInst &Inst) const { return false; }
|
||||
|
||||
/// Return the width, in bytes, of the memory access performed by \p Inst, if
|
||||
/// this is a push instruction. Return zero otherwise.
|
||||
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
|
||||
index 8dcb8934f..67dd294fb 100644
|
||||
--- a/bolt/lib/Passes/Inliner.cpp
|
||||
+++ b/bolt/lib/Passes/Inliner.cpp
|
||||
@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
|
||||
<< ". Size change: " << SizeAfterInlining
|
||||
<< " bytes.\n");
|
||||
|
||||
+// Skip situations where some A64 instructions can't be inlined:
|
||||
+// # Indirect branch, e.g., BR.
|
||||
+// # Branch instructions but used to make a function call.
|
||||
+ if (BC.isAArch64()) {
|
||||
+ auto &MIB = *BC.MIB;
|
||||
+ bool skip = false;
|
||||
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
|
||||
+ for (MCInst Inst : BB) {
|
||||
+ if (MIB.isPseudo(Inst))
|
||||
+ continue;
|
||||
+
|
||||
+ MIB.stripAnnotations(Inst, false);
|
||||
+
|
||||
+ if (MIB.isBranch(Inst)) {
|
||||
+ const BinaryBasicBlock *TargetBB =
|
||||
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
|
||||
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
|
||||
+ skip = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ if (skip)
|
||||
+ break;
|
||||
+ }
|
||||
+ if (skip) {
|
||||
+ ++InstIt;
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
|
||||
|
||||
DidInlining = true;
|
||||
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
index d109a5d52..acf21ba23 100644
|
||||
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
@@ -34,6 +34,8 @@ public:
|
||||
const MCRegisterInfo *RegInfo)
|
||||
: MCPlusBuilder(Analysis, Info, RegInfo) {}
|
||||
|
||||
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
||||
+
|
||||
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
|
||||
CompFuncTy Comp) const override {
|
||||
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
|
||||
@@ -816,6 +818,14 @@ public:
|
||||
|
||||
int getUncondBranchEncodingSize() const override { return 28; }
|
||||
|
||||
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
|
||||
+ MCContext *Ctx) override {
|
||||
+ Inst.setOpcode(AArch64::BL);
|
||||
+ Inst.addOperand(MCOperand::createExpr(
|
||||
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
||||
MCContext *Ctx) override {
|
||||
Inst.setOpcode(AArch64::B);
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
new file mode 100644
|
||||
index 000000000..1307c13f2
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+#include "stub.h"
|
||||
+
|
||||
+void foo() {
|
||||
+ puts("Hello world!\n");
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
new file mode 100644
|
||||
index 000000000..7853d2b63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+extern void foo();
|
||||
+int main() {
|
||||
+ foo();
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
new file mode 100644
|
||||
index 000000000..a6ff9e262
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
@@ -0,0 +1,23 @@
|
||||
+extern "C" int printf(const char*, ...);
|
||||
+extern const char* question();
|
||||
+
|
||||
+inline int answer() __attribute__((always_inline));
|
||||
+inline int answer() { return 42; }
|
||||
+
|
||||
+int main(int argc, char *argv[]) {
|
||||
+ int ans;
|
||||
+ if (argc == 1) {
|
||||
+ ans = 0;
|
||||
+ } else {
|
||||
+ ans = argc;
|
||||
+ }
|
||||
+ printf("%s\n", question());
|
||||
+ for (int i = 0; i < 10; ++i) {
|
||||
+ int x = answer();
|
||||
+ int y = answer();
|
||||
+ ans += x - y;
|
||||
+ }
|
||||
+ // padding to make sure question() is inlineable
|
||||
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
|
||||
+ return ans;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
new file mode 100644
|
||||
index 000000000..edb7ab145
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+const char* question() {
|
||||
+ return "What do you get if you multiply six by nine?";
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
new file mode 100644
|
||||
index 000000000..cd6d53c35
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
@@ -0,0 +1,7 @@
|
||||
+int g();
|
||||
+
|
||||
+int main() {
|
||||
+ int x = g();
|
||||
+ int y = x*x;
|
||||
+ return y;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
new file mode 100644
|
||||
index 000000000..80b853d63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f() {
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
new file mode 100644
|
||||
index 000000000..7fb551163
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f();
|
||||
+
|
||||
+int g() { return f(); }
|
||||
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
|
||||
new file mode 100644
|
||||
index 000000000..e20e5e31e
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inline-debug-info.test
|
||||
@@ -0,0 +1,20 @@
|
||||
+## Check that BOLT correctly prints and updates debug info for inlined
|
||||
+## functions.
|
||||
+
|
||||
+# REQUIRES: system-linux
|
||||
+
|
||||
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
|
||||
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
|
||||
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
|
||||
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
|
||||
+# RUN: -o %t.bolt \
|
||||
+# RUN: | FileCheck %s
|
||||
+
|
||||
+## The call to puts() should come from inline-foo.c:
|
||||
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
|
||||
+
|
||||
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
|
||||
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
|
||||
+
|
||||
+## Dump of main() should include debug info from inline-foo.c after inlining:
|
||||
+# CHECK-OBJDUMP: inline-foo.c:4
|
||||
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
new file mode 100644
|
||||
index 000000000..5a87bdde9
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
@@ -0,0 +1,11 @@
|
||||
+# Make sure inlining from a unit with debug info into unit without
|
||||
+# debug info does not cause a crash.
|
||||
+
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
|
||||
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
|
||||
+
|
||||
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
|
||||
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
|
||||
+
|
||||
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
|
||||
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
|
||||
new file mode 100644
|
||||
index 000000000..92f4b9a14
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/jmp-optimization.test
|
||||
@@ -0,0 +1,14 @@
|
||||
+# Tests the optimization of functions that just do a tail call in the beginning.
|
||||
+
|
||||
+# This test has commands that rely on shell capabilities that won't execute
|
||||
+# correctly on Windows e.g. unsupported parameter expansion
|
||||
+REQUIRES: shell
|
||||
+
|
||||
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
|
||||
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
|
||||
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
|
||||
+
|
||||
+CHECK: <main>:
|
||||
+CHECK-NOT: call
|
||||
+CHECK: xorl %eax, %eax
|
||||
+CHECK: retq
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
|
||||
Name: %{pkg_name}
|
||||
Version: %{bolt_version}
|
||||
Release: 4
|
||||
Release: 5
|
||||
Summary: BOLT is a post-link optimizer developed to speed up large applications
|
||||
License: Apache 2.0
|
||||
URL: https://github.com/llvm/llvm-project/tree/main/bolt
|
||||
@ -32,6 +32,7 @@ Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%
|
||||
|
||||
Patch1: 0001-Fix-trap-value-for-non-X86.patch
|
||||
Patch2: 0002-Add-test-for-emitting-trap-value.patch
|
||||
Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch
|
||||
|
||||
BuildRequires: gcc
|
||||
BuildRequires: gcc-c++
|
||||
@ -143,6 +144,12 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
||||
%doc %{install_docdir}
|
||||
|
||||
%changelog
|
||||
* Tue Jun 18 2024 Xiong Zhou <xiongzhou4@huawei.com> 17.0.6-5
|
||||
- Type:Feature
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Add AArch64 support for inline.
|
||||
|
||||
* Tue Jun 18 2024 Xiong Zhou <xiongzhou4@huawei.com> 17.0.6-4
|
||||
- Type:Backport
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user