!46 Sync from openEuler-22.03-LTS-Next
From: @rfwang07 Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
4c2356043d
@ -1,135 +0,0 @@
|
||||
From c62ab1487115a74d72ad23fd89b42076d5726bde Mon Sep 17 00:00:00 2001
|
||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||
Date: Mon, 24 Jul 2023 19:47:46 +0800
|
||||
Subject: [PATCH] [AArch64] fix bug #55005 handle DW_CFA_GNU_NegateRAState.
|
||||
backport: https://reviews.llvm.org/rG9921197920fc3e9ad9605bd8fe0e835ca2dd41a5
|
||||
|
||||
---
|
||||
bolt/lib/Core/Exceptions.cpp | 19 ++++--
|
||||
.../Inputs/dw_cfa_gnu_window_save.yaml | 62 +++++++++++++++++++
|
||||
bolt/test/AArch64/dw_cfa_gnu_window_save.test | 8 +++
|
||||
3 files changed, 83 insertions(+), 6 deletions(-)
|
||||
create mode 100644 bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
|
||||
create mode 100644 bolt/test/AArch64/dw_cfa_gnu_window_save.test
|
||||
|
||||
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
|
||||
index 79404ca87..b0aa8b990 100644
|
||||
--- a/bolt/lib/Core/Exceptions.cpp
|
||||
+++ b/bolt/lib/Core/Exceptions.cpp
|
||||
@@ -644,18 +644,25 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
|
||||
errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
|
||||
return false;
|
||||
case DW_CFA_GNU_window_save:
|
||||
+ // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
|
||||
+ // id but mean different things. The latter is used in AArch64.
|
||||
+ if (Function.getBinaryContext().isAArch64()) {
|
||||
+ Function.addCFIInstruction(
|
||||
+ Offset, MCCFIInstruction::createNegateRAState(nullptr));
|
||||
+ break;
|
||||
+ }
|
||||
+ if (opts::Verbosity >= 1)
|
||||
+ errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
|
||||
+ return false;
|
||||
case DW_CFA_lo_user:
|
||||
case DW_CFA_hi_user:
|
||||
- if (opts::Verbosity >= 1) {
|
||||
- errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
|
||||
- "unimplemented\n";
|
||||
- }
|
||||
+ if (opts::Verbosity >= 1)
|
||||
+ errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
|
||||
return false;
|
||||
default:
|
||||
- if (opts::Verbosity >= 1) {
|
||||
+ if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
|
||||
<< '\n';
|
||||
- }
|
||||
return false;
|
||||
}
|
||||
|
||||
diff --git a/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
|
||||
new file mode 100644
|
||||
index 000000000..faa32e089
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
|
||||
@@ -0,0 +1,62 @@
|
||||
+--- !ELF
|
||||
+FileHeader:
|
||||
+ Class: ELFCLASS64
|
||||
+ Data: ELFDATA2LSB
|
||||
+ Type: ET_EXEC
|
||||
+ Machine: EM_AARCH64
|
||||
+ Entry: 0x4100C0
|
||||
+ProgramHeaders:
|
||||
+ - Type: PT_LOAD
|
||||
+ Flags: [ PF_X, PF_R ]
|
||||
+ FirstSec: .init
|
||||
+ LastSec: .fini
|
||||
+ VAddr: 0x410000
|
||||
+ Align: 0x10000
|
||||
+Sections:
|
||||
+ - Name: .init
|
||||
+ Type: SHT_PROGBITS
|
||||
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
+ Address: 0x410000
|
||||
+ AddressAlign: 0x4
|
||||
+ Offset: 0x10000
|
||||
+ Content: 3F2303D5FD7BBFA9FD0300913F000094FD7BC1A8BF2303D5C0035FD6
|
||||
+ - Name: .plt
|
||||
+ Type: SHT_PROGBITS
|
||||
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
+ Address: 0x410020
|
||||
+ AddressAlign: 0x10
|
||||
+ Content: F07BBFA9700100F011FE47F910E23F9120021FD61F2003D51F2003D51F2003D590010090110240F91002009120021FD690010090110640F91022009120021FD690010090110A40F91042009120021FD6
|
||||
+ - Name: .text
|
||||
+ Type: SHT_PROGBITS
|
||||
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
+ Address: 0x410080
|
||||
+ AddressAlign: 0x40
|
||||
+ Content: 00008052C0035FD61F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D55F2403D51D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000D00391030080D2040080D2D5FFFF97D8FFFF975F2403D5E2FFFF171F2003D55F2403D5C0035FD6600100F000F047F9400000B4D3FFFF17C0035FD61F2003D5800100908101009000800091218000913F0000EBC000005481000090210840F9610000B4F00301AA00021FD6C0035FD680010090810100900080009121800091210000CB22FC7FD3410C818B21FC4193C10000B482000090420C40F9620000B4F00302AA00021FD6C0035FD63F2303D5FD7BBEA9FD030091F30B00F9930100906082403980000035DEFFFF972000805260820039F30B40F9FD7BC2A8BF2303D5C0035FD65F2403D5E2FFFF17
|
||||
+ - Name: .fini
|
||||
+ Type: SHT_PROGBITS
|
||||
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
+ Address: 0x4101CC
|
||||
+ AddressAlign: 0x4
|
||||
+ Content: 3F2303D5FD7BBFA9FD030091FD7BC1A8BF2303D5C0035FD6
|
||||
+ - Name: .eh_frame
|
||||
+ Type: SHT_PROGBITS
|
||||
+ Flags: [ SHF_ALLOC ]
|
||||
+ Address: 0x420068
|
||||
+ AddressAlign: 0x8
|
||||
+ Content: 1000000000000000017A520004781E011B0C1F0010000000180000003C00FFFF3C0000000041071E140000002C0000006800FFFF08000000000000000000000010000000440000007000FFFF300000000000000010000000580000008C00FFFF3C00000000000000240000006C000000B400FFFF3800000000412D410E209D049E0342930248DEDDD30E00412D0000001400000094000000C400FFFF08000000000000000000000010000000AC00000068FFFEFF080000000000000000000000
|
||||
+ - Name: .rela.text
|
||||
+ Type: SHT_RELA
|
||||
+ Flags: [ SHF_INFO_LINK ]
|
||||
+ Link: .symtab
|
||||
+ AddressAlign: 0x8
|
||||
+ Info: .text
|
||||
+Symbols:
|
||||
+ - Name: .text
|
||||
+ Type: STT_SECTION
|
||||
+ Section: .text
|
||||
+ Value: 0x410080
|
||||
+ - Name: _ITM_deregisterTMCloneTable
|
||||
+ Binding: STB_WEAK
|
||||
+ - Name: _ITM_registerTMCloneTable
|
||||
+ Binding: STB_WEAK
|
||||
+...
|
||||
diff --git a/bolt/test/AArch64/dw_cfa_gnu_window_save.test b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
|
||||
new file mode 100644
|
||||
index 000000000..2e044b399
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
|
||||
@@ -0,0 +1,8 @@
|
||||
+# Check that llvm-bolt can handle DW_CFA_GNU_window_save on AArch64.
|
||||
+
|
||||
+RUN: yaml2obj %p/Inputs/dw_cfa_gnu_window_save.yaml &> %t.exe
|
||||
+RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
|
||||
+
|
||||
+CHECK-NOT: paciasp
|
||||
+CHECK-NOT: autiasp
|
||||
+CHECK-NOT: ERROR: unable to fill CFI.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
126
0001-Fix-trap-value-for-non-X86.patch
Normal file
126
0001-Fix-trap-value-for-non-X86.patch
Normal file
@ -0,0 +1,126 @@
|
||||
From 868d8c360b3e1e5f291cb3e0dae0777a4529228f Mon Sep 17 00:00:00 2001
|
||||
From: Denis Revunov <revunov.denis@huawei-partners.com>
|
||||
Date: Thu, 27 Jul 2023 11:48:08 -0400
|
||||
Subject: [PATCH] Fix trap value for non-X86
|
||||
|
||||
The trap value used by BOLT was assumed to be single-byte instruction.
|
||||
It made some functions unaligned on AArch64(e.g exceptions-instrumentation test)
|
||||
and caused emission failures. Fix that by changing fill value to StringRef.
|
||||
|
||||
Reviewed By: rafauler
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D158191
|
||||
---
|
||||
bolt/include/bolt/Core/MCPlusBuilder.h | 9 ++++++---
|
||||
bolt/lib/Core/BinaryEmitter.cpp | 4 ++--
|
||||
bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++++--
|
||||
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 4 ++++
|
||||
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 4 ++++
|
||||
bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 2 +-
|
||||
6 files changed, 21 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
index 56d0228cd..beb06751d 100644
|
||||
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
@@ -636,9 +636,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
- /// If non-zero, this is used to fill the executable space with instructions
|
||||
- /// that will trap. Defaults to 0.
|
||||
- virtual unsigned getTrapFillValue() const { return 0; }
|
||||
+ /// Used to fill the executable space with instructions
|
||||
+ /// that will trap.
|
||||
+ virtual StringRef getTrapFillValue() const {
|
||||
+ llvm_unreachable("not implemented");
|
||||
+ return StringRef();
|
||||
+ }
|
||||
|
||||
/// Interface and basic functionality of a MCInstMatcher. The idea is to make
|
||||
/// it easy to match one or more MCInsts against a tree-like pattern and
|
||||
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
|
||||
index c4129615a..df076c81d 100644
|
||||
--- a/bolt/lib/Core/BinaryEmitter.cpp
|
||||
+++ b/bolt/lib/Core/BinaryEmitter.cpp
|
||||
@@ -376,7 +376,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
|
||||
}
|
||||
|
||||
if (opts::MarkFuncs)
|
||||
- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1);
|
||||
+ Streamer.emitBytes(BC.MIB->getTrapFillValue());
|
||||
|
||||
// Emit CFI end
|
||||
if (Function.hasCFI())
|
||||
@@ -420,7 +420,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
|
||||
// case, the call site entries in that LSDA have 0 as offset to the landing
|
||||
// pad, which the runtime interprets as "no handler". To prevent this,
|
||||
// insert some padding.
|
||||
- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1);
|
||||
+ Streamer.emitBytes(BC.MIB->getTrapFillValue());
|
||||
}
|
||||
|
||||
// Track the first emitted instruction with debug info.
|
||||
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||
index fe8c134b8..c6ea0b009 100644
|
||||
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||
@@ -5273,8 +5273,10 @@ void RewriteInstance::rewriteFile() {
|
||||
if (!BF.getFileOffset() || !BF.isEmitted())
|
||||
continue;
|
||||
OS.seek(BF.getFileOffset());
|
||||
- for (unsigned I = 0; I < BF.getMaxSize(); ++I)
|
||||
- OS.write((unsigned char)BC->MIB->getTrapFillValue());
|
||||
+ StringRef TrapInstr = BC->MIB->getTrapFillValue();
|
||||
+ unsigned NInstr = BF.getMaxSize() / TrapInstr.size();
|
||||
+ for (unsigned I = 0; I < NInstr; ++I)
|
||||
+ OS.write(TrapInstr.data(), TrapInstr.size());
|
||||
}
|
||||
OS.seek(SavedPos);
|
||||
}
|
||||
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
index acf21ba23..cd66b654e 100644
|
||||
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
@@ -1142,6 +1142,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
+ StringRef getTrapFillValue() const override {
|
||||
+ return StringRef("\0\0\0\0", 4);
|
||||
+ }
|
||||
+
|
||||
bool createReturn(MCInst &Inst) const override {
|
||||
Inst.setOpcode(AArch64::RET);
|
||||
Inst.clear();
|
||||
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||
index ec5bca852..badc1bde8 100644
|
||||
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||
@@ -171,6 +171,10 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
+ StringRef getTrapFillValue() const override {
|
||||
+ return StringRef("\0\0\0\0", 4);
|
||||
+ }
|
||||
+
|
||||
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
|
||||
const MCSymbol *&TBB, const MCSymbol *&FBB,
|
||||
MCInst *&CondBranch,
|
||||
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||
index 3ee161d0b..5e3c01a1c 100644
|
||||
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||
@@ -397,7 +397,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
- unsigned getTrapFillValue() const override { return 0xCC; }
|
||||
+ StringRef getTrapFillValue() const override { return StringRef("\314", 1); }
|
||||
|
||||
struct IndJmpMatcherFrag1 : MCInstMatcher {
|
||||
std::unique_ptr<MCInstMatcher> Base;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,465 +0,0 @@
|
||||
From 81a80dbe9f47f728bc593d05cd5708a653a23f1c Mon Sep 17 00:00:00 2001
|
||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||
Date: Mon, 11 Sep 2023 11:33:41 +0800
|
||||
Subject: [PATCH] [AArch64] Add AArch64 support for hugify.
|
||||
|
||||
---
|
||||
bolt/CMakeLists.txt | 4 +-
|
||||
bolt/runtime/CMakeLists.txt | 28 ++-
|
||||
bolt/runtime/common.h | 224 ++++++++++++++++++
|
||||
bolt/runtime/hugify.cpp | 21 +-
|
||||
.../AArch64/Inputs/user_func_order.txt | 2 +
|
||||
bolt/test/runtime/AArch64/user-func-reorder.c | 44 ++++
|
||||
6 files changed, 305 insertions(+), 18 deletions(-)
|
||||
create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt
|
||||
create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c
|
||||
|
||||
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
|
||||
index a97878cd3..3de930496 100644
|
||||
--- a/bolt/CMakeLists.txt
|
||||
+++ b/bolt/CMakeLists.txt
|
||||
@@ -5,7 +5,7 @@ set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
set(BOLT_ENABLE_RUNTIME OFF)
|
||||
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64")
|
||||
set(BOLT_ENABLE_RUNTIME ON)
|
||||
endif()
|
||||
|
||||
@@ -45,7 +45,7 @@ if (LLVM_INCLUDE_TESTS)
|
||||
endif()
|
||||
|
||||
if (BOLT_ENABLE_RUNTIME)
|
||||
- message(STATUS "Building BOLT runtime libraries for X86")
|
||||
+ message(STATUS "Building BOLT runtime libraries")
|
||||
ExternalProject_Add(bolt_rt
|
||||
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
|
||||
STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
|
||||
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
|
||||
index 7c1b79af4..ee6ab7bd4 100644
|
||||
--- a/bolt/runtime/CMakeLists.txt
|
||||
+++ b/bolt/runtime/CMakeLists.txt
|
||||
@@ -10,10 +10,12 @@ check_include_files(elf.h HAVE_ELF_H)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
||||
|
||||
-add_library(bolt_rt_instr STATIC
|
||||
- instr.cpp
|
||||
- ${CMAKE_CURRENT_BINARY_DIR}/config.h
|
||||
- )
|
||||
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
+ add_library(bolt_rt_instr STATIC
|
||||
+ instr.cpp
|
||||
+ ${CMAKE_CURRENT_BINARY_DIR}/config.h
|
||||
+ )
|
||||
+endif()
|
||||
add_library(bolt_rt_hugify STATIC
|
||||
hugify.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/config.h
|
||||
@@ -23,16 +25,24 @@ set(BOLT_RT_FLAGS
|
||||
-ffreestanding
|
||||
-fno-exceptions
|
||||
-fno-rtti
|
||||
- -fno-stack-protector
|
||||
- -mno-sse)
|
||||
+ -fno-stack-protector)
|
||||
+
|
||||
+# x86 exclusive option
|
||||
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
+ list(APPEND BOLT_RT_FLAGS -mno-sse)
|
||||
+endif()
|
||||
|
||||
# Don't let the compiler think it can create calls to standard libs
|
||||
-target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
|
||||
-target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
+ target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
|
||||
+ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
+endif()
|
||||
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
|
||||
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
-install(TARGETS bolt_rt_instr DESTINATION lib)
|
||||
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
+ install(TARGETS bolt_rt_instr DESTINATION lib)
|
||||
+endif()
|
||||
install(TARGETS bolt_rt_hugify DESTINATION lib)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
|
||||
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
|
||||
index 008dbb6c3..6869742e7 100644
|
||||
--- a/bolt/runtime/common.h
|
||||
+++ b/bolt/runtime/common.h
|
||||
@@ -39,6 +39,45 @@ typedef int int32_t;
|
||||
#endif
|
||||
|
||||
// Save all registers while keeping 16B stack alignment
|
||||
+#if defined (__aarch64__)
|
||||
+#define SAVE_ALL \
|
||||
+ "stp x0, x1, [sp, #-16]!\n" \
|
||||
+ "stp x2, x3, [sp, #-16]!\n" \
|
||||
+ "stp x4, x5, [sp, #-16]!\n" \
|
||||
+ "stp x6, x7, [sp, #-16]!\n" \
|
||||
+ "stp x8, x9, [sp, #-16]!\n" \
|
||||
+ "stp x10, x11, [sp, #-16]!\n" \
|
||||
+ "stp x12, x13, [sp, #-16]!\n" \
|
||||
+ "stp x14, x15, [sp, #-16]!\n" \
|
||||
+ "stp x16, x17, [sp, #-16]!\n" \
|
||||
+ "stp x18, x19, [sp, #-16]!\n" \
|
||||
+ "stp x20, x21, [sp, #-16]!\n" \
|
||||
+ "stp x22, x23, [sp, #-16]!\n" \
|
||||
+ "stp x24, x25, [sp, #-16]!\n" \
|
||||
+ "stp x26, x27, [sp, #-16]!\n" \
|
||||
+ "stp x28, x29, [sp, #-16]!\n" \
|
||||
+ "stp x30, xzr, [sp, #-16]!\n"
|
||||
+
|
||||
+// Mirrors SAVE_ALL
|
||||
+#define RESTORE_ALL \
|
||||
+ "ldp x30, xzr, [sp], #16\n" \
|
||||
+ "ldp x28, x29, [sp], #16\n" \
|
||||
+ "ldp x26, x27, [sp], #16\n" \
|
||||
+ "ldp x24, x25, [sp], #16\n" \
|
||||
+ "ldp x22, x23, [sp], #16\n" \
|
||||
+ "ldp x20, x21, [sp], #16\n" \
|
||||
+ "ldp x18, x19, [sp], #16\n" \
|
||||
+ "ldp x16, x17, [sp], #16\n" \
|
||||
+ "ldp x14, x15, [sp], #16\n" \
|
||||
+ "ldp x12, x13, [sp], #16\n" \
|
||||
+ "ldp x10, x11, [sp], #16\n" \
|
||||
+ "ldp x8, x9, [sp], #16\n" \
|
||||
+ "ldp x6, x7, [sp], #16\n" \
|
||||
+ "ldp x4, x5, [sp], #16\n" \
|
||||
+ "ldp x2, x3, [sp], #16\n" \
|
||||
+ "ldp x0, x1, [sp], #16\n"
|
||||
+
|
||||
+#else
|
||||
#define SAVE_ALL \
|
||||
"push %%rax\n" \
|
||||
"push %%rbx\n" \
|
||||
@@ -75,6 +114,7 @@ typedef int int32_t;
|
||||
"pop %%rcx\n" \
|
||||
"pop %%rbx\n" \
|
||||
"pop %%rax\n"
|
||||
+#endif
|
||||
|
||||
// Functions that are required by freestanding environment. Compiler may
|
||||
// generate calls to these implicitly.
|
||||
@@ -129,6 +169,189 @@ constexpr uint32_t BufSize = 10240;
|
||||
#define _STRINGIFY(x) #x
|
||||
#define STRINGIFY(x) _STRINGIFY(x)
|
||||
|
||||
+#if defined (__aarch64__)
|
||||
+// Declare some syscall wrappers we use throughout this code to avoid linking
|
||||
+// against system libc.
|
||||
+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
|
||||
+ uint64_t ret;
|
||||
+ register uint64_t x0 __asm__("x0") = fd;
|
||||
+ register const void *x1 __asm__("x1") = buf;
|
||||
+ register uint64_t x2 __asm__("x2") = count;
|
||||
+ register uint32_t w8 __asm__("w8") = 63;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
|
||||
+ uint64_t ret;
|
||||
+ register uint64_t x0 __asm__("x0") = fd;
|
||||
+ register const void *x1 __asm__("x1") = buf;
|
||||
+ register uint64_t x2 __asm__("x2") = count;
|
||||
+ register uint32_t w8 __asm__("w8") = 64;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
|
||||
+ uint64_t fd, uint64_t offset) {
|
||||
+ void *ret;
|
||||
+ register uint64_t x0 __asm__("x0") = addr;
|
||||
+ register uint64_t x1 __asm__("x1") = size;
|
||||
+ register uint64_t x2 __asm__("x2") = prot;
|
||||
+ register uint64_t x3 __asm__("x3") = flags;
|
||||
+ register uint64_t x4 __asm__("x4") = fd;
|
||||
+ register uint64_t x5 __asm__("x5") = offset;
|
||||
+ register uint32_t w8 __asm__("w8") = 222;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+uint64_t __munmap(void *addr, uint64_t size) {
|
||||
+ uint64_t ret;
|
||||
+ register void *x0 __asm__("x0") = addr;
|
||||
+ register uint64_t x1 __asm__("x1") = size;
|
||||
+ register uint32_t w8 __asm__("w8") = 215;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+uint64_t __exit(uint64_t code) {
|
||||
+ uint64_t ret;
|
||||
+ register uint64_t x0 __asm__("x0") = code;
|
||||
+ register uint32_t w8 __asm__("w8") = 94;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0)
|
||||
+ : "r"(w8)
|
||||
+ : "cc", "memory", "x1");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
|
||||
+ uint64_t ret;
|
||||
+ register int x0 __asm__("x0") = -100;
|
||||
+ register const char *x1 __asm__("x1") = pathname;
|
||||
+ register uint64_t x2 __asm__("x2") = flags;
|
||||
+ register uint64_t x3 __asm__("x3") = mode;
|
||||
+ register uint32_t w8 __asm__("w8") = 56;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %0, x0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(x3), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int __madvise(void *addr, size_t length, int advice) {
|
||||
+ int ret;
|
||||
+ register void *x0 __asm__("x0") = addr;
|
||||
+ register size_t x1 __asm__("x1") = length;
|
||||
+ register int x2 __asm__("x2") = advice;
|
||||
+ register uint32_t w8 __asm__("w8") = 233;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %w0, w0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int __mprotect(void *addr, size_t len, int prot) {
|
||||
+ int ret;
|
||||
+ register void *x0 __asm__("x0") = addr;
|
||||
+ register size_t x1 __asm__("x1") = len;
|
||||
+ register int x2 __asm__("x2") = prot;
|
||||
+ register uint32_t w8 __asm__("w8") = 226;
|
||||
+ __asm__ __volatile__("svc #0\n"
|
||||
+ "mov %w0, w0"
|
||||
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
||||
+ : "r"(x2), "r"(w8)
|
||||
+ : "cc", "memory");
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+// Helper functions for writing strings to the .fdata file. We intentionally
|
||||
+// avoid using libc names to make it clear it is our impl.
|
||||
+
|
||||
+/// Compare two strings, at most Num bytes.
|
||||
+int strnCmp(const char *Str1, const char *Str2, size_t Num) {
|
||||
+ while (Num && *Str1 && (*Str1 == *Str2)) {
|
||||
+ Num--;
|
||||
+ Str1++;
|
||||
+ Str2++;
|
||||
+ }
|
||||
+ if (Num == 0)
|
||||
+ return 0;
|
||||
+ return *(unsigned char *)Str1 - *(unsigned char *)Str2;
|
||||
+}
|
||||
+
|
||||
+uint32_t strLen(const char *Str) {
|
||||
+ uint32_t Size = 0;
|
||||
+ while (*Str++)
|
||||
+ ++Size;
|
||||
+ return Size;
|
||||
+}
|
||||
+
|
||||
+/// Write number Num using Base to the buffer in OutBuf, returns a pointer to
|
||||
+/// the end of the string.
|
||||
+char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) {
|
||||
+ const char *Chars = "0123456789abcdef";
|
||||
+ char Buf[21];
|
||||
+ char *Ptr = Buf;
|
||||
+ while (Num) {
|
||||
+ *Ptr++ = *(Chars + (Num % Base));
|
||||
+ Num /= Base;
|
||||
+ }
|
||||
+ if (Ptr == Buf) {
|
||||
+ *OutBuf++ = '0';
|
||||
+ return OutBuf;
|
||||
+ }
|
||||
+ while (Ptr != Buf)
|
||||
+ *OutBuf++ = *--Ptr;
|
||||
+
|
||||
+ return OutBuf;
|
||||
+}
|
||||
+
|
||||
+/// Copy Str to OutBuf, returns a pointer to the end of the copied string
|
||||
+char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) {
|
||||
+ while (*Str) {
|
||||
+ *OutBuf++ = *Str++;
|
||||
+ if (--Size <= 0)
|
||||
+ return OutBuf;
|
||||
+ }
|
||||
+ return OutBuf;
|
||||
+}
|
||||
+
|
||||
+void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) {
|
||||
+ char Buf[BufSize];
|
||||
+ char *Ptr = Buf;
|
||||
+ Ptr = strCopy(Ptr, Msg, BufSize - 23);
|
||||
+ Ptr = intToStr(Ptr, Num, Base);
|
||||
+ Ptr = strCopy(Ptr, "\n");
|
||||
+ __write(2, Buf, Ptr - Buf);
|
||||
+}
|
||||
+
|
||||
+void reportError(const char *Msg, uint64_t Size) {
|
||||
+ __write(2, Msg, Size);
|
||||
+ __exit(1);
|
||||
+}
|
||||
+#else
|
||||
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
|
||||
uint64_t ret;
|
||||
#if defined(__APPLE__)
|
||||
@@ -550,5 +773,6 @@ public:
|
||||
inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
|
||||
return (Value + Align - 1) / Align * Align;
|
||||
}
|
||||
+#endif
|
||||
|
||||
} // anonymous namespace
|
||||
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
|
||||
index 69e1a7e06..385e4d147 100644
|
||||
--- a/bolt/runtime/hugify.cpp
|
||||
+++ b/bolt/runtime/hugify.cpp
|
||||
@@ -6,26 +6,25 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
-#if defined (__x86_64__)
|
||||
#if !defined(__APPLE__)
|
||||
|
||||
#include "common.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
// Enables a very verbose logging to stderr useful when debugging
|
||||
-//#define ENABLE_DEBUG
|
||||
+// #define ENABLE_DEBUG
|
||||
|
||||
// Function pointers to init routines in the binary, so we can resume
|
||||
// regular execution of the function that we hooked.
|
||||
extern void (*__bolt_hugify_init_ptr)();
|
||||
|
||||
// The __hot_start and __hot_end symbols set by Bolt. We use them to figure
|
||||
-// out the rage for marking huge pages.
|
||||
+// out the range for marking huge pages.
|
||||
extern uint64_t __hot_start;
|
||||
extern uint64_t __hot_end;
|
||||
|
||||
#ifdef MADV_HUGEPAGE
|
||||
-/// Check whether the kernel supports THP via corresponding sysfs entry.
|
||||
+// Check whether the kernel supports THP via corresponding sysfs entry.
|
||||
static bool has_pagecache_thp_support() {
|
||||
char buf[256] = {0};
|
||||
const char *madviseStr = "always [madvise] never";
|
||||
@@ -116,14 +115,22 @@ extern "C" void __bolt_hugify_self_impl() {
|
||||
#endif
|
||||
}
|
||||
|
||||
-/// This is hooking ELF's entry, it needs to save all machine state.
|
||||
+// This is hooking ELF's entry, it needs to save all machine state.
|
||||
extern "C" __attribute((naked)) void __bolt_hugify_self() {
|
||||
+#if defined (__x86_64__)
|
||||
__asm__ __volatile__(SAVE_ALL
|
||||
"call __bolt_hugify_self_impl\n"
|
||||
RESTORE_ALL
|
||||
"jmp *__bolt_hugify_init_ptr(%%rip)\n"
|
||||
:::);
|
||||
-}
|
||||
-
|
||||
+#elif defined (__aarch64__)
|
||||
+ __asm__ __volatile__(SAVE_ALL
|
||||
+ "bl __bolt_hugify_self_impl\n"
|
||||
+ RESTORE_ALL
|
||||
+ "ldr x16, =__bolt_hugify_init_ptr\n"
|
||||
+ "ldr x16, [x16]\n"
|
||||
+ "br x16\n"
|
||||
+ :::);
|
||||
#endif
|
||||
+}
|
||||
#endif
|
||||
diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
|
||||
new file mode 100644
|
||||
index 000000000..48b76cd35
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
|
||||
@@ -0,0 +1,2 @@
|
||||
+main
|
||||
+fib
|
||||
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
|
||||
new file mode 100644
|
||||
index 000000000..fcb92bca1
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
|
||||
@@ -0,0 +1,44 @@
|
||||
+/* Checks that BOLT correctly processes a user-provided function list file,
|
||||
+ * reorder functions according to this list, update hot_start and hot_end
|
||||
+ * symbols and insert a function to perform hot text mapping during program
|
||||
+ * startup.
|
||||
+ */
|
||||
+#include <stdio.h>
|
||||
+
|
||||
+int foo(int x) {
|
||||
+ return x + 1;
|
||||
+}
|
||||
+
|
||||
+int fib(int x) {
|
||||
+ if (x < 2)
|
||||
+ return x;
|
||||
+ return fib(x - 1) + fib(x - 2);
|
||||
+}
|
||||
+
|
||||
+int bar(int x) {
|
||||
+ return x - 1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char **argv) {
|
||||
+ printf("fib(%d) = %d\n", argc, fib(argc));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+REQUIRES: system-linux,bolt-runtime
|
||||
+
|
||||
+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
|
||||
+
|
||||
+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
|
||||
+RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
|
||||
+RUN: llvm-nm --numeric-sort --print-armap %t | \
|
||||
+RUN: FileCheck %s -check-prefix=CHECK-NM
|
||||
+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
|
||||
+
|
||||
+CHECK-NM: W __hot_start
|
||||
+CHECK-NM: T main
|
||||
+CHECK-NM-NEXT: T fib
|
||||
+CHECK-NM-NEXT: W __hot_end
|
||||
+
|
||||
+CHECK-OUTPUT: fib(4) = 3
|
||||
+*/
|
||||
--
|
||||
2.33.0
|
||||
|
||||
44
0002-Add-test-for-emitting-trap-value.patch
Normal file
44
0002-Add-test-for-emitting-trap-value.patch
Normal file
@ -0,0 +1,44 @@
|
||||
From e4ae238a42296a84bc819dd1fb61f3c699952f17 Mon Sep 17 00:00:00 2001
|
||||
From: Denis Revunov <rnovds@gmail.com>
|
||||
Date: Thu, 17 Aug 2023 18:30:07 +0300
|
||||
Subject: [PATCH] Add test for emitting trap value
|
||||
|
||||
Reviewed By: rafauler
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D158191
|
||||
---
|
||||
bolt/test/runtime/mark-funcs.c | 22 ++++++++++++++++++++++
|
||||
1 file changed, 22 insertions(+)
|
||||
create mode 100644 bolt/test/runtime/mark-funcs.c
|
||||
|
||||
diff --git a/bolt/test/runtime/mark-funcs.c b/bolt/test/runtime/mark-funcs.c
|
||||
new file mode 100644
|
||||
index 000000000..a8586ca8b
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/runtime/mark-funcs.c
|
||||
@@ -0,0 +1,22 @@
|
||||
+#include <stdio.h>
|
||||
+
|
||||
+int dummy() {
|
||||
+ printf("Dummy called\n");
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char **argv) {
|
||||
+ if (dummy() != 0)
|
||||
+ return 1;
|
||||
+ printf("Main called\n");
|
||||
+ return 0;
|
||||
+}
|
||||
+// Check that emitting trap value works properly and
|
||||
+// does not break functions
|
||||
+// REQUIRES: system-linux
|
||||
+// RUN: %clangxx -Wl,-q %s -o %t.exe
|
||||
+// RUN: %t.exe | FileCheck %s
|
||||
+// CHECK: Dummy called
|
||||
+// CHECK-NEXT: Main called
|
||||
+// RUN: llvm-bolt %t.exe -o %t.exe.bolt -lite=false --mark-funcs
|
||||
+// RUN: %t.exe.bolt | FileCheck %s
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,259 +1,274 @@
|
||||
From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001
|
||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||
Date: Mon, 11 Sep 2023 14:43:12 +0800
|
||||
Subject: [PATCH] [AArch64] Add AArch64 support for inline.
|
||||
|
||||
---
|
||||
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
|
||||
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++
|
||||
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
|
||||
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
|
||||
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
|
||||
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
|
||||
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
|
||||
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
|
||||
12 files changed, 137 insertions(+)
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
create mode 100644 bolt/test/AArch64/inline-debug-info.test
|
||||
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
|
||||
create mode 100644 bolt/test/AArch64/jmp-optimization.test
|
||||
|
||||
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
|
||||
index 04232bd3e..d009d59dc 100644
|
||||
--- a/bolt/lib/Passes/Inliner.cpp
|
||||
+++ b/bolt/lib/Passes/Inliner.cpp
|
||||
@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
|
||||
<< ". Size change: " << SizeAfterInlining
|
||||
<< " bytes.\n");
|
||||
|
||||
+// Skip situations where some A64 instructions can't be inlined:
|
||||
+// # Indirect branch, e.g., BR.
|
||||
+// # Branch instructions but used to make a function call.
|
||||
+ if (BC.isAArch64()) {
|
||||
+ auto &MIB = *BC.MIB;
|
||||
+ bool skip = false;
|
||||
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
|
||||
+ for (MCInst Inst : BB) {
|
||||
+ if (MIB.isPseudo(Inst))
|
||||
+ continue;
|
||||
+
|
||||
+ MIB.stripAnnotations(Inst, false);
|
||||
+
|
||||
+ if (MIB.isBranch(Inst)) {
|
||||
+ const BinaryBasicBlock *TargetBB =
|
||||
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
|
||||
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
|
||||
+ skip = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ if (skip)
|
||||
+ break;
|
||||
+ }
|
||||
+ if (skip) {
|
||||
+ ++InstIt;
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
|
||||
|
||||
DidInlining = true;
|
||||
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
index c736196a8..03b1b536f 100644
|
||||
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
@@ -34,6 +34,16 @@ public:
|
||||
const MCRegisterInfo *RegInfo)
|
||||
: MCPlusBuilder(Analysis, Info, RegInfo) {}
|
||||
|
||||
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
||||
+
|
||||
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
|
||||
+ MCContext *Ctx) override {
|
||||
+ Inst.setOpcode(AArch64::BL);
|
||||
+ Inst.addOperand(MCOperand::createExpr(
|
||||
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
|
||||
CompFuncTy Comp) const override {
|
||||
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
|
||||
@@ -1103,6 +1113,8 @@ public:
|
||||
|
||||
bool isLeave(const MCInst &Inst) const override { return false; }
|
||||
|
||||
+ bool isPush(const MCInst &Inst) const override { return false; }
|
||||
+
|
||||
bool isPop(const MCInst &Inst) const override { return false; }
|
||||
|
||||
bool isPrefix(const MCInst &Inst) const override { return false; }
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
new file mode 100644
|
||||
index 000000000..1307c13f2
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+#include "stub.h"
|
||||
+
|
||||
+void foo() {
|
||||
+ puts("Hello world!\n");
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
new file mode 100644
|
||||
index 000000000..7853d2b63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+extern void foo();
|
||||
+int main() {
|
||||
+ foo();
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
new file mode 100644
|
||||
index 000000000..a6ff9e262
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
@@ -0,0 +1,23 @@
|
||||
+extern "C" int printf(const char*, ...);
|
||||
+extern const char* question();
|
||||
+
|
||||
+inline int answer() __attribute__((always_inline));
|
||||
+inline int answer() { return 42; }
|
||||
+
|
||||
+int main(int argc, char *argv[]) {
|
||||
+ int ans;
|
||||
+ if (argc == 1) {
|
||||
+ ans = 0;
|
||||
+ } else {
|
||||
+ ans = argc;
|
||||
+ }
|
||||
+ printf("%s\n", question());
|
||||
+ for (int i = 0; i < 10; ++i) {
|
||||
+ int x = answer();
|
||||
+ int y = answer();
|
||||
+ ans += x - y;
|
||||
+ }
|
||||
+ // padding to make sure question() is inlineable
|
||||
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
|
||||
+ return ans;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
new file mode 100644
|
||||
index 000000000..edb7ab145
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+const char* question() {
|
||||
+ return "What do you get if you multiply six by nine?";
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
new file mode 100644
|
||||
index 000000000..cd6d53c35
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
@@ -0,0 +1,7 @@
|
||||
+int g();
|
||||
+
|
||||
+int main() {
|
||||
+ int x = g();
|
||||
+ int y = x*x;
|
||||
+ return y;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
new file mode 100644
|
||||
index 000000000..80b853d63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f() {
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
new file mode 100644
|
||||
index 000000000..7fb551163
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f();
|
||||
+
|
||||
+int g() { return f(); }
|
||||
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
|
||||
new file mode 100644
|
||||
index 000000000..e20e5e31e
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inline-debug-info.test
|
||||
@@ -0,0 +1,20 @@
|
||||
+## Check that BOLT correctly prints and updates debug info for inlined
|
||||
+## functions.
|
||||
+
|
||||
+# REQUIRES: system-linux
|
||||
+
|
||||
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
|
||||
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
|
||||
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
|
||||
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
|
||||
+# RUN: -o %t.bolt \
|
||||
+# RUN: | FileCheck %s
|
||||
+
|
||||
+## The call to puts() should come from inline-foo.c:
|
||||
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
|
||||
+
|
||||
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
|
||||
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
|
||||
+
|
||||
+## Dump of main() should include debug info from inline-foo.c after inlining:
|
||||
+# CHECK-OBJDUMP: inline-foo.c:4
|
||||
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
new file mode 100644
|
||||
index 000000000..5a87bdde9
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
@@ -0,0 +1,11 @@
|
||||
+# Make sure inlining from a unit with debug info into unit without
|
||||
+# debug info does not cause a crash.
|
||||
+
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
|
||||
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
|
||||
+
|
||||
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
|
||||
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
|
||||
+
|
||||
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
|
||||
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
|
||||
new file mode 100644
|
||||
index 000000000..92f4b9a14
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/jmp-optimization.test
|
||||
@@ -0,0 +1,14 @@
|
||||
+# Tests the optimization of functions that just do a tail call in the beginning.
|
||||
+
|
||||
+# This test has commands that rely on shell capabilities that won't execute
|
||||
+# correctly on Windows e.g. unsupported parameter expansion
|
||||
+REQUIRES: shell
|
||||
+
|
||||
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
|
||||
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
|
||||
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
|
||||
+
|
||||
+CHECK: <main>:
|
||||
+CHECK-NOT: call
|
||||
+CHECK: xorl %eax, %eax
|
||||
+CHECK: retq
|
||||
--
|
||||
2.33.0
|
||||
|
||||
From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001
|
||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||
Date: Wed, 12 Jun 2024 17:12:36 +0800
|
||||
Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline.
|
||||
|
||||
---
|
||||
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +--
|
||||
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
|
||||
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++
|
||||
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
|
||||
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
|
||||
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
|
||||
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
|
||||
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
|
||||
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
|
||||
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
|
||||
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
|
||||
13 files changed, 136 insertions(+), 4 deletions(-)
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
create mode 100644 bolt/test/AArch64/inline-debug-info.test
|
||||
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
|
||||
create mode 100644 bolt/test/AArch64/jmp-optimization.test
|
||||
|
||||
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
index db3f7e7f1..56d0228cd 100644
|
||||
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||
@@ -573,10 +573,7 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
- virtual bool isPush(const MCInst &Inst) const {
|
||||
- llvm_unreachable("not implemented");
|
||||
- return false;
|
||||
- }
|
||||
+ virtual bool isPush(const MCInst &Inst) const { return false; }
|
||||
|
||||
/// Return the width, in bytes, of the memory access performed by \p Inst, if
|
||||
/// this is a push instruction. Return zero otherwise.
|
||||
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
|
||||
index 8dcb8934f..67dd294fb 100644
|
||||
--- a/bolt/lib/Passes/Inliner.cpp
|
||||
+++ b/bolt/lib/Passes/Inliner.cpp
|
||||
@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
|
||||
<< ". Size change: " << SizeAfterInlining
|
||||
<< " bytes.\n");
|
||||
|
||||
+// Skip situations where some A64 instructions can't be inlined:
|
||||
+// # Indirect branch, e.g., BR.
|
||||
+// # Branch instructions but used to make a function call.
|
||||
+ if (BC.isAArch64()) {
|
||||
+ auto &MIB = *BC.MIB;
|
||||
+ bool skip = false;
|
||||
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
|
||||
+ for (MCInst Inst : BB) {
|
||||
+ if (MIB.isPseudo(Inst))
|
||||
+ continue;
|
||||
+
|
||||
+ MIB.stripAnnotations(Inst, false);
|
||||
+
|
||||
+ if (MIB.isBranch(Inst)) {
|
||||
+ const BinaryBasicBlock *TargetBB =
|
||||
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
|
||||
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
|
||||
+ skip = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ if (skip)
|
||||
+ break;
|
||||
+ }
|
||||
+ if (skip) {
|
||||
+ ++InstIt;
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
|
||||
|
||||
DidInlining = true;
|
||||
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
index d109a5d52..acf21ba23 100644
|
||||
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||
@@ -34,6 +34,8 @@ public:
|
||||
const MCRegisterInfo *RegInfo)
|
||||
: MCPlusBuilder(Analysis, Info, RegInfo) {}
|
||||
|
||||
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
||||
+
|
||||
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
|
||||
CompFuncTy Comp) const override {
|
||||
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
|
||||
@@ -816,6 +818,14 @@ public:
|
||||
|
||||
int getUncondBranchEncodingSize() const override { return 28; }
|
||||
|
||||
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
|
||||
+ MCContext *Ctx) override {
|
||||
+ Inst.setOpcode(AArch64::BL);
|
||||
+ Inst.addOperand(MCOperand::createExpr(
|
||||
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
||||
MCContext *Ctx) override {
|
||||
Inst.setOpcode(AArch64::B);
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
new file mode 100644
|
||||
index 000000000..1307c13f2
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+#include "stub.h"
|
||||
+
|
||||
+void foo() {
|
||||
+ puts("Hello world!\n");
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
new file mode 100644
|
||||
index 000000000..7853d2b63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inline-main.c
|
||||
@@ -0,0 +1,5 @@
|
||||
+extern void foo();
|
||||
+int main() {
|
||||
+ foo();
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
new file mode 100644
|
||||
index 000000000..a6ff9e262
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||
@@ -0,0 +1,23 @@
|
||||
+extern "C" int printf(const char*, ...);
|
||||
+extern const char* question();
|
||||
+
|
||||
+inline int answer() __attribute__((always_inline));
|
||||
+inline int answer() { return 42; }
|
||||
+
|
||||
+int main(int argc, char *argv[]) {
|
||||
+ int ans;
|
||||
+ if (argc == 1) {
|
||||
+ ans = 0;
|
||||
+ } else {
|
||||
+ ans = argc;
|
||||
+ }
|
||||
+ printf("%s\n", question());
|
||||
+ for (int i = 0; i < 10; ++i) {
|
||||
+ int x = answer();
|
||||
+ int y = answer();
|
||||
+ ans += x - y;
|
||||
+ }
|
||||
+ // padding to make sure question() is inlineable
|
||||
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
|
||||
+ return ans;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
new file mode 100644
|
||||
index 000000000..edb7ab145
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+const char* question() {
|
||||
+ return "What do you get if you multiply six by nine?";
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
new file mode 100644
|
||||
index 000000000..cd6d53c35
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||
@@ -0,0 +1,7 @@
|
||||
+int g();
|
||||
+
|
||||
+int main() {
|
||||
+ int x = g();
|
||||
+ int y = x*x;
|
||||
+ return y;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
new file mode 100644
|
||||
index 000000000..80b853d63
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f() {
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
new file mode 100644
|
||||
index 000000000..7fb551163
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||
@@ -0,0 +1,3 @@
|
||||
+int f();
|
||||
+
|
||||
+int g() { return f(); }
|
||||
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
|
||||
new file mode 100644
|
||||
index 000000000..e20e5e31e
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inline-debug-info.test
|
||||
@@ -0,0 +1,20 @@
|
||||
+## Check that BOLT correctly prints and updates debug info for inlined
|
||||
+## functions.
|
||||
+
|
||||
+# REQUIRES: system-linux
|
||||
+
|
||||
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
|
||||
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
|
||||
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
|
||||
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
|
||||
+# RUN: -o %t.bolt \
|
||||
+# RUN: | FileCheck %s
|
||||
+
|
||||
+## The call to puts() should come from inline-foo.c:
|
||||
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
|
||||
+
|
||||
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
|
||||
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
|
||||
+
|
||||
+## Dump of main() should include debug info from inline-foo.c after inlining:
|
||||
+# CHECK-OBJDUMP: inline-foo.c:4
|
||||
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
new file mode 100644
|
||||
index 000000000..5a87bdde9
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/inlined-function-mixed.test
|
||||
@@ -0,0 +1,11 @@
|
||||
+# Make sure inlining from a unit with debug info into unit without
|
||||
+# debug info does not cause a crash.
|
||||
+
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
|
||||
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
|
||||
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
|
||||
+
|
||||
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
|
||||
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
|
||||
+
|
||||
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
|
||||
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
|
||||
new file mode 100644
|
||||
index 000000000..92f4b9a14
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/jmp-optimization.test
|
||||
@@ -0,0 +1,14 @@
|
||||
+# Tests the optimization of functions that just do a tail call in the beginning.
|
||||
+
|
||||
+# This test has commands that rely on shell capabilities that won't execute
|
||||
+# correctly on Windows e.g. unsupported parameter expansion
|
||||
+REQUIRES: shell
|
||||
+
|
||||
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
|
||||
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
|
||||
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
|
||||
+
|
||||
+CHECK: <main>:
|
||||
+CHECK-NOT: call
|
||||
+CHECK: xorl %eax, %eax
|
||||
+CHECK: retq
|
||||
--
|
||||
2.33.0
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
170
0004-Bolt-Solving-pie-support-issue.patch
Normal file
170
0004-Bolt-Solving-pie-support-issue.patch
Normal file
@ -0,0 +1,170 @@
|
||||
From a28084a4adff2340dd02c2c0c42f4997f76b3ffa Mon Sep 17 00:00:00 2001
|
||||
From: rfwang07 <wangrufeng5@huawei.com>
|
||||
Date: Fri, 21 Jun 2024 11:16:44 +0800
|
||||
Subject: [PATCH] [Bolt] Solving pie support issue
|
||||
|
||||
---
|
||||
bolt/lib/Core/BinaryContext.cpp | 25 +++++++++++++++++++----
|
||||
bolt/test/perf2bolt/Inputs/perf_test.c | 26 ++++++++++++++++++++++++
|
||||
bolt/test/perf2bolt/Inputs/perf_test.lds | 13 ++++++++++++
|
||||
bolt/test/perf2bolt/lit.local.cfg | 4 ++++
|
||||
bolt/test/perf2bolt/perf_test.test | 17 ++++++++++++++++
|
||||
bolt/unittests/Core/BinaryContext.cpp | 21 +++++++++++++++++++
|
||||
6 files changed, 102 insertions(+), 4 deletions(-)
|
||||
create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.c
|
||||
create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||
create mode 100644 bolt/test/perf2bolt/lit.local.cfg
|
||||
create mode 100644 bolt/test/perf2bolt/perf_test.test
|
||||
|
||||
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
|
||||
index 2d2b35ee2..ab9f0b844 100644
|
||||
--- a/bolt/lib/Core/BinaryContext.cpp
|
||||
+++ b/bolt/lib/Core/BinaryContext.cpp
|
||||
@@ -1880,10 +1880,27 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
|
||||
// Find a segment with a matching file offset.
|
||||
for (auto &KV : SegmentMapInfo) {
|
||||
const SegmentInfo &SegInfo = KV.second;
|
||||
- if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
|
||||
- // Use segment's aligned memory offset to calculate the base address.
|
||||
- const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
|
||||
- return MMapAddress - MemOffset;
|
||||
+ // FileOffset is got from perf event,
|
||||
+ // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
|
||||
+ // If the pagesize is not equal to SegInfo.Alignment.
|
||||
+ // FileOffset and SegInfo.FileOffset should be aligned first,
|
||||
+ // and then judge whether they are equal.
|
||||
+ if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
|
||||
+ alignDown(FileOffset, SegInfo.Alignment)) {
|
||||
+ // The function's offset from base address in VAS is aligned by pagesize
|
||||
+ // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
|
||||
+ // However, The ELF document says that SegInfo.FileOffset should equal
|
||||
+ // to SegInfo.Address, modulo the pagesize.
|
||||
+ // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
|
||||
+
|
||||
+ // So alignDown(SegInfo.Address, pagesize) can be calculated by:
|
||||
+ // alignDown(SegInfo.Address, pagesize)
|
||||
+ // = SegInfo.Address - (SegInfo.Address % pagesize)
|
||||
+ // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
|
||||
+ // = SegInfo.Address - SegInfo.FileOffset +
|
||||
+ // alignDown(SegInfo.FileOffset, pagesize)
|
||||
+ // = SegInfo.Address - SegInfo.FileOffset + FileOffset
|
||||
+ return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.c b/bolt/test/perf2bolt/Inputs/perf_test.c
|
||||
new file mode 100644
|
||||
index 000000000..ff5ecf7a8
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/perf2bolt/Inputs/perf_test.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+int add(int a, int b) { return a + b; }
|
||||
+int minus(int a, int b) { return a - b; }
|
||||
+int multiple(int a, int b) { return a * b; }
|
||||
+int divide(int a, int b) {
|
||||
+ if (b == 0)
|
||||
+ return 0;
|
||||
+ return a / b;
|
||||
+}
|
||||
+
|
||||
+int main() {
|
||||
+ int a = 16;
|
||||
+ int b = 8;
|
||||
+
|
||||
+ for (int i = 1; i < 100000; i++) {
|
||||
+ add(a, b);
|
||||
+ minus(a, b);
|
||||
+ multiple(a, b);
|
||||
+ divide(a, b);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||
new file mode 100644
|
||||
index 000000000..9cb4ebbf1
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||
@@ -0,0 +1,13 @@
|
||||
+SECTIONS {
|
||||
+ . = SIZEOF_HEADERS;
|
||||
+ .interp : { *(.interp) }
|
||||
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||
+ . = 0x212e8;
|
||||
+ .dynsym : { *(.dynsym) }
|
||||
+ . = 0x31860;
|
||||
+ .text : { *(.text*) }
|
||||
+ . = 0x41c20;
|
||||
+ .fini_array : { *(.fini_array) }
|
||||
+ . = 0x54e18;
|
||||
+ .data : { *(.data) }
|
||||
+}
|
||||
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
|
||||
new file mode 100644
|
||||
index 000000000..87a96ec34
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/perf2bolt/lit.local.cfg
|
||||
@@ -0,0 +1,4 @@
|
||||
+import shutil
|
||||
+
|
||||
+if shutil.which("perf") != None:
|
||||
+ config.available_features.add("perf")
|
||||
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
|
||||
new file mode 100644
|
||||
index 000000000..fe6e015ab
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/perf2bolt/perf_test.test
|
||||
@@ -0,0 +1,17 @@
|
||||
+# Check perf2bolt binary function which was compiled with pie
|
||||
+
|
||||
+REQUIRES: system-linux, perf
|
||||
+
|
||||
+RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
|
||||
+RUN: perf record -e cycles:u -o %t2 -- %t
|
||||
+RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
|
||||
+
|
||||
+CHECK-NOT: PERF2BOLT-ERROR
|
||||
+CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||
+
|
||||
+RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||
+RUN: perf record -e cycles:u -o %t5 -- %t4
|
||||
+RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
|
||||
+
|
||||
+CHECK-NO-PIE-NOT: PERF2BOLT-ERROR
|
||||
+CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
|
||||
index bac264141..5a80cb4a2 100644
|
||||
--- a/bolt/unittests/Core/BinaryContext.cpp
|
||||
+++ b/bolt/unittests/Core/BinaryContext.cpp
|
||||
@@ -83,3 +83,24 @@ TEST_P(BinaryContextTester, BaseAddress) {
|
||||
BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000);
|
||||
ASSERT_FALSE(BaseAddress.has_value());
|
||||
}
|
||||
+
|
||||
+TEST_P(BinaryContextTester, BaseAddress2) {
|
||||
+ // Check that base address calculation is correct for a binary if the
|
||||
+ // alignment in ELF file are different from pagesize.
|
||||
+ // The segment layout is as follows:
|
||||
+ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000};
|
||||
+ BC->SegmentMapInfo[0x31860] =
|
||||
+ SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000};
|
||||
+ BC->SegmentMapInfo[0x41c20] =
|
||||
+ SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000};
|
||||
+ BC->SegmentMapInfo[0x54e18] =
|
||||
+ SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000};
|
||||
+
|
||||
+ std::optional<uint64_t> BaseAddress =
|
||||
+ BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
|
||||
+ ASSERT_TRUE(BaseAddress.has_value());
|
||||
+ ASSERT_EQ(*BaseAddress, 0xaaaaea413000ULL);
|
||||
+
|
||||
+ BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000);
|
||||
+ ASSERT_FALSE(BaseAddress.has_value());
|
||||
+}
|
||||
--
|
||||
2.39.2 (Apple Git-143)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
130
0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
Normal file
130
0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
Normal file
@ -0,0 +1,130 @@
|
||||
From 28e7e71251dc4b79c29aa0d4904cb424f9081455 Mon Sep 17 00:00:00 2001
|
||||
From: rfwang07 <wangrufeng5@huawei.com>
|
||||
Date: Fri, 21 Jun 2024 11:23:42 +0800
|
||||
Subject: [PATCH] [BOLT][AArch64] Don't change layout in PatchEntries
|
||||
|
||||
---
|
||||
bolt/lib/Passes/PatchEntries.cpp | 11 ++++++++
|
||||
bolt/test/AArch64/patch-entries.s | 36 ++++++++++++++++++++++++
|
||||
bolt/unittests/Core/BinaryContext.cpp | 40 +++++++++++++++++++++++++++
|
||||
3 files changed, 87 insertions(+)
|
||||
create mode 100644 bolt/test/AArch64/patch-entries.s
|
||||
|
||||
diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp
|
||||
index 02a044d8b..ee7512d89 100644
|
||||
--- a/bolt/lib/Passes/PatchEntries.cpp
|
||||
+++ b/bolt/lib/Passes/PatchEntries.cpp
|
||||
@@ -98,6 +98,17 @@ void PatchEntries::runOnFunctions(BinaryContext &BC) {
|
||||
});
|
||||
|
||||
if (!Success) {
|
||||
+ // We can't change output layout for AArch64 due to LongJmp pass
|
||||
+ if (BC.isAArch64()) {
|
||||
+ if (opts::ForcePatch) {
|
||||
+ errs() << "BOLT-ERROR: unable to patch entries in " << Function
|
||||
+ << "\n";
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
// If the original function entries cannot be patched, then we cannot
|
||||
// safely emit new function body.
|
||||
errs() << "BOLT-WARNING: failed to patch entries in " << Function
|
||||
diff --git a/bolt/test/AArch64/patch-entries.s b/bolt/test/AArch64/patch-entries.s
|
||||
new file mode 100644
|
||||
index 000000000..cf6f72a0b
|
||||
--- /dev/null
|
||||
+++ b/bolt/test/AArch64/patch-entries.s
|
||||
@@ -0,0 +1,36 @@
|
||||
+# This test checks patch entries functionality
|
||||
+
|
||||
+# REQUIRES: system-linux
|
||||
+
|
||||
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
||||
+# RUN: %s -o %t.o
|
||||
+# RUN: %clang %cflags -pie %t.o -o %t.exe -nostdlib -Wl,-q
|
||||
+# RUN: llvm-bolt %t.exe -o %t.bolt --use-old-text=0 --lite=0 --skip-funcs=_start
|
||||
+# RUN: llvm-objdump -dz %t.bolt | FileCheck %s
|
||||
+
|
||||
+# CHECK: <pathedEntries.org.0>:
|
||||
+# CHECK-NEXT: adrp x16, 0x[[#%x,ADRP:]]
|
||||
+# CHECK-NEXT: add x16, x16, #0x[[#%x,ADD:]]
|
||||
+# CHECK-NEXT: br x16
|
||||
+
|
||||
+# CHECK: [[#ADRP + ADD]] <pathedEntries>:
|
||||
+# CHECK-NEXT: [[#ADRP + ADD]]: {{.*}} ret
|
||||
+
|
||||
+.text
|
||||
+.balign 4
|
||||
+.global pathedEntries
|
||||
+.type pathedEntries, %function
|
||||
+pathedEntries:
|
||||
+ .rept 32
|
||||
+ nop
|
||||
+ .endr
|
||||
+ ret
|
||||
+.size pathedEntries, .-pathedEntries
|
||||
+
|
||||
+.global _start
|
||||
+.type _start, %function
|
||||
+_start:
|
||||
+ bl pathedEntries
|
||||
+ .inst 0xdeadbeef
|
||||
+ ret
|
||||
+.size _start, .-_start
|
||||
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
|
||||
index 5a80cb4a2..7ac1c1435 100644
|
||||
--- a/bolt/unittests/Core/BinaryContext.cpp
|
||||
+++ b/bolt/unittests/Core/BinaryContext.cpp
|
||||
@@ -62,6 +62,46 @@ INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester,
|
||||
INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester,
|
||||
::testing::Values(Triple::aarch64));
|
||||
|
||||
+TEST_P(BinaryContextTester, FlushPendingRelocCALL26) {
|
||||
+ if (GetParam() != Triple::aarch64)
|
||||
+ GTEST_SKIP();
|
||||
+
|
||||
+ // This test checks that encodeValueAArch64 used by flushPendingRelocations
|
||||
+ // returns correctly encoded values for CALL26 relocation for both backward
|
||||
+ // and forward branches.
|
||||
+ //
|
||||
+ // The offsets layout is:
|
||||
+ // 4: func1
|
||||
+ // 8: bl func1
|
||||
+ // 12: bl func2
|
||||
+ // 16: func2
|
||||
+
|
||||
+ char Data[20] = {};
|
||||
+ BinarySection &BS = BC->registerOrUpdateSection(
|
||||
+ ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
|
||||
+ (uint8_t *)Data, sizeof(Data), 4);
|
||||
+ MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
|
||||
+ ASSERT_TRUE(RelSymbol1);
|
||||
+ BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true);
|
||||
+ MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
|
||||
+ ASSERT_TRUE(RelSymbol2);
|
||||
+ BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true);
|
||||
+
|
||||
+ std::error_code EC;
|
||||
+ SmallVector<char> Vect(sizeof(Data));
|
||||
+ raw_svector_ostream OS(Vect);
|
||||
+
|
||||
+ BS.flushPendingRelocations(OS, [&](const MCSymbol *S) {
|
||||
+ return S == RelSymbol1 ? 4 : S == RelSymbol2 ? 16 : 0;
|
||||
+ });
|
||||
+
|
||||
+ const uint8_t Func1Call[4] = {255, 255, 255, 151};
|
||||
+ const uint8_t Func2Call[4] = {1, 0, 0, 148};
|
||||
+
|
||||
+ EXPECT_FALSE(memcmp(Func1Call, &Vect[8], 4)) << "Wrong backward call value\n";
|
||||
+ EXPECT_FALSE(memcmp(Func2Call, &Vect[12], 4)) << "Wrong forward call value\n";
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
|
||||
TEST_P(BinaryContextTester, BaseAddress) {
|
||||
--
|
||||
2.39.2 (Apple Git-143)
|
||||
|
||||
1820
0006-AArch64-Add-CFG-block-count-correction-optimization.patch
Normal file
1820
0006-AArch64-Add-CFG-block-count-correction-optimization.patch
Normal file
File diff suppressed because it is too large
Load Diff
19
README.en.md
19
README.en.md
@ -1,22 +1,9 @@
|
||||
# llvm-bolt
|
||||
|
||||
#### Description
|
||||
llvm-bolt is a post-link optimizer developed to speed up large applications
|
||||
|
||||
#### Software Architecture
|
||||
Software architecture description
|
||||
|
||||
#### Installation
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Instructions
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
BOLT is a post-link optimizer developed to speed up large applications.
|
||||
It achieves the improvements by optimizing application's code layout based
|
||||
on execution profile gathered by sampling profiler, such as Linux perf tool.
|
||||
|
||||
#### Contribution
|
||||
|
||||
|
||||
20
README.md
20
README.md
@ -1,23 +1,9 @@
|
||||
# llvm-bolt
|
||||
|
||||
#### 介绍
|
||||
llvm-bolt is a post-link optimizer developed to speed up large applications
|
||||
|
||||
#### 软件架构
|
||||
软件架构说明
|
||||
|
||||
|
||||
#### 安装教程
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### 使用说明
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
BOLT is a post-link optimizer developed to speed up large applications.
|
||||
It achieves the improvements by optimizing application's code layout based
|
||||
on execution profile gathered by sampling profiler, such as Linux perf tool.
|
||||
|
||||
#### 参与贡献
|
||||
|
||||
|
||||
@ -1,32 +1,41 @@
|
||||
%bcond_without sys_llvm
|
||||
%bcond_with check
|
||||
|
||||
%global maj_ver 15
|
||||
%global maj_ver 17
|
||||
%global min_ver 0
|
||||
%global patch_ver 7
|
||||
%global patch_ver 6
|
||||
%global bolt_version %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||
%global bolt_srcdir llvm-project-%{bolt_version}.src
|
||||
|
||||
%if %{with sys_llvm}
|
||||
%global pkg_name llvm-bolt
|
||||
%global install_prefix %{_prefix}
|
||||
%else
|
||||
%global pkg_name llvm-bolt%{maj_ver}
|
||||
%global install_prefix %{_libdir}/llvm%{maj_ver}
|
||||
%endif
|
||||
|
||||
%global install_bindir %{install_prefix}/bin
|
||||
%global install_libdir %{install_prefix}/lib
|
||||
%global install_docdir %{install_prefix}/share/doc
|
||||
%global max_link_jobs 2
|
||||
|
||||
Name: llvm-bolt
|
||||
Name: %{pkg_name}
|
||||
Version: %{bolt_version}
|
||||
Release: 3
|
||||
Release: 7
|
||||
Summary: BOLT is a post-link optimizer developed to speed up large applications
|
||||
License: Apache 2.0
|
||||
URL: https://github.com/llvm/llvm-project/tree/main/bolt
|
||||
|
||||
Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz
|
||||
Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz.sig
|
||||
|
||||
Patch1: 0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch
|
||||
Patch2: 0002-AArch64-Add-AArch64-support-for-hugify.patch
|
||||
Patch1: 0001-Fix-trap-value-for-non-X86.patch
|
||||
Patch2: 0002-Add-test-for-emitting-trap-value.patch
|
||||
Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch
|
||||
Patch4: 0004-Added-open-source-code-related-to-feature-extracting.patch
|
||||
Patch5: 0005-Add-block-correction-optimization.patch
|
||||
Patch4: 0004-Bolt-Solving-pie-support-issue.patch
|
||||
Patch5: 0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
|
||||
Patch6: 0006-AArch64-Add-CFG-block-count-correction-optimization.patch
|
||||
|
||||
BuildRequires: gcc
|
||||
BuildRequires: gcc-c++
|
||||
@ -77,6 +86,9 @@ Documentation for the BOLT optimizer
|
||||
-DLLVM_TARGETS_TO_BUILD="AArch64"
|
||||
%endif
|
||||
|
||||
# Set LD_LIBRARY_PATH now because we skip rpath generation and the build uses
|
||||
# some just built libraries.
|
||||
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}
|
||||
%ninja_build bolt
|
||||
|
||||
%install
|
||||
@ -93,9 +105,9 @@ find %{buildroot}%{install_prefix} \
|
||||
! -name "libbolt_rt_instr.a" \
|
||||
-type f,l -exec rm -f '{}' \;
|
||||
|
||||
#
|
||||
rm -f %{_builddir}/%{bolt_srcdir}/lib/lib*.a
|
||||
|
||||
# Remove files installed during the build phase.
|
||||
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
||||
|
||||
# There currently is not support upstream for building html doc from BOLT
|
||||
install -d %{buildroot}%{install_docdir}
|
||||
mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir}
|
||||
@ -103,24 +115,17 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir}
|
||||
%check
|
||||
|
||||
%if %{with check}
|
||||
# Bolt makes incorrect assumptions on the location of libbolt_rt_*.a.
|
||||
mkdir -p %{_builddir}/%{bolt_srcdir}/lib
|
||||
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_hugify.a %{_builddir}/%{bolt_srcdir}/lib
|
||||
%ifarch x86_64
|
||||
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_instr.a %{_builddir}/%{bolt_srcdir}/lib
|
||||
%endif
|
||||
|
||||
%ifarch aarch64
|
||||
# Failing test cases on aarch64
|
||||
rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp
|
||||
%endif
|
||||
|
||||
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/lib
|
||||
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}
|
||||
export DESTDIR=%{buildroot}
|
||||
%ninja_build check
|
||||
%ninja_build check-bolt
|
||||
|
||||
# Remove files installed during the check phase.
|
||||
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
|
||||
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
||||
%endif
|
||||
|
||||
%files
|
||||
@ -130,9 +135,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
|
||||
%{install_bindir}/merge-fdata
|
||||
%{install_bindir}/perf2bolt
|
||||
%{install_bindir}/llvm-bolt-heatmap
|
||||
%{install_libdir}/libbolt_rt_hugify.a
|
||||
|
||||
%ifarch x86_64
|
||||
%{install_libdir}/libbolt_rt_hugify.a
|
||||
%{install_libdir}/libbolt_rt_instr.a
|
||||
%endif
|
||||
|
||||
@ -142,12 +147,36 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
|
||||
%doc %{install_docdir}
|
||||
|
||||
%changelog
|
||||
* Wed Jun 5 2024 Zhou Zeping <zhouzp610@126.com> 15.0.7-3
|
||||
- Type:Update
|
||||
* Fri Jul 12 2024 rfwang07 <wangrufeng5@huawei.com> 17.0.6-7
|
||||
- Type:Feature
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Add CFG block count correction optimization.
|
||||
|
||||
* Fri Jun 21 2024 rfwang07 <wangrufeng5@huawei.com> 17.0.6-6
|
||||
- Type:Backport
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Backport bugfix.
|
||||
|
||||
* Tue Jun 18 2024 Xiong Zhou <xiongzhou4@huawei.com> 17.0.6-5
|
||||
- Type:Feature
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Add AArch64 support for inline.
|
||||
|
||||
* Tue Jun 18 2024 Xiong Zhou <xiongzhou4@huawei.com> 17.0.6-4
|
||||
- Type:Backport
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Backport bugfix.
|
||||
|
||||
* Tue Jun 18 2024 Xiong Zhou <xiongzhou4@huawei.com> 17.0.6-3
|
||||
- Type:Update
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Update to version 17.0.6
|
||||
|
||||
* Thu Sep 7 2023 Xiong Zhou <xiongzhou4@huawei.com> 15.0.7-2
|
||||
- Type:Update
|
||||
- ID:NA
|
||||
|
||||
Binary file not shown.
BIN
llvm-project-17.0.6.src.tar.xz.sig
Normal file
BIN
llvm-project-17.0.6.src.tar.xz.sig
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user