115 lines
4.6 KiB
Diff
115 lines
4.6 KiB
Diff
|
|
From 92ae2027b9e9985f9f3ac90a007c9df452ea9cad Mon Sep 17 00:00:00 2001
|
||
|
|
From: liubo <liubo254@huawei.com>
|
||
|
|
Date: Sat, 13 Apr 2024 03:23:10 +0800
|
||
|
|
Subject: [PATCH] add libtcmalloc_2m.so in gperftools-libs rpm package
|
||
|
|
|
||
|
|
In the Ceph scenario, enabling tcmalloc huge pages can
|
||
|
|
reduce the TLB miss rate and improve performance.
|
||
|
|
|
||
|
|
However, tcmalloc does not support huge page release. Therefore,
|
||
|
|
release logic needs to be added. In this way, when
|
||
|
|
the tcmalloc huge page is used, the memory can be
|
||
|
|
released from the OS, preventing memory overuse.
|
||
|
|
|
||
|
|
The libtcmalloc_2m.so file is added to tcmalloc to support
|
||
|
|
hugetlb and services that require 2 MB tcmalloc.
|
||
|
|
|
||
|
|
The native tcmalloc.so file is not affected.
|
||
|
|
|
||
|
|
Signed-off-by: liubo <liubo254@huawei.com>
|
||
|
|
---
|
||
|
|
Makefile.am | 10 ++++++++++
|
||
|
|
src/common.h | 13 +++++++++++--
|
||
|
|
src/span.h | 4 ++--
|
||
|
|
src/system-alloc.cc | 2 +-
|
||
|
|
4 files changed, 24 insertions(+), 5 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/Makefile.am b/Makefile.am
|
||
|
|
index 82be544..2443e80 100644
|
||
|
|
--- a/Makefile.am
|
||
|
|
+++ b/Makefile.am
|
||
|
|
@@ -967,6 +967,16 @@ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
|
||
|
|
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
|
||
|
|
libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
|
||
|
|
|
||
|
|
+# add libtcmalloc_2m.so, use 2m hugetlb for tcmalloc page alloc.
|
||
|
|
+lib_LTLIBRARIES += libtcmalloc_2m.la
|
||
|
|
+libtcmalloc_2m_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) \
|
||
|
|
+ $(HEAP_CHECKER_SOURCES) $(libtcmalloc_internal_la_SOURCES)
|
||
|
|
+libtcmalloc_2m_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
|
||
|
|
+ $(MAYBE_NO_HEAP_CHECK) $(EMERGENCY_MALLOC_DEFINE) -DTCMALLOC_PAGE_SIZE_2M
|
||
|
|
+libtcmalloc_2m_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
|
||
|
|
+libtcmalloc_2m_la_LIBADD = libstacktrace.la libmaybe_threads.la $(PTHREAD_LIBS)
|
||
|
|
+
|
||
|
|
+
|
||
|
|
# same as above with without -DNDEBUG
|
||
|
|
noinst_LTLIBRARIES += libtcmalloc_internal_with_asserts.la
|
||
|
|
libtcmalloc_internal_with_asserts_la_SOURCES = $(libtcmalloc_internal_la_SOURCES)
|
||
|
|
diff --git a/src/common.h b/src/common.h
|
||
|
|
index caa3e4a..687b2c6 100644
|
||
|
|
--- a/src/common.h
|
||
|
|
+++ b/src/common.h
|
||
|
|
@@ -72,8 +72,10 @@ static const size_t kMinAlign = 16;
|
||
|
|
// the thread cache allowance to avoid passing more free ranges to and from
|
||
|
|
// central lists. Also, larger pages are less likely to get freed.
|
||
|
|
// These two factors cause a bounded increase in memory use.
|
||
|
|
-#if defined(TCMALLOC_PAGE_SIZE_SHIFT)
|
||
|
|
+#if defined(TCMALLOC_PAGE_SIZE_SHIFT) && !defined(TCMALLOC_PAGE_SIZE_2M)
|
||
|
|
static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT;
|
||
|
|
+#elif defined(TCMALLOC_PAGE_SIZE_2M)
|
||
|
|
+static const size_t kPageShift = 21;
|
||
|
|
#else
|
||
|
|
static const size_t kPageShift = 13;
|
||
|
|
#endif
|
||
|
|
@@ -83,11 +85,18 @@ static const size_t kClassSizesMax = 128;
|
||
|
|
static const size_t kMaxThreadCacheSize = 4 << 20;
|
||
|
|
|
||
|
|
static const size_t kPageSize = 1 << kPageShift;
|
||
|
|
+#if defined(TCMALLOC_PAGE_SIZE_2M)
|
||
|
|
+static const size_t kMaxSize = 2 * 1024 * 1024;
|
||
|
|
+#else
|
||
|
|
static const size_t kMaxSize = 256 * 1024;
|
||
|
|
+#endif
|
||
|
|
static const size_t kAlignment = 8;
|
||
|
|
// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
|
||
|
|
+#if defined(TCMALLOC_PAGE_SIZE_2M)
|
||
|
|
+static const size_t kMaxPages = 1 << (21 - kPageShift);
|
||
|
|
+#else
|
||
|
|
static const size_t kMaxPages = 1 << (20 - kPageShift);
|
||
|
|
-
|
||
|
|
+#endif
|
||
|
|
// Default bound on the total amount of thread caches.
|
||
|
|
#ifdef TCMALLOC_SMALL_BUT_SLOW
|
||
|
|
// Make the overall thread cache no bigger than that of a single thread
|
||
|
|
diff --git a/src/span.h b/src/span.h
|
||
|
|
index 7068893..9c89edc 100644
|
||
|
|
--- a/src/span.h
|
||
|
|
+++ b/src/span.h
|
||
|
|
@@ -80,8 +80,8 @@ struct Span {
|
||
|
|
// iterator which lifetime is controlled explicitly.
|
||
|
|
char span_iter_space[sizeof(SpanSet::iterator)];
|
||
|
|
};
|
||
|
|
- unsigned int refcount : 16; // Number of non-free objects
|
||
|
|
- unsigned int sizeclass : 8; // Size-class for small objects (or 0)
|
||
|
|
+ unsigned int refcount; // Number of non-free objects
|
||
|
|
+ unsigned int sizeclass; // Size-class for small objects (or 0)
|
||
|
|
unsigned int location : 2; // Is the span on a freelist, and if so, which?
|
||
|
|
unsigned int sample : 1; // Sampled object?
|
||
|
|
bool has_span_iter : 1; // Iff span_iter_space has valid
|
||
|
|
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
|
||
|
|
index 439ec69..b1bb7c9 100644
|
||
|
|
--- a/src/system-alloc.cc
|
||
|
|
+++ b/src/system-alloc.cc
|
||
|
|
@@ -548,7 +548,7 @@ bool TCMalloc_SystemRelease(void* start, size_t length) {
|
||
|
|
result = ret != MAP_FAILED;
|
||
|
|
#else
|
||
|
|
int ret = madvise(reinterpret_cast<char*>(new_start),
|
||
|
|
- new_end - new_start, MADV_FREE);
|
||
|
|
+ new_end - new_start, MADV_DONTNEED);
|
||
|
|
|
||
|
|
result = ret != -1;
|
||
|
|
#endif
|
||
|
|
--
|
||
|
|
2.23.0
|
||
|
|
|