Skip to content

Commit 3185acd

Browse files
committed
perf: add per-class mesh trigger
1 parent faf1172 commit 3185acd

File tree

5 files changed

+343
-64
lines changed

5 files changed

+343
-64
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(unit_src
3939
testing/unit/concurrent_mesh_test.cc
4040
testing/unit/mesh_memory_test.cc
4141
testing/unit/mesh_test.cc
42+
testing/unit/mesh_trigger_test.cc
4243
testing/unit/pending_list_test.cc
4344
testing/unit/rng_test.cc
4445
testing/unit/thread_exit_test.cc

src/global_heap.h

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "internal.h"
1515
#include "meshable_arena.h"
16+
#include "mesh_trigger.h"
1617
#include "mini_heap.h"
1718

1819
#include "heaplayers.h"
@@ -127,7 +128,12 @@ class GlobalHeap : public MeshableArena<PageSize> {
127128
}
128129
};
129130

130-
GlobalHeap() : Super(), _maxObjectSize(SizeMap::ByteSizeForClass(kNumBins - 1)), _lastMesh{time::now()} {
131+
GlobalHeap()
132+
: Super(),
133+
_maxObjectSize(SizeMap::ByteSizeForClass(kNumBins - 1)),
134+
_meshEventBudget(initMeshEventBudget()),
135+
_meshTrigger(_meshEventBudget),
136+
_lastMesh{time::now()} {
131137
}
132138

133139
inline void dumpStrings() const {
@@ -641,7 +647,7 @@ class GlobalHeap : public MeshableArena<PageSize> {
641647
// after call to meshLocked() completes src is a nullptr
642648
void ATTRIBUTE_NEVER_INLINE meshLocked(MiniHeapT *dst, MiniHeapT *&src);
643649

644-
inline void ATTRIBUTE_ALWAYS_INLINE maybeMesh() {
650+
inline void ATTRIBUTE_ALWAYS_INLINE maybeMesh(int preferredSizeClass = -1) {
645651
if (!kMeshingEnabled) {
646652
return;
647653
}
@@ -655,32 +661,12 @@ class GlobalHeap : public MeshableArena<PageSize> {
655661
return;
656662
}
657663

658-
const auto now = time::now();
659-
const auto lastMesh = _lastMesh.load(std::memory_order_acquire);
660-
auto duration = chrono::duration_cast<chrono::milliseconds>(now - lastMesh);
661-
662-
if (likely(duration < meshPeriodMs)) {
664+
size_t sizeClass = 0;
665+
if (!_meshTrigger.popRequested(preferredSizeClass, sizeClass)) {
663666
return;
664667
}
665668

666-
AllLocksGuard allLocks(_miniheapLocks, _largeAllocLock, _arenaLock);
667-
668-
{
669-
// ensure if two threads tried to grab the mesh lock at the same
670-
// time, the second one bows out gracefully without meshing
671-
// twice in a row.
672-
const auto lockedNow = time::now();
673-
const auto lockedLastMesh = _lastMesh.load(std::memory_order_relaxed);
674-
auto duration = chrono::duration_cast<chrono::milliseconds>(lockedNow - lockedLastMesh);
675-
676-
if (unlikely(duration < meshPeriodMs)) {
677-
return;
678-
}
679-
}
680-
681-
_lastMesh.store(now, std::memory_order_release);
682-
683-
meshAllSizeClassesLocked();
669+
processMeshRequest(sizeClass);
684670
}
685671

686672
inline bool okToProceed(void *ptr) const {
@@ -721,6 +707,8 @@ class GlobalHeap : public MeshableArena<PageSize> {
721707
}
722708

723709
private:
710+
static std::array<uint64_t, kNumBins> initMeshEventBudget();
711+
void processMeshRequest(size_t sizeClass);
724712
// check for meshes in all size classes -- must be called LOCKED
725713
void meshAllSizeClassesLocked();
726714
// meshSizeClassLocked returns the number of merged sets found
@@ -731,6 +719,9 @@ class GlobalHeap : public MeshableArena<PageSize> {
731719
atomic_size_t _meshPeriod{kDefaultMeshPeriod};
732720
std::atomic<std::chrono::milliseconds> _meshPeriodMs{kMeshPeriodMs};
733721

722+
std::array<uint64_t, kNumBins> _meshEventBudget{};
723+
internal::MeshTrigger<kNumBins> _meshTrigger{_meshEventBudget};
724+
734725
atomic_size_t ATTRIBUTE_ALIGNED(CACHELINE_SIZE) _lastMeshEffective{0};
735726

736727
// we want this on its own cacheline

src/global_heap_impl.h

Lines changed: 123 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,62 @@ MiniHeapID GetMiniHeapID(const MiniHeapT *mh) {
3636
return runtime<PageSize>().heap().miniheapIDFor(mh);
3737
}
3838

39+
template <size_t PageSize>
40+
std::array<uint64_t, kNumBins> GlobalHeap<PageSize>::initMeshEventBudget() {
41+
std::array<uint64_t, kNumBins> budgets{};
42+
constexpr uint64_t minBudget = PageSize;
43+
constexpr uint64_t maxBudget = static_cast<uint64_t>(PageSize) * 64ULL;
44+
for (size_t i = 0; i < kNumBins; i++) {
45+
const uint64_t objSize = static_cast<uint64_t>(SizeMap::ByteSizeForClass(static_cast<int32_t>(i)));
46+
uint64_t budget = objSize * 32;
47+
if (budget < minBudget) {
48+
budget = minBudget;
49+
} else if (budget > maxBudget) {
50+
budget = maxBudget;
51+
}
52+
budgets[i] = budget;
53+
}
54+
return budgets;
55+
}
56+
57+
template <size_t PageSize>
58+
struct MeshScratch {
59+
MergeSetArray<PageSize> &mergeSets;
60+
SplitArray<PageSize> &left;
61+
SplitArray<PageSize> &right;
62+
};
63+
64+
template <size_t PageSize>
65+
static MeshScratch<PageSize> getMeshScratch() {
66+
static MergeSetArray<PageSize> *MergeSetsPtr = []() {
67+
void *ptr =
68+
mmap(nullptr, sizeof(MergeSetArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
69+
hard_assert(ptr != MAP_FAILED);
70+
return new (ptr) MergeSetArray<PageSize>();
71+
}();
72+
static MergeSetArray<PageSize> &MergeSets = *MergeSetsPtr;
73+
74+
static SplitArray<PageSize> *LeftPtr = []() {
75+
void *ptr = mmap(nullptr, sizeof(SplitArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
76+
hard_assert(ptr != MAP_FAILED);
77+
return new (ptr) SplitArray<PageSize>();
78+
}();
79+
static SplitArray<PageSize> &Left = *LeftPtr;
80+
81+
static SplitArray<PageSize> *RightPtr = []() {
82+
void *ptr = mmap(nullptr, sizeof(SplitArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
83+
hard_assert(ptr != MAP_FAILED);
84+
return new (ptr) SplitArray<PageSize>();
85+
}();
86+
static SplitArray<PageSize> &Right = *RightPtr;
87+
88+
d_assert((reinterpret_cast<uintptr_t>(&MergeSets) & (getPageSize() - 1)) == 0);
89+
d_assert((reinterpret_cast<uintptr_t>(&Left) & (getPageSize() - 1)) == 0);
90+
d_assert((reinterpret_cast<uintptr_t>(&Right) & (getPageSize() - 1)) == 0);
91+
92+
return {MergeSets, Left, Right};
93+
}
94+
3995
template <size_t PageSize>
4096
void *GlobalHeap<PageSize>::malloc(size_t sz) {
4197
#ifndef NDEBUG
@@ -88,9 +144,14 @@ void GlobalHeap<PageSize>::freeFor(MiniHeapT *mh, void *ptr, size_t startEpoch)
88144

89145
d_assert(mh->maxCount() > 1);
90146

147+
const size_t objectSize = mh->objectSize();
148+
const size_t spanBytes = mh->spanSize();
149+
91150
auto freelistId = mh->freelistId();
92151
auto isAttached = mh->isAttached();
93152
auto sizeClass = mh->sizeClass();
153+
bool transitionedToPartial = false;
154+
bool becameEmpty = false;
94155

95156
// try to avoid storing to this cacheline; the branch is worth it to avoid
96157
// multi-threaded contention
@@ -151,6 +212,9 @@ void GlobalHeap<PageSize>::freeFor(MiniHeapT *mh, void *ptr, size_t startEpoch)
151212
isAttached = mh->isAttached();
152213

153214
if (!isAttached && (remaining == 0 || freelistId == list::Full)) {
215+
becameEmpty = remaining == 0;
216+
transitionedToPartial = remaining > 0 && freelistId == list::Full;
217+
shouldMesh = true;
154218
// this may free the miniheap -- we can't safely access it after
155219
// this point.
156220
postFreeLocked(mh, sizeClass, remaining);
@@ -173,6 +237,7 @@ void GlobalHeap<PageSize>::freeFor(MiniHeapT *mh, void *ptr, size_t startEpoch)
173237
// the exact crossing, the next free will catch it.
174238
if (isBelowPartialThreshold(remaining, mh->maxCount())) {
175239
tryPushPendingPartial(mh, sizeClass);
240+
transitionedToPartial = true;
176241
}
177242
shouldMesh = true;
178243
} else {
@@ -238,21 +303,67 @@ void GlobalHeap<PageSize>::freeFor(MiniHeapT *mh, void *ptr, size_t startEpoch)
238303
remaining = mh->inUseCount();
239304
postFreeLocked(mh, sizeClass, remaining);
240305
// Note: flushBinLocked deferred to next mesh cycle (requires arena lock)
306+
becameEmpty = true;
307+
shouldMesh = true;
241308
}
242309
} else {
243310
shouldMesh = !isAttached;
244311
}
245312
}
246313

247-
if (shouldMesh) {
248-
// Sample maybeMesh calls using pointer address bits to avoid overhead of
249-
// calling clock_gettime on every free. Check ~1 in 4096 frees.
250-
// Use bits 12-23 (above page offset, below typical allocation patterns).
251-
constexpr uintptr_t kMeshSampleMask = 0xFFF000;
252-
if (unlikely((reinterpret_cast<uintptr_t>(ptr) & kMeshSampleMask) == 0)) {
253-
maybeMesh();
314+
if (shouldMesh && !isAttached && sizeClass >= 0) {
315+
size_t delta = objectSize;
316+
if (transitionedToPartial || becameEmpty) {
317+
delta += spanBytes;
318+
}
319+
_meshTrigger.add(static_cast<size_t>(sizeClass), delta);
320+
maybeMesh(sizeClass);
321+
}
322+
}
323+
324+
template <size_t PageSize>
325+
void GlobalHeap<PageSize>::processMeshRequest(size_t sizeClass) {
326+
d_assert(sizeClass < kNumBins);
327+
auto scratch = getMeshScratch<PageSize>();
328+
const uint64_t budget = _meshTrigger.adjustedBudget(sizeClass);
329+
330+
size_t meshCount = 0;
331+
bool aboveThreshold = false;
332+
333+
{
334+
// Lock ordering: size-class lock -> arena lock -> epoch lock
335+
// This matches meshAllSizeClassesLocked (called via AllLocksGuard in mallctl)
336+
lock_guard<mutex> sizeLock(_miniheapLocks[sizeClass]);
337+
lock_guard<mutex> arenaLock(_arenaLock);
338+
339+
// if we have freed but not reset meshed mappings, this will reset
340+
// them to the identity mapping, ensuring we don't blow past our VMA
341+
// limit (which is why we set the force flag to true)
342+
Super::scavenge(true);
343+
344+
if (Super::aboveMeshThreshold()) {
345+
aboveThreshold = true;
346+
} else {
347+
// Acquire epoch lock last to match lock ordering in meshAllSizeClassesLocked
348+
lock_guard<EpochLock> epochLock(_meshEpoch);
349+
350+
drainPendingPartialLocked(sizeClass);
351+
flushBinLocked(sizeClass);
352+
meshCount = meshSizeClassLocked(sizeClass, scratch.mergeSets, scratch.left, scratch.right);
353+
madvise(&scratch.left, sizeof(scratch.left), MADV_DONTNEED);
354+
madvise(&scratch.right, sizeof(scratch.right), MADV_DONTNEED);
355+
madvise(&scratch.mergeSets, sizeof(scratch.mergeSets), MADV_DONTNEED);
356+
Super::scavenge(true);
254357
}
255358
}
359+
360+
if (!aboveThreshold) {
361+
_lastMeshEffective.store(meshCount > 0 ? 1 : 0, std::memory_order_release);
362+
_stats.meshCount += meshCount;
363+
}
364+
365+
_meshTrigger.onMeshComplete(sizeClass, meshCount > 0, budget);
366+
_lastMesh.store(time::now(), std::memory_order_release);
256367
}
257368

258369
template <size_t PageSize>
@@ -428,34 +539,7 @@ size_t GlobalHeap<PageSize>::meshSizeClassLocked(size_t sizeClass, MergeSetArray
428539

429540
template <size_t PageSize>
430541
void GlobalHeap<PageSize>::meshAllSizeClassesLocked() {
431-
static MergeSetArray<PageSize> *MergeSetsPtr = []() {
432-
void *ptr =
433-
mmap(nullptr, sizeof(MergeSetArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
434-
hard_assert(ptr != MAP_FAILED);
435-
return new (ptr) MergeSetArray<PageSize>();
436-
}();
437-
static MergeSetArray<PageSize> &MergeSets = *MergeSetsPtr;
438-
// static_assert(sizeof(MergeSets) == sizeof(void *) * 2 * 4096, "array too big");
439-
d_assert((reinterpret_cast<uintptr_t>(&MergeSets) & (getPageSize() - 1)) == 0);
440-
441-
static SplitArray<PageSize> *LeftPtr = []() {
442-
void *ptr = mmap(nullptr, sizeof(SplitArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
443-
hard_assert(ptr != MAP_FAILED);
444-
return new (ptr) SplitArray<PageSize>();
445-
}();
446-
static SplitArray<PageSize> &Left = *LeftPtr;
447-
448-
static SplitArray<PageSize> *RightPtr = []() {
449-
void *ptr = mmap(nullptr, sizeof(SplitArray<PageSize>), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
450-
hard_assert(ptr != MAP_FAILED);
451-
return new (ptr) SplitArray<PageSize>();
452-
}();
453-
static SplitArray<PageSize> &Right = *RightPtr;
454-
455-
// static_assert(sizeof(Left) == sizeof(void *) * 16384, "array too big");
456-
// static_assert(sizeof(Right) == sizeof(void *) * 16384, "array too big");
457-
d_assert((reinterpret_cast<uintptr_t>(&Left) & (getPageSize() - 1)) == 0);
458-
d_assert((reinterpret_cast<uintptr_t>(&Right) & (getPageSize() - 1)) == 0);
542+
auto scratch = getMeshScratch<PageSize>();
459543

460544
// if we have freed but not reset meshed mappings, this will reset
461545
// them to the identity mapping, ensuring we don't blow past our VMA
@@ -483,12 +567,12 @@ void GlobalHeap<PageSize>::meshAllSizeClassesLocked() {
483567
size_t totalMeshCount = 0;
484568

485569
for (size_t sizeClass = 0; sizeClass < kNumBins; sizeClass++) {
486-
totalMeshCount += meshSizeClassLocked(sizeClass, MergeSets, Left, Right);
570+
totalMeshCount += meshSizeClassLocked(sizeClass, scratch.mergeSets, scratch.left, scratch.right);
487571
}
488572

489-
madvise(&Left, sizeof(Left), MADV_DONTNEED);
490-
madvise(&Right, sizeof(Right), MADV_DONTNEED);
491-
madvise(&MergeSets, sizeof(MergeSets), MADV_DONTNEED);
573+
madvise(&scratch.left, sizeof(scratch.left), MADV_DONTNEED);
574+
madvise(&scratch.right, sizeof(scratch.right), MADV_DONTNEED);
575+
madvise(&scratch.mergeSets, sizeof(scratch.mergeSets), MADV_DONTNEED);
492576

493577
_lastMeshEffective = totalMeshCount > 256;
494578
_stats.meshCount += totalMeshCount;

0 commit comments

Comments
 (0)