Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mysql-test/suite/sys_vars/r/all_vars.result
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ init_replica
init_replica
init_slave
init_slave
innodb_buffer_pool_lazy_latch_init
innodb_buffer_pool_lazy_latch_init
innodb_dedicated_server
innodb_dedicated_server
innodb_log_spin_cpu_abs_lwm
Expand Down
158 changes: 100 additions & 58 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "srv0start.h"
#include "sync0sync.h"
#include "trx0trx.h"
#include "ut0cpu_cache.h"
#include "ut0new.h"

#include "scope_guard.h"
Expand Down Expand Up @@ -828,16 +829,49 @@ static void pfs_register_buffer_block(
}
#endif /* PFS_GROUP_BUFFER_SYNC */

/** Initializes a buffer control block when the buf_pool is created. */
static void buf_block_init(
buf_pool_t *buf_pool, /*!< in: buffer pool instance */
buf_block_t *block, /*!< in: pointer to control block */
byte *frame) /*!< in: pointer to buffer frame */
{
/** Initialize latches for a buffer block. */
void buf_block_initialize_latches(buf_block_t *block) {
ut_a(!block->latches_initialized);

/* This runs in one of three race-free contexts:
- eager init during chunk creation: at startup, or under free_list_mutex
for resize (see buf_chunk_init);
- lazy init on first use: the block has just been taken from the free list
and is owned exclusively by this thread (BUF_BLOCK_READY_FOR_USE), so it
is not yet reachable by any other thread. */
ut_ad(srv_is_being_started ||
mutex_own(&buf_pool_from_block(block)->free_list_mutex) ||
buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);

mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);

#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, LATCH_ID_BUF_BLOCK_LOCK);
ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch,
LATCH_ID_BUF_BLOCK_DEBUG));
#else
rw_lock_create(buf_block_lock_key, &block->lock, LATCH_ID_BUF_BLOCK_LOCK);
ut_d(rw_lock_create(buf_block_debug_latch_key, &block->debug_latch,
LATCH_ID_BUF_BLOCK_DEBUG));
#endif

#ifdef UNIV_DEBUG
block->lock.m_id = LATCH_ID_BUF_BLOCK_LOCK;
block->debug_latch.m_id = LATCH_ID_BUF_BLOCK_DEBUG;
#endif /* UNIV_DEBUG */

block->lock.is_block_lock = true;

ut_ad(rw_lock_validate(&block->lock));

block->latches_initialized = true;
}

/** Lightweight initialization of a buffer control block: no latches created. */
static void buf_block_init_light(buf_pool_t *buf_pool, buf_block_t *block,
byte *frame) {
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);

/* This function should only be executed at database startup or by
buf_pool_resize(). Either way, adaptive hash index must not exist. */
block->ahi.assert_empty_on_init();

block->frame = frame;
Expand Down Expand Up @@ -867,34 +901,13 @@ static void buf_block_init(

page_zip_des_init(&block->page.zip);

mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);

#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
of buffer block rwlock with performance schema.

If PFS_GROUP_BUFFER_SYNC is defined, skip the registration
since buffer block rwlock will be registered later in
pfs_register_buffer_block(). */

rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, LATCH_ID_BUF_BLOCK_LOCK);

ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch,
LATCH_ID_BUF_BLOCK_DEBUG));

#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */

rw_lock_create(buf_block_lock_key, &block->lock, LATCH_ID_BUF_BLOCK_LOCK);

ut_d(rw_lock_create(buf_block_debug_latch_key, &block->debug_latch,
LATCH_ID_BUF_BLOCK_DEBUG));

#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */

block->lock.is_block_lock = true;

ut_ad(rw_lock_validate(&(block->lock)));
/* Latches are NOT initialized here. Block creation happens either at
startup (no concurrency on this instance yet) or, for resize, under the
instance's free_list_mutex. */
ut_ad(srv_is_being_started || mutex_own(&buf_pool->free_list_mutex));
block->latches_initialized = false;
}

/* We maintain our private view of innobase_should_madvise_buf_pool() which we
initialize at the beginning of buf_pool_init() and then update when the
@@global.innodb_buffer_pool_in_core_file changes.
Expand Down Expand Up @@ -1081,7 +1094,8 @@ static buf_chunk_t *buf_chunk_init(
buf_chunk_t *chunk, /*!< out: chunk of buffers */
ulonglong mem_size, /*!< in: requested size in bytes */
bool populate, /*!< in: virtual page preallocation */
std::mutex *mutex) /*!< in,out: Mutex protecting chunk map. */
std::mutex *mutex) /*!< in,out: mutex protecting the chunk map, or
nullptr when no concurrency is possible */
{
buf_block_t *block;
byte *frame;
Expand Down Expand Up @@ -1143,7 +1157,15 @@ static buf_chunk_t *buf_chunk_init(
block = chunk->blocks;

for (i = chunk->size; i--;) {
buf_block_init(buf_pool, block, frame);
buf_block_init_light(buf_pool, block, frame);

/* When lazy latch initialization is disabled, create the latches now
(eager initialization, the original behavior). When enabled, they are
created on first use in buf_LRU_get_free_only(). */
if (!srv_buf_pool_lazy_latch_init) {
buf_block_initialize_latches(block);
}

UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);

/* Add the block to the free list */
Expand All @@ -1157,6 +1179,9 @@ static buf_chunk_t *buf_chunk_init(
frame += UNIV_PAGE_SIZE;
}

/* buf_pool instances are created in parallel during buf_pool_init(), so the
caller passes a mutex to serialize inserts into the shared chunk map. During
resize there is no concurrency and mutex is nullptr. */
if (mutex != nullptr) {
mutex->lock();
}
Expand All @@ -1170,6 +1195,7 @@ static buf_chunk_t *buf_chunk_init(
#ifdef PFS_GROUP_BUFFER_SYNC
pfs_register_buffer_block(chunk);
#endif /* PFS_GROUP_BUFFER_SYNC */

return (chunk);
}

Expand Down Expand Up @@ -1280,7 +1306,8 @@ static void buf_pool_set_sizes(void) {
@param[in] buf_pool buffer pool instance
@param[in] buf_pool_size size in bytes
@param[in] instance_no id of the instance
@param[in,out] mutex Mutex to protect common data structures
@param[in,out] mutex mutex protecting the shared chunk map while
instances are created in parallel
@param[out] err DB_SUCCESS if all goes well
@param[in] populate virtual page preallocation */
static void buf_pool_create(buf_pool_t *buf_pool, ulint buf_pool_size,
Expand Down Expand Up @@ -1361,14 +1388,18 @@ static void buf_pool_create(buf_pool_t *buf_pool, ulint buf_pool_size,

do {
if (!buf_chunk_init(buf_pool, chunk, chunk_size, populate, mutex)) {
/* Failure cleanup at startup, under chunks_mutex. */
ut_ad(mutex_own(&buf_pool->chunks_mutex));
while (--chunk >= buf_pool->chunks) {
buf_block_t *block = chunk->blocks;

for (i = chunk->size; i--; block++) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);

ut_d(rw_lock_free(&block->debug_latch));
/* Only blocks whose latches were lazily created need freeing. */
if (block->latches_initialized) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);
ut_d(rw_lock_free(&block->debug_latch));
}
}
buf_pool->deallocate_chunk(chunk);
}
Expand Down Expand Up @@ -1497,14 +1528,18 @@ static void buf_pool_free_instance(buf_pool_t *buf_pool) {
chunks = buf_pool->chunks;
chunk = chunks + buf_pool->n_chunks;

ut_ad(mutex_own(&buf_pool->chunks_mutex));

while (--chunk >= chunks) {
buf_block_t *block = chunk->blocks;

for (ulint i = chunk->size; i--; block++) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);

ut_d(rw_lock_free(&block->debug_latch));
/* Only blocks whose latches were lazily created need to be freed. */
if (block->latches_initialized) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);
ut_d(rw_lock_free(&block->debug_latch));
}
}

buf_pool->deallocate_chunk(chunk);
Expand Down Expand Up @@ -1593,13 +1628,16 @@ dberr_t buf_pool_init(ulint total_size, bool populate, ulint n_instances) {

std::vector<IB_thread> threads;

std::mutex m;
/* Shared by the worker threads of this batch to serialize chunk-map
inserts. Placed in its own cache line so the contended lock word does not
false-share with neighbouring stack data. */
ut::Cacheline_aligned<std::mutex> m;

for (ulint id = i; id < n; ++id) {
threads.emplace_back(os_thread_create(buf_pool_create_thread_key, 0,
buf_pool_create, &buf_pool_ptr[id],
size, id, &m, std::ref(errs[id]),
populate));
threads.emplace_back(os_thread_create(
buf_pool_create_thread_key, 0, buf_pool_create, &buf_pool_ptr[id],
size, id, static_cast<std::mutex *>(&m), std::ref(errs[id]),
populate));
threads[id - i].start();
}

Expand Down Expand Up @@ -2485,20 +2523,24 @@ static void buf_pool_resize() {

ulint sum_freed = 0;

/* Resize holds the instance's free_list_mutex (and runs with
buf_pool_resizing set), so reading latches_initialized here is safe. */
ut_ad(buf_pool_resizing);
ut_ad(mutex_own(&buf_pool->free_list_mutex));

while (chunk < echunk) {
buf_block_t *block = chunk->blocks;

for (ulint j = chunk->size; j--; block++) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);

ut_d(rw_lock_free(&block->debug_latch));
if (block->latches_initialized) {
mutex_free(&block->mutex);
rw_lock_free(&block->lock);
ut_d(rw_lock_free(&block->debug_latch));
}
}

buf_pool->deallocate_chunk(chunk);

sum_freed += chunk->size;

++chunk;
}

Expand Down Expand Up @@ -3591,7 +3633,7 @@ buf_block_t *buf_block_from_ahi(const byte *ptr) {

buf_block_t *block = &chunk->blocks[offs];

/* The function buf_chunk_init() invokes buf_block_init() so that
/* The function buf_chunk_init() invokes buf_block_init_light() so that
block[n].frame == block->frame + n * UNIV_PAGE_SIZE. Check it. */
ut_ad(block->frame == page_align(ptr));
/* Read the state of the block without holding a mutex.
Expand Down
11 changes: 11 additions & 0 deletions storage/innobase/buf/buf0lru.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,17 @@ buf_block_t *buf_LRU_get_free_only(buf_pool_t *buf_pool) {

ut_ad(buf_pool_from_block(block) == buf_pool);

/* Initialize latches on first use. The block has just been removed from
the free list and is owned exclusively by this thread, so accessing the
non-atomic latches_initialized flag here is race-free; the prior thread's
initialization (if any) is visible through the free_list_mutex handoff.
No mutex protects this access - exclusive ownership does. */
ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
ut_ad(!block->page.in_free_list);
if (!block->latches_initialized) {
buf_block_initialize_latches(block);
}

return (block);
}

Expand Down
9 changes: 9 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24162,6 +24162,14 @@ static MYSQL_SYSVAR_BOOL(
nullptr, nullptr, true);
#endif /* HAVE_LIBNUMA */

static MYSQL_SYSVAR_BOOL(
buffer_pool_lazy_latch_init, srv_buf_pool_lazy_latch_init,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Create buffer pool block latches (mutex, rw-locks) lazily on first use"
" instead of eagerly while the buffer pool is built. Speeds up buffer"
" pool initialization for large pools. OFF by default.",
nullptr, nullptr, false);

static MYSQL_SYSVAR_BOOL(
api_enable_binlog, ib_binlog_enabled,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
Expand Down Expand Up @@ -24620,6 +24628,7 @@ static SYS_VAR *innobase_system_variables[] = {
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */
MYSQL_SYSVAR(buffer_pool_lazy_latch_init),
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(change_buffer_max_size),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
Expand Down
14 changes: 14 additions & 0 deletions storage/innobase/include/buf0buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,16 @@ struct buf_block_t {
single thread. */
bool made_dirty_with_no_latch;

/** Whether this block's latches (mutex, lock, debug_latch) have been
created. See buf_block_initialize_latches().

Not atomic: every false->true transition happens while the block is owned
exclusively by a single thread (just removed from the free list, not yet in
the page hash or LRU), and its visibility to threads that later reuse the
block is carried by the buf_pool->free_list_mutex release/acquire handoff.
Reads during teardown happen under chunks_mutex with no concurrent writers. */
bool latches_initialized{false};

#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/** @name Debug fields */
Expand Down Expand Up @@ -2052,6 +2062,10 @@ static inline uint64_t buf_pool_hash_zip_frame(void *ptr) {
static inline uint64_t buf_pool_hash_zip(buf_block_t *b) {
return buf_pool_hash_zip_frame(b->frame);
}

/* Lazy latch initialization for buffer block. */
void buf_block_initialize_latches(buf_block_t *block);

/** @} */

/** A "Hazard Pointer" class used to iterate over page lists
Expand Down
5 changes: 5 additions & 0 deletions storage/innobase/include/srv0srv.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ Currently we support native aio on windows and linux */
extern bool srv_use_native_aio;
extern bool srv_numa_interleave;

/** When true, buffer pool blocks are created with lightweight initialization
and their latches (mutex, rw-locks) are created lazily on first use. When
false (default), latches are created eagerly while the buffer pool is built. */
extern bool srv_buf_pool_lazy_latch_init;

/* The innodb_directories variable value. This a list of directories
deliminated by ';', i.e the FIL_PATH_SEPARATOR. */
extern char *srv_innodb_directories;
Expand Down
3 changes: 3 additions & 0 deletions storage/innobase/srv/srv0srv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ bool srv_use_native_aio = false;

bool srv_numa_interleave = false;

/** See srv0srv.h. Default off: latches are created eagerly. */
bool srv_buf_pool_lazy_latch_init = false;

#ifdef UNIV_DEBUG
/** Force all user tables to use page compression. */
ulong srv_debug_compress;
Expand Down
Loading