From 6109084868c203cccbe44cb14437697158a1becf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Olchawa?= Date: Mon, 22 Jun 2026 16:45:24 +0200 Subject: [PATCH] PS-11120: Reduce contention on BUF_BLOCK_MUTEX by reading ahead the access_time This patch reduces contention inside buf_page_optimistic_get. We acquire there the BUF_BLOCK_MUTEX twice. But the second time we acquire only to update the access time. Given that we would update the access time only if it was zero, we could avoid acquiring the mutex when we knew that the access time was already non-zero. This we could learn the first time we acquired the mutex by reading the access time field of the page. Note: the second time we acquired the mutex, we re-check if it still was zero anyway, so the patch is safe and allows to save significant number of times we needed to acquire the mutex. This is a contribution from: Anna Glasgall (with a minor fix). --- storage/innobase/buf/buf0buf.cc | 37 +++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 747e146424d8..3d80c087a736 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -4581,6 +4581,10 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, buf_block_buf_fix_inc(block, ut::Location{file, line}); + /* Grab the access time while we have the mutex to potentially + avoid the need to acquire the mutex the second time (below). */ + auto access_time = buf_page_is_accessed(&block->page); + buf_page_mutex_exit(block); ut_ad(!ibuf_inside(mtr) || @@ -4627,15 +4631,35 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, return (false); } - buf_page_mutex_enter(block); + /* Only grab the mutex to update access time if it was zero when we + checked earlier. This check is to reduce contention on page mutex + for hot pages (access time would be set only if it was zero anyway). */ + if (access_time == std::chrono::steady_clock::time_point{}) { + buf_page_mutex_enter(block); - const auto access_time = buf_page_is_accessed(&block->page); + /* Refresh the access_time. Because of race condition we might see + that it's been set by other thread since the last time we checked. - buf_page_set_accessed(&block->page); + Note: it's important to update access_time variable because we use it + later to determine if it was the first page access and we should: + - try reading ahead next consecutive pages on the disk, + - update thd->access_distinct_page() statistics (trx != nullptr). - ut_ad(!block->page.file_page_was_freed); + Without this: + - we could be calling too many times the buf_read_ahead_linear + if the set of hot pages was changing over time, + - the sum of innodb_pages_distinct across many queries + could have the same page counted twice (so no longer would be + lower bound for the total number of unique page accesses). */ + access_time = buf_page_is_accessed(&block->page); - buf_page_mutex_exit(block); + /* This is no-op if access time was non-zero. */ + buf_page_set_accessed(&block->page); + + ut_ad(!block->page.file_page_was_freed); + + buf_page_mutex_exit(block); + } if (fetch_mode != Page_fetch::SCAN) { buf_page_make_young_if_needed(&block->page); @@ -4653,10 +4677,11 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, trx_t *trx; if (access_time == std::chrono::steady_clock::time_point{}) { trx = innobase_get_trx_for_slow_log(); - /* In the case of a first access, try to apply linear read-ahead */ + /* In the case of a first access, try to apply linear read-ahead. */ buf_read_ahead_linear(block->page.id, block->page.size, ibuf_inside(mtr), trx); } else { + /* It's not the first page access (don't bump access_distinct_page). */ trx = nullptr; }