From 099fa2eb2499df28e0e18ea51e5891c2fe0a5bbc Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 16:25:54 -0700 Subject: [PATCH] test(kv_cache): force TORCH/CUPY availability off in tier-order test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_tier_order_includes_fake_gpu's baseline assertion (no 'gpu' tier on a freshly constructed cache) was fragile: MultiTierCache.__init__ adds the 'gpu' backend whenever the module-level TORCH_AVAILABLE or CUPY_AVAILABLE flag is true (cache.py:243), regardless of gpu_memory_gb. On a dev box or CI runner with torch in the venv — which is the normal case for this repo — the cache starts with a real GPU backend (with a 0-byte limit), and the test's ``assert 'gpu' not in tier_order_before`` fires. The test's *intent* is to exercise the fake-GPU injection path: "injecting a backend after construction promotes the tier order to ['gpu', 'cpu', 'nvme']". Whether the baseline cache happens to have a GPU backend at all is incidental, and depends on the runner's installed Python deps. Pin the test to the no-GPU-library precondition by monkeypatching TORCH_AVAILABLE and CUPY_AVAILABLE to False at the ``kv_cache.cache`` module before constructing the cache. The post-construction state then matches the test's stated baseline regardless of whether torch is installed in the test environment. Result: 239 passed + 1 failed → 240 passed in ``kv_cache_benchmark/tests/test_kv_cache.py``. (Separate question: should ``gpu_memory_gb=0`` skip the GPU backend in production? Today every allocation does a redundant ``_ensure_ space_in_tier('gpu')`` check that always returns False before falling through to CPU. That looks like a wart but is a behavior change worth its own PR — out of scope for this test fix.) --- kv_cache_benchmark/tests/test_kv_cache.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kv_cache_benchmark/tests/test_kv_cache.py b/kv_cache_benchmark/tests/test_kv_cache.py index 31d5b1af..71b42912 100644 --- a/kv_cache_benchmark/tests/test_kv_cache.py +++ b/kv_cache_benchmark/tests/test_kv_cache.py @@ -2623,11 +2623,24 @@ def test_demote_path_preserves_data(self, tiny_model_config): # Key was evicted entirely — that's also valid if NVMe was tiny print(" Key was evicted (deleted). Skipping data comparison.") - def test_tier_order_includes_fake_gpu(self, tiny_model_config): + def test_tier_order_includes_fake_gpu(self, tiny_model_config, monkeypatch): """ Confirm that injecting a GPU backend adds 'gpu' to the tier order, giving us the full 3-tier cascade path. + + Baseline assumption: a fresh MultiTierCache has no 'gpu' backend. + MultiTierCache.__init__ adds one whenever ``TORCH_AVAILABLE`` or + ``CUPY_AVAILABLE`` is true (cache.py:243), regardless of + ``gpu_memory_gb`` — so on a dev box / CI runner with torch in + the venv, the cache starts with a real GPU backend (0-byte limit) + and the baseline check would fail. Force both flags off for this + test so the post-construction state matches the test's stated + precondition. """ + import kv_cache.cache as cache_mod + monkeypatch.setattr(cache_mod, "TORCH_AVAILABLE", False) + monkeypatch.setattr(cache_mod, "CUPY_AVAILABLE", False) + cache = MultiTierCache( model_config=tiny_model_config, gpu_memory_gb=0,