diff --git a/kv_cache_benchmark/tests/test_kv_cache.py b/kv_cache_benchmark/tests/test_kv_cache.py index 31d5b1af..71b42912 100644 --- a/kv_cache_benchmark/tests/test_kv_cache.py +++ b/kv_cache_benchmark/tests/test_kv_cache.py @@ -2623,11 +2623,24 @@ def test_demote_path_preserves_data(self, tiny_model_config): # Key was evicted entirely — that's also valid if NVMe was tiny print(" Key was evicted (deleted). Skipping data comparison.") - def test_tier_order_includes_fake_gpu(self, tiny_model_config): + def test_tier_order_includes_fake_gpu(self, tiny_model_config, monkeypatch): """ Confirm that injecting a GPU backend adds 'gpu' to the tier order, giving us the full 3-tier cascade path. + + Baseline assumption: a fresh MultiTierCache has no 'gpu' backend. + MultiTierCache.__init__ adds one whenever ``TORCH_AVAILABLE`` or + ``CUPY_AVAILABLE`` is true (cache.py:243), regardless of + ``gpu_memory_gb`` — so on a dev box / CI runner with torch in + the venv, the cache starts with a real GPU backend (0-byte limit) + and the baseline check would fail. Force both flags off for this + test so the post-construction state matches the test's stated + precondition. """ + import kv_cache.cache as cache_mod + monkeypatch.setattr(cache_mod, "TORCH_AVAILABLE", False) + monkeypatch.setattr(cache_mod, "CUPY_AVAILABLE", False) + cache = MultiTierCache( model_config=tiny_model_config, gpu_memory_gb=0,