From 24de9109f792cb0bf9fe5267953f5748fba4fd00 Mon Sep 17 00:00:00 2001 From: ColinM-sys Date: Thu, 16 Apr 2026 18:45:31 -0400 Subject: [PATCH 1/4] fix(cache-middleware): bound cache size and enforce safe fuzzy threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CacheMiddleware previously: 1. Used an unbounded dict for storage — sustained unique inputs would grow the cache without limit, a memory-exhaustion DoS surface. 2. Accepted any similarity_threshold in [0, 1]. Low values make cache poisoning trivial: a crafted input whose difflib ratio exceeds the threshold against a legitimate cached key hijacks the legitimate user's response. At threshold=0.5, most short/structured inputs collide on first try. Changes: - Back the cache with an OrderedDict. Evict the oldest entry (LRU) whenever the cache exceeds max_entries. A cache hit moves the entry to the MRU end so frequently-useful results are preferred on eviction. - Add max_entries to __init__ (default 1024, must be a positive int). - Reject similarity_threshold values below 0.85 at construct time with an actionable error. 1.0 remains recommended (exact match only). - Non-numeric threshold values now raise instead of failing later. Breaking change: existing configs that pass similarity_threshold < 0.85 will fail at construct time. This is intentional — any such value is a cache-poisoning foot-gun. The error message points at the two safe choices (use 1.0, or use a value >= 0.85). Test updates: - test_similarity_computation_for_different_thresholds / _fuzzy_match_caching / test_multiple_similar_entries / test_custom_initialization: bumped from 0.5/0.7/0.8 to values >= 0.85 so they exercise fuzzy matching without tripping the new floor. - Added TestSimilarityThresholdFloor: * below-floor values (0.0, 0.3, 0.5, 0.7, 0.84) are rejected * at/above-floor values (0.85, 0.9, 0.95, 1.0) are accepted * threshold > 1.0 is rejected * non-numeric threshold is rejected - Added TestMaxEntriesLruEviction: * default max_entries is positive * zero or negative max_entries is rejected * cache stays at max_entries under sustained unique input * hit promotes entry to MRU; later insert evicts the older one CWE-400 (resource exhaustion) + CWE-345 (insufficient data authenticity / cache-key confusion). Signed-off-by: ColinM-sys --- .../nat/middleware/cache/cache_middleware.py | 73 +++++++++- .../nat/middleware/test_cache_middleware.py | 133 ++++++++++++++++-- 2 files changed, 191 insertions(+), 15 deletions(-) diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py index 0ed26e22df..afbc0cfae6 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py @@ -30,6 +30,7 @@ import json import logging +from collections import OrderedDict from collections.abc import AsyncIterator from typing import Any @@ -43,6 +44,21 @@ logger = logging.getLogger(__name__) +# Lower bound on fuzzy-match similarity to reduce the cache-poisoning surface. +# A threshold below this makes it trivial to craft an input that is "similar +# enough" to a legitimate user's cached key to hijack their response (for the +# current process, which is how the in-memory cache is scoped). 0.85 is the +# smallest value that we're comfortable shipping as an unconditional default; +# operators with strict needs should use 1.0 (exact match only). +_MIN_FUZZY_THRESHOLD = 0.85 + +# Default bound on cache size. The previous implementation used an unbounded +# dict which, under sustained unique input, grew without limit — a memory- +# exhaustion DoS and, combined with fuzzy matching, a long-lived surface for +# cross-request confusion. OrderedDict-backed LRU evicts the oldest entry +# when the cache exceeds this bound. +_DEFAULT_MAX_CACHE_ENTRIES = 1024 + class CacheMiddleware(FunctionMiddleware): """Cache middleware that memoizes function outputs based on input similarity. @@ -67,19 +83,51 @@ class CacheMiddleware(FunctionMiddleware): computation. """ - def __init__(self, *, enabled_mode: str, similarity_threshold: float) -> None: + def __init__( + self, + *, + enabled_mode: str, + similarity_threshold: float, + max_entries: int = _DEFAULT_MAX_CACHE_ENTRIES, + ) -> None: """Initialize the cache middleware. Args: enabled_mode: Either "always" or "eval". If "eval", only caches when Context.is_evaluating is True. - similarity_threshold: Similarity threshold between 0 and 1. - If 1.0, performs exact matching. Otherwise uses fuzzy matching. + similarity_threshold: Similarity threshold in [_MIN_FUZZY_THRESHOLD, 1.0]. + If 1.0, performs exact matching. Lower values enable difflib- + based fuzzy matching. Values below _MIN_FUZZY_THRESHOLD are + rejected to prevent cache-poisoning where a crafted input + collides with a legitimate user's cached key. + max_entries: Maximum number of cache entries. When exceeded, the + oldest entry is evicted (LRU). Defaults to + _DEFAULT_MAX_CACHE_ENTRIES. Must be >= 1. + + Raises: + ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0] + or max_entries is not a positive integer. """ + if not isinstance(similarity_threshold, (int, float)): + raise ValueError( + f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}") + if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0: + raise ValueError( + f"similarity_threshold={similarity_threshold} is outside the safe range " + f"[{_MIN_FUZZY_THRESHOLD}, 1.0]. Lower values make cache-poisoning trivial — " + "a crafted input can collide with a legitimate user's cached key. Use 1.0 " + "for exact matching (recommended), or a value >= " + f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.") + if not isinstance(max_entries, int) or max_entries < 1: + raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}") + super().__init__(is_final=True) self._enabled_mode = enabled_mode self._similarity_threshold = similarity_threshold - self._cache: dict[str, Any] = {} + # OrderedDict gives O(1) LRU: move_to_end() on hit, popitem(last=False) + # to evict the oldest when we exceed max_entries. + self._cache: OrderedDict[str, Any] = OrderedDict() + self._max_entries = max_entries # ==================== Abstract Method Implementations ==================== @@ -199,10 +247,13 @@ async def function_middleware_invoke(self, # Phase 1: Preprocess - look for a similar cached input similar_key = self._find_similar_key(input_str) if similar_key is not None: - # Cache hit - short-circuit and return cached output + # Cache hit - short-circuit and return cached output. + # Move the hit entry to the MRU end so LRU eviction prefers truly + # old entries, not just recently-useful ones. logger.debug("Cache hit for function %s with similarity %.2f", context.name, 1.0 if similar_key == input_str else self._similarity_threshold) + self._cache.move_to_end(similar_key) # Phase 4: Continue - return cached result return self._cache[similar_key] @@ -210,9 +261,17 @@ async def function_middleware_invoke(self, logger.debug("Cache miss for function %s", context.name) result = await call_next(*args, **kwargs) - # Phase 3: Postprocess - cache the result for future use + # Phase 3: Postprocess - cache the result for future use. Enforce the + # LRU bound BEFORE insert so the new entry always lands in a cache of + # size <= max_entries, preventing unbounded memory growth (DoS). self._cache[input_str] = result - logger.debug("Cached result for function %s", context.name) + self._cache.move_to_end(input_str) + while len(self._cache) > self._max_entries: + self._cache.popitem(last=False) + logger.debug("Cached result for function %s (size=%d/%d)", + context.name, + len(self._cache), + self._max_entries) # Phase 4: Continue - return the fresh result return result diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py index c1e14f5de1..a98c85189e 100644 --- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py +++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py @@ -62,7 +62,8 @@ def test_default_initialization(self): def test_custom_initialization(self): """Test custom initialization.""" - middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.8) + # Use 0.9 (above the enforced minimum) to exercise non-default fuzzy mode. + middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) # Check attributes are set assert hasattr(middleware, '_enabled_mode') assert hasattr(middleware, '_similarity_threshold') @@ -108,8 +109,12 @@ async def mock_next_call(*args, **kwargs): assert result3.result == "Result for test" async def test_fuzzy_match_caching(self, middleware_context): - """Test fuzzy matching with similarity_threshold < 1.0.""" - middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.8) + """Test fuzzy matching with similarity_threshold < 1.0. + + Uses 0.9 (above the enforced minimum) — 0.8 is no longer a valid + threshold after the cache-poisoning hardening. + """ + middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) call_count = 0 @@ -267,8 +272,10 @@ async def mock_next_call(*args, **kwargs): def test_similarity_computation_for_different_thresholds(self): """Test similarity computation for different thresholds.""" - # This is more of a unit test for the similarity logic - middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.5) + # This is more of a unit test for the similarity logic. + # Uses 0.9 (above the enforced minimum) to exercise fuzzy matching + # without enabling cache-poisoning-prone low thresholds. + middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) # Directly test internal methods # Add a cached entry @@ -278,14 +285,18 @@ def test_similarity_computation_for_different_thresholds(self): # Test various similarity levels # Exact match assert middleware._find_similar_key(test_key) == test_key # noqa - # Very similar + # Very similar (one char shorter, ~0.95 ratio) assert middleware._find_similar_key("hello worl") == test_key # noqa # Too different - use a completely different string assert middleware._find_similar_key("xyz123abc") is None # noqa async def test_multiple_similar_entries(self, middleware_context): - """Test behavior with multiple similar cached entries.""" - middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.7) + """Test behavior with multiple similar cached entries. + + Uses 0.85 (the enforced minimum) instead of the original 0.7 — + below 0.85 is now rejected as a cache-poisoning risk. + """ + middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.85) # Pre-populate cache with similar entries key1 = middleware._serialize_input( # noqa @@ -306,3 +317,109 @@ async def mock_next_call(*args, **kwargs): input_str = {"value": "test input X", "number": 42} await middleware.function_middleware_invoke(input_str, call_next=mock_next_call, context=middleware_context) # The exact behavior depends on which cached key is most similar + + +class TestSimilarityThresholdFloor: + """The constructor must reject similarity thresholds below the safe floor. + + Below ~0.85, crafting an input whose difflib ratio exceeds the threshold + against a legitimate cached key is trivial (small edits, common prefixes, + shared structural tokens). Accepting those values silently produces a + cache where one caller can hijack another caller's response. + """ + + @pytest.mark.parametrize("threshold", [0.0, 0.3, 0.5, 0.7, 0.84]) + def test_below_floor_is_rejected(self, threshold): + with pytest.raises(ValueError, match="outside the safe range"): + CacheMiddleware(enabled_mode="always", similarity_threshold=threshold) + + @pytest.mark.parametrize("threshold", [0.85, 0.9, 0.95, 1.0]) + def test_at_or_above_floor_is_allowed(self, threshold): + mw = CacheMiddleware(enabled_mode="always", similarity_threshold=threshold) + assert mw._similarity_threshold == threshold # noqa: SLF001 + + def test_threshold_above_one_is_rejected(self): + with pytest.raises(ValueError, match="outside the safe range"): + CacheMiddleware(enabled_mode="always", similarity_threshold=1.5) + + def test_threshold_non_numeric_is_rejected(self): + with pytest.raises(ValueError, match="must be a number"): + CacheMiddleware(enabled_mode="always", similarity_threshold="high") # type: ignore[arg-type] + + +class TestMaxEntriesLruEviction: + """The cache must bound its size to prevent memory-exhaustion DoS. + + The previous implementation used an unbounded dict; sustained unique + inputs would grow the cache without limit, eventually crashing the + process. LRU eviction ensures the cache stays within max_entries. + """ + + async def test_default_max_entries_is_positive(self): + mw = CacheMiddleware(enabled_mode="always", similarity_threshold=1.0) + assert mw._max_entries > 0 # noqa: SLF001 + + def test_zero_max_entries_is_rejected(self): + with pytest.raises(ValueError, match="positive integer"): + CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=0) + + def test_negative_max_entries_is_rejected(self): + with pytest.raises(ValueError, match="positive integer"): + CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5) + + async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context): + """Insert more unique entries than max_entries; verify size stays bounded.""" + mw = CacheMiddleware( + enabled_mode="always", + similarity_threshold=1.0, # exact match keeps the test deterministic + max_entries=3, + ) + + call_count = 0 + + async def mock_next_call(*_args, **_kwargs): + nonlocal call_count + call_count += 1 + return _TestOutput(result=f"result_{call_count}") + + for i in range(10): + await mw.function_middleware_invoke( + {"value": f"unique_input_{i}"}, + call_next=mock_next_call, + context=middleware_context, + ) + + assert len(mw._cache) == 3 # noqa: SLF001 + # The MOST recent three inserts should be what's left. + latest_keys = list(mw._cache.keys()) # noqa: SLF001 + for i in range(7, 10): + assert any(f"unique_input_{i}" in k for k in latest_keys) + + async def test_cache_hit_promotes_entry_to_most_recently_used(self, middleware_context): + """A cache hit should move the entry to MRU so later evictions spare it.""" + mw = CacheMiddleware( + enabled_mode="always", + similarity_threshold=1.0, + max_entries=3, + ) + + async def mock_next_call(*_args, **_kwargs): + return _TestOutput(result="r") + + # Fill the cache with A, B, C (A is oldest) + for key in ("A", "B", "C"): + await mw.function_middleware_invoke( + {"value": key}, call_next=mock_next_call, context=middleware_context) + + # Hit A again — should promote A to the MRU end + await mw.function_middleware_invoke( + {"value": "A"}, call_next=mock_next_call, context=middleware_context) + + # Now insert D — B (now oldest) should be evicted, not A. + await mw.function_middleware_invoke( + {"value": "D"}, call_next=mock_next_call, context=middleware_context) + + keys = "".join(list(mw._cache.keys())) # noqa: SLF001 + assert '"value": "A"' in keys + assert '"value": "D"' in keys + assert '"value": "B"' not in keys From 9f60ef6b150bf3c81fc88b2a2e166c30120548d3 Mon Sep 17 00:00:00 2001 From: ColinM-sys Date: Thu, 16 Apr 2026 18:56:02 -0400 Subject: [PATCH 2/4] fix(cache-middleware): align config schema, reject bool/non-finite thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CodeRabbit on #1879: 1. cache_middleware_config.py was out of sync with the new constructor semantics — schema allowed similarity_threshold in [0, 1] and had no max_entries field at all, so config-driven instantiation could pass values the constructor now rejects. Re-export _MIN_FUZZY_THRESHOLD and _DEFAULT_MAX_CACHE_ENTRIES from the module and wire both constants into the schema: similarity_threshold: ge=_MIN_FUZZY_THRESHOLD, le=1.0 max_entries: ge=1, default=_DEFAULT_MAX_CACHE_ENTRIES Docstrings mirror the constructor's rationale (cache-poisoning for threshold, DoS for max_entries). 2. Constructor validation did not reject `bool` (since isinstance(True, int) is True in Python) or non-finite floats (NaN / +inf / -inf). Both are classic config-bug foot-guns: a boolean silently becoming 0 or 1.0, or a parser that hands back NaN from an upstream source, would slip past the range comparison. Add explicit: - bool rejection on similarity_threshold (before the number check) - math.isfinite() check on similarity_threshold - bool rejection on max_entries New tests: - test_threshold_bool_is_rejected (parametrized True / False) - test_threshold_non_finite_is_rejected (parametrized NaN / +inf / -inf) - test_bool_max_entries_is_rejected (parametrized True / False) Signed-off-by: ColinM-sys --- .../nat/middleware/cache/cache_middleware.py | 14 +++++++- .../cache/cache_middleware_config.py | 35 +++++++++++++++---- .../nat/middleware/test_cache_middleware.py | 23 ++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py index afbc0cfae6..29090a9162 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py @@ -30,6 +30,7 @@ import json import logging +import math from collections import OrderedDict from collections.abc import AsyncIterator from typing import Any @@ -108,9 +109,19 @@ def __init__( ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0] or max_entries is not a positive integer. """ + # Reject bool explicitly — `isinstance(True, int)` is True in Python, + # and `True`/`False` silently sneaking through as numeric is a classic + # config bug (user passes the wrong key, gets no error). Check bool + # FIRST so the "must be a number" message doesn't lie. + if isinstance(similarity_threshold, bool): + raise ValueError( + f"similarity_threshold must be a number, got bool ({similarity_threshold!r})") if not isinstance(similarity_threshold, (int, float)): raise ValueError( f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}") + if not math.isfinite(similarity_threshold): + raise ValueError( + f"similarity_threshold must be finite, got {similarity_threshold!r}") if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0: raise ValueError( f"similarity_threshold={similarity_threshold} is outside the safe range " @@ -118,7 +129,8 @@ def __init__( "a crafted input can collide with a legitimate user's cached key. Use 1.0 " "for exact matching (recommended), or a value >= " f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.") - if not isinstance(max_entries, int) or max_entries < 1: + # Same bool-as-int foot-gun applies to max_entries. + if isinstance(max_entries, bool) or not isinstance(max_entries, int) or max_entries < 1: raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}") super().__init__(is_final=True) diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py index 7f24f03485..316c2aecf6 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py @@ -19,6 +19,8 @@ from pydantic import Field from nat.data_models.middleware import FunctionMiddlewareBaseConfig +from nat.middleware.cache.cache_middleware import _DEFAULT_MAX_CACHE_ENTRIES +from nat.middleware.cache.cache_middleware import _MIN_FUZZY_THRESHOLD class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"): @@ -31,14 +33,33 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"): enabled_mode: Controls when caching is active: - "always": Cache is always enabled - "eval": Cache only active when Context.is_evaluating is True - similarity_threshold: Float between 0 and 1 for input matching: - - 1.0: Exact string matching (fastest) - - < 1.0: Fuzzy matching using difflib similarity + similarity_threshold: Float in [_MIN_FUZZY_THRESHOLD, 1.0] for input + matching: + - 1.0: Exact string matching (fastest, recommended) + - >= _MIN_FUZZY_THRESHOLD: Fuzzy matching via difflib. Values + below this bound are rejected as a cache-poisoning risk — + crafted inputs at lower thresholds can collide with a + legitimate user's cached key. + max_entries: Upper bound on cached entries. When exceeded, the + least-recently-used entry is evicted. Must be a positive int; + defaults to _DEFAULT_MAX_CACHE_ENTRIES. """ enabled_mode: Literal["always", "eval"] = Field( default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)") - similarity_threshold: float = Field(default=1.0, - ge=0.0, - le=1.0, - description="Similarity threshold between 0 and 1. Use 1.0 for exact matching") + similarity_threshold: float = Field( + default=1.0, + ge=_MIN_FUZZY_THRESHOLD, + le=1.0, + description=( + f"Similarity threshold in [{_MIN_FUZZY_THRESHOLD}, 1.0]. Use 1.0 for exact matching " + "(recommended). Lower values enable fuzzy matching but are bounded below to prevent " + "cache-poisoning collisions with legitimate cached keys."), + ) + max_entries: int = Field( + default=_DEFAULT_MAX_CACHE_ENTRIES, + ge=1, + description=("Maximum number of cache entries before LRU eviction. Must be >= 1. " + "Prevents memory-exhaustion DoS from unbounded cache growth under " + "sustained unique inputs."), + ) diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py index a98c85189e..89b5b5aee6 100644 --- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py +++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py @@ -346,6 +346,19 @@ def test_threshold_non_numeric_is_rejected(self): with pytest.raises(ValueError, match="must be a number"): CacheMiddleware(enabled_mode="always", similarity_threshold="high") # type: ignore[arg-type] + @pytest.mark.parametrize("bad_bool", [True, False]) + def test_threshold_bool_is_rejected(self, bad_bool): + """`isinstance(True, int)` is True in Python — reject bools explicitly + so a config with the wrong key type doesn't silently become 1.0 or 0.0.""" + with pytest.raises(ValueError, match="got bool"): + CacheMiddleware(enabled_mode="always", similarity_threshold=bad_bool) # type: ignore[arg-type] + + @pytest.mark.parametrize("bad_value", [float("nan"), float("inf"), float("-inf")]) + def test_threshold_non_finite_is_rejected(self, bad_value): + """NaN, +inf, -inf must be rejected before the range comparison.""" + with pytest.raises(ValueError, match="must be finite"): + CacheMiddleware(enabled_mode="always", similarity_threshold=bad_value) + class TestMaxEntriesLruEviction: """The cache must bound its size to prevent memory-exhaustion DoS. @@ -367,6 +380,16 @@ def test_negative_max_entries_is_rejected(self): with pytest.raises(ValueError, match="positive integer"): CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5) + @pytest.mark.parametrize("bad_bool", [True, False]) + def test_bool_max_entries_is_rejected(self, bad_bool): + """Same bool-as-int foot-gun protection as similarity_threshold.""" + with pytest.raises(ValueError, match="positive integer"): + CacheMiddleware( + enabled_mode="always", + similarity_threshold=1.0, + max_entries=bad_bool, # type: ignore[arg-type] + ) + async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context): """Insert more unique entries than max_entries; verify size stays bounded.""" mw = CacheMiddleware( From ee05bcd928876421ea149d15da0a91f3af7f7ab5 Mon Sep 17 00:00:00 2001 From: ColinM-sys Date: Thu, 16 Apr 2026 19:05:14 -0400 Subject: [PATCH 3/4] fix(cache-middleware): wire max_entries through register.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CodeRabbit on #1879: CacheMiddlewareConfig.max_entries was added to the config schema but register.py still instantiated CacheMiddleware with only (enabled_mode, similarity_threshold) — so a configured max_entries value was silently ignored and deployments always used the 1024 default. Pass config.max_entries through to the constructor so config-driven LRU sizing actually takes effect. Signed-off-by: ColinM-sys --- .../nvidia_nat_core/src/nat/middleware/cache/register.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/register.py b/packages/nvidia_nat_core/src/nat/middleware/cache/register.py index 48d2702095..e8a77b997e 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/register.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/register.py @@ -30,4 +30,8 @@ async def cache_middleware(config: CacheMiddlewareConfig, builder: Builder): Yields: A configured cache middleware instance """ - yield CacheMiddleware(enabled_mode=config.enabled_mode, similarity_threshold=config.similarity_threshold) + yield CacheMiddleware( + enabled_mode=config.enabled_mode, + similarity_threshold=config.similarity_threshold, + max_entries=config.max_entries, + ) From 2036cf2114a0a5986d5cf695437d060e99ffc4f1 Mon Sep 17 00:00:00 2001 From: ColinM-sys Date: Mon, 20 Apr 2026 19:04:40 -0400 Subject: [PATCH 4/4] refactor(cache-middleware): address dagardner-nv review feedback Per design discussion in #1888: - Drop similarity_threshold floor from 0.85 to 0 and remove manual type/range validation from the constructor; Pydantic enforces bounds at the config layer (CacheMiddlewareConfig) and anyone constructing CacheMiddleware directly is on their own - Update CacheMiddlewareConfig.similarity_threshold to ge=0 with updated description documenting the performance cost of low values and the cache-collision risk near 0 - Replace the manual difflib.SequenceMatcher loop in _find_similar_key with difflib.get_close_matches for cleaner, more idiomatic code - Remove constructor-level validation tests that no longer apply Signed-off-by: ColinM-sys --- .../nat/middleware/cache/cache_middleware.py | 69 ++++------------- .../cache/cache_middleware_config.py | 23 +++--- .../nat/middleware/test_cache_middleware.py | 75 +------------------ 3 files changed, 27 insertions(+), 140 deletions(-) diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py index 29090a9162..70def61313 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py @@ -30,7 +30,6 @@ import json import logging -import math from collections import OrderedDict from collections.abc import AsyncIterator from typing import Any @@ -45,13 +44,6 @@ logger = logging.getLogger(__name__) -# Lower bound on fuzzy-match similarity to reduce the cache-poisoning surface. -# A threshold below this makes it trivial to craft an input that is "similar -# enough" to a legitimate user's cached key to hijack their response (for the -# current process, which is how the in-memory cache is scoped). 0.85 is the -# smallest value that we're comfortable shipping as an unconditional default; -# operators with strict needs should use 1.0 (exact match only). -_MIN_FUZZY_THRESHOLD = 0.85 # Default bound on cache size. The previous implementation used an unbounded # dict which, under sustained unique input, grew without limit — a memory- @@ -96,43 +88,17 @@ def __init__( Args: enabled_mode: Either "always" or "eval". If "eval", only caches when Context.is_evaluating is True. - similarity_threshold: Similarity threshold in [_MIN_FUZZY_THRESHOLD, 1.0]. - If 1.0, performs exact matching. Lower values enable difflib- - based fuzzy matching. Values below _MIN_FUZZY_THRESHOLD are - rejected to prevent cache-poisoning where a crafted input - collides with a legitimate user's cached key. + similarity_threshold: Similarity threshold in [0, 1.0]. If 1.0, + performs exact matching. Lower values enable difflib-based + fuzzy matching; note that difflib is quadratic in the worst + case, so large caches with low thresholds may have a + performance cost. Values near 0 increase the risk of cache + collisions where different inputs return the same cached + response. max_entries: Maximum number of cache entries. When exceeded, the oldest entry is evicted (LRU). Defaults to - _DEFAULT_MAX_CACHE_ENTRIES. Must be >= 1. - - Raises: - ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0] - or max_entries is not a positive integer. + _DEFAULT_MAX_CACHE_ENTRIES. """ - # Reject bool explicitly — `isinstance(True, int)` is True in Python, - # and `True`/`False` silently sneaking through as numeric is a classic - # config bug (user passes the wrong key, gets no error). Check bool - # FIRST so the "must be a number" message doesn't lie. - if isinstance(similarity_threshold, bool): - raise ValueError( - f"similarity_threshold must be a number, got bool ({similarity_threshold!r})") - if not isinstance(similarity_threshold, (int, float)): - raise ValueError( - f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}") - if not math.isfinite(similarity_threshold): - raise ValueError( - f"similarity_threshold must be finite, got {similarity_threshold!r}") - if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0: - raise ValueError( - f"similarity_threshold={similarity_threshold} is outside the safe range " - f"[{_MIN_FUZZY_THRESHOLD}, 1.0]. Lower values make cache-poisoning trivial — " - "a crafted input can collide with a legitimate user's cached key. Use 1.0 " - "for exact matching (recommended), or a value >= " - f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.") - # Same bool-as-int foot-gun applies to max_entries. - if isinstance(max_entries, bool) or not isinstance(max_entries, int) or max_entries < 1: - raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}") - super().__init__(is_final=True) self._enabled_mode = enabled_mode self._similarity_threshold = similarity_threshold @@ -202,22 +168,13 @@ def _find_similar_key(self, input_str: str) -> str | None: # Exact matching - fast path return input_str if input_str in self._cache else None - # Fuzzy matching using difflib import difflib - best_match = None - best_ratio = 0.0 - - for cached_key in self._cache: - # Use SequenceMatcher for similarity computation - matcher = difflib.SequenceMatcher(None, input_str, cached_key) - ratio = matcher.ratio() - - if ratio >= self._similarity_threshold and ratio > best_ratio: - best_ratio = ratio - best_match = cached_key - - return best_match + best_matches = difflib.get_close_matches( + input_str, self._cache.keys(), n=1, cutoff=self._similarity_threshold) + if best_matches: + return best_matches[0] + return None async def function_middleware_invoke(self, *args: Any, diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py index 316c2aecf6..1ad122c773 100644 --- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py +++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py @@ -20,7 +20,6 @@ from nat.data_models.middleware import FunctionMiddlewareBaseConfig from nat.middleware.cache.cache_middleware import _DEFAULT_MAX_CACHE_ENTRIES -from nat.middleware.cache.cache_middleware import _MIN_FUZZY_THRESHOLD class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"): @@ -33,13 +32,13 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"): enabled_mode: Controls when caching is active: - "always": Cache is always enabled - "eval": Cache only active when Context.is_evaluating is True - similarity_threshold: Float in [_MIN_FUZZY_THRESHOLD, 1.0] for input - matching: + similarity_threshold: Float in [0, 1.0] for input matching: - 1.0: Exact string matching (fastest, recommended) - - >= _MIN_FUZZY_THRESHOLD: Fuzzy matching via difflib. Values - below this bound are rejected as a cache-poisoning risk — - crafted inputs at lower thresholds can collide with a - legitimate user's cached key. + - < 1.0: Fuzzy matching via difflib. Note that difflib is + quadratic in the worst case, so large caches with low + thresholds may have a performance cost. Values near 0 + increase the risk of cache collisions where different + inputs return the same cached response. max_entries: Upper bound on cached entries. When exceeded, the least-recently-used entry is evicted. Must be a positive int; defaults to _DEFAULT_MAX_CACHE_ENTRIES. @@ -49,12 +48,14 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"): default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)") similarity_threshold: float = Field( default=1.0, - ge=_MIN_FUZZY_THRESHOLD, + ge=0, le=1.0, description=( - f"Similarity threshold in [{_MIN_FUZZY_THRESHOLD}, 1.0]. Use 1.0 for exact matching " - "(recommended). Lower values enable fuzzy matching but are bounded below to prevent " - "cache-poisoning collisions with legitimate cached keys."), + "Similarity threshold in [0, 1.0]. Use 1.0 for exact matching (recommended). " + "Lower values enable fuzzy matching via difflib; note that difflib is quadratic " + "in the worst case, so large caches with low thresholds may have a performance " + "cost. Values near 0 increase the risk of cache collisions where different " + "inputs return the same cached response."), ) max_entries: int = Field( default=_DEFAULT_MAX_CACHE_ENTRIES, diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py index 89b5b5aee6..aa5cb2a034 100644 --- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py +++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py @@ -62,7 +62,6 @@ def test_default_initialization(self): def test_custom_initialization(self): """Test custom initialization.""" - # Use 0.9 (above the enforced minimum) to exercise non-default fuzzy mode. middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) # Check attributes are set assert hasattr(middleware, '_enabled_mode') @@ -109,11 +108,7 @@ async def mock_next_call(*args, **kwargs): assert result3.result == "Result for test" async def test_fuzzy_match_caching(self, middleware_context): - """Test fuzzy matching with similarity_threshold < 1.0. - - Uses 0.9 (above the enforced minimum) — 0.8 is no longer a valid - threshold after the cache-poisoning hardening. - """ + """Test fuzzy matching with similarity_threshold < 1.0.""" middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) call_count = 0 @@ -272,9 +267,6 @@ async def mock_next_call(*args, **kwargs): def test_similarity_computation_for_different_thresholds(self): """Test similarity computation for different thresholds.""" - # This is more of a unit test for the similarity logic. - # Uses 0.9 (above the enforced minimum) to exercise fuzzy matching - # without enabling cache-poisoning-prone low thresholds. middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9) # Directly test internal methods @@ -291,11 +283,7 @@ def test_similarity_computation_for_different_thresholds(self): assert middleware._find_similar_key("xyz123abc") is None # noqa async def test_multiple_similar_entries(self, middleware_context): - """Test behavior with multiple similar cached entries. - - Uses 0.85 (the enforced minimum) instead of the original 0.7 — - below 0.85 is now rejected as a cache-poisoning risk. - """ + """Test behavior with multiple similar cached entries.""" middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.85) # Pre-populate cache with similar entries @@ -319,47 +307,6 @@ async def mock_next_call(*args, **kwargs): # The exact behavior depends on which cached key is most similar -class TestSimilarityThresholdFloor: - """The constructor must reject similarity thresholds below the safe floor. - - Below ~0.85, crafting an input whose difflib ratio exceeds the threshold - against a legitimate cached key is trivial (small edits, common prefixes, - shared structural tokens). Accepting those values silently produces a - cache where one caller can hijack another caller's response. - """ - - @pytest.mark.parametrize("threshold", [0.0, 0.3, 0.5, 0.7, 0.84]) - def test_below_floor_is_rejected(self, threshold): - with pytest.raises(ValueError, match="outside the safe range"): - CacheMiddleware(enabled_mode="always", similarity_threshold=threshold) - - @pytest.mark.parametrize("threshold", [0.85, 0.9, 0.95, 1.0]) - def test_at_or_above_floor_is_allowed(self, threshold): - mw = CacheMiddleware(enabled_mode="always", similarity_threshold=threshold) - assert mw._similarity_threshold == threshold # noqa: SLF001 - - def test_threshold_above_one_is_rejected(self): - with pytest.raises(ValueError, match="outside the safe range"): - CacheMiddleware(enabled_mode="always", similarity_threshold=1.5) - - def test_threshold_non_numeric_is_rejected(self): - with pytest.raises(ValueError, match="must be a number"): - CacheMiddleware(enabled_mode="always", similarity_threshold="high") # type: ignore[arg-type] - - @pytest.mark.parametrize("bad_bool", [True, False]) - def test_threshold_bool_is_rejected(self, bad_bool): - """`isinstance(True, int)` is True in Python — reject bools explicitly - so a config with the wrong key type doesn't silently become 1.0 or 0.0.""" - with pytest.raises(ValueError, match="got bool"): - CacheMiddleware(enabled_mode="always", similarity_threshold=bad_bool) # type: ignore[arg-type] - - @pytest.mark.parametrize("bad_value", [float("nan"), float("inf"), float("-inf")]) - def test_threshold_non_finite_is_rejected(self, bad_value): - """NaN, +inf, -inf must be rejected before the range comparison.""" - with pytest.raises(ValueError, match="must be finite"): - CacheMiddleware(enabled_mode="always", similarity_threshold=bad_value) - - class TestMaxEntriesLruEviction: """The cache must bound its size to prevent memory-exhaustion DoS. @@ -372,24 +319,6 @@ async def test_default_max_entries_is_positive(self): mw = CacheMiddleware(enabled_mode="always", similarity_threshold=1.0) assert mw._max_entries > 0 # noqa: SLF001 - def test_zero_max_entries_is_rejected(self): - with pytest.raises(ValueError, match="positive integer"): - CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=0) - - def test_negative_max_entries_is_rejected(self): - with pytest.raises(ValueError, match="positive integer"): - CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5) - - @pytest.mark.parametrize("bad_bool", [True, False]) - def test_bool_max_entries_is_rejected(self, bad_bool): - """Same bool-as-int foot-gun protection as similarity_threshold.""" - with pytest.raises(ValueError, match="positive integer"): - CacheMiddleware( - enabled_mode="always", - similarity_threshold=1.0, - max_entries=bad_bool, # type: ignore[arg-type] - ) - async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context): """Insert more unique entries than max_entries; verify size stays bounded.""" mw = CacheMiddleware(