From 24de9109f792cb0bf9fe5267953f5748fba4fd00 Mon Sep 17 00:00:00 2001
From: ColinM-sys <cmcdonough@50words.com>
Date: Thu, 16 Apr 2026 18:45:31 -0400
Subject: [PATCH 1/4] fix(cache-middleware): bound cache size and enforce safe
 fuzzy threshold
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CacheMiddleware previously:
  1. Used an unbounded dict for storage — sustained unique inputs would
     grow the cache without limit, a memory-exhaustion DoS surface.
  2. Accepted any similarity_threshold in [0, 1]. Low values make cache
     poisoning trivial: a crafted input whose difflib ratio exceeds the
     threshold against a legitimate cached key hijacks the legitimate
     user's response. At threshold=0.5, most short/structured inputs
     collide on first try.

Changes:
  - Back the cache with an OrderedDict. Evict the oldest entry (LRU)
    whenever the cache exceeds max_entries. A cache hit moves the entry
    to the MRU end so frequently-useful results are preferred on eviction.
  - Add max_entries to __init__ (default 1024, must be a positive int).
  - Reject similarity_threshold values below 0.85 at construct time with
    an actionable error. 1.0 remains recommended (exact match only).
  - Non-numeric threshold values now raise instead of failing later.

Breaking change: existing configs that pass similarity_threshold < 0.85
will fail at construct time. This is intentional — any such value is a
cache-poisoning foot-gun. The error message points at the two safe
choices (use 1.0, or use a value >= 0.85).

Test updates:
  - test_similarity_computation_for_different_thresholds / _fuzzy_match_caching
    / test_multiple_similar_entries / test_custom_initialization: bumped
    from 0.5/0.7/0.8 to values >= 0.85 so they exercise fuzzy matching
    without tripping the new floor.
  - Added TestSimilarityThresholdFloor:
      * below-floor values (0.0, 0.3, 0.5, 0.7, 0.84) are rejected
      * at/above-floor values (0.85, 0.9, 0.95, 1.0) are accepted
      * threshold > 1.0 is rejected
      * non-numeric threshold is rejected
  - Added TestMaxEntriesLruEviction:
      * default max_entries is positive
      * zero or negative max_entries is rejected
      * cache stays at max_entries under sustained unique input
      * hit promotes entry to MRU; later insert evicts the older one

CWE-400 (resource exhaustion) + CWE-345 (insufficient data authenticity /
cache-key confusion).

Signed-off-by: ColinM-sys <cmcdonough@50words.com>
---
 .../nat/middleware/cache/cache_middleware.py  |  73 +++++++++-
 .../nat/middleware/test_cache_middleware.py   | 133 ++++++++++++++++--
 2 files changed, 191 insertions(+), 15 deletions(-)

diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
index 0ed26e22df..afbc0cfae6 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
@@ -30,6 +30,7 @@
 
 import json
 import logging
+from collections import OrderedDict
 from collections.abc import AsyncIterator
 from typing import Any
 
@@ -43,6 +44,21 @@
 
 logger = logging.getLogger(__name__)
 
+# Lower bound on fuzzy-match similarity to reduce the cache-poisoning surface.
+# A threshold below this makes it trivial to craft an input that is "similar
+# enough" to a legitimate user's cached key to hijack their response (for the
+# current process, which is how the in-memory cache is scoped). 0.85 is the
+# smallest value that we're comfortable shipping as an unconditional default;
+# operators with strict needs should use 1.0 (exact match only).
+_MIN_FUZZY_THRESHOLD = 0.85
+
+# Default bound on cache size. The previous implementation used an unbounded
+# dict which, under sustained unique input, grew without limit — a memory-
+# exhaustion DoS and, combined with fuzzy matching, a long-lived surface for
+# cross-request confusion. OrderedDict-backed LRU evicts the oldest entry
+# when the cache exceeds this bound.
+_DEFAULT_MAX_CACHE_ENTRIES = 1024
+
 
 class CacheMiddleware(FunctionMiddleware):
     """Cache middleware that memoizes function outputs based on input similarity.
@@ -67,19 +83,51 @@ class CacheMiddleware(FunctionMiddleware):
             computation.
     """
 
-    def __init__(self, *, enabled_mode: str, similarity_threshold: float) -> None:
+    def __init__(
+        self,
+        *,
+        enabled_mode: str,
+        similarity_threshold: float,
+        max_entries: int = _DEFAULT_MAX_CACHE_ENTRIES,
+    ) -> None:
         """Initialize the cache middleware.
 
         Args:
             enabled_mode: Either "always" or "eval". If "eval", only caches
                 when Context.is_evaluating is True.
-            similarity_threshold: Similarity threshold between 0 and 1.
-                If 1.0, performs exact matching. Otherwise uses fuzzy matching.
+            similarity_threshold: Similarity threshold in [_MIN_FUZZY_THRESHOLD, 1.0].
+                If 1.0, performs exact matching. Lower values enable difflib-
+                based fuzzy matching. Values below _MIN_FUZZY_THRESHOLD are
+                rejected to prevent cache-poisoning where a crafted input
+                collides with a legitimate user's cached key.
+            max_entries: Maximum number of cache entries. When exceeded, the
+                oldest entry is evicted (LRU). Defaults to
+                _DEFAULT_MAX_CACHE_ENTRIES. Must be >= 1.
+
+        Raises:
+            ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0]
+                or max_entries is not a positive integer.
         """
+        if not isinstance(similarity_threshold, (int, float)):
+            raise ValueError(
+                f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}")
+        if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0:
+            raise ValueError(
+                f"similarity_threshold={similarity_threshold} is outside the safe range "
+                f"[{_MIN_FUZZY_THRESHOLD}, 1.0]. Lower values make cache-poisoning trivial — "
+                "a crafted input can collide with a legitimate user's cached key. Use 1.0 "
+                "for exact matching (recommended), or a value >= "
+                f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.")
+        if not isinstance(max_entries, int) or max_entries < 1:
+            raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}")
+
         super().__init__(is_final=True)
         self._enabled_mode = enabled_mode
         self._similarity_threshold = similarity_threshold
-        self._cache: dict[str, Any] = {}
+        # OrderedDict gives O(1) LRU: move_to_end() on hit, popitem(last=False)
+        # to evict the oldest when we exceed max_entries.
+        self._cache: OrderedDict[str, Any] = OrderedDict()
+        self._max_entries = max_entries
 
     # ==================== Abstract Method Implementations ====================
 
@@ -199,10 +247,13 @@ async def function_middleware_invoke(self,
         # Phase 1: Preprocess - look for a similar cached input
         similar_key = self._find_similar_key(input_str)
         if similar_key is not None:
-            # Cache hit - short-circuit and return cached output
+            # Cache hit - short-circuit and return cached output.
+            # Move the hit entry to the MRU end so LRU eviction prefers truly
+            # old entries, not just recently-useful ones.
             logger.debug("Cache hit for function %s with similarity %.2f",
                          context.name,
                          1.0 if similar_key == input_str else self._similarity_threshold)
+            self._cache.move_to_end(similar_key)
             # Phase 4: Continue - return cached result
             return self._cache[similar_key]
 
@@ -210,9 +261,17 @@ async def function_middleware_invoke(self,
         logger.debug("Cache miss for function %s", context.name)
         result = await call_next(*args, **kwargs)
 
-        # Phase 3: Postprocess - cache the result for future use
+        # Phase 3: Postprocess - cache the result for future use. Enforce the
+        # LRU bound BEFORE insert so the new entry always lands in a cache of
+        # size <= max_entries, preventing unbounded memory growth (DoS).
         self._cache[input_str] = result
-        logger.debug("Cached result for function %s", context.name)
+        self._cache.move_to_end(input_str)
+        while len(self._cache) > self._max_entries:
+            self._cache.popitem(last=False)
+        logger.debug("Cached result for function %s (size=%d/%d)",
+                     context.name,
+                     len(self._cache),
+                     self._max_entries)
 
         # Phase 4: Continue - return the fresh result
         return result
diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
index c1e14f5de1..a98c85189e 100644
--- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
+++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
@@ -62,7 +62,8 @@ def test_default_initialization(self):
 
     def test_custom_initialization(self):
         """Test custom initialization."""
-        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.8)
+        # Use 0.9 (above the enforced minimum) to exercise non-default fuzzy mode.
+        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
         # Check attributes are set
         assert hasattr(middleware, '_enabled_mode')
         assert hasattr(middleware, '_similarity_threshold')
@@ -108,8 +109,12 @@ async def mock_next_call(*args, **kwargs):
         assert result3.result == "Result for test"
 
     async def test_fuzzy_match_caching(self, middleware_context):
-        """Test fuzzy matching with similarity_threshold < 1.0."""
-        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.8)
+        """Test fuzzy matching with similarity_threshold < 1.0.
+
+        Uses 0.9 (above the enforced minimum) — 0.8 is no longer a valid
+        threshold after the cache-poisoning hardening.
+        """
+        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
 
         call_count = 0
 
@@ -267,8 +272,10 @@ async def mock_next_call(*args, **kwargs):
 
     def test_similarity_computation_for_different_thresholds(self):
         """Test similarity computation for different thresholds."""
-        # This is more of a unit test for the similarity logic
-        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.5)
+        # This is more of a unit test for the similarity logic.
+        # Uses 0.9 (above the enforced minimum) to exercise fuzzy matching
+        # without enabling cache-poisoning-prone low thresholds.
+        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
 
         # Directly test internal methods
         # Add a cached entry
@@ -278,14 +285,18 @@ def test_similarity_computation_for_different_thresholds(self):
         # Test various similarity levels
         # Exact match
         assert middleware._find_similar_key(test_key) == test_key  # noqa
-        # Very similar
+        # Very similar (one char shorter, ~0.95 ratio)
         assert middleware._find_similar_key("hello worl") == test_key  # noqa
         # Too different - use a completely different string
         assert middleware._find_similar_key("xyz123abc") is None  # noqa
 
     async def test_multiple_similar_entries(self, middleware_context):
-        """Test behavior with multiple similar cached entries."""
-        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.7)
+        """Test behavior with multiple similar cached entries.
+
+        Uses 0.85 (the enforced minimum) instead of the original 0.7 —
+        below 0.85 is now rejected as a cache-poisoning risk.
+        """
+        middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.85)
 
         # Pre-populate cache with similar entries
         key1 = middleware._serialize_input(  # noqa
@@ -306,3 +317,109 @@ async def mock_next_call(*args, **kwargs):
         input_str = {"value": "test input X", "number": 42}
         await middleware.function_middleware_invoke(input_str, call_next=mock_next_call, context=middleware_context)
         # The exact behavior depends on which cached key is most similar
+
+
+class TestSimilarityThresholdFloor:
+    """The constructor must reject similarity thresholds below the safe floor.
+
+    Below ~0.85, crafting an input whose difflib ratio exceeds the threshold
+    against a legitimate cached key is trivial (small edits, common prefixes,
+    shared structural tokens). Accepting those values silently produces a
+    cache where one caller can hijack another caller's response.
+    """
+
+    @pytest.mark.parametrize("threshold", [0.0, 0.3, 0.5, 0.7, 0.84])
+    def test_below_floor_is_rejected(self, threshold):
+        with pytest.raises(ValueError, match="outside the safe range"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=threshold)
+
+    @pytest.mark.parametrize("threshold", [0.85, 0.9, 0.95, 1.0])
+    def test_at_or_above_floor_is_allowed(self, threshold):
+        mw = CacheMiddleware(enabled_mode="always", similarity_threshold=threshold)
+        assert mw._similarity_threshold == threshold  # noqa: SLF001
+
+    def test_threshold_above_one_is_rejected(self):
+        with pytest.raises(ValueError, match="outside the safe range"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=1.5)
+
+    def test_threshold_non_numeric_is_rejected(self):
+        with pytest.raises(ValueError, match="must be a number"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold="high")  # type: ignore[arg-type]
+
+
+class TestMaxEntriesLruEviction:
+    """The cache must bound its size to prevent memory-exhaustion DoS.
+
+    The previous implementation used an unbounded dict; sustained unique
+    inputs would grow the cache without limit, eventually crashing the
+    process. LRU eviction ensures the cache stays within max_entries.
+    """
+
+    async def test_default_max_entries_is_positive(self):
+        mw = CacheMiddleware(enabled_mode="always", similarity_threshold=1.0)
+        assert mw._max_entries > 0  # noqa: SLF001
+
+    def test_zero_max_entries_is_rejected(self):
+        with pytest.raises(ValueError, match="positive integer"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=0)
+
+    def test_negative_max_entries_is_rejected(self):
+        with pytest.raises(ValueError, match="positive integer"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5)
+
+    async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context):
+        """Insert more unique entries than max_entries; verify size stays bounded."""
+        mw = CacheMiddleware(
+            enabled_mode="always",
+            similarity_threshold=1.0,  # exact match keeps the test deterministic
+            max_entries=3,
+        )
+
+        call_count = 0
+
+        async def mock_next_call(*_args, **_kwargs):
+            nonlocal call_count
+            call_count += 1
+            return _TestOutput(result=f"result_{call_count}")
+
+        for i in range(10):
+            await mw.function_middleware_invoke(
+                {"value": f"unique_input_{i}"},
+                call_next=mock_next_call,
+                context=middleware_context,
+            )
+
+        assert len(mw._cache) == 3  # noqa: SLF001
+        # The MOST recent three inserts should be what's left.
+        latest_keys = list(mw._cache.keys())  # noqa: SLF001
+        for i in range(7, 10):
+            assert any(f"unique_input_{i}" in k for k in latest_keys)
+
+    async def test_cache_hit_promotes_entry_to_most_recently_used(self, middleware_context):
+        """A cache hit should move the entry to MRU so later evictions spare it."""
+        mw = CacheMiddleware(
+            enabled_mode="always",
+            similarity_threshold=1.0,
+            max_entries=3,
+        )
+
+        async def mock_next_call(*_args, **_kwargs):
+            return _TestOutput(result="r")
+
+        # Fill the cache with A, B, C (A is oldest)
+        for key in ("A", "B", "C"):
+            await mw.function_middleware_invoke(
+                {"value": key}, call_next=mock_next_call, context=middleware_context)
+
+        # Hit A again — should promote A to the MRU end
+        await mw.function_middleware_invoke(
+            {"value": "A"}, call_next=mock_next_call, context=middleware_context)
+
+        # Now insert D — B (now oldest) should be evicted, not A.
+        await mw.function_middleware_invoke(
+            {"value": "D"}, call_next=mock_next_call, context=middleware_context)
+
+        keys = "".join(list(mw._cache.keys()))  # noqa: SLF001
+        assert '"value": "A"' in keys
+        assert '"value": "D"' in keys
+        assert '"value": "B"' not in keys

From 9f60ef6b150bf3c81fc88b2a2e166c30120548d3 Mon Sep 17 00:00:00 2001
From: ColinM-sys <cmcdonough@50words.com>
Date: Thu, 16 Apr 2026 18:56:02 -0400
Subject: [PATCH 2/4] fix(cache-middleware): align config schema, reject
 bool/non-finite thresholds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per CodeRabbit on #1879:

1. cache_middleware_config.py was out of sync with the new constructor
   semantics — schema allowed similarity_threshold in [0, 1] and had no
   max_entries field at all, so config-driven instantiation could pass
   values the constructor now rejects. Re-export
   _MIN_FUZZY_THRESHOLD and _DEFAULT_MAX_CACHE_ENTRIES from the module
   and wire both constants into the schema:
     similarity_threshold: ge=_MIN_FUZZY_THRESHOLD, le=1.0
     max_entries: ge=1, default=_DEFAULT_MAX_CACHE_ENTRIES
   Docstrings mirror the constructor's rationale (cache-poisoning for
   threshold, DoS for max_entries).

2. Constructor validation did not reject `bool` (since isinstance(True, int)
   is True in Python) or non-finite floats (NaN / +inf / -inf). Both are
   classic config-bug foot-guns: a boolean silently becoming 0 or 1.0,
   or a parser that hands back NaN from an upstream source, would slip
   past the range comparison. Add explicit:
     - bool rejection on similarity_threshold (before the number check)
     - math.isfinite() check on similarity_threshold
     - bool rejection on max_entries

New tests:
  - test_threshold_bool_is_rejected (parametrized True / False)
  - test_threshold_non_finite_is_rejected (parametrized NaN / +inf / -inf)
  - test_bool_max_entries_is_rejected (parametrized True / False)

Signed-off-by: ColinM-sys <cmcdonough@50words.com>
---
 .../nat/middleware/cache/cache_middleware.py  | 14 +++++++-
 .../cache/cache_middleware_config.py          | 35 +++++++++++++++----
 .../nat/middleware/test_cache_middleware.py   | 23 ++++++++++++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
index afbc0cfae6..29090a9162 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
@@ -30,6 +30,7 @@
 
 import json
 import logging
+import math
 from collections import OrderedDict
 from collections.abc import AsyncIterator
 from typing import Any
@@ -108,9 +109,19 @@ def __init__(
             ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0]
                 or max_entries is not a positive integer.
         """
+        # Reject bool explicitly — `isinstance(True, int)` is True in Python,
+        # and `True`/`False` silently sneaking through as numeric is a classic
+        # config bug (user passes the wrong key, gets no error). Check bool
+        # FIRST so the "must be a number" message doesn't lie.
+        if isinstance(similarity_threshold, bool):
+            raise ValueError(
+                f"similarity_threshold must be a number, got bool ({similarity_threshold!r})")
         if not isinstance(similarity_threshold, (int, float)):
             raise ValueError(
                 f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}")
+        if not math.isfinite(similarity_threshold):
+            raise ValueError(
+                f"similarity_threshold must be finite, got {similarity_threshold!r}")
         if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0:
             raise ValueError(
                 f"similarity_threshold={similarity_threshold} is outside the safe range "
@@ -118,7 +129,8 @@ def __init__(
                 "a crafted input can collide with a legitimate user's cached key. Use 1.0 "
                 "for exact matching (recommended), or a value >= "
                 f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.")
-        if not isinstance(max_entries, int) or max_entries < 1:
+        # Same bool-as-int foot-gun applies to max_entries.
+        if isinstance(max_entries, bool) or not isinstance(max_entries, int) or max_entries < 1:
             raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}")
 
         super().__init__(is_final=True)
diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
index 7f24f03485..316c2aecf6 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
@@ -19,6 +19,8 @@
 from pydantic import Field
 
 from nat.data_models.middleware import FunctionMiddlewareBaseConfig
+from nat.middleware.cache.cache_middleware import _DEFAULT_MAX_CACHE_ENTRIES
+from nat.middleware.cache.cache_middleware import _MIN_FUZZY_THRESHOLD
 
 
 class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
@@ -31,14 +33,33 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
         enabled_mode: Controls when caching is active:
             - "always": Cache is always enabled
             - "eval": Cache only active when Context.is_evaluating is True
-        similarity_threshold: Float between 0 and 1 for input matching:
-            - 1.0: Exact string matching (fastest)
-            - < 1.0: Fuzzy matching using difflib similarity
+        similarity_threshold: Float in [_MIN_FUZZY_THRESHOLD, 1.0] for input
+            matching:
+            - 1.0: Exact string matching (fastest, recommended)
+            - >= _MIN_FUZZY_THRESHOLD: Fuzzy matching via difflib. Values
+              below this bound are rejected as a cache-poisoning risk —
+              crafted inputs at lower thresholds can collide with a
+              legitimate user's cached key.
+        max_entries: Upper bound on cached entries. When exceeded, the
+            least-recently-used entry is evicted. Must be a positive int;
+            defaults to _DEFAULT_MAX_CACHE_ENTRIES.
     """
 
     enabled_mode: Literal["always", "eval"] = Field(
         default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)")
-    similarity_threshold: float = Field(default=1.0,
-                                        ge=0.0,
-                                        le=1.0,
-                                        description="Similarity threshold between 0 and 1. Use 1.0 for exact matching")
+    similarity_threshold: float = Field(
+        default=1.0,
+        ge=_MIN_FUZZY_THRESHOLD,
+        le=1.0,
+        description=(
+            f"Similarity threshold in [{_MIN_FUZZY_THRESHOLD}, 1.0]. Use 1.0 for exact matching "
+            "(recommended). Lower values enable fuzzy matching but are bounded below to prevent "
+            "cache-poisoning collisions with legitimate cached keys."),
+    )
+    max_entries: int = Field(
+        default=_DEFAULT_MAX_CACHE_ENTRIES,
+        ge=1,
+        description=("Maximum number of cache entries before LRU eviction. Must be >= 1. "
+                     "Prevents memory-exhaustion DoS from unbounded cache growth under "
+                     "sustained unique inputs."),
+    )
diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
index a98c85189e..89b5b5aee6 100644
--- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
+++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
@@ -346,6 +346,19 @@ def test_threshold_non_numeric_is_rejected(self):
         with pytest.raises(ValueError, match="must be a number"):
             CacheMiddleware(enabled_mode="always", similarity_threshold="high")  # type: ignore[arg-type]
 
+    @pytest.mark.parametrize("bad_bool", [True, False])
+    def test_threshold_bool_is_rejected(self, bad_bool):
+        """`isinstance(True, int)` is True in Python — reject bools explicitly
+        so a config with the wrong key type doesn't silently become 1.0 or 0.0."""
+        with pytest.raises(ValueError, match="got bool"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=bad_bool)  # type: ignore[arg-type]
+
+    @pytest.mark.parametrize("bad_value", [float("nan"), float("inf"), float("-inf")])
+    def test_threshold_non_finite_is_rejected(self, bad_value):
+        """NaN, +inf, -inf must be rejected before the range comparison."""
+        with pytest.raises(ValueError, match="must be finite"):
+            CacheMiddleware(enabled_mode="always", similarity_threshold=bad_value)
+
 
 class TestMaxEntriesLruEviction:
     """The cache must bound its size to prevent memory-exhaustion DoS.
@@ -367,6 +380,16 @@ def test_negative_max_entries_is_rejected(self):
         with pytest.raises(ValueError, match="positive integer"):
             CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5)
 
+    @pytest.mark.parametrize("bad_bool", [True, False])
+    def test_bool_max_entries_is_rejected(self, bad_bool):
+        """Same bool-as-int foot-gun protection as similarity_threshold."""
+        with pytest.raises(ValueError, match="positive integer"):
+            CacheMiddleware(
+                enabled_mode="always",
+                similarity_threshold=1.0,
+                max_entries=bad_bool,  # type: ignore[arg-type]
+            )
+
     async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context):
         """Insert more unique entries than max_entries; verify size stays bounded."""
         mw = CacheMiddleware(

From ee05bcd928876421ea149d15da0a91f3af7f7ab5 Mon Sep 17 00:00:00 2001
From: ColinM-sys <cmcdonough@50words.com>
Date: Thu, 16 Apr 2026 19:05:14 -0400
Subject: [PATCH 3/4] fix(cache-middleware): wire max_entries through
 register.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per CodeRabbit on #1879: CacheMiddlewareConfig.max_entries was added
to the config schema but register.py still instantiated CacheMiddleware
with only (enabled_mode, similarity_threshold) — so a configured
max_entries value was silently ignored and deployments always used the
1024 default.

Pass config.max_entries through to the constructor so config-driven
LRU sizing actually takes effect.

Signed-off-by: ColinM-sys <cmcdonough@50words.com>
---
 .../nvidia_nat_core/src/nat/middleware/cache/register.py    | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/register.py b/packages/nvidia_nat_core/src/nat/middleware/cache/register.py
index 48d2702095..e8a77b997e 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/register.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/register.py
@@ -30,4 +30,8 @@ async def cache_middleware(config: CacheMiddlewareConfig, builder: Builder):
     Yields:
         A configured cache middleware instance
     """
-    yield CacheMiddleware(enabled_mode=config.enabled_mode, similarity_threshold=config.similarity_threshold)
+    yield CacheMiddleware(
+        enabled_mode=config.enabled_mode,
+        similarity_threshold=config.similarity_threshold,
+        max_entries=config.max_entries,
+    )

From 2036cf2114a0a5986d5cf695437d060e99ffc4f1 Mon Sep 17 00:00:00 2001
From: ColinM-sys <cmcdonough@50words.com>
Date: Mon, 20 Apr 2026 19:04:40 -0400
Subject: [PATCH 4/4] refactor(cache-middleware): address dagardner-nv review
 feedback

Per design discussion in #1888:

- Drop similarity_threshold floor from 0.85 to 0 and remove manual
  type/range validation from the constructor; Pydantic enforces bounds
  at the config layer (CacheMiddlewareConfig) and anyone constructing
  CacheMiddleware directly is on their own
- Update CacheMiddlewareConfig.similarity_threshold to ge=0 with
  updated description documenting the performance cost of low values
  and the cache-collision risk near 0
- Replace the manual difflib.SequenceMatcher loop in _find_similar_key
  with difflib.get_close_matches for cleaner, more idiomatic code
- Remove constructor-level validation tests that no longer apply

Signed-off-by: ColinM-sys <cmcdonough@50words.com>
---
 .../nat/middleware/cache/cache_middleware.py  | 69 ++++-------------
 .../cache/cache_middleware_config.py          | 23 +++---
 .../nat/middleware/test_cache_middleware.py   | 75 +------------------
 3 files changed, 27 insertions(+), 140 deletions(-)

diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
index 29090a9162..70def61313 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware.py
@@ -30,7 +30,6 @@
 
 import json
 import logging
-import math
 from collections import OrderedDict
 from collections.abc import AsyncIterator
 from typing import Any
@@ -45,13 +44,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Lower bound on fuzzy-match similarity to reduce the cache-poisoning surface.
-# A threshold below this makes it trivial to craft an input that is "similar
-# enough" to a legitimate user's cached key to hijack their response (for the
-# current process, which is how the in-memory cache is scoped). 0.85 is the
-# smallest value that we're comfortable shipping as an unconditional default;
-# operators with strict needs should use 1.0 (exact match only).
-_MIN_FUZZY_THRESHOLD = 0.85
 
 # Default bound on cache size. The previous implementation used an unbounded
 # dict which, under sustained unique input, grew without limit — a memory-
@@ -96,43 +88,17 @@ def __init__(
         Args:
             enabled_mode: Either "always" or "eval". If "eval", only caches
                 when Context.is_evaluating is True.
-            similarity_threshold: Similarity threshold in [_MIN_FUZZY_THRESHOLD, 1.0].
-                If 1.0, performs exact matching. Lower values enable difflib-
-                based fuzzy matching. Values below _MIN_FUZZY_THRESHOLD are
-                rejected to prevent cache-poisoning where a crafted input
-                collides with a legitimate user's cached key.
+            similarity_threshold: Similarity threshold in [0, 1.0]. If 1.0,
+                performs exact matching. Lower values enable difflib-based
+                fuzzy matching; note that difflib is quadratic in the worst
+                case, so large caches with low thresholds may have a
+                performance cost. Values near 0 increase the risk of cache
+                collisions where different inputs return the same cached
+                response.
             max_entries: Maximum number of cache entries. When exceeded, the
                 oldest entry is evicted (LRU). Defaults to
-                _DEFAULT_MAX_CACHE_ENTRIES. Must be >= 1.
-
-        Raises:
-            ValueError: If similarity_threshold is outside [_MIN_FUZZY_THRESHOLD, 1.0]
-                or max_entries is not a positive integer.
+                _DEFAULT_MAX_CACHE_ENTRIES.
         """
-        # Reject bool explicitly — `isinstance(True, int)` is True in Python,
-        # and `True`/`False` silently sneaking through as numeric is a classic
-        # config bug (user passes the wrong key, gets no error). Check bool
-        # FIRST so the "must be a number" message doesn't lie.
-        if isinstance(similarity_threshold, bool):
-            raise ValueError(
-                f"similarity_threshold must be a number, got bool ({similarity_threshold!r})")
-        if not isinstance(similarity_threshold, (int, float)):
-            raise ValueError(
-                f"similarity_threshold must be a number, got {type(similarity_threshold).__name__}")
-        if not math.isfinite(similarity_threshold):
-            raise ValueError(
-                f"similarity_threshold must be finite, got {similarity_threshold!r}")
-        if similarity_threshold < _MIN_FUZZY_THRESHOLD or similarity_threshold > 1.0:
-            raise ValueError(
-                f"similarity_threshold={similarity_threshold} is outside the safe range "
-                f"[{_MIN_FUZZY_THRESHOLD}, 1.0]. Lower values make cache-poisoning trivial — "
-                "a crafted input can collide with a legitimate user's cached key. Use 1.0 "
-                "for exact matching (recommended), or a value >= "
-                f"{_MIN_FUZZY_THRESHOLD} for fuzzy matching.")
-        # Same bool-as-int foot-gun applies to max_entries.
-        if isinstance(max_entries, bool) or not isinstance(max_entries, int) or max_entries < 1:
-            raise ValueError(f"max_entries must be a positive integer, got {max_entries!r}")
-
         super().__init__(is_final=True)
         self._enabled_mode = enabled_mode
         self._similarity_threshold = similarity_threshold
@@ -202,22 +168,13 @@ def _find_similar_key(self, input_str: str) -> str | None:
             # Exact matching - fast path
             return input_str if input_str in self._cache else None
 
-        # Fuzzy matching using difflib
         import difflib
 
-        best_match = None
-        best_ratio = 0.0
-
-        for cached_key in self._cache:
-            # Use SequenceMatcher for similarity computation
-            matcher = difflib.SequenceMatcher(None, input_str, cached_key)
-            ratio = matcher.ratio()
-
-            if ratio >= self._similarity_threshold and ratio > best_ratio:
-                best_ratio = ratio
-                best_match = cached_key
-
-        return best_match
+        best_matches = difflib.get_close_matches(
+            input_str, self._cache.keys(), n=1, cutoff=self._similarity_threshold)
+        if best_matches:
+            return best_matches[0]
+        return None
 
     async def function_middleware_invoke(self,
                                          *args: Any,
diff --git a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
index 316c2aecf6..1ad122c773 100644
--- a/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
+++ b/packages/nvidia_nat_core/src/nat/middleware/cache/cache_middleware_config.py
@@ -20,7 +20,6 @@
 
 from nat.data_models.middleware import FunctionMiddlewareBaseConfig
 from nat.middleware.cache.cache_middleware import _DEFAULT_MAX_CACHE_ENTRIES
-from nat.middleware.cache.cache_middleware import _MIN_FUZZY_THRESHOLD
 
 
 class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
@@ -33,13 +32,13 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
         enabled_mode: Controls when caching is active:
             - "always": Cache is always enabled
             - "eval": Cache only active when Context.is_evaluating is True
-        similarity_threshold: Float in [_MIN_FUZZY_THRESHOLD, 1.0] for input
-            matching:
+        similarity_threshold: Float in [0, 1.0] for input matching:
             - 1.0: Exact string matching (fastest, recommended)
-            - >= _MIN_FUZZY_THRESHOLD: Fuzzy matching via difflib. Values
-              below this bound are rejected as a cache-poisoning risk —
-              crafted inputs at lower thresholds can collide with a
-              legitimate user's cached key.
+            - < 1.0: Fuzzy matching via difflib. Note that difflib is
+              quadratic in the worst case, so large caches with low
+              thresholds may have a performance cost. Values near 0
+              increase the risk of cache collisions where different
+              inputs return the same cached response.
         max_entries: Upper bound on cached entries. When exceeded, the
             least-recently-used entry is evicted. Must be a positive int;
             defaults to _DEFAULT_MAX_CACHE_ENTRIES.
@@ -49,12 +48,14 @@ class CacheMiddlewareConfig(FunctionMiddlewareBaseConfig, name="cache"):
         default="eval", description="When caching is enabled: 'always' or 'eval' (only during evaluation)")
     similarity_threshold: float = Field(
         default=1.0,
-        ge=_MIN_FUZZY_THRESHOLD,
+        ge=0,
         le=1.0,
         description=(
-            f"Similarity threshold in [{_MIN_FUZZY_THRESHOLD}, 1.0]. Use 1.0 for exact matching "
-            "(recommended). Lower values enable fuzzy matching but are bounded below to prevent "
-            "cache-poisoning collisions with legitimate cached keys."),
+            "Similarity threshold in [0, 1.0]. Use 1.0 for exact matching (recommended). "
+            "Lower values enable fuzzy matching via difflib; note that difflib is quadratic "
+            "in the worst case, so large caches with low thresholds may have a performance "
+            "cost. Values near 0 increase the risk of cache collisions where different "
+            "inputs return the same cached response."),
     )
     max_entries: int = Field(
         default=_DEFAULT_MAX_CACHE_ENTRIES,
diff --git a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
index 89b5b5aee6..aa5cb2a034 100644
--- a/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
+++ b/packages/nvidia_nat_core/tests/nat/middleware/test_cache_middleware.py
@@ -62,7 +62,6 @@ def test_default_initialization(self):
 
     def test_custom_initialization(self):
         """Test custom initialization."""
-        # Use 0.9 (above the enforced minimum) to exercise non-default fuzzy mode.
         middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
         # Check attributes are set
         assert hasattr(middleware, '_enabled_mode')
@@ -109,11 +108,7 @@ async def mock_next_call(*args, **kwargs):
         assert result3.result == "Result for test"
 
     async def test_fuzzy_match_caching(self, middleware_context):
-        """Test fuzzy matching with similarity_threshold < 1.0.
-
-        Uses 0.9 (above the enforced minimum) — 0.8 is no longer a valid
-        threshold after the cache-poisoning hardening.
-        """
+        """Test fuzzy matching with similarity_threshold < 1.0."""
         middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
 
         call_count = 0
@@ -272,9 +267,6 @@ async def mock_next_call(*args, **kwargs):
 
     def test_similarity_computation_for_different_thresholds(self):
         """Test similarity computation for different thresholds."""
-        # This is more of a unit test for the similarity logic.
-        # Uses 0.9 (above the enforced minimum) to exercise fuzzy matching
-        # without enabling cache-poisoning-prone low thresholds.
         middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.9)
 
         # Directly test internal methods
@@ -291,11 +283,7 @@ def test_similarity_computation_for_different_thresholds(self):
         assert middleware._find_similar_key("xyz123abc") is None  # noqa
 
     async def test_multiple_similar_entries(self, middleware_context):
-        """Test behavior with multiple similar cached entries.
-
-        Uses 0.85 (the enforced minimum) instead of the original 0.7 —
-        below 0.85 is now rejected as a cache-poisoning risk.
-        """
+        """Test behavior with multiple similar cached entries."""
         middleware = CacheMiddleware(enabled_mode="always", similarity_threshold=0.85)
 
         # Pre-populate cache with similar entries
@@ -319,47 +307,6 @@ async def mock_next_call(*args, **kwargs):
         # The exact behavior depends on which cached key is most similar
 
 
-class TestSimilarityThresholdFloor:
-    """The constructor must reject similarity thresholds below the safe floor.
-
-    Below ~0.85, crafting an input whose difflib ratio exceeds the threshold
-    against a legitimate cached key is trivial (small edits, common prefixes,
-    shared structural tokens). Accepting those values silently produces a
-    cache where one caller can hijack another caller's response.
-    """
-
-    @pytest.mark.parametrize("threshold", [0.0, 0.3, 0.5, 0.7, 0.84])
-    def test_below_floor_is_rejected(self, threshold):
-        with pytest.raises(ValueError, match="outside the safe range"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=threshold)
-
-    @pytest.mark.parametrize("threshold", [0.85, 0.9, 0.95, 1.0])
-    def test_at_or_above_floor_is_allowed(self, threshold):
-        mw = CacheMiddleware(enabled_mode="always", similarity_threshold=threshold)
-        assert mw._similarity_threshold == threshold  # noqa: SLF001
-
-    def test_threshold_above_one_is_rejected(self):
-        with pytest.raises(ValueError, match="outside the safe range"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=1.5)
-
-    def test_threshold_non_numeric_is_rejected(self):
-        with pytest.raises(ValueError, match="must be a number"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold="high")  # type: ignore[arg-type]
-
-    @pytest.mark.parametrize("bad_bool", [True, False])
-    def test_threshold_bool_is_rejected(self, bad_bool):
-        """`isinstance(True, int)` is True in Python — reject bools explicitly
-        so a config with the wrong key type doesn't silently become 1.0 or 0.0."""
-        with pytest.raises(ValueError, match="got bool"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=bad_bool)  # type: ignore[arg-type]
-
-    @pytest.mark.parametrize("bad_value", [float("nan"), float("inf"), float("-inf")])
-    def test_threshold_non_finite_is_rejected(self, bad_value):
-        """NaN, +inf, -inf must be rejected before the range comparison."""
-        with pytest.raises(ValueError, match="must be finite"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=bad_value)
-
-
 class TestMaxEntriesLruEviction:
     """The cache must bound its size to prevent memory-exhaustion DoS.
 
@@ -372,24 +319,6 @@ async def test_default_max_entries_is_positive(self):
         mw = CacheMiddleware(enabled_mode="always", similarity_threshold=1.0)
         assert mw._max_entries > 0  # noqa: SLF001
 
-    def test_zero_max_entries_is_rejected(self):
-        with pytest.raises(ValueError, match="positive integer"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=0)
-
-    def test_negative_max_entries_is_rejected(self):
-        with pytest.raises(ValueError, match="positive integer"):
-            CacheMiddleware(enabled_mode="always", similarity_threshold=1.0, max_entries=-5)
-
-    @pytest.mark.parametrize("bad_bool", [True, False])
-    def test_bool_max_entries_is_rejected(self, bad_bool):
-        """Same bool-as-int foot-gun protection as similarity_threshold."""
-        with pytest.raises(ValueError, match="positive integer"):
-            CacheMiddleware(
-                enabled_mode="always",
-                similarity_threshold=1.0,
-                max_entries=bad_bool,  # type: ignore[arg-type]
-            )
-
     async def test_cache_evicts_oldest_when_exceeding_max_entries(self, middleware_context):
         """Insert more unique entries than max_entries; verify size stays bounded."""
         mw = CacheMiddleware(