From 9338951e173f1e8267b42d9d03bcfe9c938d5195 Mon Sep 17 00:00:00 2001
From: Phuong Lambert <phuonglambert@users.noreply.github.com>
Date: Fri, 29 May 2026 07:50:22 +0700
Subject: [PATCH 1/2] feat: Nous provider support with OAuth auto-refresh

Adds 'nous' as a supported LLM transport with automatic OAuth
credential refresh on 401 AuthenticationError.

- New 'nous' transport in ModelTransport
- OAuth refresh module (src/llm/nous_refresh.py) with secure state files
- OpenAIBackend autorefresh integration (parse/create/stream paths)
- NousAuthError exception class
- Config: NOUS_API_KEY, NOUS_BASE_URL defaults
- Fix missing imports (BadRequestError, LengthFinishReasonError)
- Includes tests for autorefresh and credential loading.
---
 src/config.py                                 |   4 +-
 src/exceptions.py                             |   6 +
 src/llm/backends/openai.py                    |  88 ++++++-
 src/llm/credentials.py                        |   2 +
 src/llm/nous_refresh.py                       | 249 ++++++++++++++++++
 .../test_backends/test_nous_autorefresh.py    | 180 +++++++++++++
 tests/llm/test_nous_refresh.py                | 159 +++++++++++
 7 files changed, 682 insertions(+), 6 deletions(-)
 create mode 100644 src/llm/nous_refresh.py
 create mode 100644 tests/llm/test_backends/test_nous_autorefresh.py
 create mode 100644 tests/llm/test_nous_refresh.py

diff --git a/src/config.py b/src/config.py
index 5ae93fa4c..917136d91 100644
--- a/src/config.py
+++ b/src/config.py
@@ -22,7 +22,7 @@
 
 logger = logging.getLogger(__name__)
 
-ModelTransport = Literal["anthropic", "openai", "gemini"]
+ModelTransport = Literal["anthropic", "openai", "gemini", "nous"]
 EmbeddingTransport = Literal["openai", "gemini"]
 EmbeddingDimensionsMode = Literal["auto", "always", "never"]
 
@@ -654,6 +654,8 @@ class LLMSettings(HonchoSettings):
     ANTHROPIC_API_KEY: str | None = None
     OPENAI_API_KEY: str | None = None
     GEMINI_API_KEY: str | None = None
+    NOUS_API_KEY: str | None = None
+    NOUS_BASE_URL: str | None = "https://inference-api.nousresearch.com/v1"
 
     # Base URLs for LLM providers (for OpenAI-compatible proxies like
     # OpenRouter, vLLM, Together, Anyscale, self-hosted, etc.)
diff --git a/src/exceptions.py b/src/exceptions.py
index 129324b69..224cfa89a 100644
--- a/src/exceptions.py
+++ b/src/exceptions.py
@@ -133,6 +133,12 @@ class VectorStoreError(HonchoException):
     detail = "Vector store operation failed"
 
 
+class NousAuthError(Exception):
+    """Raised when Nous OAuth operations fail (token refresh or agent key mint)."""
+
+    pass
+
+
 class LLMError(Exception):
     """Exception raised when an LLM call fails.
 
diff --git a/src/llm/backends/openai.py b/src/llm/backends/openai.py
index b2d82d91c..7feb411a9 100644
--- a/src/llm/backends/openai.py
+++ b/src/llm/backends/openai.py
@@ -1,13 +1,19 @@
 from __future__ import annotations
 
+import contextlib
 import json
 import logging
 from collections.abc import AsyncIterator
 from typing import Any, cast
 
-from openai import BadRequestError, LengthFinishReasonError
+from openai import (
+    AuthenticationError,
+    BadRequestError,
+    LengthFinishReasonError,
+)
 from pydantic import BaseModel, ValidationError
 
+from src.config import settings
 from src.exceptions import ValidationException
 from src.llm.backend import CompletionResult, StreamChunk, ToolCallResult
 from src.llm.structured_output import (
@@ -109,8 +115,33 @@ def extract_openai_cache_tokens(usage: Any) -> tuple[int, int]:
 class OpenAIBackend:
     """Provider backend wrapping AsyncOpenAI."""
 
-    def __init__(self, client: Any) -> None:
+    def __init__(self, client: Any, is_nous: bool = False) -> None:
         self._client: Any = client
+        self._is_nous: bool = is_nous
+
+    async def _call_with_autorefresh(
+        self, *, use_parse: bool, params: dict[str, Any]
+    ) -> Any:
+        """Call the OpenAI API with automatic Nous OAuth refresh on 401."""
+        try:
+            if use_parse:
+                return await self._client.chat.completions.parse(**params)
+            return await self._client.chat.completions.create(**params)
+        except AuthenticationError:
+            if self._is_nous:
+                logger.warning("Nous API 401 detected — attempting auto-refresh...")
+                from ..nous_refresh import refresh_nous_credentials
+
+                new_key = await refresh_nous_credentials()
+                if new_key:
+                    self._client.api_key = new_key
+                    with contextlib.suppress(Exception):
+                        settings.LLM.NOUS_API_KEY = new_key
+                    logger.info("Retrying request with refreshed API key")
+                    if use_parse:
+                        return await self._client.chat.completions.parse(**params)
+                    return await self._client.chat.completions.create(**params)
+            raise
 
     async def complete(
         self,
@@ -145,6 +176,21 @@ async def complete(
             params["response_format"] = response_format
             try:
                 response = await self._client.chat.completions.parse(**params)
+            except AuthenticationError:
+                if self._is_nous:
+                    logger.warning("Nous API 401 detected — attempting auto-refresh...")
+                    from ..nous_refresh import refresh_nous_credentials
+                    new_key = await refresh_nous_credentials()
+                    if new_key:
+                        self._client.api_key = new_key
+                        with contextlib.suppress(Exception):
+                            settings.LLM.NOUS_API_KEY = new_key
+                        logger.info("Retrying request with refreshed API key")
+                        response = await self._client.chat.completions.parse(**params)
+                    else:
+                        raise
+                else:
+                    raise
             except LengthFinishReasonError as exc:
                 truncated = exc.completion
                 raw_content = truncated.choices[0].message.content or ""
@@ -198,7 +244,23 @@ async def complete(
         if extra_params and extra_params.get("json_mode"):
             params["response_format"] = {"type": "json_object"}
 
-        response = await self._client.chat.completions.create(**params)
+        try:
+            response = await self._client.chat.completions.create(**params)
+        except AuthenticationError:
+            if self._is_nous:
+                logger.warning("Nous API 401 detected — attempting auto-refresh...")
+                from ..nous_refresh import refresh_nous_credentials
+                new_key = await refresh_nous_credentials()
+                if new_key:
+                    self._client.api_key = new_key
+                    with contextlib.suppress(Exception):
+                        settings.LLM.NOUS_API_KEY = new_key
+                    logger.info("Retrying request with refreshed API key")
+                    response = await self._client.chat.completions.create(**params)
+                else:
+                    raise
+            else:
+                raise
         return self._normalize_response(response)
 
     async def stream(
@@ -246,7 +308,23 @@ async def stream(
         elif extra_params and extra_params.get("json_mode"):
             params["response_format"] = {"type": "json_object"}
 
-        response_stream = await self._client.chat.completions.create(**params)
+        try:
+            response_stream = await self._client.chat.completions.create(**params)
+        except AuthenticationError:
+            if self._is_nous:
+                logger.warning("Nous API 401 detected — attempting auto-refresh...")
+                from ..nous_refresh import refresh_nous_credentials
+                new_key = await refresh_nous_credentials()
+                if new_key:
+                    self._client.api_key = new_key
+                    with contextlib.suppress(Exception):
+                        settings.LLM.NOUS_API_KEY = new_key
+                    logger.info("Retrying request with refreshed API key")
+                    response_stream = await self._client.chat.completions.create(**params)
+                else:
+                    raise
+            else:
+                raise
         finish_reason: str | None = None
         usage_chunk_received = False
         async for chunk in response_stream:
@@ -312,7 +390,7 @@ def _build_params(
         if tools:
             params["tools"] = self._convert_tools(tools)
             if tool_choice is not None:
-                params["tool_choice"] = tool_choice
+                params["tool_choice"] = "required" if tool_choice == "any" else tool_choice
         if extra_params:
             for key in (
                 "top_p",
diff --git a/src/llm/credentials.py b/src/llm/credentials.py
index 9b41e77d9..f6dff5cda 100644
--- a/src/llm/credentials.py
+++ b/src/llm/credentials.py
@@ -22,4 +22,6 @@ def default_transport_api_key(transport: str) -> str | None:
         return settings.LLM.OPENAI_API_KEY
     if transport == "gemini":
         return settings.LLM.GEMINI_API_KEY
+    if transport == "nous":
+        return settings.LLM.NOUS_API_KEY
     raise ValidationException(f"Unknown transport: {transport}")
diff --git a/src/llm/nous_refresh.py b/src/llm/nous_refresh.py
new file mode 100644
index 000000000..315b46673
--- /dev/null
+++ b/src/llm/nous_refresh.py
@@ -0,0 +1,249 @@
+"""Auto-refresh module for Nous OAuth credentials.
+
+This module handles automatic detection and renewal of expired Nous API
+agent keys. It is designed to be called from within the Honcho async
+runtime when an AuthenticationError (401) is encountered.
+
+The flow:
+  1. Load refresh_token from persistent state file.
+  2. Exchange refresh_token for a new access_token.
+  3. Mint a fresh agent_key (TTL=7200 seconds, max allowed).
+  4. Update the .env file and in-memory settings.LLM.NOUS_API_KEY.
+  5. Return the new agent key so the caller can retry the request.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+from src.exceptions import NousAuthError
+
+logger = logging.getLogger(__name__)
+
+# ── Constants ──────────────────────────────────────────────────────────────
+
+PORTAL_URL = "https://portal.nousresearch.com"
+TOKEN_ENDPOINT = f"{PORTAL_URL}/api/oauth/token"
+AGENT_KEY_ENDPOINT = f"{PORTAL_URL}/api/oauth/agent-key"
+CLIENT_ID = "hermes-cli"
+SCOPE = "inference:mint_agent_key"
+MIN_TTL_SECONDS = 7200  # Nous maximum allowed TTL (2 hours)
+
+# Default state file location — override with NOUS_OAUTH_STATE_PATH
+STATE_FILE = Path(
+    os.getenv("NOUS_OAUTH_STATE_PATH", "~/.honcho/nous_oauth_state.json")
+).expanduser()
+
+
+# ── State management ────────────────────────────────────────────────────────
+
+
+def load_state() -> dict[str, Any]:
+    """Load persisted OAuth state from disk, with Hermes auth.json and .env fallbacks."""
+    if STATE_FILE.exists():
+        try:
+            return json.loads(STATE_FILE.read_text())
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", STATE_FILE, exc)
+    # State file missing or corrupt — try env var first (Docker env_file)
+    env_token = os.getenv("NOUS_REFRESH_TOKEN")
+    if env_token:
+        logger.info("Bootstrapping refresh_token from environment variable")
+        return {"refresh_token": env_token}
+    # Fallback: Hermes auth.json
+    auth_state = _load_from_hermes_auth()
+    if auth_state:
+        logger.info("Bootstrapping refresh_token from Hermes auth.json")
+        return auth_state
+    # Finally, check .env on disk (local development without Docker)
+    env_refresh = _read_refresh_from_env()
+    if env_refresh:
+        logger.info("Bootstrapping refresh_token from .env")
+        return {"refresh_token": env_refresh}
+    return {}
+
+
+def save_state(**fields: Any) -> None:
+    """Merge and persist OAuth state atomically."""
+    state = load_state()
+    state.update({k: v for k, v in fields.items() if v is not None})
+    state["updated_at"] = datetime.now(timezone.utc).isoformat()
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    # Atomic write via temp file + rename
+    tmp = STATE_FILE.with_suffix(".tmp")
+    tmp.write_text(json.dumps(state, indent=2))
+    os.chmod(tmp, 0o600)
+    tmp.replace(STATE_FILE)
+    os.chmod(STATE_FILE, 0o600)
+
+
+# ── Environment file update ─────────────────────────────────────────────────
+
+
+def _load_from_hermes_auth() -> dict[str, Any] | None:
+    """Bootstrap refresh_token from Hermes auth.json if state file is empty."""
+    auth_path = Path.home() / ".hermes" / "auth.json"
+    if not auth_path.exists():
+        return None
+    try:
+        auth = json.loads(auth_path.read_text())
+        provider = auth.get("providers", {}).get("nous", {})
+        refresh = provider.get("refresh_token")
+        if refresh:
+            return {"refresh_token": refresh}
+    except Exception as exc:
+        logger.debug("Failed to read Hermes auth.json: %s", exc)
+    return None
+
+
+def _read_refresh_from_env() -> str | None:
+    """Read NOUS_REFRESH_TOKEN from project .env (backward compatibility)."""
+    env_path = _find_project_root() / ".env"
+    if not env_path.exists():
+        return None
+    try:
+        for line in env_path.read_text().splitlines():
+            stripped = line.strip()
+            if stripped.startswith("NOUS_REFRESH_TOKEN="):
+                val = stripped.split("=", 1)[1].strip()
+                # Strip surrounding quotes if present
+                if (val.startswith('"') and val.endswith('"')) or (
+                    val.startswith("'") and val.endswith("'")
+                ):
+                    val = val[1:-1]
+                return val if val else None
+    except Exception as exc:
+        logger.debug("Failed to read .env for refresh token: %s", exc)
+    return None
+
+
+def _find_project_root(start: Path | None = None) -> Path:
+    """Walk up from start (or __file__) until we find .env or docker-compose.yml."""
+    cur = (start or Path(__file__)).resolve()
+    for p in [cur, *cur.parents]:
+        if (p / ".env").exists() or (p / "docker-compose.yml").exists():
+            return p
+    # Fallback: cwd
+    return Path.cwd()
+
+
+def update_env_key(env_path: Path, new_key: str) -> None:
+    """Update LLM_NOUS_API_KEY in the given .env file, creating it if missing."""
+    if not env_path.exists():
+        env_path.write_text(f"LLM_NOUS_API_KEY={new_key}\n")
+        logger.info("Created .env with new Nous API key at %s", env_path)
+        return
+
+    lines = env_path.read_text().splitlines(keepends=True)
+    updated = False
+    for i, line in enumerate(lines):
+        stripped = line.strip()
+        if stripped.startswith("LLM_NOUS_API_KEY="):
+            # Preserve any surrounding quotes/whitespace
+            lines[i] = f"LLM_NOUS_API_KEY={new_key}\n"
+            updated = True
+            break
+    if not updated:
+        lines.append(f"\nLLM_NOUS_API_KEY={new_key}\n")
+    env_path.write_text("".join(lines))
+    logger.info("Updated .env with new Nous API key")
+
+
+# ── HTTP helpers (httpx) ─────────────────────────────────────────────────────
+
+
+async def refresh_access_token(refresh_token: str) -> tuple[str, str]:
+    """Exchange refresh_token for a new access_token and refresh_token."""
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.post(
+            TOKEN_ENDPOINT,
+            data={
+                "grant_type": "refresh_token",
+                "client_id": CLIENT_ID,
+                "refresh_token": refresh_token,
+            },
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+        )
+    if resp.status_code != 200:
+        raise NousAuthError(f"Token refresh failed {resp.status_code}: {resp.text}")
+    data = resp.json()
+    return data["access_token"], data["refresh_token"]
+
+
+async def mint_agent_key(access_token: str) -> str:
+    """Mint a new agent key using the access token."""
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.post(
+            AGENT_KEY_ENDPOINT,
+            json={"min_ttl_seconds": MIN_TTL_SECONDS},
+            headers={
+                "Authorization": f"Bearer {access_token}",
+                "Content-Type": "application/json",
+            },
+        )
+    if resp.status_code != 200:
+        raise NousAuthError(f"Agent key mint failed {resp.status_code}: {resp.text}")
+    data = resp.json()
+    return data["api_key"]
+
+
+# ── Public orchestrator ──────────────────────────────────────────────────────
+
+
+async def refresh_nous_credentials() -> str | None:
+    """Full refresh+mint flow; returns new agent_key or None on failure."""
+    state = load_state()
+    refresh_token = state.get("refresh_token")
+    if not refresh_token:
+        logger.error("No refresh_token found in state — manual login required")
+        return None
+
+    try:
+        # 1. Refresh access token
+        logger.info("Refreshing Nous access token...")
+        access_token, new_refresh_token = await refresh_access_token(refresh_token)
+
+        # 2. Mint new agent key
+        logger.info("Minting new Nous agent key (TTL=%ds)...", MIN_TTL_SECONDS)
+        agent_key = await mint_agent_key(access_token)
+
+        # 3. Compute expiry timestamp (UTC ISO 8601)
+        expires_at = (
+            datetime.now(timezone.utc) + timedelta(seconds=MIN_TTL_SECONDS)
+        ).isoformat()
+
+        # 4. Persist state
+        save_state(
+            refresh_token=new_refresh_token,
+            access_token=access_token,
+            agent_key=agent_key,
+            expires_at=expires_at,
+        )
+
+        # 5. Update .env on disk
+        project_root = _find_project_root()
+        update_env_key(project_root / ".env", agent_key)
+
+        # 6. Update in-memory settings globally (if Honcho is running)
+        try:
+            from src.config import settings
+
+            settings.LLM.NOUS_API_KEY = agent_key
+            logger.info("In-memory settings.LLM.NOUS_API_KEY updated")
+        except (ImportError, AttributeError) as exc:
+            # settings may not be importable in all contexts (tests, CLI)
+            logger.debug("Could not import settings for in-memory update: %s", exc)
+
+        logger.info("Nous OAuth refresh complete — new key expires at %s", expires_at)
+        return agent_key
+
+    except Exception as exc:
+        logger.error("Nous credential refresh failed: %s", exc, exc_info=True)
+        return None
diff --git a/tests/llm/test_backends/test_nous_autorefresh.py b/tests/llm/test_backends/test_nous_autorefresh.py
new file mode 100644
index 000000000..1c827a854
--- /dev/null
+++ b/tests/llm/test_backends/test_nous_autorefresh.py
@@ -0,0 +1,180 @@
+"""Integration tests for Nous auto-refresh in OpenAIBackend."""
+
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+from openai import AuthenticationError
+
+from src.llm.backends.openai import OpenAIBackend
+
+
+@pytest.mark.asyncio
+async def test_nous_backend_auto_refresh_on_401() -> None:
+    """OpenAIBackend with is_nous=True should refresh credentials on 401 and retry."""
+    # Arrange
+    mock_client = Mock()
+    mock_client.api_key = "old_key"
+
+    mock_success_response = Mock()
+    mock_success_response.choices = [
+        Mock(
+            message=Mock(
+                content="Hello, world!",
+                tool_calls=[],  # required by _normalize_response
+                refusal=None,
+                parsed=None,
+            )
+        )
+    ]
+    mock_success_response.usage = Mock(completion_tokens=5)
+
+    # First call raises AuthError, second returns success
+    mock_client.chat.completions.create = AsyncMock(
+        side_effect=[
+        AuthenticationError(
+            message="401 Unauthorized",
+            response=Mock(status_code=401, request=Mock()),
+            body={"error": "invalid_api_key"},
+        ),
+        mock_success_response,
+    ]
+    )
+
+    backend = OpenAIBackend(mock_client, is_nous=True)
+
+    # Patch refresh_nous_credentials to return a fake new key
+    with patch(
+        "src.llm.nous_refresh.refresh_nous_credentials",
+        new=AsyncMock(return_value="new_refreshed_key"),
+    ):
+        result = await backend.complete(
+            model="nous-model",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=10,
+        )
+
+    # Assert: client api_key updated
+    assert mock_client.api_key == "new_refreshed_key"
+    # Assert: create called twice (original + retry)
+    assert mock_client.chat.completions.create.call_count == 2
+    # Assert: result content from the second response
+    assert result.content == "Hello, world!"
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_no_refresh_on_401() -> None:
+    """Non-Nous backends should not intercept 401; error bubbles up."""
+    mock_client = Mock()
+    mock_client.chat.completions.create = AsyncMock(
+        side_effect=AuthenticationError(
+        message="401",
+        response=Mock(status_code=401, request=Mock()),
+        body={"error": "invalid_api_key"},
+    )
+    )
+
+    backend = OpenAIBackend(mock_client, is_nous=False)
+
+    with pytest.raises(AuthenticationError):
+        await backend.complete(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Hi"}],
+            max_tokens=10,
+        )
+
+    # Should call create exactly once (no retry)
+    assert mock_client.chat.completions.create.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_nous_backend_refresh_fails_propagates_error() -> None:
+    """If refresh_nous_credentials returns None, original 401 is raised."""
+    mock_client = Mock()
+    mock_client.api_key = "old_key"
+    mock_client.chat.completions.create = AsyncMock(
+        side_effect=AuthenticationError(
+        message="401",
+        response=Mock(status_code=401, request=Mock()),
+        body={"error": "invalid_api_key"},
+    )
+    )
+
+    backend = OpenAIBackend(mock_client, is_nous=True)
+
+    with patch(
+        "src.llm.nous_refresh.refresh_nous_credentials",
+        new=AsyncMock(return_value=None),  # refresh fails
+    ):
+        with pytest.raises(AuthenticationError):
+            await backend.complete(
+                model="nous-model",
+                messages=[{"role": "user", "content": "test"}],
+                max_tokens=5,
+            )
+
+    # Still only one call because refresh failed before retry
+    assert mock_client.chat.completions.create.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_nous_backend_stream_auto_refresh() -> None:
+    """Stream path also triggers auto-refresh on 401."""
+
+    class FakeAsyncIterator:
+        """Simple async iterator yielding predetermined chunks."""
+        def __init__(self, chunks: list):
+            self.chunks = chunks
+            self.idx = 0
+
+        def __aiter__(self):
+            return self
+
+        async def __anext__(self):
+            if self.idx >= len(self.chunks):
+                raise StopAsyncIteration
+            chunk = self.chunks[self.idx]
+            self.idx += 1
+            return chunk
+
+    mock_client = Mock()
+    mock_client.api_key = "old_key"
+
+    call_count = 0
+
+    async def create_side_effect(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            raise AuthenticationError(
+                message="401",
+                response=Mock(status_code=401, request=Mock()),
+                body={"error": "invalid_api_key"},
+            )
+        # Return an async iterator simulating a stream
+        return FakeAsyncIterator(
+            [Mock(choices=[Mock(delta=Mock(content="Hello"))], usage=Mock(completion_tokens=1))]
+        )
+
+    mock_client.chat.completions.create = AsyncMock(side_effect=create_side_effect)
+
+    backend = OpenAIBackend(mock_client, is_nous=True)
+
+    with patch(
+        "src.llm.nous_refresh.refresh_nous_credentials",
+        new=AsyncMock(return_value="new_key"),
+    ):
+        chunks = []
+        async for chunk in backend.stream(
+            model="nous-model",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=10,
+        ):
+            chunks.append(chunk)
+
+    assert mock_client.api_key == "new_key"
+    assert call_count == 2
+    # Stream yields one content chunk and one final done chunk
+    assert len(chunks) == 2
+    assert chunks[0].content == "Hello"
+    assert chunks[1].is_done is True
+    assert chunks[1].output_tokens == 1
diff --git a/tests/llm/test_nous_refresh.py b/tests/llm/test_nous_refresh.py
new file mode 100644
index 000000000..ea9ac29a5
--- /dev/null
+++ b/tests/llm/test_nous_refresh.py
@@ -0,0 +1,159 @@
+"""Tests for nous_refresh module — isolated via httpx mocking."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from src.llm.nous_refresh import (
+    STATE_FILE,
+    load_state,
+    save_state,
+    update_env_key,
+    refresh_nous_credentials,
+    _find_project_root,
+)
+
+
+# ── State management ─────────────────────────────────────────────────────────
+
+def test_load_state_missing(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    """load_state returns {} when state file does not exist."""
+    monkeypatch.delenv("NOUS_REFRESH_TOKEN", raising=False)
+    with patch.object(Path, "exists", return_value=False):
+        assert load_state() == {}
+
+
+def test_save_and_load_state(tmp_path: Path) -> None:
+    """save_state writes JSON correctly; load_state reads it back."""
+    test_state = {
+        "refresh_token": "rt_test_123",
+        "access_token": "at_test_456",
+        "agent_key": "sk_test_789",
+        "expires_at": "2026-05-03T10:00:00+00:00",
+    }
+    # Patch STATE_FILE path
+    state_file = tmp_path / "state.json"
+    with patch("src.llm.nous_refresh.STATE_FILE", state_file):
+        save_state(**test_state)
+        loaded = load_state()
+    assert loaded["refresh_token"] == "rt_test_123"
+    assert loaded["agent_key"] == "sk_test_789"
+
+
+# ── .env update ──────────────────────────────────────────────────────────────
+
+def test_update_env_key_creates_file_if_missing(tmp_path: Path) -> None:
+    """update_env_key creates .env when missing."""
+    env_path = tmp_path / ".env"
+    update_env_key(env_path, "newkey_xyz")
+    assert env_path.exists()
+    content = env_path.read_text()
+    assert "LLM_NOUS_API_KEY=newkey_xyz" in content
+
+
+def test_update_env_key_replaces_existing(tmp_path: Path) -> None:
+    """update_env_key replaces the first matching line."""
+    env_path = tmp_path / ".env"
+    env_path.write_text("LLM_NOUS_API_KEY=oldkey_abc\nOTHER=val\n")
+    update_env_key(env_path, "newkey_xyz")
+    lines = env_path.read_text().splitlines()
+    assert lines[0] == "LLM_NOUS_API_KEY=newkey_xyz"
+    assert lines[1] == "OTHER=val"
+
+
+def test_update_env_key_appends_when_missing(tmp_path: Path) -> None:
+    """update_env_key appends a new line if key not present."""
+    env_path = tmp_path / ".env"
+    env_path.write_text("OTHER=val\n")
+    update_env_key(env_path, "newkey_xyz")
+    content = env_path.read_text()
+    assert "LLM_NOUS_API_KEY=newkey_xyz" in content
+    assert "OTHER=val" in content
+
+
+# ── Project root discovery ───────────────────────────────────────────────────
+
+def test_find_project_root_with_dotenv(tmp_path: Path) -> None:
+    """_find_project_root locates directory containing .env."""
+    (tmp_path / ".env").touch()
+    deep = tmp_path / "sub" / "deep"
+    deep.mkdir(parents=True)
+    assert _find_project_root(start=deep / "dummy.py") == tmp_path
+
+
+# ── OAuth flow mocks ─────────────────────────────────────────────────────────
+
+@pytest.mark.asyncio
+async def test_refresh_nous_credentials_success(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """refresh_nous_credentials succeeds and updates .env + settings."""
+    # Arrange
+    state_file = tmp_path / "state.json"
+    env_file = tmp_path / ".env"
+    env_file.write_text("LLM_NOUS_API_KEY=oldkey\n")
+
+    monkeypatch.setenv("NOUS_OAUTH_STATE_PATH", str(state_file))
+
+    saved_state = {}
+
+    def fake_save_state(**kw):
+        saved_state.update(kw)
+
+    # Import module first so we can patch httpx.AsyncClient in it
+    import src.llm.nous_refresh as refresh_mod
+
+    # Mock httpx.AsyncClient
+    mock_client = AsyncMock()
+    mock_resp_token = SimpleNamespace(
+        status_code=200,
+        json=Mock(return_value={
+            "access_token": "new_access_token",
+            "refresh_token": "new_refresh_token",
+        }),
+    )
+    mock_resp_key = SimpleNamespace(
+        status_code=200,
+        json=Mock(return_value={"api_key": "new_agent_key_123"}),
+    )
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+    mock_client.post = AsyncMock(side_effect=[mock_resp_token, mock_resp_key])
+
+    # Patch httpx.AsyncClient in the refresh module so both
+    # refresh_access_token() and mint_agent_key() use the mock.
+    monkeypatch.setattr(refresh_mod, "httpx", MagicMock())
+    refresh_mod.httpx.AsyncClient = lambda *a, **kw: mock_client
+
+    original_save = refresh_mod.save_state
+    original_update_env = refresh_mod.update_env_key
+    original_find_root = refresh_mod._find_project_root
+
+    refresh_mod.save_state = fake_save_state
+    refresh_mod.update_env_key = lambda p, k: None  # we'll call directly
+    refresh_mod._find_project_root = lambda: tmp_path
+
+    # Also patch settings import
+    fake_settings = SimpleNamespace(LLM=SimpleNamespace(NOUS_API_KEY="oldkey"))
+    monkeypatch.setitem(sys.modules, "src.config", SimpleNamespace(settings=fake_settings))
+
+    try:
+        result = await refresh_nous_credentials()
+    finally:
+        # Restore
+        refresh_mod.save_state = original_save
+        refresh_mod.update_env_key = original_update_env
+        refresh_mod._find_project_root = original_find_root
+
+    assert result == "new_agent_key_123"
+    assert saved_state["refresh_token"] == "new_refresh_token"
+    assert saved_state["agent_key"] == "new_agent_key_123"
+
+
+# Helper for SimpleNamespace
+from types import SimpleNamespace
+from unittest.mock import Mock
+import sys

From a9df7fa3011cc8f6f59e209c976e4d07813df342 Mon Sep 17 00:00:00 2001
From: Phuong Lambert <phuonglambert@users.noreply.github.com>
Date: Fri, 29 May 2026 11:48:27 +0700
Subject: [PATCH 2/2] fix: address CodeRabbit review comments

- Add 'nous' and 'lmstudio' to _normalize_model_transport shorthand (config.py)
- NousAuthError subclasses HonchoException with status_code=401 (exceptions.py)
- Refactor 3 inline retry blocks into _call_with_autorefresh helper (openai.py)
- Narrow except Exception to (OSError, json.JSONDecodeError) (nous_refresh.py)
- Set state directory permissions to 0o700 (nous_refresh.py)
- Handle missing refresh_token in response with .get() fallback (nous_refresh.py)
- Move imports to top of test file, stub load_state (test_nous_refresh.py)
---
 src/config.py                  |  2 +-
 src/exceptions.py              |  5 +--
 src/llm/backends/openai.py     | 59 ++++++----------------------------
 src/llm/nous_refresh.py        |  7 ++--
 tests/llm/test_nous_refresh.py | 23 +++++++------
 5 files changed, 28 insertions(+), 68 deletions(-)

diff --git a/src/config.py b/src/config.py
index 917136d91..3a6dae27e 100644
--- a/src/config.py
+++ b/src/config.py
@@ -95,7 +95,7 @@ def _normalize_model_transport(data: Any) -> Any:
     transport_value = update.get("transport")
     if isinstance(model_value, str) and "/" in model_value and transport_value is None:
         prefix, bare_model = model_value.split("/", 1)
-        if prefix in {"anthropic", "openai", "gemini"}:
+        if prefix in {"anthropic", "openai", "gemini", "nous", "lmstudio"}:
             update["transport"] = prefix
             update["model"] = bare_model
     return update
diff --git a/src/exceptions.py b/src/exceptions.py
index 224cfa89a..5a3800664 100644
--- a/src/exceptions.py
+++ b/src/exceptions.py
@@ -133,10 +133,11 @@ class VectorStoreError(HonchoException):
     detail = "Vector store operation failed"
 
 
-class NousAuthError(Exception):
+class NousAuthError(HonchoException):
     """Raised when Nous OAuth operations fail (token refresh or agent key mint)."""
 
-    pass
+    status_code = 401
+    detail = "Nous authentication failed"
 
 
 class LLMError(Exception):
diff --git a/src/llm/backends/openai.py b/src/llm/backends/openai.py
index 7feb411a9..022e0ab0b 100644
--- a/src/llm/backends/openai.py
+++ b/src/llm/backends/openai.py
@@ -175,22 +175,9 @@ async def complete(
         if isinstance(response_format, type):
             params["response_format"] = response_format
             try:
-                response = await self._client.chat.completions.parse(**params)
-            except AuthenticationError:
-                if self._is_nous:
-                    logger.warning("Nous API 401 detected — attempting auto-refresh...")
-                    from ..nous_refresh import refresh_nous_credentials
-                    new_key = await refresh_nous_credentials()
-                    if new_key:
-                        self._client.api_key = new_key
-                        with contextlib.suppress(Exception):
-                            settings.LLM.NOUS_API_KEY = new_key
-                        logger.info("Retrying request with refreshed API key")
-                        response = await self._client.chat.completions.parse(**params)
-                    else:
-                        raise
-                else:
-                    raise
+                response = await self._call_with_autorefresh(
+                    use_parse=True, params=params,
+                )
             except LengthFinishReasonError as exc:
                 truncated = exc.completion
                 raw_content = truncated.choices[0].message.content or ""
@@ -244,23 +231,9 @@ async def complete(
         if extra_params and extra_params.get("json_mode"):
             params["response_format"] = {"type": "json_object"}
 
-        try:
-            response = await self._client.chat.completions.create(**params)
-        except AuthenticationError:
-            if self._is_nous:
-                logger.warning("Nous API 401 detected — attempting auto-refresh...")
-                from ..nous_refresh import refresh_nous_credentials
-                new_key = await refresh_nous_credentials()
-                if new_key:
-                    self._client.api_key = new_key
-                    with contextlib.suppress(Exception):
-                        settings.LLM.NOUS_API_KEY = new_key
-                    logger.info("Retrying request with refreshed API key")
-                    response = await self._client.chat.completions.create(**params)
-                else:
-                    raise
-            else:
-                raise
+        response = await self._call_with_autorefresh(
+            use_parse=False, params=params,
+        )
         return self._normalize_response(response)
 
     async def stream(
@@ -308,23 +281,9 @@ async def stream(
         elif extra_params and extra_params.get("json_mode"):
             params["response_format"] = {"type": "json_object"}
 
-        try:
-            response_stream = await self._client.chat.completions.create(**params)
-        except AuthenticationError:
-            if self._is_nous:
-                logger.warning("Nous API 401 detected — attempting auto-refresh...")
-                from ..nous_refresh import refresh_nous_credentials
-                new_key = await refresh_nous_credentials()
-                if new_key:
-                    self._client.api_key = new_key
-                    with contextlib.suppress(Exception):
-                        settings.LLM.NOUS_API_KEY = new_key
-                    logger.info("Retrying request with refreshed API key")
-                    response_stream = await self._client.chat.completions.create(**params)
-                else:
-                    raise
-            else:
-                raise
+        response_stream = await self._call_with_autorefresh(
+            use_parse=False, params=params,
+        )
         finish_reason: str | None = None
         usage_chunk_received = False
         async for chunk in response_stream:
diff --git a/src/llm/nous_refresh.py b/src/llm/nous_refresh.py
index 315b46673..21a8c60ac 100644
--- a/src/llm/nous_refresh.py
+++ b/src/llm/nous_refresh.py
@@ -50,7 +50,7 @@ def load_state() -> dict[str, Any]:
     if STATE_FILE.exists():
         try:
             return json.loads(STATE_FILE.read_text())
-        except Exception as exc:
+        except (OSError, json.JSONDecodeError) as exc:
             logger.warning("Failed to parse %s: %s", STATE_FILE, exc)
     # State file missing or corrupt — try env var first (Docker env_file)
     env_token = os.getenv("NOUS_REFRESH_TOKEN")
@@ -76,6 +76,7 @@ def save_state(**fields: Any) -> None:
     state.update({k: v for k, v in fields.items() if v is not None})
     state["updated_at"] = datetime.now(timezone.utc).isoformat()
     STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    os.chmod(STATE_FILE.parent, 0o700)
     # Atomic write via temp file + rename
     tmp = STATE_FILE.with_suffix(".tmp")
     tmp.write_text(json.dumps(state, indent=2))
@@ -98,7 +99,7 @@ def _load_from_hermes_auth() -> dict[str, Any] | None:
         refresh = provider.get("refresh_token")
         if refresh:
             return {"refresh_token": refresh}
-    except Exception as exc:
+    except (OSError, json.JSONDecodeError) as exc:
         logger.debug("Failed to read Hermes auth.json: %s", exc)
     return None
 
@@ -174,7 +175,7 @@ async def refresh_access_token(refresh_token: str) -> tuple[str, str]:
     if resp.status_code != 200:
         raise NousAuthError(f"Token refresh failed {resp.status_code}: {resp.text}")
     data = resp.json()
-    return data["access_token"], data["refresh_token"]
+    return data["access_token"], data.get("refresh_token", refresh_token)
 
 
 async def mint_agent_key(access_token: str) -> str:
diff --git a/tests/llm/test_nous_refresh.py b/tests/llm/test_nous_refresh.py
index ea9ac29a5..1b85c6115 100644
--- a/tests/llm/test_nous_refresh.py
+++ b/tests/llm/test_nous_refresh.py
@@ -1,22 +1,20 @@
 """Tests for nous_refresh module — isolated via httpx mocking."""
 
-import json
-import os
+import sys
 from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
 import pytest
 
 from src.llm.nous_refresh import (
-    STATE_FILE,
+    _find_project_root,
     load_state,
+    refresh_nous_credentials,
     save_state,
     update_env_key,
-    refresh_nous_credentials,
-    _find_project_root,
 )
 
-
 # ── State management ─────────────────────────────────────────────────────────
 
 def test_load_state_missing(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
@@ -98,6 +96,11 @@ async def test_refresh_nous_credentials_success(
 
     monkeypatch.setenv("NOUS_OAUTH_STATE_PATH", str(state_file))
 
+    # Stub load_state so refresh_nous_credentials finds a refresh_token
+    import src.llm.nous_refresh as refresh_mod
+    original_load_state = refresh_mod.load_state
+    refresh_mod.load_state = lambda: {"refresh_token": "test_refresh_token"}
+
     saved_state = {}
 
     def fake_save_state(**kw):
@@ -144,6 +147,7 @@ def fake_save_state(**kw):
         result = await refresh_nous_credentials()
     finally:
         # Restore
+        refresh_mod.load_state = original_load_state
         refresh_mod.save_state = original_save
         refresh_mod.update_env_key = original_update_env
         refresh_mod._find_project_root = original_find_root
@@ -152,8 +156,3 @@ def fake_save_state(**kw):
     assert saved_state["refresh_token"] == "new_refresh_token"
     assert saved_state["agent_key"] == "new_agent_key_123"
 
-
-# Helper for SimpleNamespace
-from types import SimpleNamespace
-from unittest.mock import Mock
-import sys