Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

logger = logging.getLogger(__name__)

ModelTransport = Literal["anthropic", "openai", "gemini"]
ModelTransport = Literal["anthropic", "openai", "gemini", "nous"]
Comment thread
coderabbitai[bot] marked this conversation as resolved.
EmbeddingTransport = Literal["openai", "gemini"]
EmbeddingDimensionsMode = Literal["auto", "always", "never"]

Expand Down Expand Up @@ -95,7 +95,7 @@ def _normalize_model_transport(data: Any) -> Any:
transport_value = update.get("transport")
if isinstance(model_value, str) and "/" in model_value and transport_value is None:
prefix, bare_model = model_value.split("/", 1)
if prefix in {"anthropic", "openai", "gemini"}:
if prefix in {"anthropic", "openai", "gemini", "nous", "lmstudio"}:
update["transport"] = prefix
update["model"] = bare_model
return update
Expand Down Expand Up @@ -654,6 +654,8 @@ class LLMSettings(HonchoSettings):
ANTHROPIC_API_KEY: str | None = None
OPENAI_API_KEY: str | None = None
GEMINI_API_KEY: str | None = None
NOUS_API_KEY: str | None = None
NOUS_BASE_URL: str | None = "https://inference-api.nousresearch.com/v1"

# Base URLs for LLM providers (for OpenAI-compatible proxies like
# OpenRouter, vLLM, Together, Anyscale, self-hosted, etc.)
Expand Down
7 changes: 7 additions & 0 deletions src/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ class VectorStoreError(HonchoException):
detail = "Vector store operation failed"


class NousAuthError(HonchoException):
"""Raised when Nous OAuth operations fail (token refresh or agent key mint)."""

status_code = 401
detail = "Nous authentication failed"


class LLMError(Exception):
"""Exception raised when an LLM call fails.

Expand Down
49 changes: 43 additions & 6 deletions src/llm/backends/openai.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from __future__ import annotations

import contextlib
import json
import logging
from collections.abc import AsyncIterator
from typing import Any, cast

from openai import BadRequestError, LengthFinishReasonError
from openai import (
AuthenticationError,
BadRequestError,
LengthFinishReasonError,
)
from pydantic import BaseModel, ValidationError

from src.config import settings
from src.exceptions import ValidationException
from src.llm.backend import CompletionResult, StreamChunk, ToolCallResult
from src.llm.structured_output import (
Expand Down Expand Up @@ -109,8 +115,33 @@ def extract_openai_cache_tokens(usage: Any) -> tuple[int, int]:
class OpenAIBackend:
"""Provider backend wrapping AsyncOpenAI."""

def __init__(self, client: Any) -> None:
def __init__(self, client: Any, is_nous: bool = False) -> None:
self._client: Any = client
self._is_nous: bool = is_nous

async def _call_with_autorefresh(
self, *, use_parse: bool, params: dict[str, Any]
) -> Any:
"""Call the OpenAI API with automatic Nous OAuth refresh on 401."""
try:
if use_parse:
return await self._client.chat.completions.parse(**params)
return await self._client.chat.completions.create(**params)
except AuthenticationError:
if self._is_nous:
logger.warning("Nous API 401 detected — attempting auto-refresh...")
from ..nous_refresh import refresh_nous_credentials

new_key = await refresh_nous_credentials()
if new_key:
self._client.api_key = new_key
with contextlib.suppress(Exception):
settings.LLM.NOUS_API_KEY = new_key
logger.info("Retrying request with refreshed API key")
if use_parse:
return await self._client.chat.completions.parse(**params)
return await self._client.chat.completions.create(**params)
raise
Comment thread
coderabbitai[bot] marked this conversation as resolved.

async def complete(
self,
Expand Down Expand Up @@ -144,7 +175,9 @@ async def complete(
if isinstance(response_format, type):
params["response_format"] = response_format
try:
response = await self._client.chat.completions.parse(**params)
response = await self._call_with_autorefresh(
use_parse=True, params=params,
)
except LengthFinishReasonError as exc:
truncated = exc.completion
raw_content = truncated.choices[0].message.content or ""
Expand Down Expand Up @@ -198,7 +231,9 @@ async def complete(
if extra_params and extra_params.get("json_mode"):
params["response_format"] = {"type": "json_object"}

response = await self._client.chat.completions.create(**params)
response = await self._call_with_autorefresh(
use_parse=False, params=params,
)
return self._normalize_response(response)

async def stream(
Expand Down Expand Up @@ -246,7 +281,9 @@ async def stream(
elif extra_params and extra_params.get("json_mode"):
params["response_format"] = {"type": "json_object"}

response_stream = await self._client.chat.completions.create(**params)
response_stream = await self._call_with_autorefresh(
use_parse=False, params=params,
)
finish_reason: str | None = None
usage_chunk_received = False
async for chunk in response_stream:
Expand Down Expand Up @@ -312,7 +349,7 @@ def _build_params(
if tools:
params["tools"] = self._convert_tools(tools)
if tool_choice is not None:
params["tool_choice"] = tool_choice
params["tool_choice"] = "required" if tool_choice == "any" else tool_choice
if extra_params:
for key in (
"top_p",
Expand Down
2 changes: 2 additions & 0 deletions src/llm/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ def default_transport_api_key(transport: str) -> str | None:
return settings.LLM.OPENAI_API_KEY
if transport == "gemini":
return settings.LLM.GEMINI_API_KEY
if transport == "nous":
return settings.LLM.NOUS_API_KEY
raise ValidationException(f"Unknown transport: {transport}")
250 changes: 250 additions & 0 deletions src/llm/nous_refresh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
"""Auto-refresh module for Nous OAuth credentials.

This module handles automatic detection and renewal of expired Nous API
agent keys. It is designed to be called from within the Honcho async
runtime when an AuthenticationError (401) is encountered.

The flow:
1. Load refresh_token from persistent state file.
2. Exchange refresh_token for a new access_token.
3. Mint a fresh agent_key (TTL=7200 seconds, max allowed).
4. Update the .env file and in-memory settings.LLM.NOUS_API_KEY.
5. Return the new agent key so the caller can retry the request.
"""

from __future__ import annotations

import json
import logging
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any

import httpx

from src.exceptions import NousAuthError

logger = logging.getLogger(__name__)

# ── Constants ──────────────────────────────────────────────────────────────

PORTAL_URL = "https://portal.nousresearch.com"
TOKEN_ENDPOINT = f"{PORTAL_URL}/api/oauth/token"
AGENT_KEY_ENDPOINT = f"{PORTAL_URL}/api/oauth/agent-key"
CLIENT_ID = "hermes-cli"
SCOPE = "inference:mint_agent_key"
MIN_TTL_SECONDS = 7200 # Nous maximum allowed TTL (2 hours)

# Default state file location — override with NOUS_OAUTH_STATE_PATH
STATE_FILE = Path(
os.getenv("NOUS_OAUTH_STATE_PATH", "~/.honcho/nous_oauth_state.json")
).expanduser()


# ── State management ────────────────────────────────────────────────────────


def load_state() -> dict[str, Any]:
"""Load persisted OAuth state from disk, with Hermes auth.json and .env fallbacks."""
if STATE_FILE.exists():
try:
return json.loads(STATE_FILE.read_text())
except (OSError, json.JSONDecodeError) as exc:
logger.warning("Failed to parse %s: %s", STATE_FILE, exc)
# State file missing or corrupt — try env var first (Docker env_file)
env_token = os.getenv("NOUS_REFRESH_TOKEN")
if env_token:
logger.info("Bootstrapping refresh_token from environment variable")
return {"refresh_token": env_token}
# Fallback: Hermes auth.json
auth_state = _load_from_hermes_auth()
if auth_state:
logger.info("Bootstrapping refresh_token from Hermes auth.json")
return auth_state
# Finally, check .env on disk (local development without Docker)
env_refresh = _read_refresh_from_env()
if env_refresh:
logger.info("Bootstrapping refresh_token from .env")
return {"refresh_token": env_refresh}
return {}


def save_state(**fields: Any) -> None:
"""Merge and persist OAuth state atomically."""
state = load_state()
state.update({k: v for k, v in fields.items() if v is not None})
state["updated_at"] = datetime.now(timezone.utc).isoformat()
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
os.chmod(STATE_FILE.parent, 0o700)
# Atomic write via temp file + rename
tmp = STATE_FILE.with_suffix(".tmp")
tmp.write_text(json.dumps(state, indent=2))
os.chmod(tmp, 0o600)
tmp.replace(STATE_FILE)
os.chmod(STATE_FILE, 0o600)
Comment thread
coderabbitai[bot] marked this conversation as resolved.


# ── Environment file update ─────────────────────────────────────────────────


def _load_from_hermes_auth() -> dict[str, Any] | None:
"""Bootstrap refresh_token from Hermes auth.json if state file is empty."""
auth_path = Path.home() / ".hermes" / "auth.json"
if not auth_path.exists():
return None
try:
auth = json.loads(auth_path.read_text())
provider = auth.get("providers", {}).get("nous", {})
refresh = provider.get("refresh_token")
if refresh:
return {"refresh_token": refresh}
except (OSError, json.JSONDecodeError) as exc:
logger.debug("Failed to read Hermes auth.json: %s", exc)
return None


def _read_refresh_from_env() -> str | None:
"""Read NOUS_REFRESH_TOKEN from project .env (backward compatibility)."""
env_path = _find_project_root() / ".env"
if not env_path.exists():
return None
try:
for line in env_path.read_text().splitlines():
stripped = line.strip()
if stripped.startswith("NOUS_REFRESH_TOKEN="):
val = stripped.split("=", 1)[1].strip()
# Strip surrounding quotes if present
if (val.startswith('"') and val.endswith('"')) or (
val.startswith("'") and val.endswith("'")
):
val = val[1:-1]
return val if val else None
except Exception as exc:
logger.debug("Failed to read .env for refresh token: %s", exc)
return None


def _find_project_root(start: Path | None = None) -> Path:
"""Walk up from start (or __file__) until we find .env or docker-compose.yml."""
cur = (start or Path(__file__)).resolve()
for p in [cur, *cur.parents]:
if (p / ".env").exists() or (p / "docker-compose.yml").exists():
return p
# Fallback: cwd
return Path.cwd()


def update_env_key(env_path: Path, new_key: str) -> None:
"""Update LLM_NOUS_API_KEY in the given .env file, creating it if missing."""
if not env_path.exists():
env_path.write_text(f"LLM_NOUS_API_KEY={new_key}\n")
logger.info("Created .env with new Nous API key at %s", env_path)
return

lines = env_path.read_text().splitlines(keepends=True)
updated = False
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("LLM_NOUS_API_KEY="):
# Preserve any surrounding quotes/whitespace
lines[i] = f"LLM_NOUS_API_KEY={new_key}\n"
updated = True
break
if not updated:
lines.append(f"\nLLM_NOUS_API_KEY={new_key}\n")
env_path.write_text("".join(lines))
logger.info("Updated .env with new Nous API key")


# ── HTTP helpers (httpx) ─────────────────────────────────────────────────────


async def refresh_access_token(refresh_token: str) -> tuple[str, str]:
"""Exchange refresh_token for a new access_token and refresh_token."""
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.post(
TOKEN_ENDPOINT,
data={
"grant_type": "refresh_token",
"client_id": CLIENT_ID,
"refresh_token": refresh_token,
},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
if resp.status_code != 200:
raise NousAuthError(f"Token refresh failed {resp.status_code}: {resp.text}")
data = resp.json()
return data["access_token"], data.get("refresh_token", refresh_token)


async def mint_agent_key(access_token: str) -> str:
"""Mint a new agent key using the access token."""
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.post(
AGENT_KEY_ENDPOINT,
json={"min_ttl_seconds": MIN_TTL_SECONDS},
headers={
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
},
)
if resp.status_code != 200:
raise NousAuthError(f"Agent key mint failed {resp.status_code}: {resp.text}")
data = resp.json()
return data["api_key"]


# ── Public orchestrator ──────────────────────────────────────────────────────


async def refresh_nous_credentials() -> str | None:
"""Full refresh+mint flow; returns new agent_key or None on failure."""
state = load_state()
refresh_token = state.get("refresh_token")
if not refresh_token:
logger.error("No refresh_token found in state — manual login required")
return None

try:
# 1. Refresh access token
logger.info("Refreshing Nous access token...")
access_token, new_refresh_token = await refresh_access_token(refresh_token)

# 2. Mint new agent key
logger.info("Minting new Nous agent key (TTL=%ds)...", MIN_TTL_SECONDS)
agent_key = await mint_agent_key(access_token)

# 3. Compute expiry timestamp (UTC ISO 8601)
expires_at = (
datetime.now(timezone.utc) + timedelta(seconds=MIN_TTL_SECONDS)
).isoformat()

# 4. Persist state
save_state(
refresh_token=new_refresh_token,
access_token=access_token,
agent_key=agent_key,
expires_at=expires_at,
)

# 5. Update .env on disk
project_root = _find_project_root()
update_env_key(project_root / ".env", agent_key)

# 6. Update in-memory settings globally (if Honcho is running)
try:
from src.config import settings

settings.LLM.NOUS_API_KEY = agent_key
logger.info("In-memory settings.LLM.NOUS_API_KEY updated")
except (ImportError, AttributeError) as exc:
# settings may not be importable in all contexts (tests, CLI)
logger.debug("Could not import settings for in-memory update: %s", exc)

logger.info("Nous OAuth refresh complete — new key expires at %s", expires_at)
return agent_key

except Exception as exc:
logger.error("Nous credential refresh failed: %s", exc, exc_info=True)
return None
Loading