diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py index 7855574..70c4cec 100644 --- a/skillclaw/api_server.py +++ b/skillclaw/api_server.py @@ -30,6 +30,8 @@ from .prm_scorer import PRMScorer from .skill_manager import SkillManager from .utils import run_llm +from .protocols import anthropic_messages as anthropic_protocol +from .protocols import openai_responses as responses_protocol logger = logging.getLogger(__name__) @@ -90,6 +92,7 @@ def _normalize_assistant_content_parts(content: list[dict]) -> tuple[str, list[d return (" ".join(text_parts).strip(), tool_calls) + _THINK_RE = re.compile(r".*?", re.DOTALL) _TOOL_HANDLE_RE = re.compile(r"^call_(?:kimi|xml)_\d+$") _KIMI_TOOL_CALL_RE = re.compile( @@ -1058,324 +1061,39 @@ def _rewrite_new_session_bootstrap_prompt(messages: list[dict]) -> tuple[list[di # ------------------------------------------------------------------ # -# Anthropic ↔ OpenAI format helpers (for NanoClaw /v1/messages) # +# Protocol compatibility wrappers # # ------------------------------------------------------------------ # def _anthropic_to_openai_body(body: dict[str, Any]) -> dict[str, Any]: - """Convert an Anthropic /v1/messages request body to OpenAI chat format.""" - messages: list[dict] = list(body.get("messages", [])) - - # Anthropic puts the system prompt at top level; move it into messages[0]. - system = body.get("system") - if system: - if isinstance(system, str): - system_text = system - elif isinstance(system, list): - system_text = " ".join( - blk.get("text", "") for blk in system if isinstance(blk, dict) and blk.get("type") == "text" - ) - else: - system_text = str(system) - messages = [{"role": "system", "content": system_text}] + messages - - # Flatten Anthropic content blocks → plain strings expected by OpenAI. - normalized: list[dict] = [] - for msg in messages: - content = msg.get("content") - if isinstance(content, list): - text = " ".join( - blk.get("text", "") for blk in content if isinstance(blk, dict) and blk.get("type") == "text" - ) - normalized.append({**msg, "content": text}) - else: - normalized.append(msg) - - openai_body: dict[str, Any] = { - "model": body.get("model", ""), - "messages": normalized, - "max_tokens": body.get("max_tokens", 2048), - } - for opt in ("temperature", "top_p", "stop_sequences", "stream"): - if opt in body: - key = "stop" if opt == "stop_sequences" else opt - openai_body[key] = body[opt] - return openai_body + return anthropic_protocol.to_openai_body(body) def _normalize_responses_content(content: Any) -> str: - """Flatten Responses-style content blocks to plain text.""" - if isinstance(content, str): - return content - if isinstance(content, list): - parts: list[str] = [] - for item in content: - if not isinstance(item, dict): - continue - item_type = item.get("type") - if item_type in {"input_text", "output_text", "text"}: - text = item.get("text") - if isinstance(text, str) and text: - parts.append(text) - return " ".join(parts) - return str(content) if content is not None else "" + return responses_protocol.normalize_content_to_text(content) def _responses_tools_to_openai_tools(tools: Any) -> list[dict]: - """Convert Responses function-tool schemas to chat-completions tool schemas.""" - converted: list[dict] = [] - if not isinstance(tools, list): - return converted - - for item in tools: - if not isinstance(item, dict): - continue - item_type = item.get("type") - if item_type == "function": - name = str(item.get("name") or "").strip() - if not name: - continue - converted.append( - { - "type": "function", - "function": { - "name": name, - "description": str(item.get("description") or ""), - "parameters": item.get("parameters") or {"type": "object", "properties": {}}, - }, - } - ) - continue - if item_type == "function" or item.get("function"): - converted.append(item) - return converted + return responses_protocol.tools_to_openai_tools(tools) def _responses_to_openai_body(body: dict[str, Any], default_model: str) -> dict[str, Any]: - """Convert an OpenAI Responses request body to chat-completions format.""" - raw_input = body.get("input") - if raw_input is None: - raise HTTPException(status_code=400, detail="input is required") - - messages: list[dict] = [] - instructions = body.get("instructions") - if instructions is not None: - messages.append({"role": "system", "content": _normalize_responses_content(instructions)}) - - def _append_tool_call(item: dict[str, Any]) -> None: - call_id = str(item.get("call_id") or item.get("id") or "").strip() - name = str(item.get("name") or "").strip() - arguments = item.get("arguments", "{}") - if isinstance(arguments, dict): - arguments = json.dumps(arguments, ensure_ascii=False) - elif not isinstance(arguments, str): - arguments = str(arguments) - arguments = arguments.strip() or "{}" - if not call_id or not name: - raise HTTPException(status_code=400, detail="function_call items require call_id and name") - messages.append( - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": call_id, - "type": "function", - "function": {"name": name, "arguments": arguments}, - } - ], - } - ) - - def _append_tool_output(item: dict[str, Any]) -> None: - call_id = str(item.get("call_id") or item.get("tool_call_id") or "").strip() - if not call_id: - raise HTTPException(status_code=400, detail="function_call_output items require call_id") - output = item.get("output", "") - if output is None: - output = "" - if not isinstance(output, str): - output = str(output) - messages.append({"role": "tool", "tool_call_id": call_id, "content": output}) - - if isinstance(raw_input, str): - messages.append({"role": "user", "content": raw_input}) - elif isinstance(raw_input, list): - for item in raw_input: - if isinstance(item, str): - messages.append({"role": "user", "content": item}) - continue - if not isinstance(item, dict): - raise HTTPException(status_code=400, detail="input items must be strings or objects") - - item_type = item.get("type") - if item_type == "function_call": - _append_tool_call(item) - continue - if item_type == "function_call_output": - _append_tool_output(item) - continue - if item_type == "reasoning": - continue - - role = str(item.get("role") or "user").strip() or "user" - if role == "tool": - _append_tool_output(item) - continue - messages.append({"role": role, "content": _normalize_responses_content(item.get("content", ""))}) - else: - raise HTTPException(status_code=400, detail="input must be a string or an array") - - if not messages: - raise HTTPException(status_code=400, detail="input must produce at least one message") - - openai_body: dict[str, Any] = { - "model": body.get("model") or default_model, - "messages": messages, - } - tools = _responses_tools_to_openai_tools(body.get("tools")) - if tools: - openai_body["tools"] = tools - if "temperature" in body: - openai_body["temperature"] = body["temperature"] - if "top_p" in body: - openai_body["top_p"] = body["top_p"] - if "tool_choice" in body: - openai_body["tool_choice"] = body["tool_choice"] - if "parallel_tool_calls" in body: - openai_body["parallel_tool_calls"] = body["parallel_tool_calls"] - if "max_output_tokens" in body: - openai_body["max_tokens"] = body["max_output_tokens"] - return openai_body + try: + return responses_protocol.to_openai_body(body, default_model) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc def _responses_function_item_id(call_id: str, index: int) -> str: - raw = str(call_id or "").strip() - if raw.startswith("fc_"): - return raw - if raw.startswith("call_") and len(raw) > len("call_"): - return f"fc_{raw[len('call_') :]}" - cleaned = re.sub(r"[^A-Za-z0-9_-]", "", raw) - if cleaned: - return f"fc_{cleaned[:48]}" - return f"fc_{index}" + return responses_protocol.function_item_id(call_id, index) def _openai_chat_to_responses_payload(payload: dict[str, Any], model: str) -> dict[str, Any]: - """Convert a chat-completions payload to a Responses API payload.""" - choice = payload.get("choices", [{}])[0] - message = choice.get("message", {}) if isinstance(choice.get("message"), dict) else {} - content_text = _flatten_message_content(message.get("content", "")) - tool_calls = list(message.get("tool_calls") or []) if isinstance(message.get("tool_calls"), list) else [] - - output_items: list[dict[str, Any]] = [] - for idx, tc in enumerate(tool_calls): - if not isinstance(tc, dict): - continue - fn = tc.get("function", {}) if isinstance(tc.get("function"), dict) else {} - call_id = str(tc.get("id") or tc.get("call_id") or f"call_{idx}").strip() or f"call_{idx}" - arguments = fn.get("arguments", "{}") - if isinstance(arguments, dict): - arguments = json.dumps(arguments, ensure_ascii=False) - elif not isinstance(arguments, str): - arguments = str(arguments) - output_items.append( - { - "type": "function_call", - "id": _responses_function_item_id(call_id, idx), - "call_id": call_id, - "name": str(fn.get("name") or ""), - "arguments": arguments or "{}", - "status": "completed", - } - ) - - if content_text or not output_items: - output_items.append( - { - "id": f"msg_{payload.get('id') or 'skillclaw'}_{len(output_items)}", - "type": "message", - "role": "assistant", - "status": "completed", - "content": [{"type": "output_text", "text": content_text, "annotations": []}], - } - ) - - usage = payload.get("usage", {}) - response_payload = { - "id": payload.get("id") or f"resp_skillclaw_{int(time.time() * 1000)}", - "object": "response", - "created_at": payload.get("created", int(time.time())), - "status": "completed", - "model": model, - "output": output_items, - "parallel_tool_calls": True, - "tool_choice": "auto", - "tools": [], - "usage": { - "input_tokens": int(usage.get("prompt_tokens", 0) or 0), - "output_tokens": int(usage.get("completion_tokens", 0) or 0), - "total_tokens": int(usage.get("total_tokens", 0) or 0), - }, - } - if content_text: - response_payload["output_text"] = content_text - return response_payload - - -def _merge_previous_response_messages( - previous_messages: list[dict[str, Any]], - current_messages: list[dict[str, Any]], -) -> list[dict[str, Any]]: - """Merge stored response history with the current turn's messages. - - If the current request provides a fresh system/instructions message, keep it - as the new leading system prompt and drop older system prompts from history. - """ - if not previous_messages: - return list(current_messages) - if not current_messages: - return list(previous_messages) - - first = current_messages[0] - if isinstance(first, dict) and first.get("role") == "system": - history_without_system = [ - msg for msg in previous_messages if not (isinstance(msg, dict) and msg.get("role") == "system") - ] - return [first, *history_without_system, *current_messages[1:]] - - return [*previous_messages, *current_messages] + return responses_protocol.from_openai_chat_payload(payload, model) def _openai_to_anthropic_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]: - """Convert an OpenAI chat completion response to Anthropic /v1/messages format.""" - choice = openai_resp.get("choices", [{}])[0] - message = choice.get("message", {}) - content_text = message.get("content") or "" - finish_reason = choice.get("finish_reason", "stop") - - stop_reason_map = { - "stop": "end_turn", - "length": "max_tokens", - "tool_calls": "tool_use", - "content_filter": "stop_sequence", - } - stop_reason = stop_reason_map.get(finish_reason, "end_turn") - - usage = openai_resp.get("usage", {}) - return { - "id": openai_resp.get("id", "msg_skillclaw"), - "type": "message", - "role": "assistant", - "model": model, - "content": [{"type": "text", "text": content_text}], - "stop_reason": stop_reason, - "stop_sequence": None, - "usage": { - "input_tokens": usage.get("prompt_tokens", 0), - "output_tokens": usage.get("completion_tokens", 0), - }, - } + return anthropic_protocol.from_openai_response(openai_resp, model) # ------------------------------------------------------------------ # @@ -1596,6 +1314,7 @@ async def responses( request: Request, authorization: Optional[str] = Header(default=None), x_session_id: Optional[str] = Header(default=None), + codex_session_id: Optional[str] = Header(default=None, alias="session_id"), x_turn_type: Optional[str] = Header(default=None), x_session_done: Optional[str] = Header(default=None), ): @@ -1604,6 +1323,15 @@ async def responses( await owner._check_auth(authorization) body = await request.json() + if owner._responses_native_enabled(): + if bool(body.get("stream", False)): + return StreamingResponse( + owner._stream_llm_responses(body), + media_type="text/event-stream", + ) + response_payload = await owner._forward_to_llm_responses(body) + return JSONResponse(content=response_payload) + previous_response_id = str(body.get("previous_response_id") or "").strip() store_response = bool(body.get("store", True)) openai_body = _responses_to_openai_body(body, owner._served_model) @@ -1618,7 +1346,7 @@ async def responses( list(stored.get("messages") or []), list(openai_body.get("messages") or []), ) - _raw_sid = x_session_id or body.get("session_id") or "" + _raw_sid = x_session_id or codex_session_id or body.get("session_id") or "" if _raw_sid: session_id = _raw_sid turn_type = _resolve_turn_type(x_turn_type, body.get("turn_type"), default="main") @@ -2505,6 +2233,100 @@ async def _forward_to_llm(self, body: dict[str, Any]) -> dict[str, Any]: return await self._forward_to_llm_bedrock(body) return await self._forward_to_llm_openai(body) + def _responses_native_enabled(self) -> bool: + """Return whether /v1/responses should be forwarded as Responses API.""" + return str(getattr(self.config, "llm_api_mode", "chat") or "chat").lower() == "responses" + + def _prepare_responses_forward(self, body: dict[str, Any], *, stream: bool) -> tuple[str, dict[str, Any], dict[str, str]]: + """Build URL, body, and headers for native Responses forwarding. + + Native mode intentionally keeps Responses-only tools (custom, web_search, + namespace, etc.) untouched instead of converting the request to chat. + """ + api_base = self.config.llm_api_base.rstrip("/") + if not api_base: + raise HTTPException( + status_code=503, + detail="llm_api_base is not configured. Run 'skillclaw setup' first.", + ) + + send_body = {k: v for k, v in body.items() if k not in _NON_STANDARD_BODY_KEYS} + send_body["model"] = self.config.llm_model_id or body.get("model", "") + send_body["stream"] = stream + + headers: dict[str, str] = {} + if self.config.llm_api_key: + headers["Authorization"] = f"Bearer {self.config.llm_api_key}" + return f"{api_base}/responses", send_body, headers + + async def _forward_to_llm_responses(self, body: dict[str, Any]) -> dict[str, Any]: + """Forward a Codex Responses payload to an upstream Responses API.""" + import httpx + + url, send_body, headers = self._prepare_responses_forward(body, stream=False) + + max_retries = 3 + for attempt in range(max_retries): + try: + async with httpx.AsyncClient(timeout=_llm_request_timeout_seconds()) as client: + resp = await client.post( + url, + json=send_body, + headers=headers, + ) + resp.raise_for_status() + return resp.json() + except httpx.HTTPStatusError as e: + response_text = e.response.text[:200] + if attempt < max_retries - 1: + wait = min(2**attempt + random.uniform(0, 1), 10) + logger.warning( + "[OpenClaw] upstream Responses error (attempt %d/%d), retrying in %.1fs: %s %s", + attempt + 1, + max_retries, + wait, + e.response.status_code, + response_text, + ) + await asyncio.sleep(wait) + continue + logger.error("[OpenClaw] upstream Responses error: %s %s", e.response.status_code, response_text) + raise HTTPException(status_code=502, detail=f"Upstream Responses error: {e}") from e + except Exception as e: + if attempt < max_retries - 1: + wait = min(2**attempt + random.uniform(0, 1), 10) + logger.warning( + "[OpenClaw] Responses forward failed (attempt %d/%d), retrying in %.1fs: %s", + attempt + 1, + max_retries, + wait, + e, + ) + await asyncio.sleep(wait) + continue + logger.error("[OpenClaw] Responses forward failed: %s", e, exc_info=True) + raise HTTPException(status_code=502, detail=f"Responses forward error: {e}") from e + + async def _stream_llm_responses(self, body: dict[str, Any]): + """Passthrough upstream Responses SSE without aggregating or rewriting events.""" + import httpx + + url, send_body, headers = self._prepare_responses_forward(body, stream=True) + try: + async with httpx.AsyncClient(timeout=_llm_request_timeout_seconds()) as client: + async with client.stream("POST", url, json=send_body, headers=headers) as resp: + resp.raise_for_status() + async for chunk in resp.aiter_raw(): + if chunk: + yield chunk + except httpx.HTTPStatusError as e: + response_text = e.response.text[:200] + logger.error("[OpenClaw] upstream Responses stream error: %s %s", e.response.status_code, response_text) + raise HTTPException(status_code=502, detail=f"Upstream Responses stream error: {e}") from e + except Exception as e: + logger.error("[OpenClaw] Responses stream failed: %s", e, exc_info=True) + raise HTTPException(status_code=502, detail=f"Responses stream error: {e}") from e + async def _forward_to_llm_openai(self, body: dict[str, Any]) -> dict[str, Any]: """Forward to an OpenAI-compatible API.""" import httpx @@ -3053,178 +2875,14 @@ async def _stream_response(self, result: dict[str, Any]): async def _stream_responses_response(self, response_payload: dict[str, Any]): """Yield OpenAI Responses API-compatible SSE events.""" - seq = 0 - - def _event(payload: dict[str, Any]) -> str: - nonlocal seq - payload["sequence_number"] = seq - seq += 1 - return "data: " + json.dumps(payload, ensure_ascii=False) + "\n\n" - - initial_response = dict(response_payload) - initial_response["status"] = "in_progress" - initial_response["output"] = [] - initial_response["usage"] = None - yield _event({"type": "response.created", "response": initial_response}) - yield _event({"type": "response.in_progress", "response": initial_response}) - - for index, item in enumerate(response_payload.get("output", [])): - yield _event( - { - "type": "response.output_item.added", - "output_index": index, - "item": item, - } - ) - - if item.get("type") == "function_call": - arguments = str(item.get("arguments") or "") - if arguments: - yield _event( - { - "type": "response.function_call_arguments.delta", - "item_id": item.get("id", ""), - "output_index": index, - "delta": arguments, - } - ) - yield _event( - { - "type": "response.function_call_arguments.done", - "item_id": item.get("id", ""), - "output_index": index, - "arguments": arguments, - } - ) - - if item.get("type") == "message": - for content_index, part in enumerate(item.get("content", [])): - if part.get("type") != "output_text": - continue - item_id = str(item.get("id") or "") - base_part = { - "type": "output_text", - "text": "", - "annotations": [], - } - yield _event( - { - "type": "response.content_part.added", - "output_index": index, - "content_index": content_index, - "item_id": item_id, - "part": base_part, - } - ) - text = str(part.get("text") or "") - if text: - yield _event( - { - "type": "response.output_text.delta", - "output_index": index, - "content_index": content_index, - "item_id": item_id, - "delta": text, - "logprobs": [], - } - ) - yield _event( - { - "type": "response.output_text.done", - "output_index": index, - "content_index": content_index, - "item_id": item_id, - "text": text, - "logprobs": [], - } - ) - yield _event( - { - "type": "response.content_part.done", - "output_index": index, - "content_index": content_index, - "item_id": item_id, - "part": { - "type": "output_text", - "text": text, - "annotations": [], - }, - } - ) - yield _event( - { - "type": "response.output_item.done", - "output_index": index, - "item": item, - } - ) - - yield _event({"type": "response.completed", "response": response_payload}) - yield "data: [DONE]\n\n" + async for chunk in responses_protocol.stream_response(response_payload): + yield chunk async def _stream_anthropic_response(self, result: dict[str, Any], model: str): """Yield Anthropic-format SSE events from an internal result dict.""" - payload = result["response"] - choice = payload.get("choices", [{}])[0] - message = choice.get("message", {}) - content_text = message.get("content", "") or "" - finish_reason = choice.get("finish_reason", "stop") - stop_reason_map = { - "stop": "end_turn", - "length": "max_tokens", - "tool_calls": "tool_use", - "content_filter": "stop_sequence", - } - stop_reason = stop_reason_map.get(finish_reason, "end_turn") - usage = payload.get("usage", {}) - msg_id = payload.get("id", "msg_skillclaw") - - def _sse(event: str, data: dict) -> str: - return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n" + async for chunk in anthropic_protocol.stream_from_openai_result(result, model): + yield chunk - yield _sse( - "message_start", - { - "type": "message_start", - "message": { - "id": msg_id, - "type": "message", - "role": "assistant", - "content": [], - "model": model, - "stop_reason": None, - "stop_sequence": None, - "usage": {"input_tokens": usage.get("prompt_tokens", 0), "output_tokens": 0}, - }, - }, - ) - yield _sse( - "content_block_start", - { - "type": "content_block_start", - "index": 0, - "content_block": {"type": "text", "text": ""}, - }, - ) - yield _sse("ping", {"type": "ping"}) - yield _sse( - "content_block_delta", - { - "type": "content_block_delta", - "index": 0, - "delta": {"type": "text_delta", "text": content_text}, - }, - ) - yield _sse("content_block_stop", {"type": "content_block_stop", "index": 0}) - yield _sse( - "message_delta", - { - "type": "message_delta", - "delta": {"stop_reason": stop_reason, "stop_sequence": None}, - "usage": {"output_tokens": usage.get("completion_tokens", 0)}, - }, - ) - yield _sse("message_stop", {"type": "message_stop"}) # ------------------------------------------------------------------ # # Lifecycle # diff --git a/skillclaw/config.py b/skillclaw/config.py index b5a5c46..afe06b3 100644 --- a/skillclaw/config.py +++ b/skillclaw/config.py @@ -62,6 +62,9 @@ class SkillClawConfig: llm_api_base: str = "" llm_api_key: str = "" llm_model_id: str = "" + # Upstream API surface: "chat" keeps the legacy chat-completions bridge; + # "responses" forwards Codex /v1/responses payloads to an upstream Responses API. + llm_api_mode: str = "chat" # ------------------------------------------------------------------ # # OpenRouter-specific (ignored for other providers) # diff --git a/skillclaw/config_store.py b/skillclaw/config_store.py index 95ef4f2..1902569 100644 --- a/skillclaw/config_store.py +++ b/skillclaw/config_store.py @@ -251,6 +251,7 @@ def to_skillclaw_config(self) -> SkillClawConfig: llm_api_base = llm.get("api_base", "") llm_api_key = llm.get("api_key", "") llm_model_id = llm.get("model_id", "") + llm_api_mode = str(llm.get("api_mode", "chat") or "chat") proxy = data.get("proxy", {}) skills = data.get("skills", {}) orouter = data.get("openrouter", {}) @@ -288,6 +289,7 @@ def to_skillclaw_config(self) -> SkillClawConfig: llm_api_base=llm_api_base, llm_api_key=llm_api_key, llm_model_id=llm_model_id, + llm_api_mode=llm_api_mode, bedrock_region=llm.get("bedrock_region") or data.get("bedrock_region", "us-east-1"), # OpenRouter openrouter_app_name=orouter.get("app_name", "SkillClaw"), diff --git a/skillclaw/protocols/__init__.py b/skillclaw/protocols/__init__.py new file mode 100644 index 0000000..362acd7 --- /dev/null +++ b/skillclaw/protocols/__init__.py @@ -0,0 +1 @@ +"""Protocol adapters for SkillClaw API compatibility layers.""" diff --git a/skillclaw/protocols/anthropic_messages.py b/skillclaw/protocols/anthropic_messages.py new file mode 100644 index 0000000..00cef7f --- /dev/null +++ b/skillclaw/protocols/anthropic_messages.py @@ -0,0 +1,322 @@ +"""Anthropic Messages compatibility for Claude Code style clients.""" + +from __future__ import annotations + +import json +from typing import Any, AsyncIterator + +from .common import json_dumps_tool_args, json_loads_tool_input + +_STOP_REASON_MAP = { + "stop": "end_turn", + "length": "max_tokens", + "tool_calls": "tool_use", + "content_filter": "stop_sequence", +} + + +def _flatten_tool_result_content(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, dict): + if item.get("type") in {"text", "input_text", "output_text"}: + text = item.get("text") + if isinstance(text, str): + parts.append(text) + elif "content" in item: + parts.append(_flatten_tool_result_content(item.get("content"))) + elif item is not None: + parts.append(str(item)) + return " ".join(part for part in parts if part) + return str(content) if content is not None else "" + + +def _image_block_to_openai_part(block: dict[str, Any]) -> dict[str, Any] | None: + source = block.get("source") if isinstance(block.get("source"), dict) else {} + if source.get("type") == "base64": + media_type = str(source.get("media_type") or "image/png") + data = str(source.get("data") or "") + if data: + return {"type": "image_url", "image_url": {"url": f"data:{media_type};base64,{data}"}} + url = source.get("url") or block.get("url") or block.get("image_url") + if isinstance(url, str) and url: + return {"type": "image_url", "image_url": {"url": url}} + return None + + +def _tools_to_openai_tools(tools: Any) -> list[dict[str, Any]]: + converted: list[dict[str, Any]] = [] + if not isinstance(tools, list): + return converted + for item in tools: + if not isinstance(item, dict): + continue + item_type = str(item.get("type") or "").strip() + # Anthropic server tools are not client function tools; a chat upstream + # cannot execute them unless they are handled by a native protocol path. + if item_type.startswith("web_search") or item_type in {"server_tool_use", "web_search_tool_result"}: + continue + name = str(item.get("name") or "").strip() + if not name: + continue + converted.append( + { + "type": "function", + "function": { + "name": name, + "description": str(item.get("description") or ""), + "parameters": item.get("input_schema") or {"type": "object", "properties": {}}, + }, + } + ) + return converted + + +def _tool_choice_to_openai(tool_choice: Any) -> Any: + if isinstance(tool_choice, str): + return "required" if tool_choice == "any" else tool_choice + if not isinstance(tool_choice, dict): + return tool_choice + choice_type = tool_choice.get("type") + if choice_type == "auto": + return "auto" + if choice_type == "any": + return "required" + if choice_type == "tool": + name = str(tool_choice.get("name") or "").strip() + if name: + return {"type": "function", "function": {"name": name}} + return tool_choice + + +def to_openai_body(body: dict[str, Any]) -> dict[str, Any]: + """Convert an Anthropic /v1/messages request body to OpenAI chat format.""" + messages: list[dict[str, Any]] = list(body.get("messages", [])) + + system = body.get("system") + if system: + if isinstance(system, str): + system_text = system + elif isinstance(system, list): + system_text = " ".join( + blk.get("text", "") for blk in system if isinstance(blk, dict) and blk.get("type") == "text" + ) + else: + system_text = str(system) + messages = [{"role": "system", "content": system_text}] + messages + + normalized: list[dict[str, Any]] = [] + for msg in messages: + role = msg.get("role") + content = msg.get("content") + if not isinstance(content, list): + normalized.append(msg) + continue + + text_parts: list[str] = [] + content_parts: list[dict[str, Any]] = [] + tool_calls: list[dict[str, Any]] = [] + tool_results: list[dict[str, Any]] = [] + for idx, block in enumerate(content): + if not isinstance(block, dict): + continue + block_type = block.get("type") + if block_type == "text": + text = block.get("text") + if isinstance(text, str) and text: + text_parts.append(text) + content_parts.append({"type": "text", "text": text}) + elif block_type == "image": + image_part = _image_block_to_openai_part(block) + if image_part: + content_parts.append(image_part) + elif block_type == "tool_use": + tool_calls.append( + { + "id": str(block.get("id") or f"toolu_{idx}"), + "type": "function", + "function": { + "name": str(block.get("name") or "unknown_tool"), + "arguments": json_dumps_tool_args(block.get("input")), + }, + } + ) + elif block_type == "tool_result": + tool_results.append( + { + "role": "tool", + "tool_call_id": str(block.get("tool_use_id") or ""), + "content": _flatten_tool_result_content(block.get("content")), + } + ) + + text = " ".join(text_parts).strip() + has_image = any(part.get("type") == "image_url" for part in content_parts) + openai_content: str | list[dict[str, Any]] = content_parts if has_image else text + if role == "assistant": + assistant_msg = {**msg, "content": text} + if tool_calls: + assistant_msg["tool_calls"] = tool_calls + normalized.append(assistant_msg) + continue + if tool_results: + normalized.extend(tool_results) + if text: + normalized.append({**msg, "content": text}) + continue + normalized.append({**msg, "content": openai_content}) + + openai_body: dict[str, Any] = { + "model": body.get("model", ""), + "messages": normalized, + "max_tokens": body.get("max_tokens", 2048), + } + tools = _tools_to_openai_tools(body.get("tools")) + if tools: + openai_body["tools"] = tools + if "tool_choice" in body: + openai_body["tool_choice"] = _tool_choice_to_openai(body.get("tool_choice")) + for opt in ("temperature", "top_p", "stop_sequences", "stream"): + if opt in body: + key = "stop" if opt == "stop_sequences" else opt + openai_body[key] = body[opt] + return openai_body + + +def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]: + """Convert an OpenAI chat completion response to Anthropic /v1/messages format.""" + choice = openai_resp.get("choices", [{}])[0] + message = choice.get("message", {}) if isinstance(choice.get("message"), dict) else {} + content_text = message.get("content") or "" + raw_tool_calls = message.get("tool_calls") + tool_calls = raw_tool_calls if isinstance(raw_tool_calls, list) else [] + finish_reason = choice.get("finish_reason", "stop") + stop_reason = "tool_use" if tool_calls else _STOP_REASON_MAP.get(finish_reason, "end_turn") + + content_blocks: list[dict[str, Any]] = [] + if content_text: + content_blocks.append({"type": "text", "text": content_text}) + for idx, tool_call in enumerate(tool_calls): + if not isinstance(tool_call, dict): + continue + function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {} + content_blocks.append( + { + "type": "tool_use", + "id": str(tool_call.get("id") or f"call_{idx}"), + "name": str(function.get("name") or "unknown_tool"), + "input": json_loads_tool_input(function.get("arguments")), + } + ) + if not content_blocks: + content_blocks.append({"type": "text", "text": ""}) + + usage = openai_resp.get("usage", {}) + return { + "id": openai_resp.get("id", "msg_skillclaw"), + "type": "message", + "role": "assistant", + "model": model, + "content": content_blocks, + "stop_reason": stop_reason, + "stop_sequence": None, + "usage": { + "input_tokens": usage.get("prompt_tokens", 0), + "output_tokens": usage.get("completion_tokens", 0), + }, + } + + +async def stream_from_openai_result(result: dict[str, Any], model: str) -> AsyncIterator[str]: + """Yield Anthropic-format SSE events from an internal OpenAI chat result.""" + payload = result["response"] + choice = payload.get("choices", [{}])[0] + anthropic_payload = from_openai_response(payload, model) + content_blocks = anthropic_payload.get("content", []) + stop_reason = anthropic_payload.get("stop_reason") or "end_turn" + usage = payload.get("usage", {}) + msg_id = payload.get("id", "msg_skillclaw") + + def sse(event: str, data: dict[str, Any]) -> str: + return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n" + + yield sse( + "message_start", + { + "type": "message_start", + "message": { + "id": msg_id, + "type": "message", + "role": "assistant", + "content": [], + "model": model, + "stop_reason": None, + "stop_sequence": None, + "usage": {"input_tokens": usage.get("prompt_tokens", 0), "output_tokens": 0}, + }, + }, + ) + yield sse("ping", {"type": "ping"}) + + for index, block in enumerate(content_blocks): + block_type = block.get("type") if isinstance(block, dict) else None + if block_type == "tool_use": + input_obj = block.get("input") if isinstance(block.get("input"), dict) else {} + partial_json = json.dumps(input_obj, ensure_ascii=False, separators=(",", ":")) + yield sse( + "content_block_start", + { + "type": "content_block_start", + "index": index, + "content_block": { + "type": "tool_use", + "id": block.get("id", f"call_{index}"), + "name": block.get("name", "unknown_tool"), + "input": {}, + }, + }, + ) + if partial_json and partial_json != "{}": + yield sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": index, + "delta": {"type": "input_json_delta", "partial_json": partial_json}, + }, + ) + yield sse("content_block_stop", {"type": "content_block_stop", "index": index}) + continue + + text = str(block.get("text") or "") if isinstance(block, dict) else "" + yield sse( + "content_block_start", + { + "type": "content_block_start", + "index": index, + "content_block": {"type": "text", "text": ""}, + }, + ) + if text: + yield sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": index, + "delta": {"type": "text_delta", "text": text}, + }, + ) + yield sse("content_block_stop", {"type": "content_block_stop", "index": index}) + + yield sse( + "message_delta", + { + "type": "message_delta", + "delta": {"stop_reason": stop_reason, "stop_sequence": None}, + "usage": {"output_tokens": usage.get("completion_tokens", 0)}, + }, + ) + yield sse("message_stop", {"type": "message_stop"}) diff --git a/skillclaw/protocols/common.py b/skillclaw/protocols/common.py new file mode 100644 index 0000000..12ffe77 --- /dev/null +++ b/skillclaw/protocols/common.py @@ -0,0 +1,28 @@ +"""Shared helpers for protocol adapters.""" + +from __future__ import annotations + +import json +from typing import Any + + +def json_dumps_tool_args(value: Any) -> str: + """Return a JSON object string suitable for OpenAI function arguments.""" + if isinstance(value, str): + stripped = value.strip() + return stripped or "{}" + try: + return json.dumps(value if value is not None else {}, ensure_ascii=False) + except Exception: + return "{}" + + +def json_loads_tool_input(value: Any) -> dict[str, Any]: + """Parse OpenAI function arguments into a tool input object.""" + if isinstance(value, str): + try: + parsed = json.loads(value or "{}") + except Exception: + return {} + return parsed if isinstance(parsed, dict) else {} + return value if isinstance(value, dict) else {} diff --git a/skillclaw/protocols/openai_responses.py b/skillclaw/protocols/openai_responses.py new file mode 100644 index 0000000..3691333 --- /dev/null +++ b/skillclaw/protocols/openai_responses.py @@ -0,0 +1,372 @@ +"""OpenAI Responses compatibility for Codex style clients.""" + +from __future__ import annotations + +import json +import re +import time +from typing import Any, AsyncIterator + + +def normalize_content_to_text(content: Any) -> str: + """Flatten Responses-style content blocks to plain text.""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if not isinstance(item, dict): + continue + item_type = item.get("type") + if item_type in {"input_text", "output_text", "text"}: + text = item.get("text") + if isinstance(text, str) and text: + parts.append(text) + return " ".join(parts) + return str(content) if content is not None else "" + + +def content_to_openai_chat_content(content: Any) -> str | list[dict[str, Any]]: + """Convert Responses message content into chat-completions content.""" + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) if content is not None else "" + + parts: list[dict[str, Any]] = [] + for item in content: + if not isinstance(item, dict): + continue + item_type = item.get("type") + if item_type in {"input_text", "output_text", "text"}: + text = item.get("text") + if isinstance(text, str) and text: + parts.append({"type": "text", "text": text}) + elif item_type in {"input_image", "image_url"}: + image_url = item.get("image_url") or item.get("url") + if isinstance(image_url, dict): + image_url = image_url.get("url") + if isinstance(image_url, str) and image_url: + parts.append({"type": "image_url", "image_url": {"url": image_url}}) + if not parts: + return "" + if all(part.get("type") == "text" for part in parts): + return " ".join(str(part.get("text") or "") for part in parts).strip() + return parts + + +def tool_choice_to_openai_chat(tool_choice: Any) -> Any: + """Convert Responses tool_choice to chat-completions tool_choice.""" + if isinstance(tool_choice, str): + if tool_choice in {"auto", "none", "required"}: + return tool_choice + raise ValueError(f"Responses tool_choice {tool_choice!r} is not supported in chat bridge mode") + if not isinstance(tool_choice, dict): + return tool_choice + + choice_type = str(tool_choice.get("type") or "").strip() + if choice_type == "function": + if isinstance(tool_choice.get("function"), dict): + return tool_choice + name = str(tool_choice.get("name") or "").strip() + if name: + return {"type": "function", "function": {"name": name}} + if choice_type in {"auto", "none", "required"}: + return choice_type + raise ValueError(f"Responses tool_choice type {choice_type!r} is not supported in chat bridge mode") + + +def tools_to_openai_tools(tools: Any) -> list[dict[str, Any]]: + """Convert Responses function-tool schemas to chat-completions tool schemas.""" + converted: list[dict[str, Any]] = [] + if not isinstance(tools, list): + return converted + + for item in tools: + if not isinstance(item, dict): + continue + item_type = item.get("type") + if item_type == "function": + name = str(item.get("name") or "").strip() + if not name: + continue + converted.append( + { + "type": "function", + "function": { + "name": name, + "description": str(item.get("description") or ""), + "parameters": item.get("parameters") or {"type": "object", "properties": {}}, + }, + } + ) + continue + if item.get("function") and item_type in {None, "function"}: + converted.append(item) + return converted + + +def to_openai_body(body: dict[str, Any], default_model: str) -> dict[str, Any]: + """Convert an OpenAI Responses request body to chat-completions format.""" + raw_input = body.get("input") + if raw_input is None: + raise ValueError("input is required") + + messages: list[dict[str, Any]] = [] + instructions = body.get("instructions") + if instructions is not None: + messages.append({"role": "system", "content": normalize_content_to_text(instructions)}) + + def append_tool_call(item: dict[str, Any]) -> None: + call_id = str(item.get("call_id") or item.get("id") or "").strip() + name = str(item.get("name") or "").strip() + arguments = item.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + arguments = arguments.strip() or "{}" + if not call_id or not name: + raise ValueError("function_call items require call_id and name") + messages.append( + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": call_id, + "type": "function", + "function": {"name": name, "arguments": arguments}, + } + ], + } + ) + + def append_tool_output(item: dict[str, Any]) -> None: + call_id = str(item.get("call_id") or item.get("tool_call_id") or "").strip() + if not call_id: + raise ValueError("function_call_output items require call_id") + output = item.get("output", "") + if output is None: + output = "" + if not isinstance(output, str): + output = str(output) + messages.append({"role": "tool", "tool_call_id": call_id, "content": output}) + + if isinstance(raw_input, str): + messages.append({"role": "user", "content": raw_input}) + elif isinstance(raw_input, list): + for item in raw_input: + if isinstance(item, str): + messages.append({"role": "user", "content": item}) + continue + if not isinstance(item, dict): + raise ValueError("input items must be strings or objects") + + item_type = item.get("type") + if item_type == "function_call": + append_tool_call(item) + continue + if item_type == "function_call_output": + append_tool_output(item) + continue + if item_type == "reasoning": + continue + + role = str(item.get("role") or "user").strip() or "user" + if role == "developer": + role = "system" + if role == "tool": + append_tool_output(item) + continue + messages.append({"role": role, "content": content_to_openai_chat_content(item.get("content", ""))}) + else: + raise ValueError("input must be a string or an array") + + if not messages: + raise ValueError("input must produce at least one message") + + openai_body: dict[str, Any] = { + "model": body.get("model") or default_model, + "messages": messages, + } + tools = tools_to_openai_tools(body.get("tools")) + if tools: + openai_body["tools"] = tools + if "temperature" in body: + openai_body["temperature"] = body["temperature"] + if "top_p" in body: + openai_body["top_p"] = body["top_p"] + if "tool_choice" in body: + openai_body["tool_choice"] = tool_choice_to_openai_chat(body["tool_choice"]) + if "parallel_tool_calls" in body: + openai_body["parallel_tool_calls"] = body["parallel_tool_calls"] + if "max_output_tokens" in body: + openai_body["max_tokens"] = body["max_output_tokens"] + return openai_body + + +def function_item_id(call_id: str, index: int) -> str: + raw = str(call_id or "").strip() + if raw.startswith("fc_"): + return raw + if raw.startswith("call_") and len(raw) > len("call_"): + return f"fc_{raw[len('call_') :]}" + cleaned = re.sub(r"[^A-Za-z0-9_-]", "", raw) + if cleaned: + return f"fc_{cleaned[:48]}" + return f"fc_{index}" + + +def from_openai_chat_payload(payload: dict[str, Any], model: str) -> dict[str, Any]: + """Convert a chat-completions payload to a Responses API payload.""" + choice = payload.get("choices", [{}])[0] + message = choice.get("message", {}) if isinstance(choice.get("message"), dict) else {} + content_text = normalize_content_to_text(message.get("content", "")) + tool_calls = list(message.get("tool_calls") or []) if isinstance(message.get("tool_calls"), list) else [] + + output_items: list[dict[str, Any]] = [] + for idx, tool_call in enumerate(tool_calls): + if not isinstance(tool_call, dict): + continue + function = tool_call.get("function", {}) if isinstance(tool_call.get("function"), dict) else {} + call_id = str(tool_call.get("id") or tool_call.get("call_id") or f"call_{idx}").strip() or f"call_{idx}" + arguments = function.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + output_items.append( + { + "type": "function_call", + "id": function_item_id(call_id, idx), + "call_id": call_id, + "name": str(function.get("name") or ""), + "arguments": arguments or "{}", + "status": "completed", + } + ) + + if content_text or not output_items: + output_items.append( + { + "id": f"msg_{payload.get('id') or 'skillclaw'}_{len(output_items)}", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": content_text, "annotations": []}], + } + ) + + usage = payload.get("usage", {}) + response_payload = { + "id": payload.get("id") or f"resp_skillclaw_{int(time.time() * 1000)}", + "object": "response", + "created_at": payload.get("created", int(time.time())), + "status": "completed", + "model": model, + "output": output_items, + "parallel_tool_calls": True, + "tool_choice": "auto", + "tools": [], + "usage": { + "input_tokens": int(usage.get("prompt_tokens", 0) or 0), + "output_tokens": int(usage.get("completion_tokens", 0) or 0), + "total_tokens": int(usage.get("total_tokens", 0) or 0), + }, + } + if content_text: + response_payload["output_text"] = content_text + return response_payload + + +async def stream_response(response_payload: dict[str, Any]) -> AsyncIterator[str]: + """Yield OpenAI Responses API-compatible SSE events.""" + sequence_number = 0 + + def event(payload: dict[str, Any]) -> str: + nonlocal sequence_number + payload["sequence_number"] = sequence_number + sequence_number += 1 + return "data: " + json.dumps(payload, ensure_ascii=False) + "\n\n" + + initial_response = dict(response_payload) + initial_response["status"] = "in_progress" + initial_response["output"] = [] + initial_response["usage"] = None + yield event({"type": "response.created", "response": initial_response}) + yield event({"type": "response.in_progress", "response": initial_response}) + + for index, item in enumerate(response_payload.get("output", [])): + yield event({"type": "response.output_item.added", "output_index": index, "item": item}) + + if item.get("type") == "function_call": + arguments = str(item.get("arguments") or "") + if arguments: + yield event( + { + "type": "response.function_call_arguments.delta", + "item_id": item.get("id", ""), + "output_index": index, + "delta": arguments, + } + ) + yield event( + { + "type": "response.function_call_arguments.done", + "item_id": item.get("id", ""), + "output_index": index, + "arguments": arguments, + } + ) + + if item.get("type") == "message": + for content_index, part in enumerate(item.get("content", [])): + if part.get("type") != "output_text": + continue + item_id = str(item.get("id") or "") + base_part = {"type": "output_text", "text": "", "annotations": []} + yield event( + { + "type": "response.content_part.added", + "output_index": index, + "content_index": content_index, + "item_id": item_id, + "part": base_part, + } + ) + text = str(part.get("text") or "") + if text: + yield event( + { + "type": "response.output_text.delta", + "output_index": index, + "content_index": content_index, + "item_id": item_id, + "delta": text, + "logprobs": [], + } + ) + yield event( + { + "type": "response.output_text.done", + "output_index": index, + "content_index": content_index, + "item_id": item_id, + "text": text, + "logprobs": [], + } + ) + yield event( + { + "type": "response.content_part.done", + "output_index": index, + "content_index": content_index, + "item_id": item_id, + "part": {"type": "output_text", "text": text, "annotations": []}, + } + ) + yield event({"type": "response.output_item.done", "output_index": index, "item": item}) + + yield event({"type": "response.completed", "response": response_payload}) + yield "data: [DONE]\n\n" diff --git a/skillclaw/setup_wizard.py b/skillclaw/setup_wizard.py index 71168f0..1197865 100644 --- a/skillclaw/setup_wizard.py +++ b/skillclaw/setup_wizard.py @@ -343,6 +343,7 @@ def run(self): proxy_config["port"] = proxy_port proxy_config.setdefault("host", "0.0.0.0") proxy_config["served_model_name"] = served_model_name or "skillclaw-model" + llm_api_mode = str(current_llm.get("api_mode", "chat") or "chat") data = { "claw_type": claw_type, "llm": { @@ -351,6 +352,7 @@ def run(self): "api_base": api_base, "api_key": api_key, "bedrock_region": bedrock_region, + "api_mode": llm_api_mode, }, "openrouter": openrouter_config, "proxy": proxy_config, diff --git a/tests/test_anthropic_messages.py b/tests/test_anthropic_messages.py new file mode 100644 index 0000000..19a2576 --- /dev/null +++ b/tests/test_anthropic_messages.py @@ -0,0 +1,294 @@ +from skillclaw.protocols import anthropic_messages + + +def test_anthropic_tool_result_blocks_convert_to_openai_tool_messages(): + body = { + "model": "claude-code-test", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "Skill", "input": {"name": "debug"}} + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "toolu_1", "content": "Skill instructions"} + ], + }, + ], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["messages"] == [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "toolu_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name": "debug"}'}, + } + ], + }, + {"role": "tool", "tool_call_id": "toolu_1", "content": "Skill instructions"}, + ] + + +def test_openai_tool_calls_convert_to_anthropic_tool_use_blocks(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"debug"}'}, + } + ], + }, + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 2}, + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test") + + assert converted["stop_reason"] == "tool_use" + assert converted["content"] == [ + {"type": "tool_use", "id": "call_1", "name": "Skill", "input": {"name": "debug"}} + ] + + +async def _collect_stream_events(result, model): + events = [] + async for chunk in anthropic_messages.stream_from_openai_result(result, model): + if not chunk.startswith("event: "): + continue + header, data_line = chunk.strip().split("\n", 1) + events.append((header.removeprefix("event: "), data_line.removeprefix("data: "))) + return events + + +def test_streaming_openai_tool_calls_emit_anthropic_tool_use_events(): + import asyncio + import json + result = { + "response": { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"debug"}'}, + } + ], + }, + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 2}, + } + } + + events = asyncio.run(_collect_stream_events(result, "claude-code-test")) + parsed = [(name, json.loads(data)) for name, data in events] + + assert any( + name == "content_block_start" + and payload["content_block"] == {"type": "tool_use", "id": "call_1", "name": "Skill", "input": {}} + for name, payload in parsed + ) + assert any( + name == "content_block_delta" + and payload["delta"] == {"type": "input_json_delta", "partial_json": '{"name":"debug"}'} + for name, payload in parsed + ) + + + +def test_streaming_openai_tool_calls_use_tool_use_stop_reason_even_if_finish_reason_is_stop(): + import asyncio + import json + result = { + "response": { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"debug"}'}, + } + ], + }, + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 2}, + } + } + + events = asyncio.run(_collect_stream_events(result, "claude-code-test")) + parsed = [(name, json.loads(data)) for name, data in events] + + assert any( + name == "message_delta" + and payload["delta"] == {"stop_reason": "tool_use", "stop_sequence": None} + for name, payload in parsed + ) + +def test_anthropic_system_blocks_preserve_text_and_cache_control(): + body = { + "model": "claude-code-test", + "system": [ + {"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}}, + {"type": "text", "text": "Use tools carefully."}, + ], + "messages": [{"role": "user", "content": "hi"}], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["messages"][0] == { + "role": "system", + "content": "You are Claude Code. Use tools carefully.", + } + + +def test_anthropic_tools_and_tool_choice_convert_to_openai_function_schema(): + body = { + "model": "claude-code-test", + "messages": [{"role": "user", "content": "Use a skill"}], + "tools": [ + { + "name": "Skill", + "description": "Load a named skill", + "input_schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + } + ], + "tool_choice": {"type": "tool", "name": "Skill"}, + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["tools"] == [ + { + "type": "function", + "function": { + "name": "Skill", + "description": "Load a named skill", + "parameters": { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + }, + } + ] + assert converted["tool_choice"] == {"type": "function", "function": {"name": "Skill"}} + + +def test_openai_response_with_tool_calls_uses_tool_use_stop_reason_even_if_finish_reason_is_stop(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": "{}"}, + } + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test") + + assert converted["stop_reason"] == "tool_use" + + + +def test_anthropic_server_web_search_tool_is_not_converted_to_function_tool(): + body = { + "model": "claude-code-test", + "messages": [{"role": "user", "content": "search docs"}], + "tools": [ + {"type": "web_search_20250305", "name": "web_search"}, + {"name": "Skill", "description": "Load skill", "input_schema": {"type": "object"}}, + ], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["tools"] == [ + { + "type": "function", + "function": { + "name": "Skill", + "description": "Load skill", + "parameters": {"type": "object"}, + }, + } + ] + +def test_anthropic_multimodal_image_input_converts_to_openai_chat_content_parts(): + body = { + "model": "claude-code-test", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "describe this image"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "AAAA", + }, + }, + ], + } + ], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["messages"] == [ + { + "role": "user", + "content": [ + {"type": "text", "text": "describe this image"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + ], + } + ] diff --git a/tests/test_responses_codex.py b/tests/test_responses_codex.py new file mode 100644 index 0000000..47a12aa --- /dev/null +++ b/tests/test_responses_codex.py @@ -0,0 +1,233 @@ +from skillclaw.protocols import openai_responses + + +def test_codex_responses_message_items_convert_developer_to_system(): + body = { + "model": "skillclaw-model", + "instructions": "base instructions", + "input": [ + { + "type": "message", + "role": "developer", + "content": [{"type": "input_text", "text": "repo instructions"}], + }, + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "do work"}], + }, + ], + } + + converted = openai_responses.to_openai_body(body, "fallback-model") + + assert converted["messages"] == [ + {"role": "system", "content": "base instructions"}, + {"role": "system", "content": "repo instructions"}, + {"role": "user", "content": "do work"}, + ] + + +def test_codex_responses_drops_non_chat_completion_tools_but_keeps_function_tools(): + body = { + "model": "skillclaw-model", + "input": "hi", + "tools": [ + {"type": "function", "name": "exec_command", "parameters": {"type": "object"}}, + {"type": "custom", "name": "js_repl"}, + {"type": "web_search"}, + {"type": "namespace", "name": "mcp__cccc__"}, + ], + } + + converted = openai_responses.to_openai_body(body, "fallback-model") + + assert converted["tools"] == [ + { + "type": "function", + "function": { + "name": "exec_command", + "description": "", + "parameters": {"type": "object"}, + }, + } + ] + + +async def _collect_response_stream(response_payload): + events = [] + async for chunk in openai_responses.stream_response(response_payload): + if chunk.startswith("data: ") and chunk.strip() != "data: [DONE]": + events.append(chunk.removeprefix("data: ").strip()) + elif chunk.strip() == "data: [DONE]": + events.append("[DONE]") + return events + + +def test_codex_responses_stream_includes_response_completed_and_done(): + import asyncio + import json + + response_payload = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "skillclaw-model", + "output": [ + { + "id": "msg_1", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": "hi", "annotations": []}], + } + ], + "usage": {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2}, + } + + events = asyncio.run(_collect_response_stream(response_payload)) + parsed = [json.loads(event) for event in events if event != "[DONE]"] + + assert any(event["type"] == "response.completed" for event in parsed) + assert events[-1] == "[DONE]" + + +def test_codex_responses_multimodal_image_input_converts_to_openai_chat_content_parts(): + body = { + "model": "skillclaw-model", + "input": [ + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "describe this image"}, + {"type": "input_image", "image_url": "data:image/png;base64,AAAA"}, + ], + } + ], + } + + converted = openai_responses.to_openai_body(body, "fallback-model") + + assert converted["messages"] == [ + { + "role": "user", + "content": [ + {"type": "text", "text": "describe this image"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + ], + } + ] + + +def test_codex_responses_skill_function_call_and_output_convert_to_chat_tool_roundtrip(): + body = { + "model": "skillclaw-model", + "input": [ + { + "type": "function_call", + "call_id": "call_skill_1", + "name": "Skill", + "arguments": {"name": "paper-reviewer"}, + }, + { + "type": "function_call_output", + "call_id": "call_skill_1", + "output": "Loaded paper-reviewer skill instructions", + }, + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "continue"}], + }, + ], + } + + converted = openai_responses.to_openai_body(body, "fallback-model") + + assert converted["messages"] == [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_skill_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name": "paper-reviewer"}'}, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_skill_1", + "content": "Loaded paper-reviewer skill instructions", + }, + {"role": "user", "content": "continue"}, + ] + + +def test_openai_chat_tool_call_response_converts_to_codex_responses_function_call_item(): + payload = { + "id": "chatcmpl_tool", + "created": 1, + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_skill_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"paper-reviewer"}'}, + } + ], + }, + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + + converted = openai_responses.from_openai_chat_payload(payload, "skillclaw-model") + + assert converted["output"] == [ + { + "type": "function_call", + "id": "fc_skill_1", + "call_id": "call_skill_1", + "name": "Skill", + "arguments": '{"name":"paper-reviewer"}', + "status": "completed", + } + ] + + +def test_codex_responses_chat_bridge_converts_function_tool_choice_shortcut(): + body = { + "model": "skillclaw-model", + "input": "Use the skill", + "tools": [{"type": "function", "name": "Skill", "parameters": {"type": "object"}}], + "tool_choice": {"type": "function", "name": "Skill"}, + } + + converted = openai_responses.to_openai_body(body, "fallback-model") + + assert converted["tool_choice"] == {"type": "function", "function": {"name": "Skill"}} + + +def test_codex_responses_chat_bridge_rejects_native_tool_choice(): + body = { + "model": "skillclaw-model", + "input": "Use js", + "tools": [{"type": "custom", "name": "js_repl"}], + "tool_choice": {"type": "custom", "name": "js_repl"}, + } + + try: + openai_responses.to_openai_body(body, "fallback-model") + except ValueError as exc: + assert "not supported in chat bridge mode" in str(exc) + else: + raise AssertionError("expected native tool_choice to be rejected") diff --git a/tests/test_responses_native.py b/tests/test_responses_native.py new file mode 100644 index 0000000..ea5212e --- /dev/null +++ b/tests/test_responses_native.py @@ -0,0 +1,266 @@ +import httpx +import pytest + +from skillclaw.api_server import SkillClawAPIServer +from skillclaw.config import SkillClawConfig + + +@pytest.mark.asyncio +async def test_forward_to_llm_responses_preserves_codex_native_tools(monkeypatch): + captured = {} + + class FakeResponse: + def raise_for_status(self): + return None + + def json(self): + return { + "id": "resp_native", + "object": "response", + "status": "completed", + "output": [], + } + + class FakeAsyncClient: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def post(self, url, json, headers): + captured["url"] = url + captured["json"] = json + captured["headers"] = headers + return FakeResponse() + + monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient) + server = object.__new__(SkillClawAPIServer) + server.config = SkillClawConfig( + llm_api_base="http://upstream.test/v1", + llm_api_key="upstream-key", + llm_model_id="upstream-model", + llm_api_mode="responses", + ) + + body = { + "model": "skillclaw-model", + "input": "hi", + "stream": True, + "tools": [ + {"type": "function", "name": "exec_command", "parameters": {"type": "object"}}, + {"type": "custom", "name": "js_repl"}, + {"type": "web_search"}, + {"type": "namespace", "name": "mcp__cccc__"}, + ], + } + + result = await server._forward_to_llm_responses(body) + + assert result["id"] == "resp_native" + assert captured["url"] == "http://upstream.test/v1/responses" + assert captured["json"]["model"] == "upstream-model" + assert captured["json"]["stream"] is False + assert captured["json"]["tools"] == body["tools"] + assert captured["headers"] == {"Authorization": "Bearer upstream-key"} + + +@pytest.mark.asyncio +async def test_responses_endpoint_uses_native_forward_when_enabled(): + server = SkillClawAPIServer( + SkillClawConfig( + llm_api_mode="responses", + llm_api_base="http://upstream.test/v1", + llm_model_id="upstream-model", + proxy_api_key="skillclaw", + record_enabled=False, + ) + ) + seen = {} + + async def fake_forward(body): + seen["body"] = body + return { + "id": "resp_native", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "upstream-model", + "output": [ + { + "id": "msg_1", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": "native ok", "annotations": []}], + } + ], + "usage": {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2}, + } + + server._forward_to_llm_responses = fake_forward + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test") + try: + response = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={ + "model": "skillclaw-model", + "input": "hi", + "stream": False, + "tools": [{"type": "custom", "name": "js_repl"}], + }, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.json()["id"] == "resp_native" + assert seen["body"]["tools"] == [{"type": "custom", "name": "js_repl"}] + + +@pytest.mark.asyncio +async def test_forward_to_llm_responses_stream_preserves_upstream_sse(monkeypatch): + captured = {} + + class FakeStreamResponse: + def raise_for_status(self): + return None + + async def aiter_raw(self): + yield b'data: {"type":"response.created"}\n\n' + yield b'data: {"type":"response.completed"}\n\n' + yield b'data: [DONE]\n\n' + + class FakeStreamContext: + async def __aenter__(self): + return FakeStreamResponse() + + async def __aexit__(self, exc_type, exc, tb): + return False + + class FakeAsyncClient: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + def stream(self, method, url, json, headers): + captured["method"] = method + captured["url"] = url + captured["json"] = json + captured["headers"] = headers + return FakeStreamContext() + + monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient) + server = object.__new__(SkillClawAPIServer) + server.config = SkillClawConfig( + llm_api_base="http://upstream.test/v1", + llm_api_key="upstream-key", + llm_model_id="upstream-model", + llm_api_mode="responses", + ) + + body = { + "model": "skillclaw-model", + "input": "hi", + "stream": True, + "tools": [{"type": "custom", "name": "js_repl"}], + } + + chunks = [] + async for chunk in server._stream_llm_responses(body): + chunks.append(chunk) + + assert chunks == [ + b'data: {"type":"response.created"}\n\n', + b'data: {"type":"response.completed"}\n\n', + b'data: [DONE]\n\n', + ] + assert captured["method"] == "POST" + assert captured["url"] == "http://upstream.test/v1/responses" + assert captured["json"]["stream"] is True + assert captured["json"]["model"] == "upstream-model" + assert captured["json"]["tools"] == body["tools"] + + +@pytest.mark.asyncio +async def test_responses_endpoint_passthroughs_native_stream(): + server = SkillClawAPIServer( + SkillClawConfig( + llm_api_mode="responses", + llm_api_base="http://upstream.test/v1", + llm_model_id="upstream-model", + proxy_api_key="skillclaw", + record_enabled=False, + ) + ) + + async def fake_stream(body): + yield b'data: {"type":"response.created","upstream":true}\n\n' + yield b'data: [DONE]\n\n' + + server._stream_llm_responses = fake_stream + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test") + try: + response = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={"model": "skillclaw-model", "input": "hi", "stream": True}, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.text == 'data: {"type":"response.created","upstream":true}\n\ndata: [DONE]\n\n' + + +def test_prepare_responses_forward_keeps_native_codex_items_out_of_chat_conversion(): + server = object.__new__(SkillClawAPIServer) + server.config = SkillClawConfig( + llm_api_base="http://upstream.test/v1/", + llm_api_key="upstream-key", + llm_model_id="upstream-model", + llm_api_mode="responses", + ) + native_tools = [ + {"type": "custom", "name": "js_repl", "description": "Run JavaScript"}, + {"type": "web_search", "search_context_size": "medium"}, + { + "type": "namespace", + "name": "mcp__cccc__", + "tools": [{"name": "cccc_context_get", "input_schema": {"type": "object"}}], + }, + ] + body = { + "model": "skillclaw-model", + "input": [{"role": "user", "content": [{"type": "input_text", "text": "hi"}]}], + "tools": native_tools, + "tool_choice": {"type": "custom", "name": "js_repl"}, + "parallel_tool_calls": True, + "session_id": "private-session", + "session_done": False, + "turn_type": "user", + } + + url, send_body, headers = server._prepare_responses_forward(body, stream=True) + + assert url == "http://upstream.test/v1/responses" + assert headers == {"Authorization": "Bearer upstream-key"} + assert send_body["model"] == "upstream-model" + assert send_body["stream"] is True + assert send_body["tools"] is native_tools + assert send_body["tool_choice"] == {"type": "custom", "name": "js_repl"} + assert send_body["parallel_tool_calls"] is True + assert send_body["input"] == body["input"] + assert "messages" not in send_body + assert "session_id" not in send_body + assert "session_done" not in send_body + assert "turn_type" not in send_body diff --git a/tests/test_setup_wizard.py b/tests/test_setup_wizard.py new file mode 100644 index 0000000..2e1fa18 --- /dev/null +++ b/tests/test_setup_wizard.py @@ -0,0 +1,41 @@ +from skillclaw import setup_wizard +from skillclaw.setup_wizard import SetupWizard + + +def test_setup_wizard_preserves_existing_llm_api_mode(monkeypatch, tmp_path): + saved = {} + + class FakeConfigStore: + config_file = tmp_path / "config.yaml" + + def exists(self): + return True + + def load(self): + return { + "claw_type": "codex", + "llm": { + "provider": "custom", + "model_id": "upstream-model", + "api_base": "http://upstream.test/v1", + "api_key": "upstream-key", + "api_mode": "responses", + }, + "proxy": {"port": 30000, "served_model_name": "skillclaw-model"}, + "skills": {"enabled": False, "dir": str(tmp_path / "skills")}, + "prm": {"enabled": False}, + "sharing": {"enabled": False}, + } + + def save(self, data): + saved.update(data) + + monkeypatch.setattr(setup_wizard, "ConfigStore", FakeConfigStore) + monkeypatch.setattr(setup_wizard, "_prompt_choice", lambda msg, choices, default="": default) + monkeypatch.setattr(setup_wizard, "_prompt", lambda msg, default="", hide=False: default) + monkeypatch.setattr(setup_wizard, "_prompt_bool", lambda msg, default=False: default) + monkeypatch.setattr(setup_wizard, "_prompt_int", lambda msg, default=0: default) + + SetupWizard().run() + + assert saved["llm"]["api_mode"] == "responses"