diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts index df799426a72..bca440de1eb 100644 --- a/src/api/providers/__tests__/requesty.spec.ts +++ b/src/api/providers/__tests__/requesty.spec.ts @@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" +import { t } from "i18next" import { TOOL_PROTOCOL } from "@roo-code/types" @@ -378,6 +379,125 @@ describe("RequestyHandler", () => { }) }) }) + + describe("reasoning-only response handling", () => { + it("should emit placeholder text when model returns only reasoning content", async () => { + const handler = new RequestyHandler(mockOptions) + + // Mock stream that only returns reasoning content without actual text or tool calls + const mockStreamWithOnlyReasoning = { + async *[Symbol.asyncIterator]() { + yield { + id: "test-id", + choices: [ + { + delta: { + reasoning_content: "I am thinking about how to respond...", + }, + }, + ], + } + yield { + id: "test-id", + choices: [ + { + delta: { + reasoning_content: + "The user wants me to use a tool, but I'll format it wrong: ", + }, + }, + ], + } + yield { + id: "test-id", + choices: [{ delta: {} }], + usage: { prompt_tokens: 10, completion_tokens: 20 }, + } + }, + } + mockCreate.mockResolvedValue(mockStreamWithOnlyReasoning) + + const systemPrompt = "test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] + + const chunks = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + // Expect two reasoning chunks, one fallback text chunk, and one usage chunk + expect(chunks).toHaveLength(4) + expect(chunks[0]).toEqual({ type: "reasoning", text: "I am thinking about how to respond..." }) + expect(chunks[1]).toEqual({ + type: "reasoning", + text: "The user wants me to use a tool, but I'll format it wrong: ", + }) + // The fallback text to prevent empty response error + expect(chunks[2]).toEqual({ + type: "text", + text: t("common:errors.gemini.thinking_complete_no_output"), + }) + expect(chunks[3]).toMatchObject({ + type: "usage", + inputTokens: 10, + outputTokens: 20, + }) + }) + + it("should not emit placeholder when model returns actual content", async () => { + const handler = new RequestyHandler(mockOptions) + + // Mock stream that returns both reasoning and text content + const mockStreamWithContent = { + async *[Symbol.asyncIterator]() { + yield { + id: "test-id", + choices: [ + { + delta: { + reasoning_content: "Thinking...", + }, + }, + ], + } + yield { + id: "test-id", + choices: [ + { + delta: { + content: "Here is my actual response", + }, + }, + ], + } + yield { + id: "test-id", + choices: [{ delta: {} }], + usage: { prompt_tokens: 10, completion_tokens: 20 }, + } + }, + } + mockCreate.mockResolvedValue(mockStreamWithContent) + + const systemPrompt = "test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] + + const chunks = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + // Expect one reasoning chunk, one text chunk, and one usage chunk (no fallback) + expect(chunks).toHaveLength(3) + expect(chunks[0]).toEqual({ type: "reasoning", text: "Thinking..." }) + expect(chunks[1]).toEqual({ type: "text", text: "Here is my actual response" }) + expect(chunks[2]).toMatchObject({ + type: "usage", + inputTokens: 10, + outputTokens: 20, + }) + }) + }) }) describe("completePrompt", () => { diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index b84a36bcc16..b28f64d1d70 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -1,5 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" +import { t } from "i18next" import { type ModelInfo, requestyDefaultModelId, requestyDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types" @@ -166,19 +167,26 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan } let lastUsage: any = undefined + // Track whether we've received actual content vs just reasoning + let hasContent = false + let hasReasoning = false + for await (const chunk of stream) { const delta = chunk.choices[0]?.delta if (delta?.content) { + hasContent = true yield { type: "text", text: delta.content } } if (delta && "reasoning_content" in delta && delta.reasoning_content) { + hasReasoning = true yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } } // Handle native tool calls if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) { + hasContent = true for (const toolCall of delta.tool_calls) { yield { type: "tool_call_partial", @@ -195,6 +203,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan } } + // If model produced reasoning but no actual content (text or tool calls), + // emit a placeholder to prevent "empty assistant response" errors. + // This can happen when models output malformed tool call syntax in their + // reasoning/thinking content (e.g., tags). + if (hasReasoning && !hasContent) { + yield { type: "text", text: t("common:errors.gemini.thinking_complete_no_output") } + } + if (lastUsage) { yield this.processUsageMetrics(lastUsage, info) }