Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions src/api/providers/__tests__/requesty.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"
import { t } from "i18next"

import { TOOL_PROTOCOL } from "@roo-code/types"

Expand Down Expand Up @@ -378,6 +379,125 @@ describe("RequestyHandler", () => {
})
})
})

describe("reasoning-only response handling", () => {
it("should emit placeholder text when model returns only reasoning content", async () => {
const handler = new RequestyHandler(mockOptions)

// Mock stream that only returns reasoning content without actual text or tool calls
const mockStreamWithOnlyReasoning = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [
{
delta: {
reasoning_content: "I am thinking about how to respond...",
},
},
],
}
yield {
id: "test-id",
choices: [
{
delta: {
reasoning_content:
"The user wants me to use a tool, but I'll format it wrong: <tool_call><function=get_weather>",
},
},
],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}
mockCreate.mockResolvedValue(mockStreamWithOnlyReasoning)

const systemPrompt = "test system prompt"
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }]

const chunks = []
for await (const chunk of handler.createMessage(systemPrompt, messages)) {
chunks.push(chunk)
}

// Expect two reasoning chunks, one fallback text chunk, and one usage chunk
expect(chunks).toHaveLength(4)
expect(chunks[0]).toEqual({ type: "reasoning", text: "I am thinking about how to respond..." })
expect(chunks[1]).toEqual({
type: "reasoning",
text: "The user wants me to use a tool, but I'll format it wrong: <tool_call><function=get_weather>",
})
// The fallback text to prevent empty response error
expect(chunks[2]).toEqual({
type: "text",
text: t("common:errors.gemini.thinking_complete_no_output"),
})
expect(chunks[3]).toMatchObject({
type: "usage",
inputTokens: 10,
outputTokens: 20,
})
})

it("should not emit placeholder when model returns actual content", async () => {
const handler = new RequestyHandler(mockOptions)

// Mock stream that returns both reasoning and text content
const mockStreamWithContent = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [
{
delta: {
reasoning_content: "Thinking...",
},
},
],
}
yield {
id: "test-id",
choices: [
{
delta: {
content: "Here is my actual response",
},
},
],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}
mockCreate.mockResolvedValue(mockStreamWithContent)

const systemPrompt = "test system prompt"
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }]

const chunks = []
for await (const chunk of handler.createMessage(systemPrompt, messages)) {
chunks.push(chunk)
}

// Expect one reasoning chunk, one text chunk, and one usage chunk (no fallback)
expect(chunks).toHaveLength(3)
expect(chunks[0]).toEqual({ type: "reasoning", text: "Thinking..." })
expect(chunks[1]).toEqual({ type: "text", text: "Here is my actual response" })
expect(chunks[2]).toMatchObject({
type: "usage",
inputTokens: 10,
outputTokens: 20,
})
})
})
})

describe("completePrompt", () => {
Expand Down
16 changes: 16 additions & 0 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"
import { t } from "i18next"

import { type ModelInfo, requestyDefaultModelId, requestyDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types"

Expand Down Expand Up @@ -166,19 +167,26 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
}
let lastUsage: any = undefined

// Track whether we've received actual content vs just reasoning
let hasContent = false
let hasReasoning = false

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta

if (delta?.content) {
hasContent = true
yield { type: "text", text: delta.content }
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
hasReasoning = true
yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
}

// Handle native tool calls
if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
hasContent = true
for (const toolCall of delta.tool_calls) {
yield {
type: "tool_call_partial",
Expand All @@ -195,6 +203,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
}
}

// If model produced reasoning but no actual content (text or tool calls),
// emit a placeholder to prevent "empty assistant response" errors.
// This can happen when models output malformed tool call syntax in their
// reasoning/thinking content (e.g., <tool_call><function=...> tags).
if (hasReasoning && !hasContent) {
yield { type: "text", text: t("common:errors.gemini.thinking_complete_no_output") }
}

if (lastUsage) {
yield this.processUsageMetrics(lastUsage, info)
}
Expand Down
Loading