Skip to content

Commit d274812

Browse files
feat(deepseek): implement interleaved thinking mode for deepseek-reasoner (#9969)
1 parent f414ba4 commit d274812

File tree

6 files changed

+701
-77
lines changed

6 files changed

+701
-77
lines changed

packages/types/src/providers/deepseek.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import type { ModelInfo } from "../model.js"
22

33
// https://platform.deepseek.com/docs/api
4+
// preserveReasoning enables interleaved thinking mode for tool calls:
5+
// DeepSeek requires reasoning_content to be passed back during tool call
6+
// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
47
export type DeepSeekModelId = keyof typeof deepSeekModels
58

69
export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
@@ -26,6 +29,7 @@ export const deepSeekModels = {
2629
supportsPromptCache: true,
2730
supportsNativeTools: true,
2831
defaultToolProtocol: "native",
32+
preserveReasoning: true,
2933
inputPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
3034
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
3135
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
@@ -35,4 +39,4 @@ export const deepSeekModels = {
3539
} as const satisfies Record<string, ModelInfo>
3640

3741
// https://api-docs.deepseek.com/quick_start/parameter_settings
38-
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0
42+
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3

src/api/providers/__tests__/deepseek.spec.ts

Lines changed: 186 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,75 @@ vi.mock("openai", () => {
2929
}
3030
}
3131

32+
// Check if this is a reasoning_content test by looking at model
33+
const isReasonerModel = options.model?.includes("deepseek-reasoner")
34+
const isToolCallTest = options.tools?.length > 0
35+
3236
// Return async iterator for streaming
3337
return {
3438
[Symbol.asyncIterator]: async function* () {
35-
yield {
36-
choices: [
37-
{
38-
delta: { content: "Test response" },
39-
index: 0,
40-
},
41-
],
42-
usage: null,
39+
// For reasoner models, emit reasoning_content first
40+
if (isReasonerModel) {
41+
yield {
42+
choices: [
43+
{
44+
delta: { reasoning_content: "Let me think about this..." },
45+
index: 0,
46+
},
47+
],
48+
usage: null,
49+
}
50+
yield {
51+
choices: [
52+
{
53+
delta: { reasoning_content: " I'll analyze step by step." },
54+
index: 0,
55+
},
56+
],
57+
usage: null,
58+
}
4359
}
60+
61+
// For tool call tests with reasoner, emit tool call
62+
if (isReasonerModel && isToolCallTest) {
63+
yield {
64+
choices: [
65+
{
66+
delta: {
67+
tool_calls: [
68+
{
69+
index: 0,
70+
id: "call_123",
71+
function: {
72+
name: "get_weather",
73+
arguments: '{"location":"SF"}',
74+
},
75+
},
76+
],
77+
},
78+
index: 0,
79+
},
80+
],
81+
usage: null,
82+
}
83+
} else {
84+
yield {
85+
choices: [
86+
{
87+
delta: { content: "Test response" },
88+
index: 0,
89+
},
90+
],
91+
usage: null,
92+
}
93+
}
94+
4495
yield {
4596
choices: [
4697
{
4798
delta: {},
4899
index: 0,
100+
finish_reason: isToolCallTest ? "tool_calls" : "stop",
49101
},
50102
],
51103
usage: {
@@ -70,7 +122,7 @@ vi.mock("openai", () => {
70122
import OpenAI from "openai"
71123
import type { Anthropic } from "@anthropic-ai/sdk"
72124

73-
import { deepSeekDefaultModelId } from "@roo-code/types"
125+
import { deepSeekDefaultModelId, type ModelInfo } from "@roo-code/types"
74126

75127
import type { ApiHandlerOptions } from "../../../shared/api"
76128

@@ -174,6 +226,27 @@ describe("DeepSeekHandler", () => {
174226
expect(model.info.supportsPromptCache).toBe(true)
175227
})
176228

229+
it("should have preserveReasoning enabled for deepseek-reasoner to support interleaved thinking", () => {
230+
// This is critical for DeepSeek's interleaved thinking mode with tool calls.
231+
// See: https://api-docs.deepseek.com/guides/thinking_mode
232+
// The reasoning_content needs to be passed back during tool call continuation
233+
// within the same turn for the model to continue reasoning properly.
234+
const handlerWithReasoner = new DeepSeekHandler({
235+
...mockOptions,
236+
apiModelId: "deepseek-reasoner",
237+
})
238+
const model = handlerWithReasoner.getModel()
239+
// Cast to ModelInfo to access preserveReasoning which is an optional property
240+
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
241+
})
242+
243+
it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
244+
// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
245+
const model = handler.getModel()
246+
// Cast to ModelInfo to access preserveReasoning which is an optional property
247+
expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
248+
})
249+
177250
it("should return provided model ID with default model info if model does not exist", () => {
178251
const handlerWithInvalidModel = new DeepSeekHandler({
179252
...mockOptions,
@@ -317,4 +390,108 @@ describe("DeepSeekHandler", () => {
317390
expect(result.cacheReadTokens).toBeUndefined()
318391
})
319392
})
393+
394+
describe("interleaved thinking mode", () => {
395+
const systemPrompt = "You are a helpful assistant."
396+
const messages: Anthropic.Messages.MessageParam[] = [
397+
{
398+
role: "user",
399+
content: [
400+
{
401+
type: "text" as const,
402+
text: "Hello!",
403+
},
404+
],
405+
},
406+
]
407+
408+
it("should handle reasoning_content in streaming responses for deepseek-reasoner", async () => {
409+
const reasonerHandler = new DeepSeekHandler({
410+
...mockOptions,
411+
apiModelId: "deepseek-reasoner",
412+
})
413+
414+
const stream = reasonerHandler.createMessage(systemPrompt, messages)
415+
const chunks: any[] = []
416+
for await (const chunk of stream) {
417+
chunks.push(chunk)
418+
}
419+
420+
// Should have reasoning chunks
421+
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
422+
expect(reasoningChunks.length).toBeGreaterThan(0)
423+
expect(reasoningChunks[0].text).toBe("Let me think about this...")
424+
expect(reasoningChunks[1].text).toBe(" I'll analyze step by step.")
425+
})
426+
427+
it("should pass thinking parameter for deepseek-reasoner model", async () => {
428+
const reasonerHandler = new DeepSeekHandler({
429+
...mockOptions,
430+
apiModelId: "deepseek-reasoner",
431+
})
432+
433+
const stream = reasonerHandler.createMessage(systemPrompt, messages)
434+
for await (const _chunk of stream) {
435+
// Consume the stream
436+
}
437+
438+
// Verify that the thinking parameter was passed to the API
439+
// Note: mockCreate receives two arguments - request options and path options
440+
expect(mockCreate).toHaveBeenCalledWith(
441+
expect.objectContaining({
442+
thinking: { type: "enabled" },
443+
}),
444+
{}, // Empty path options for non-Azure URLs
445+
)
446+
})
447+
448+
it("should NOT pass thinking parameter for deepseek-chat model", async () => {
449+
const chatHandler = new DeepSeekHandler({
450+
...mockOptions,
451+
apiModelId: "deepseek-chat",
452+
})
453+
454+
const stream = chatHandler.createMessage(systemPrompt, messages)
455+
for await (const _chunk of stream) {
456+
// Consume the stream
457+
}
458+
459+
// Verify that the thinking parameter was NOT passed to the API
460+
const callArgs = mockCreate.mock.calls[0][0]
461+
expect(callArgs.thinking).toBeUndefined()
462+
})
463+
464+
it("should handle tool calls with reasoning_content", async () => {
465+
const reasonerHandler = new DeepSeekHandler({
466+
...mockOptions,
467+
apiModelId: "deepseek-reasoner",
468+
})
469+
470+
const tools: any[] = [
471+
{
472+
type: "function",
473+
function: {
474+
name: "get_weather",
475+
description: "Get weather",
476+
parameters: { type: "object", properties: {} },
477+
},
478+
},
479+
]
480+
481+
const stream = reasonerHandler.createMessage(systemPrompt, messages, { taskId: "test", tools })
482+
const chunks: any[] = []
483+
for await (const chunk of stream) {
484+
chunks.push(chunk)
485+
}
486+
487+
// Should have reasoning chunks
488+
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
489+
expect(reasoningChunks.length).toBeGreaterThan(0)
490+
491+
// Should have tool call chunks
492+
const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
493+
expect(toolCallChunks.length).toBeGreaterThan(0)
494+
expect(toolCallChunks[0].name).toBe("get_weather")
495+
})
496+
})
320497
})

src/api/providers/deepseek.ts

Lines changed: 110 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
1-
import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types"
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import OpenAI from "openai"
3+
4+
import {
5+
deepSeekModels,
6+
deepSeekDefaultModelId,
7+
DEEP_SEEK_DEFAULT_TEMPERATURE,
8+
OPENAI_AZURE_AI_INFERENCE_PATH,
9+
} from "@roo-code/types"
210

311
import type { ApiHandlerOptions } from "../../shared/api"
412

5-
import type { ApiStreamUsageChunk } from "../transform/stream"
13+
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
614
import { getModelParams } from "../transform/model-params"
15+
import { convertToR1Format } from "../transform/r1-format"
716

817
import { OpenAiHandler } from "./openai"
18+
import type { ApiHandlerCreateMessageMetadata } from "../index"
19+
20+
// Custom interface for DeepSeek params to support thinking mode
21+
type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
22+
thinking?: { type: "enabled" | "disabled" }
23+
}
924

1025
export class DeepSeekHandler extends OpenAiHandler {
1126
constructor(options: ApiHandlerOptions) {
@@ -26,8 +41,100 @@ export class DeepSeekHandler extends OpenAiHandler {
2641
return { id, info, ...params }
2742
}
2843

44+
override async *createMessage(
45+
systemPrompt: string,
46+
messages: Anthropic.Messages.MessageParam[],
47+
metadata?: ApiHandlerCreateMessageMetadata,
48+
): ApiStream {
49+
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
50+
const { info: modelInfo } = this.getModel()
51+
52+
// Check if this is a thinking-enabled model (deepseek-reasoner)
53+
const isThinkingModel = modelId.includes("deepseek-reasoner")
54+
55+
// Convert messages to R1 format (merges consecutive same-role messages)
56+
// This is required for DeepSeek which does not support successive messages with the same role
57+
const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
58+
59+
const requestOptions: DeepSeekChatCompletionParams = {
60+
model: modelId,
61+
temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
62+
messages: convertedMessages,
63+
stream: true as const,
64+
stream_options: { include_usage: true },
65+
// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
66+
...(isThinkingModel && { thinking: { type: "enabled" } }),
67+
...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
68+
...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
69+
...(metadata?.toolProtocol === "native" && {
70+
parallel_tool_calls: metadata.parallelToolCalls ?? false,
71+
}),
72+
}
73+
74+
// Add max_tokens if needed
75+
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
76+
77+
// Check if base URL is Azure AI Inference (for DeepSeek via Azure)
78+
const isAzureAiInference = this._isAzureAiInference(this.options.deepSeekBaseUrl)
79+
80+
let stream
81+
try {
82+
stream = await this.client.chat.completions.create(
83+
requestOptions,
84+
isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
85+
)
86+
} catch (error) {
87+
const { handleOpenAIError } = await import("./utils/openai-error-handler")
88+
throw handleOpenAIError(error, "DeepSeek")
89+
}
90+
91+
let lastUsage
92+
93+
for await (const chunk of stream) {
94+
const delta = chunk.choices?.[0]?.delta ?? {}
95+
96+
// Handle regular text content
97+
if (delta.content) {
98+
yield {
99+
type: "text",
100+
text: delta.content,
101+
}
102+
}
103+
104+
// Handle reasoning_content from DeepSeek's interleaved thinking
105+
// This is the proper way DeepSeek sends thinking content in streaming
106+
if ("reasoning_content" in delta && delta.reasoning_content) {
107+
yield {
108+
type: "reasoning",
109+
text: (delta.reasoning_content as string) || "",
110+
}
111+
}
112+
113+
// Handle tool calls
114+
if (delta.tool_calls) {
115+
for (const toolCall of delta.tool_calls) {
116+
yield {
117+
type: "tool_call_partial",
118+
index: toolCall.index,
119+
id: toolCall.id,
120+
name: toolCall.function?.name,
121+
arguments: toolCall.function?.arguments,
122+
}
123+
}
124+
}
125+
126+
if (chunk.usage) {
127+
lastUsage = chunk.usage
128+
}
129+
}
130+
131+
if (lastUsage) {
132+
yield this.processUsageMetrics(lastUsage, modelInfo)
133+
}
134+
}
135+
29136
// Override to handle DeepSeek's usage metrics, including caching.
30-
protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
137+
protected override processUsageMetrics(usage: any, _modelInfo?: any): ApiStreamUsageChunk {
31138
return {
32139
type: "usage",
33140
inputTokens: usage?.prompt_tokens || 0,

0 commit comments

Comments
 (0)