diff --git a/.claude/skills/teach-me/SKILL.md b/.claude/skills/teach-me/SKILL.md index 1900181a1..88c589825 100644 --- a/.claude/skills/teach-me/SKILL.md +++ b/.claude/skills/teach-me/SKILL.md @@ -41,7 +41,8 @@ All teach-me data is stored under `.claude/skills/teach-me/records/`: .claude/skills/teach-me/records/ ├── learner-profile.md # Cross-topic notes (created on first session) └── {topic-slug}/ - └── session.md # Learning state: concepts, status, notes + ├── session.md # Learning state: concepts, status, notes + └── {topic-slug}-notes.md # Learner-facing summary notes (generated at session end) ``` **Slug**: Topic in kebab-case, 2-5 words. Example: "Python decorators" → `python-decorators` @@ -275,7 +276,8 @@ Update `session.md` after each round: When all concepts mastered or user ends session: 1. Update `session.md` with final state. -2. Update `.claude/skills/teach-me/records/learner-profile.md` (keep under 30 lines): +2. **Generate learner-facing notes** — write `{topic-slug}-notes.md` in the topic directory. This is a standalone reference document the learner can review later. See "Notes Generation" below for format. +3. Update `.claude/skills/teach-me/records/learner-profile.md` (keep under 30 lines): ```markdown # Learner Profile @@ -293,7 +295,48 @@ Updated: {timestamp} - Python decorators (8/10 concepts, 2025-01-15) ``` -3. Give a brief text summary of what was covered, key insights, and areas for further study. +4. Give a brief text summary of what was covered, key insights, and areas for further study. + +## Notes Generation + +At session end, generate a learner-facing notes file at `{topic-slug}/{topic-slug}-notes.md`. This file is **written for the learner to review later**, not for the tutor. It should be self-contained and organized as a quick-reference. + +### Notes Structure + +```markdown +# {Topic} 核心笔记 + +## 1. {Section Name} +{Key concept, mechanism, or principle} +* **One-line summary**: {what it does / why it matters} +* **Detail**: {brief explanation, 2-4 sentences max} +* **Example** (if applicable): {code snippet, command, or concrete scenario} + +--- + +## 2. {Section Name} +... + +--- + +## n. 实战参数 / Cheat Sheet (if applicable) +{Practical commands, config, or quick-reference table} + +| Parameter / Concept | What it does | Tuning tip | +|---------------------|-------------|------------| +| ... | ... | ... | +``` + +### Notes Writing Rules + +1. **Start with "what & why"** before "how". Each section should answer: what is this, why does it exist, what problem does it solve. +2. **Use analogies sparingly but effectively**. Only include an analogy if it clarifies a non-obvious mechanism (e.g., "PagedAttention is like OS virtual memory paging"). +3. **Include trade-offs**. Every optimization or design choice has a cost. Always state it (e.g., "TP improves throughput but increases communication latency"). +4. **Code / command examples should be minimal**. Under 10 lines, self-contained, with comments explaining the key flags. +5. **Organize by concept dependency**, not by chronological teaching order. Foundation concepts first, advanced ones last. +6. **No quiz questions, no misconceptions, no tutor-side notes**. This is a clean reference document. +7. **Language matches the session**. If the session was in Chinese, notes are in Chinese (technical terms can stay in English). +8. **Keep it under 150 lines**. If it gets too long, the learner won't review it. Be ruthless about cutting fluff. ## Resuming Sessions diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts index 27c792a5d..974849af9 100644 --- a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts +++ b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts @@ -121,7 +121,7 @@ describe('anthropicMessagesToOpenAI', () => { ]) }) - test('strips thinking blocks', () => { + test('preserves thinking blocks as reasoning_content', () => { const result = anthropicMessagesToOpenAI( [ makeAssistantMsg([ @@ -131,7 +131,7 @@ describe('anthropicMessagesToOpenAI', () => { ], [] as any, ) - expect(result).toEqual([{ role: 'assistant', content: 'visible response' }]) + expect(result).toEqual([{ role: 'assistant', content: 'visible response', reasoning_content: 'internal thoughts...' }] as any) }) test('handles full conversation with tools', () => { @@ -299,7 +299,7 @@ describe('DeepSeek thinking mode (enableThinking)', () => { expect(assistant.reasoning_content).toBe('Let me reason about this...') }) - test('drops thinking block when enableThinking is false (default)', () => { + test('preserves thinking block as reasoning_content even without enableThinking', () => { const result = anthropicMessagesToOpenAI( [ makeAssistantMsg([ @@ -311,7 +311,7 @@ describe('DeepSeek thinking mode (enableThinking)', () => { ) const assistant = result[0] as any expect(assistant.content).toBe('visible response') - expect(assistant.reasoning_content).toBeUndefined() + expect(assistant.reasoning_content).toBe('internal thoughts...') }) test('preserves reasoning_content with tool_calls in same turn', () => { @@ -352,7 +352,7 @@ describe('DeepSeek thinking mode (enableThinking)', () => { expect(assistant.tool_calls[0].function.name).toBe('get_weather') }) - test('strips reasoning_content from previous turns', () => { + test('always preserves reasoning_content from all turns', () => { const result = anthropicMessagesToOpenAI( [ // Turn 1: user → assistant (with thinking) @@ -361,7 +361,8 @@ describe('DeepSeek thinking mode (enableThinking)', () => { { type: 'thinking' as const, thinking: 'Turn 1 reasoning...' }, { type: 'text', text: 'Turn 1 answer' }, ]), - // Turn 2: new user message → previous reasoning should be stripped + // Turn 2: new user message → reasoning should still be preserved + // (DeepSeek requires reasoning_content to be passed back when tool calls are involved) makeUserMsg('question 2'), makeAssistantMsg([ { type: 'thinking' as const, thinking: 'Turn 2 reasoning...' }, @@ -373,10 +374,9 @@ describe('DeepSeek thinking mode (enableThinking)', () => { ) const assistants = result.filter(m => m.role === 'assistant') - // Turn 1 assistant: reasoning should be stripped (previous turn) - expect((assistants[0] as any).reasoning_content).toBeUndefined() + // Both turns preserve reasoning_content (DeepSeek API requires it for tool calls) + expect((assistants[0] as any).reasoning_content).toBe('Turn 1 reasoning...') expect((assistants[0] as any).content).toBe('Turn 1 answer') - // Turn 2 assistant: reasoning should be preserved (current turn) expect((assistants[1] as any).reasoning_content).toBe('Turn 2 reasoning...') expect((assistants[1] as any).content).toBe('Turn 2 answer') }) diff --git a/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts b/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts index 2d7cf62ba..286ad55d7 100644 --- a/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts +++ b/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts @@ -26,16 +26,16 @@ export interface ConvertMessagesOptions { * - system prompt → role: "system" message prepended * - tool_use blocks → tool_calls[] on assistant message * - tool_result blocks → role: "tool" messages - * - thinking blocks → silently dropped (or preserved as reasoning_content when enableThinking=true) + * - thinking blocks → preserved as reasoning_content (DeepSeek requires passing it back) * - cache_control → stripped */ export function anthropicMessagesToOpenAI( messages: (UserMessage | AssistantMessage)[], systemPrompt: SystemPrompt, - options?: ConvertMessagesOptions, + // options retained for API compatibility; thinking blocks are now always preserved + _options?: ConvertMessagesOptions, ): ChatCompletionMessageParam[] { const result: ChatCompletionMessageParam[] = [] - const enableThinking = options?.enableThinking ?? false // Prepend system prompt as system message const systemText = systemPromptToText(systemPrompt) @@ -46,53 +46,13 @@ export function anthropicMessagesToOpenAI( } satisfies ChatCompletionSystemMessageParam) } - // When thinking mode is on, detect turn boundaries so that reasoning_content - // from *previous* user turns is stripped (saves bandwidth; DeepSeek ignores it). - // A "new turn" starts when a user text message appears after at least one assistant response. - const turnBoundaries = new Set() - if (enableThinking) { - let hasSeenAssistant = false - for (let i = 0; i < messages.length; i++) { - const msg = messages[i] - if (msg.type === 'assistant') { - hasSeenAssistant = true - } - if (msg.type === 'user' && hasSeenAssistant) { - const content = msg.message.content - // A user message starts a new turn if it contains any non-tool_result content - // (text, image, or other media). Tool results alone do NOT start a new turn - // because they are continuations of the previous assistant tool call. - const startsNewUserTurn = - typeof content === 'string' - ? content.length > 0 - : Array.isArray(content) && - content.some( - (b: any) => - typeof b === 'string' || - (b && - typeof b === 'object' && - 'type' in b && - b.type !== 'tool_result'), - ) - if (startsNewUserTurn) { - turnBoundaries.add(i) - } - } - } - } - - for (let i = 0; i < messages.length; i++) { - const msg = messages[i] + for (const msg of messages) { switch (msg.type) { case 'user': result.push(...convertInternalUserMessage(msg)) break case 'assistant': - // Preserve reasoning_content unless we're before a turn boundary - // (i.e., from a previous user Q&A round) - const preserveReasoning = - enableThinking && !isBeforeAnyTurnBoundary(i, turnBoundaries) - result.push(...convertInternalAssistantMessage(msg, preserveReasoning)) + result.push(...convertInternalAssistantMessage(msg)) break default: break @@ -107,17 +67,6 @@ function systemPromptToText(systemPrompt: SystemPrompt): string { return systemPrompt.filter(Boolean).join('\n\n') } -/** - * Check if index `i` falls before any turn boundary (i.e. it belongs to a previous turn). - * A message at index i is "before" a boundary if there exists a boundary j where i < j. - */ -function isBeforeAnyTurnBoundary(i: number, boundaries: Set): boolean { - for (const b of boundaries) { - if (i < b) return true - } - return false -} - function convertInternalUserMessage( msg: UserMessage, ): ChatCompletionMessageParam[] { @@ -213,7 +162,6 @@ function convertToolResult( function convertInternalAssistantMessage( msg: AssistantMessage, - preserveReasoning = false, ): ChatCompletionMessageParam[] { const content = msg.message.content @@ -257,8 +205,10 @@ function convertInternalAssistantMessage( typeof tu.input === 'string' ? tu.input : JSON.stringify(tu.input), }, }) - } else if (block.type === 'thinking' && preserveReasoning) { - // DeepSeek thinking mode: preserve reasoning_content for tool call iterations + } else if (block.type === 'thinking') { + // DeepSeek thinking mode: always preserve reasoning_content. + // DeepSeek requires reasoning_content to be passed back in subsequent requests, + // especially when tool calls are involved (returns 400 if missing). const thinkingText = (block as unknown as Record) .thinking if (typeof thinkingText === 'string' && thinkingText) { diff --git a/packages/builtin-tools/src/tools/FileEditTool/FileEditTool.ts b/packages/builtin-tools/src/tools/FileEditTool/FileEditTool.ts index 42b00676b..0cad34958 100644 --- a/packages/builtin-tools/src/tools/FileEditTool/FileEditTool.ts +++ b/packages/builtin-tools/src/tools/FileEditTool/FileEditTool.ts @@ -273,18 +273,6 @@ export const FileEditTool = buildTool({ } const readTimestamp = toolUseContext.readFileState.get(fullFilePath) - if (!readTimestamp || readTimestamp.isPartialView) { - return { - result: false, - behavior: 'ask', - message: - 'File has not been read yet. Read it first before writing to it.', - meta: { - isFilePathAbsolute: String(isAbsolute(file_path)), - }, - errorCode: 6, - } - } // Check if file exists and get its last modified time if (readTimestamp) { diff --git a/packages/builtin-tools/src/tools/FileEditTool/UI.tsx b/packages/builtin-tools/src/tools/FileEditTool/UI.tsx index 3fbd9a34b..417ffa3f5 100644 --- a/packages/builtin-tools/src/tools/FileEditTool/UI.tsx +++ b/packages/builtin-tools/src/tools/FileEditTool/UI.tsx @@ -186,14 +186,6 @@ export function renderToolUseErrorMessage( extractTag(result, 'tool_use_error') ) { const errorMessage = extractTag(result, 'tool_use_error') - // Show a less scary message for intended behavior - if (errorMessage?.includes('File has not been read yet')) { - return ( - - File must be read first - - ) - } if (errorMessage?.includes(FILE_NOT_FOUND_CWD_NOTE)) { return ( diff --git a/packages/builtin-tools/src/tools/FileWriteTool/FileWriteTool.ts b/packages/builtin-tools/src/tools/FileWriteTool/FileWriteTool.ts index 399bab62e..009207472 100644 --- a/packages/builtin-tools/src/tools/FileWriteTool/FileWriteTool.ts +++ b/packages/builtin-tools/src/tools/FileWriteTool/FileWriteTool.ts @@ -196,25 +196,18 @@ export const FileWriteTool = buildTool({ } const readTimestamp = toolUseContext.readFileState.get(fullFilePath) - if (!readTimestamp || readTimestamp.isPartialView) { - return { - result: false, - message: - 'File has not been read yet. Read it first before writing to it.', - errorCode: 2, - } - } // Reuse mtime from the stat above — avoids a redundant statSync via - // getFileModificationTime. The readTimestamp guard above ensures this - // block is always reached when the file exists. - const lastWriteTime = Math.floor(fileMtimeMs) - if (lastWriteTime > readTimestamp.timestamp) { - return { - result: false, - message: - 'File has been modified since read, either by the user or by a linter. Read it again before attempting to write it.', - errorCode: 3, + // getFileModificationTime. + if (readTimestamp) { + const lastWriteTime = Math.floor(fileMtimeMs) + if (lastWriteTime > readTimestamp.timestamp) { + return { + result: false, + message: + 'File has been modified since read, either by the user or by a linter. Read it again before attempting to write it.', + errorCode: 3, + } } } diff --git a/src/bootstrap/state.ts b/src/bootstrap/state.ts index 66702cadf..90d613b61 100644 --- a/src/bootstrap/state.ts +++ b/src/bootstrap/state.ts @@ -235,11 +235,6 @@ type State = { // microcompact is first enabled, keep sending the header so mid-session // GrowthBook/settings toggles don't bust the prompt cache. cacheEditingHeaderLatched: boolean | null - // Sticky-on latch for clearing thinking from prior tool loops. Triggered - // when >1h since last API call (confirmed cache miss — no cache-hit - // benefit to keeping thinking). Once latched, stays on so the newly-warmed - // thinking-cleared cache isn't busted by flipping back to keep:'all'. - thinkingClearLatched: boolean | null // Current prompt ID (UUID) correlating a user prompt with subsequent OTel events promptId: string | null // Last API requestId for the main conversation chain (not subagents). @@ -414,7 +409,6 @@ function getInitialState(): State { afkModeHeaderLatched: null, fastModeHeaderLatched: null, cacheEditingHeaderLatched: null, - thinkingClearLatched: null, // Current prompt ID promptId: null, lastMainRequestId: undefined, @@ -1729,14 +1723,6 @@ export function setCacheEditingHeaderLatched(v: boolean): void { STATE.cacheEditingHeaderLatched = v } -export function getThinkingClearLatched(): boolean | null { - return STATE.thinkingClearLatched -} - -export function setThinkingClearLatched(v: boolean): void { - STATE.thinkingClearLatched = v -} - /** * Reset beta header latches to null. Called on /clear and /compact so a * fresh conversation gets fresh header evaluation. @@ -1745,7 +1731,6 @@ export function clearBetaHeaderLatches(): void { STATE.afkModeHeaderLatched = null STATE.fastModeHeaderLatched = null STATE.cacheEditingHeaderLatched = null - STATE.thinkingClearLatched = null } export function getPromptId(): string | null { diff --git a/src/constants/prompts.ts b/src/constants/prompts.ts index ea8a5dc02..02b68f94f 100644 --- a/src/constants/prompts.ts +++ b/src/constants/prompts.ts @@ -614,17 +614,6 @@ ${CYBER_RISK_INSTRUCTION}`, 'summarize_tool_results', () => SUMMARIZE_TOOL_RESULTS_SECTION, ), - // Numeric length anchors — research shows ~1.2% output token reduction vs - // qualitative "be concise". Ant-only to measure quality impact first. - ...(process.env.USER_TYPE === 'ant' - ? [ - systemPromptSection( - 'numeric_length_anchors', - () => - 'Length limits: keep text between tool calls to \u226425 words. Keep final responses to \u2264100 words unless the task requires more detail.', - ), - ] - : []), ...(feature('TOKEN_BUDGET') ? [ // Cached unconditionally — the "When the user specifies..." phrasing diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index ddc814809..ec4dfaeab 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -124,14 +124,12 @@ import { getPromptCache1hAllowlist, getPromptCache1hEligible, getSessionId, - getThinkingClearLatched, setAfkModeHeaderLatched, setCacheEditingHeaderLatched, setFastModeHeaderLatched, setLastMainRequestId, setPromptCache1hAllowlist, setPromptCache1hEligible, - setThinkingClearLatched, } from 'src/bootstrap/state.js' import { AFK_MODE_BETA_HEADER, @@ -1492,20 +1490,6 @@ async function* queryModel( } } - // Only latch from agentic queries so a classifier call doesn't flip the - // main thread's context_management mid-turn. - let thinkingClearLatched = getThinkingClearLatched() === true - if (!thinkingClearLatched && isAgenticQuery) { - const lastCompletion = getLastApiCompletionTimestamp() - if ( - lastCompletion !== null && - Date.now() - lastCompletion > CACHE_TTL_1HOUR_MS - ) { - thinkingClearLatched = true - setThinkingClearLatched(true) - } - } - const effort = resolveAppliedEffort(options.model, options.effortValue) if (feature('PROMPT_CACHE_BREAK_DETECTION')) { @@ -1684,7 +1668,7 @@ async function* queryModel( const contextManagement = getAPIContextManagement({ hasThinking, isRedactThinkingActive: betasParams.includes(REDACT_THINKING_BETA_HEADER), - clearAllThinking: thinkingClearLatched, + clearAllThinking: false, }) const enablePromptCaching = diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts index 9b8433282..5a51451a5 100644 --- a/src/services/api/openai/__tests__/thinking.test.ts +++ b/src/services/api/openai/__tests__/thinking.test.ts @@ -100,16 +100,28 @@ describe('isOpenAIThinkingEnabled', () => { expect(isOpenAIThinkingEnabled('TokenService/deepseek-v3.2')).toBe(true) }) - test('returns false when model name is "deepseek-chat"', () => { - expect(isOpenAIThinkingEnabled('deepseek-chat')).toBe(false) + test('returns true when model name is "deepseek-chat"', () => { + expect(isOpenAIThinkingEnabled('deepseek-chat')).toBe(true) }) - test('returns false when model name is "deepseek-v3"', () => { - expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(false) + test('returns true when model name is "deepseek-v3"', () => { + expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(true) + }) + + test('returns true when model name is "deepseek-v4"', () => { + expect(isOpenAIThinkingEnabled('deepseek-v4')).toBe(true) + }) + + test('returns true when model name is "deepseek-v4-pro"', () => { + expect(isOpenAIThinkingEnabled('deepseek-v4-pro')).toBe(true) + }) + + test('returns true when model name is "deepseek-r1"', () => { + expect(isOpenAIThinkingEnabled('deepseek-r1')).toBe(true) }) - test('returns false when model name contains "deepseek" but not "reasoner" or "v3.2"', () => { - expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(false) + test('returns true when model name contains "deepseek"', () => { + expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(true) }) test('returns false when model name is "gpt-4o"', () => { @@ -126,6 +138,7 @@ describe('isOpenAIThinkingEnabled', () => { process.env.OPENAI_ENABLE_THINKING = '1' expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(true) + expect(isOpenAIThinkingEnabled('qwen-3')).toBe(true) }) test('OPENAI_ENABLE_THINKING=false disables thinking even for deepseek-reasoner', () => { diff --git a/src/services/api/openai/requestBody.ts b/src/services/api/openai/requestBody.ts index e8f93ecfa..09163c834 100644 --- a/src/services/api/openai/requestBody.ts +++ b/src/services/api/openai/requestBody.ts @@ -25,9 +25,9 @@ export function isOpenAIThinkingEnabled(model: string): boolean { if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false // Explicit enable if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true - // Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode) + // Auto-detect from model name (all DeepSeek models support thinking mode) const modelLower = model.toLowerCase() - return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2') + return modelLower.includes('deepseek') } /** diff --git a/src/services/api/src/bootstrap/state.ts b/src/services/api/src/bootstrap/state.ts index 24331fe0d..ec9794128 100644 --- a/src/services/api/src/bootstrap/state.ts +++ b/src/services/api/src/bootstrap/state.ts @@ -6,14 +6,12 @@ export type getFastModeHeaderLatched = any; export type getLastApiCompletionTimestamp = any; export type getPromptCache1hAllowlist = any; export type getPromptCache1hEligible = any; -export type getThinkingClearLatched = any; export type setAfkModeHeaderLatched = any; export type setCacheEditingHeaderLatched = any; export type setFastModeHeaderLatched = any; export type setLastMainRequestId = any; export type setPromptCache1hAllowlist = any; export type setPromptCache1hEligible = any; -export type setThinkingClearLatched = any; export type addToTotalDurationState = any; export type consumePostCompaction = any; export type getIsNonInteractiveSession = any; diff --git a/src/utils/effort.ts b/src/utils/effort.ts index bb920b38c..4cf530995 100644 --- a/src/utils/effort.ts +++ b/src/utils/effort.ts @@ -348,13 +348,13 @@ export function getDefaultEffortForModel( model.toLowerCase().includes('opus-4-6') ) { if (isProSubscriber()) { - return 'medium' + return 'high' } if ( getOpusDefaultEffortConfig().enabled && (isMaxSubscriber() || isTeamSubscriber()) ) { - return 'medium' + return 'high' } }