From 67de6bd2cffc3381f0f28fd3ffce043970611667 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Wed, 22 Apr 2026 22:47:57 +0800 Subject: [PATCH] fix(openai-shim): echo reasoning_content on assistant tool-call messages for Moonshot (#828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kimi / Moonshot's chat completions endpoint requires that every assistant message carrying tool_calls also carry reasoning_content when the "thinking" feature is active. When an agent sends prior-turn assistant history back (standard multi-turn / subagent / Explore patterns), the shim previously stripped the thinking block: case 'thinking': case 'redacted_thinking': // Strip thinking blocks for OpenAI-compatible providers. break That's correct for providers that would mis-interpret serialized tags, but Moonshot validates the schema strictly and rejects with: API Error: 400 {"error":{"message":"thinking is enabled but reasoning_content is missing in assistant tool call message at index N","type":"invalid_request_error"}} Reproducer: launch with Kimi profile, run any tool-using command (Explore, Bash, etc.) — every request after the first 400s. Fix: in convertMessages(), when the per-request flag preserveReasoningContent is set (only for Moonshot baseUrls today), attach the original thinking block's text as reasoning_content on the outgoing OpenAI-shaped assistant message. Other providers continue to strip (unknown-field rejection risk). OpenAIMessage type grows a reasoning_content?: string field. convertMessages() accepts an options object and threads the flag through; the only call site (_doOpenAIRequest) gates via isMoonshotBaseUrl(request.baseUrl). Tests (openaiShim.test.ts): - Moonshot: echoes reasoning_content on assistant tool-call messages (regression for the reported 400) - non-Moonshot providers do NOT receive reasoning_content (guards against leaking the field to strict-parse endpoints) Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean. Co-authored-by: OpenClaude --- src/services/api/openaiShim.test.ts | 133 ++++++++++++++++++++++++++++ src/services/api/openaiShim.ts | 32 ++++++- 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index 5d3cb552..f83622a2 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a expect(requestBody?.store).toBeUndefined() }) +test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => { + // Regression for: "API Error: 400 {"error":{"message":"thinking is enabled + // but reasoning_content is missing in assistant tool call message at index + // N"}}" when the agent sends a prior-turn assistant response back to Kimi. + // The thinking block captured from the inbound response must round-trip + // as reasoning_content on the outgoing echoed assistant message. + process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1' + process.env.OPENAI_API_KEY = 'sk-moonshot-test' + + let requestBody: Record | undefined + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'kimi-k2.6', + choices: [ + { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }, + ], + usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + await client.beta.messages.create({ + model: 'kimi-k2.6', + system: 'you are kimi', + messages: [ + { role: 'user', content: 'check the logs' }, + { + role: 'assistant', + content: [ + { + type: 'thinking', + thinking: 'Need to inspect logs via Bash; running a cat.', + }, + { type: 'text', text: "I'll inspect the logs." }, + { + type: 'tool_use', + id: 'call_bash_1', + name: 'Bash', + input: { command: 'cat /tmp/app.log' }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'call_bash_1', + content: 'log line 1\nlog line 2', + }, + ], + }, + ], + max_tokens: 256, + stream: false, + }) + + const messages = requestBody?.messages as Array> + const assistantWithToolCall = messages.find( + m => m.role === 'assistant' && Array.isArray(m.tool_calls), + ) + expect(assistantWithToolCall).toBeDefined() + expect(assistantWithToolCall?.reasoning_content).toBe( + 'Need to inspect logs via Bash; running a cat.', + ) +}) + +test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => { + // Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the + // outgoing assistant message without reasoning_content to avoid + // unknown-field rejections from strict servers. + process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1' + process.env.OPENAI_API_KEY = 'sk-deepseek' + + let requestBody: Record | undefined + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'deepseek-chat', + choices: [ + { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }, + ], + usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + await client.beta.messages.create({ + model: 'deepseek-chat', + system: 'test', + messages: [ + { role: 'user', content: 'hi' }, + { + role: 'assistant', + content: [ + { type: 'thinking', thinking: 'thought' }, + { type: 'text', text: 'hello' }, + { + type: 'tool_use', + id: 'call_1', + name: 'Bash', + input: { command: 'ls' }, + }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'call_1', content: 'files' }, + ], + }, + ], + max_tokens: 32, + stream: false, + }) + + const messages = requestBody?.messages as Array> + const assistantWithToolCall = messages.find( + m => m.role === 'assistant' && Array.isArray(m.tool_calls), + ) + expect(assistantWithToolCall).toBeDefined() + expect(assistantWithToolCall?.reasoning_content).toBeUndefined() +}) + test('Moonshot: cn host is also detected', async () => { process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1' process.env.OPENAI_API_KEY = 'sk-moonshot-test' diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index c62b7f21..8f8500ba 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -218,6 +218,14 @@ interface OpenAIMessage { }> tool_call_id?: string name?: string + /** + * Per-assistant-message chain-of-thought, attached when echoing an + * assistant message back to providers that require it (notably Moonshot: + * "thinking is enabled but reasoning_content is missing in assistant + * tool call message at index N" 400). Derived from the Anthropic thinking + * block captured when the original response was translated. + */ + reasoning_content?: string } interface OpenAITool { @@ -385,7 +393,9 @@ function convertMessages( content?: unknown }>, system: unknown, + options?: { preserveReasoningContent?: boolean }, ): OpenAIMessage[] { + const preserveReasoningContent = options?.preserveReasoningContent === true const result: OpenAIMessage[] = [] const knownToolCallIds = new Set() @@ -488,6 +498,21 @@ function convertMessages( })(), } + // Providers that validate reasoning continuity (Moonshot: "thinking + // is enabled but reasoning_content is missing in assistant tool call + // message at index N" 400) need the original chain-of-thought echoed + // back on each assistant message that carries a tool_call. We kept + // the thinking block on the Anthropic side; re-attach it here as the + // `reasoning_content` field on the outgoing OpenAI-shaped message. + // Gated per-provider because other endpoints either ignore the field + // (harmless) or strict-reject unknown fields (harmful). + if (preserveReasoningContent) { + const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking + if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) { + assistantMsg.reasoning_content = thinkingText + } + } + if (toolUses.length > 0) { const mappedToolCalls = toolUses .map( @@ -1460,7 +1485,12 @@ class OpenAIShimMessages { }>, request.resolvedModel, ) - const openaiMessages = convertMessages(compressedMessages, params.system) + const openaiMessages = convertMessages(compressedMessages, params.system, { + // Moonshot requires every assistant tool-call message to carry + // reasoning_content when its thinking feature is active. Echo it back + // from the thinking block we captured on the inbound response. + preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl), + }) const body: Record = { model: request.resolvedModel,