From 67de6bd2cffc3381f0f28fd3ffce043970611667 Mon Sep 17 00:00:00 2001
From: Kevin Codex <kevin@gitlawb.com>
Date: Wed, 22 Apr 2026 22:47:57 +0800
Subject: [PATCH] fix(openai-shim): echo reasoning_content on assistant
 tool-call messages for Moonshot (#828)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kimi / Moonshot's chat completions endpoint requires that every assistant
message carrying tool_calls also carry reasoning_content when the
"thinking" feature is active. When an agent sends prior-turn assistant
history back (standard multi-turn / subagent / Explore patterns), the
shim previously stripped the thinking block:

  case 'thinking':
  case 'redacted_thinking':
    // Strip thinking blocks for OpenAI-compatible providers.
    break

That's correct for providers that would mis-interpret serialized
<thinking> tags, but Moonshot validates the schema strictly and rejects
with:

  API Error: 400 {"error":{"message":"thinking is enabled but
  reasoning_content is missing in assistant tool call message at
  index N","type":"invalid_request_error"}}

Reproducer: launch with Kimi profile, run any tool-using command
(Explore, Bash, etc.) — every request after the first 400s.

Fix: in convertMessages(), when the per-request flag
preserveReasoningContent is set (only for Moonshot baseUrls today),
attach the original thinking block's text as reasoning_content on the
outgoing OpenAI-shaped assistant message. Other providers continue to
strip (unknown-field rejection risk).

OpenAIMessage type grows a reasoning_content?: string field.
convertMessages() accepts an options object and threads the flag
through; the only call site (_doOpenAIRequest) gates via
isMoonshotBaseUrl(request.baseUrl).

Tests (openaiShim.test.ts):
  - Moonshot: echoes reasoning_content on assistant tool-call messages
    (regression for the reported 400)
  - non-Moonshot providers do NOT receive reasoning_content (guards
    against leaking the field to strict-parse endpoints)

Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean.

Co-authored-by: OpenClaude <openclaude@gitlawb.com>
---
 src/services/api/openaiShim.test.ts | 133 ++++++++++++++++++++++++++++
 src/services/api/openaiShim.ts      |  32 ++++++-
 2 files changed, 164 insertions(+), 1 deletion(-)
diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts
index 5d3cb552..f83622a2 100644
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a
   expect(requestBody?.store).toBeUndefined()
 })
 
+test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => {
+  // Regression for: "API Error: 400 {"error":{"message":"thinking is enabled
+  // but reasoning_content is missing in assistant tool call message at index
+  // N"}}" when the agent sends a prior-turn assistant response back to Kimi.
+  // The thinking block captured from the inbound response must round-trip
+  // as reasoning_content on the outgoing echoed assistant message.
+  process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
+  process.env.OPENAI_API_KEY = 'sk-moonshot-test'
+
+  let requestBody: Record<string, unknown> | undefined
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'kimi-k2.6',
+        choices: [
+          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
+        ],
+        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  await client.beta.messages.create({
+    model: 'kimi-k2.6',
+    system: 'you are kimi',
+    messages: [
+      { role: 'user', content: 'check the logs' },
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'thinking',
+            thinking: 'Need to inspect logs via Bash; running a cat.',
+          },
+          { type: 'text', text: "I'll inspect the logs." },
+          {
+            type: 'tool_use',
+            id: 'call_bash_1',
+            name: 'Bash',
+            input: { command: 'cat /tmp/app.log' },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result',
+            tool_use_id: 'call_bash_1',
+            content: 'log line 1\nlog line 2',
+          },
+        ],
+      },
+    ],
+    max_tokens: 256,
+    stream: false,
+  })
+
+  const messages = requestBody?.messages as Array<Record<string, unknown>>
+  const assistantWithToolCall = messages.find(
+    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
+  )
+  expect(assistantWithToolCall).toBeDefined()
+  expect(assistantWithToolCall?.reasoning_content).toBe(
+    'Need to inspect logs via Bash; running a cat.',
+  )
+})
+
+test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
+  // Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
+  // outgoing assistant message without reasoning_content to avoid
+  // unknown-field rejections from strict servers.
+  process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
+  process.env.OPENAI_API_KEY = 'sk-deepseek'
+
+  let requestBody: Record<string, unknown> | undefined
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'deepseek-chat',
+        choices: [
+          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
+        ],
+        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  await client.beta.messages.create({
+    model: 'deepseek-chat',
+    system: 'test',
+    messages: [
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        content: [
+          { type: 'thinking', thinking: 'thought' },
+          { type: 'text', text: 'hello' },
+          {
+            type: 'tool_use',
+            id: 'call_1',
+            name: 'Bash',
+            input: { command: 'ls' },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          { type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
+        ],
+      },
+    ],
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const messages = requestBody?.messages as Array<Record<string, unknown>>
+  const assistantWithToolCall = messages.find(
+    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
+  )
+  expect(assistantWithToolCall).toBeDefined()
+  expect(assistantWithToolCall?.reasoning_content).toBeUndefined()
+})
+
 test('Moonshot: cn host is also detected', async () => {
   process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
   process.env.OPENAI_API_KEY = 'sk-moonshot-test'
diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts
index c62b7f21..8f8500ba 100644
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -218,6 +218,14 @@ interface OpenAIMessage {
   }>
   tool_call_id?: string
   name?: string
+  /**
+   * Per-assistant-message chain-of-thought, attached when echoing an
+   * assistant message back to providers that require it (notably Moonshot:
+   * "thinking is enabled but reasoning_content is missing in assistant
+   * tool call message at index N" 400). Derived from the Anthropic thinking
+   * block captured when the original response was translated.
+   */
+  reasoning_content?: string
 }
 
 interface OpenAITool {
@@ -385,7 +393,9 @@ function convertMessages(
     content?: unknown
   }>,
   system: unknown,
+  options?: { preserveReasoningContent?: boolean },
 ): OpenAIMessage[] {
+  const preserveReasoningContent = options?.preserveReasoningContent === true
   const result: OpenAIMessage[] = []
   const knownToolCallIds = new Set<string>()
 
@@ -488,6 +498,21 @@ function convertMessages(
           })(),
         }
 
+        // Providers that validate reasoning continuity (Moonshot: "thinking
+        // is enabled but reasoning_content is missing in assistant tool call
+        // message at index N" 400) need the original chain-of-thought echoed
+        // back on each assistant message that carries a tool_call. We kept
+        // the thinking block on the Anthropic side; re-attach it here as the
+        // `reasoning_content` field on the outgoing OpenAI-shaped message.
+        // Gated per-provider because other endpoints either ignore the field
+        // (harmless) or strict-reject unknown fields (harmful).
+        if (preserveReasoningContent) {
+          const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
+          if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
+            assistantMsg.reasoning_content = thinkingText
+          }
+        }
+
         if (toolUses.length > 0) {
           const mappedToolCalls = toolUses
             .map(
@@ -1460,7 +1485,12 @@ class OpenAIShimMessages {
       }>,
       request.resolvedModel,
     )
-    const openaiMessages = convertMessages(compressedMessages, params.system)
+    const openaiMessages = convertMessages(compressedMessages, params.system, {
+      // Moonshot requires every assistant tool-call message to carry
+      // reasoning_content when its thinking feature is active. Echo it back
+      // from the thinking block we captured on the inbound response.
+      preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
+    })
 
     const body: Record<string, unknown> = {
       model: request.resolvedModel,