fix(openai-shim): echo reasoning_content on assistant tool-call messages for Moonshot (#828)

Kimi / Moonshot's chat completions endpoint requires that every assistant message carrying tool_calls also carry reasoning_content when the "thinking" feature is active. When an agent sends prior-turn assistant history back (standard multi-turn / subagent / Explore patterns), the shim previously stripped the thinking block: case 'thinking': case 'redacted_thinking': // Strip thinking blocks for OpenAI-compatible providers. break That's correct for providers that would mis-interpret serialized <thinking> tags, but Moonshot validates the schema strictly and rejects with: API Error: 400 {"error":{"message":"thinking is enabled but reasoning_content is missing in assistant tool call message at index N","type":"invalid_request_error"}} Reproducer: launch with Kimi profile, run any tool-using command (Explore, Bash, etc.) — every request after the first 400s. Fix: in convertMessages(), when the per-request flag preserveReasoningContent is set (only for Moonshot baseUrls today), attach the original thinking block's text as reasoning_content on the outgoing OpenAI-shaped assistant message. Other providers continue to strip (unknown-field rejection risk). OpenAIMessage type grows a reasoning_content?: string field. convertMessages() accepts an options object and threads the flag through; the only call site (_doOpenAIRequest) gates via isMoonshotBaseUrl(request.baseUrl). Tests (openaiShim.test.ts): - Moonshot: echoes reasoning_content on assistant tool-call messages (regression for the reported 400) - non-Moonshot providers do NOT receive reasoning_content (guards against leaking the field to strict-parse endpoints) Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean. Co-authored-by: OpenClaude <openclaude@gitlawb.com>
2026-04-22 22:47:57 +08:00
parent 4d559c9135
commit 67de6bd2cf
2 changed files with 164 additions and 1 deletions
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a
  expect(requestBody?.store).toBeUndefined()
 })

+test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => {
+  // Regression for: "API Error: 400 {"error":{"message":"thinking is enabled
+  // but reasoning_content is missing in assistant tool call message at index
+  // N"}}" when the agent sends a prior-turn assistant response back to Kimi.
+  // The thinking block captured from the inbound response must round-trip
+  // as reasoning_content on the outgoing echoed assistant message.
+  process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
+  process.env.OPENAI_API_KEY = 'sk-moonshot-test'
+
+  let requestBody: Record<string, unknown> | undefined
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'kimi-k2.6',
+        choices: [
+          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
+        ],
+        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  await client.beta.messages.create({
+    model: 'kimi-k2.6',
+    system: 'you are kimi',
+    messages: [
+      { role: 'user', content: 'check the logs' },
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'thinking',
+            thinking: 'Need to inspect logs via Bash; running a cat.',
+          },
+          { type: 'text', text: "I'll inspect the logs." },
+          {
+            type: 'tool_use',
+            id: 'call_bash_1',
+            name: 'Bash',
+            input: { command: 'cat /tmp/app.log' },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result',
+            tool_use_id: 'call_bash_1',
+            content: 'log line 1\nlog line 2',
+          },
+        ],
+      },
+    ],
+    max_tokens: 256,
+    stream: false,
+  })
+
+  const messages = requestBody?.messages as Array<Record<string, unknown>>
+  const assistantWithToolCall = messages.find(
+    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
+  )
+  expect(assistantWithToolCall).toBeDefined()
+  expect(assistantWithToolCall?.reasoning_content).toBe(
+    'Need to inspect logs via Bash; running a cat.',
+  )
+})
+
+test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
+  // Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
+  // outgoing assistant message without reasoning_content to avoid
+  // unknown-field rejections from strict servers.
+  process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
+  process.env.OPENAI_API_KEY = 'sk-deepseek'
+
+  let requestBody: Record<string, unknown> | undefined
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'deepseek-chat',
+        choices: [
+          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
+        ],
+        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  await client.beta.messages.create({
+    model: 'deepseek-chat',
+    system: 'test',
+    messages: [
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        content: [
+          { type: 'thinking', thinking: 'thought' },
+          { type: 'text', text: 'hello' },
+          {
+            type: 'tool_use',
+            id: 'call_1',
+            name: 'Bash',
+            input: { command: 'ls' },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          { type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
+        ],
+      },
+    ],
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const messages = requestBody?.messages as Array<Record<string, unknown>>
+  const assistantWithToolCall = messages.find(
+    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
+  )
+  expect(assistantWithToolCall).toBeDefined()
+  expect(assistantWithToolCall?.reasoning_content).toBeUndefined()
+})
+
 test('Moonshot: cn host is also detected', async () => {
  process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
  process.env.OPENAI_API_KEY = 'sk-moonshot-test'
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -218,6 +218,14 @@ interface OpenAIMessage {
  }>
  tool_call_id?: string
  name?: string
+  /**
+   * Per-assistant-message chain-of-thought, attached when echoing an
+   * assistant message back to providers that require it (notably Moonshot:
+   * "thinking is enabled but reasoning_content is missing in assistant
+   * tool call message at index N" 400). Derived from the Anthropic thinking
+   * block captured when the original response was translated.
+   */
+  reasoning_content?: string
 }

 interface OpenAITool {
@@ -385,7 +393,9 @@ function convertMessages(
    content?: unknown
  }>,
  system: unknown,
+  options?: { preserveReasoningContent?: boolean },
 ): OpenAIMessage[] {
+  const preserveReasoningContent = options?.preserveReasoningContent === true
  const result: OpenAIMessage[] = []
  const knownToolCallIds = new Set<string>()

@@ -488,6 +498,21 @@ function convertMessages(
          })(),
        }

+        // Providers that validate reasoning continuity (Moonshot: "thinking
+        // is enabled but reasoning_content is missing in assistant tool call
+        // message at index N" 400) need the original chain-of-thought echoed
+        // back on each assistant message that carries a tool_call. We kept
+        // the thinking block on the Anthropic side; re-attach it here as the
+        // `reasoning_content` field on the outgoing OpenAI-shaped message.
+        // Gated per-provider because other endpoints either ignore the field
+        // (harmless) or strict-reject unknown fields (harmful).
+        if (preserveReasoningContent) {
+          const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
+          if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
+            assistantMsg.reasoning_content = thinkingText
+          }
+        }
+
        if (toolUses.length > 0) {
          const mappedToolCalls = toolUses
            .map(
@@ -1460,7 +1485,12 @@ class OpenAIShimMessages {
      }>,
      request.resolvedModel,
    )
-    const openaiMessages = convertMessages(compressedMessages, params.system)
+    const openaiMessages = convertMessages(compressedMessages, params.system, {
+      // Moonshot requires every assistant tool-call message to carry
+      // reasoning_content when its thinking feature is active. Echo it back
+      // from the thinking block we captured on the inbound response.
+      preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
+    })

    const body: Record<string, unknown> = {
      model: request.resolvedModel,