fix(openai-shim): echo reasoning_content on assistant tool-call messages for Moonshot

Kimi / Moonshot's chat completions endpoint requires that every assistant message carrying tool_calls also carry reasoning_content when the "thinking" feature is active. When an agent sends prior-turn assistant history back (standard multi-turn / subagent / Explore patterns), the shim previously stripped the thinking block: case 'thinking': case 'redacted_thinking': // Strip thinking blocks for OpenAI-compatible providers. break That's correct for providers that would mis-interpret serialized <thinking> tags, but Moonshot validates the schema strictly and rejects with: API Error: 400 {"error":{"message":"thinking is enabled but reasoning_content is missing in assistant tool call message at index N","type":"invalid_request_error"}} Reproducer: launch with Kimi profile, run any tool-using command (Explore, Bash, etc.) — every request after the first 400s. Fix: in convertMessages(), when the per-request flag preserveReasoningContent is set (only for Moonshot baseUrls today), attach the original thinking block's text as reasoning_content on the outgoing OpenAI-shaped assistant message. Other providers continue to strip (unknown-field rejection risk). OpenAIMessage type grows a reasoning_content?: string field. convertMessages() accepts an options object and threads the flag through; the only call site (_doOpenAIRequest) gates via isMoonshotBaseUrl(request.baseUrl). Tests (openaiShim.test.ts): - Moonshot: echoes reasoning_content on assistant tool-call messages (regression for the reported 400) - non-Moonshot providers do NOT receive reasoning_content (guards against leaking the field to strict-parse endpoints) Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean. Co-Authored-By: OpenClaude <openclaude@gitlawb.com>
2026-04-22 22:37:14 +08:00
parent 4d559c9135
commit ae3f0b2934
2 changed files with 164 additions and 1 deletions
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a
  expect(requestBody?.store).toBeUndefined()
 })
 test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => {
  // Regression for: "API Error: 400 {"error":{"message":"thinking is enabled
  // but reasoning_content is missing in assistant tool call message at index
  // N"}}" when the agent sends a prior-turn assistant response back to Kimi.
  // The thinking block captured from the inbound response must round-trip
  // as reasoning_content on the outgoing echoed assistant message.
  process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
  process.env.OPENAI_API_KEY = 'sk-moonshot-test'
  let requestBody: Record<string, unknown> | undefined
  globalThis.fetch = (async (_input, init) => {
    requestBody = JSON.parse(String(init?.body))
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'kimi-k2.6',
        choices: [
          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
        ],
        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
      }),
      { headers: { 'Content-Type': 'application/json' } },
    )
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await client.beta.messages.create({
    model: 'kimi-k2.6',
    system: 'you are kimi',
    messages: [
      { role: 'user', content: 'check the logs' },
      {
        role: 'assistant',
        content: [
          {
            type: 'thinking',
            thinking: 'Need to inspect logs via Bash; running a cat.',
          },
          { type: 'text', text: "I'll inspect the logs." },
          {
            type: 'tool_use',
            id: 'call_bash_1',
            name: 'Bash',
            input: { command: 'cat /tmp/app.log' },
          },
        ],
      },
      {
        role: 'user',
        content: [
          {
            type: 'tool_result',
            tool_use_id: 'call_bash_1',
            content: 'log line 1\nlog line 2',
          },
        ],
      },
    ],
    max_tokens: 256,
    stream: false,
  })
  const messages = requestBody?.messages as Array<Record<string, unknown>>
  const assistantWithToolCall = messages.find(
    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
  )
  expect(assistantWithToolCall).toBeDefined()
  expect(assistantWithToolCall?.reasoning_content).toBe(
    'Need to inspect logs via Bash; running a cat.',
  )
 })
 test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
  // Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
  // outgoing assistant message without reasoning_content to avoid
  // unknown-field rejections from strict servers.
  process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
  process.env.OPENAI_API_KEY = 'sk-deepseek'
  let requestBody: Record<string, unknown> | undefined
  globalThis.fetch = (async (_input, init) => {
    requestBody = JSON.parse(String(init?.body))
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'deepseek-chat',
        choices: [
          { message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
        ],
        usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
      }),
      { headers: { 'Content-Type': 'application/json' } },
    )
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await client.beta.messages.create({
    model: 'deepseek-chat',
    system: 'test',
    messages: [
      { role: 'user', content: 'hi' },
      {
        role: 'assistant',
        content: [
          { type: 'thinking', thinking: 'thought' },
          { type: 'text', text: 'hello' },
          {
            type: 'tool_use',
            id: 'call_1',
            name: 'Bash',
            input: { command: 'ls' },
          },
        ],
      },
      {
        role: 'user',
        content: [
          { type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
        ],
      },
    ],
    max_tokens: 32,
    stream: false,
  })
  const messages = requestBody?.messages as Array<Record<string, unknown>>
  const assistantWithToolCall = messages.find(
    m => m.role === 'assistant' && Array.isArray(m.tool_calls),
  )
  expect(assistantWithToolCall).toBeDefined()
  expect(assistantWithToolCall?.reasoning_content).toBeUndefined()
 })
 test('Moonshot: cn host is also detected', async () => {
  process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
  process.env.OPENAI_API_KEY = 'sk-moonshot-test'
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -218,6 +218,14 @@ interface OpenAIMessage {
  }>
  tool_call_id?: string
  name?: string
  /**
   * Per-assistant-message chain-of-thought, attached when echoing an
   * assistant message back to providers that require it (notably Moonshot:
   * "thinking is enabled but reasoning_content is missing in assistant
   * tool call message at index N" 400). Derived from the Anthropic thinking
   * block captured when the original response was translated.
   */
  reasoning_content?: string
 }
 interface OpenAITool {
@@ -385,7 +393,9 @@ function convertMessages(
    content?: unknown
  }>,
  system: unknown,
  options?: { preserveReasoningContent?: boolean },
 ): OpenAIMessage[] {
  const preserveReasoningContent = options?.preserveReasoningContent === true
  const result: OpenAIMessage[] = []
  const knownToolCallIds = new Set<string>()
@@ -488,6 +498,21 @@ function convertMessages(
          })(),
        }
        // Providers that validate reasoning continuity (Moonshot: "thinking
        // is enabled but reasoning_content is missing in assistant tool call
        // message at index N" 400) need the original chain-of-thought echoed
        // back on each assistant message that carries a tool_call. We kept
        // the thinking block on the Anthropic side; re-attach it here as the
        // `reasoning_content` field on the outgoing OpenAI-shaped message.
        // Gated per-provider because other endpoints either ignore the field
        // (harmless) or strict-reject unknown fields (harmful).
        if (preserveReasoningContent) {
          const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
          if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
            assistantMsg.reasoning_content = thinkingText
          }
        }
        if (toolUses.length > 0) {
          const mappedToolCalls = toolUses
            .map(
@@ -1460,7 +1485,12 @@ class OpenAIShimMessages {
      }>,
      request.resolvedModel,
    )
-    const openaiMessages = convertMessages(compressedMessages, params.system)
+    const openaiMessages = convertMessages(compressedMessages, params.system, {
      // Moonshot requires every assistant tool-call message to carry
      // reasoning_content when its thinking feature is active. Echo it back
      // from the thinking block we captured on the inbound response.
      preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
    })
    const body: Record<string, unknown> = {
      model: request.resolvedModel,