fix(openai-shim): echo reasoning_content on assistant tool-call messages for Moonshot
Kimi / Moonshot's chat completions endpoint requires that every assistant
message carrying tool_calls also carry reasoning_content when the
"thinking" feature is active. When an agent sends prior-turn assistant
history back (standard multi-turn / subagent / Explore patterns), the
shim previously stripped the thinking block:
case 'thinking':
case 'redacted_thinking':
// Strip thinking blocks for OpenAI-compatible providers.
break
That's correct for providers that would mis-interpret serialized
<thinking> tags, but Moonshot validates the schema strictly and rejects
with:
API Error: 400 {"error":{"message":"thinking is enabled but
reasoning_content is missing in assistant tool call message at
index N","type":"invalid_request_error"}}
Reproducer: launch with Kimi profile, run any tool-using command
(Explore, Bash, etc.) — every request after the first 400s.
Fix: in convertMessages(), when the per-request flag
preserveReasoningContent is set (only for Moonshot baseUrls today),
attach the original thinking block's text as reasoning_content on the
outgoing OpenAI-shaped assistant message. Other providers continue to
strip (unknown-field rejection risk).
OpenAIMessage type grows a reasoning_content?: string field.
convertMessages() accepts an options object and threads the flag
through; the only call site (_doOpenAIRequest) gates via
isMoonshotBaseUrl(request.baseUrl).
Tests (openaiShim.test.ts):
- Moonshot: echoes reasoning_content on assistant tool-call messages
(regression for the reported 400)
- non-Moonshot providers do NOT receive reasoning_content (guards
against leaking the field to strict-parse endpoints)
Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean.
Co-Authored-By: OpenClaude <openclaude@gitlawb.com>
This commit is contained in:
@@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a
|
|||||||
expect(requestBody?.store).toBeUndefined()
|
expect(requestBody?.store).toBeUndefined()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => {
|
||||||
|
// Regression for: "API Error: 400 {"error":{"message":"thinking is enabled
|
||||||
|
// but reasoning_content is missing in assistant tool call message at index
|
||||||
|
// N"}}" when the agent sends a prior-turn assistant response back to Kimi.
|
||||||
|
// The thinking block captured from the inbound response must round-trip
|
||||||
|
// as reasoning_content on the outgoing echoed assistant message.
|
||||||
|
process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
|
||||||
|
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||||
|
|
||||||
|
let requestBody: Record<string, unknown> | undefined
|
||||||
|
globalThis.fetch = (async (_input, init) => {
|
||||||
|
requestBody = JSON.parse(String(init?.body))
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
model: 'kimi-k2.6',
|
||||||
|
choices: [
|
||||||
|
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||||
|
],
|
||||||
|
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||||
|
}),
|
||||||
|
{ headers: { 'Content-Type': 'application/json' } },
|
||||||
|
)
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
await client.beta.messages.create({
|
||||||
|
model: 'kimi-k2.6',
|
||||||
|
system: 'you are kimi',
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: 'check the logs' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'thinking',
|
||||||
|
thinking: 'Need to inspect logs via Bash; running a cat.',
|
||||||
|
},
|
||||||
|
{ type: 'text', text: "I'll inspect the logs." },
|
||||||
|
{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: 'call_bash_1',
|
||||||
|
name: 'Bash',
|
||||||
|
input: { command: 'cat /tmp/app.log' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: 'call_bash_1',
|
||||||
|
content: 'log line 1\nlog line 2',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
max_tokens: 256,
|
||||||
|
stream: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||||
|
const assistantWithToolCall = messages.find(
|
||||||
|
m => m.role === 'assistant' && Array.isArray(m.tool_calls),
|
||||||
|
)
|
||||||
|
expect(assistantWithToolCall).toBeDefined()
|
||||||
|
expect(assistantWithToolCall?.reasoning_content).toBe(
|
||||||
|
'Need to inspect logs via Bash; running a cat.',
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
|
||||||
|
// Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
|
||||||
|
// outgoing assistant message without reasoning_content to avoid
|
||||||
|
// unknown-field rejections from strict servers.
|
||||||
|
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
|
||||||
|
process.env.OPENAI_API_KEY = 'sk-deepseek'
|
||||||
|
|
||||||
|
let requestBody: Record<string, unknown> | undefined
|
||||||
|
globalThis.fetch = (async (_input, init) => {
|
||||||
|
requestBody = JSON.parse(String(init?.body))
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
model: 'deepseek-chat',
|
||||||
|
choices: [
|
||||||
|
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||||
|
],
|
||||||
|
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||||
|
}),
|
||||||
|
{ headers: { 'Content-Type': 'application/json' } },
|
||||||
|
)
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
await client.beta.messages.create({
|
||||||
|
model: 'deepseek-chat',
|
||||||
|
system: 'test',
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: 'hi' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'thought' },
|
||||||
|
{ type: 'text', text: 'hello' },
|
||||||
|
{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: 'call_1',
|
||||||
|
name: 'Bash',
|
||||||
|
input: { command: 'ls' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
max_tokens: 32,
|
||||||
|
stream: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||||
|
const assistantWithToolCall = messages.find(
|
||||||
|
m => m.role === 'assistant' && Array.isArray(m.tool_calls),
|
||||||
|
)
|
||||||
|
expect(assistantWithToolCall).toBeDefined()
|
||||||
|
expect(assistantWithToolCall?.reasoning_content).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
test('Moonshot: cn host is also detected', async () => {
|
test('Moonshot: cn host is also detected', async () => {
|
||||||
process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
|
process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
|
||||||
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||||
|
|||||||
@@ -218,6 +218,14 @@ interface OpenAIMessage {
|
|||||||
}>
|
}>
|
||||||
tool_call_id?: string
|
tool_call_id?: string
|
||||||
name?: string
|
name?: string
|
||||||
|
/**
|
||||||
|
* Per-assistant-message chain-of-thought, attached when echoing an
|
||||||
|
* assistant message back to providers that require it (notably Moonshot:
|
||||||
|
* "thinking is enabled but reasoning_content is missing in assistant
|
||||||
|
* tool call message at index N" 400). Derived from the Anthropic thinking
|
||||||
|
* block captured when the original response was translated.
|
||||||
|
*/
|
||||||
|
reasoning_content?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface OpenAITool {
|
interface OpenAITool {
|
||||||
@@ -385,7 +393,9 @@ function convertMessages(
|
|||||||
content?: unknown
|
content?: unknown
|
||||||
}>,
|
}>,
|
||||||
system: unknown,
|
system: unknown,
|
||||||
|
options?: { preserveReasoningContent?: boolean },
|
||||||
): OpenAIMessage[] {
|
): OpenAIMessage[] {
|
||||||
|
const preserveReasoningContent = options?.preserveReasoningContent === true
|
||||||
const result: OpenAIMessage[] = []
|
const result: OpenAIMessage[] = []
|
||||||
const knownToolCallIds = new Set<string>()
|
const knownToolCallIds = new Set<string>()
|
||||||
|
|
||||||
@@ -488,6 +498,21 @@ function convertMessages(
|
|||||||
})(),
|
})(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Providers that validate reasoning continuity (Moonshot: "thinking
|
||||||
|
// is enabled but reasoning_content is missing in assistant tool call
|
||||||
|
// message at index N" 400) need the original chain-of-thought echoed
|
||||||
|
// back on each assistant message that carries a tool_call. We kept
|
||||||
|
// the thinking block on the Anthropic side; re-attach it here as the
|
||||||
|
// `reasoning_content` field on the outgoing OpenAI-shaped message.
|
||||||
|
// Gated per-provider because other endpoints either ignore the field
|
||||||
|
// (harmless) or strict-reject unknown fields (harmful).
|
||||||
|
if (preserveReasoningContent) {
|
||||||
|
const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
|
||||||
|
if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
|
||||||
|
assistantMsg.reasoning_content = thinkingText
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (toolUses.length > 0) {
|
if (toolUses.length > 0) {
|
||||||
const mappedToolCalls = toolUses
|
const mappedToolCalls = toolUses
|
||||||
.map(
|
.map(
|
||||||
@@ -1460,7 +1485,12 @@ class OpenAIShimMessages {
|
|||||||
}>,
|
}>,
|
||||||
request.resolvedModel,
|
request.resolvedModel,
|
||||||
)
|
)
|
||||||
const openaiMessages = convertMessages(compressedMessages, params.system)
|
const openaiMessages = convertMessages(compressedMessages, params.system, {
|
||||||
|
// Moonshot requires every assistant tool-call message to carry
|
||||||
|
// reasoning_content when its thinking feature is active. Echo it back
|
||||||
|
// from the thinking block we captured on the inbound response.
|
||||||
|
preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
|
||||||
|
})
|
||||||
|
|
||||||
const body: Record<string, unknown> = {
|
const body: Record<string, unknown> = {
|
||||||
model: request.resolvedModel,
|
model: request.resolvedModel,
|
||||||
|
|||||||
Reference in New Issue
Block a user