fix(openai-shim): echo reasoning_content on assistant tool-call messages for Moonshot (#828)
Kimi / Moonshot's chat completions endpoint requires that every assistant
message carrying tool_calls also carry reasoning_content when the
"thinking" feature is active. When an agent sends prior-turn assistant
history back (standard multi-turn / subagent / Explore patterns), the
shim previously stripped the thinking block:
case 'thinking':
case 'redacted_thinking':
// Strip thinking blocks for OpenAI-compatible providers.
break
That's correct for providers that would mis-interpret serialized
<thinking> tags, but Moonshot validates the schema strictly and rejects
with:
API Error: 400 {"error":{"message":"thinking is enabled but
reasoning_content is missing in assistant tool call message at
index N","type":"invalid_request_error"}}
Reproducer: launch with Kimi profile, run any tool-using command
(Explore, Bash, etc.) — every request after the first 400s.
Fix: in convertMessages(), when the per-request flag
preserveReasoningContent is set (only for Moonshot baseUrls today),
attach the original thinking block's text as reasoning_content on the
outgoing OpenAI-shaped assistant message. Other providers continue to
strip (unknown-field rejection risk).
OpenAIMessage type grows a reasoning_content?: string field.
convertMessages() accepts an options object and threads the flag
through; the only call site (_doOpenAIRequest) gates via
isMoonshotBaseUrl(request.baseUrl).
Tests (openaiShim.test.ts):
- Moonshot: echoes reasoning_content on assistant tool-call messages
(regression for the reported 400)
- non-Moonshot providers do NOT receive reasoning_content (guards
against leaking the field to strict-parse endpoints)
Full suite: 1195/1195 pass under --max-concurrency=1. PR scan clean.
Co-authored-by: OpenClaude <openclaude@gitlawb.com>
This commit is contained in:
@@ -3343,6 +3343,139 @@ test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', a
|
||||
expect(requestBody?.store).toBeUndefined()
|
||||
})
|
||||
|
||||
test('Moonshot: echoes reasoning_content on assistant tool-call messages', async () => {
|
||||
// Regression for: "API Error: 400 {"error":{"message":"thinking is enabled
|
||||
// but reasoning_content is missing in assistant tool call message at index
|
||||
// N"}}" when the agent sends a prior-turn assistant response back to Kimi.
|
||||
// The thinking block captured from the inbound response must round-trip
|
||||
// as reasoning_content on the outgoing echoed assistant message.
|
||||
process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
|
||||
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'kimi-k2.6',
|
||||
choices: [
|
||||
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||
],
|
||||
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'kimi-k2.6',
|
||||
system: 'you are kimi',
|
||||
messages: [
|
||||
{ role: 'user', content: 'check the logs' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'thinking',
|
||||
thinking: 'Need to inspect logs via Bash; running a cat.',
|
||||
},
|
||||
{ type: 'text', text: "I'll inspect the logs." },
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'call_bash_1',
|
||||
name: 'Bash',
|
||||
input: { command: 'cat /tmp/app.log' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'call_bash_1',
|
||||
content: 'log line 1\nlog line 2',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 256,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const assistantWithToolCall = messages.find(
|
||||
m => m.role === 'assistant' && Array.isArray(m.tool_calls),
|
||||
)
|
||||
expect(assistantWithToolCall).toBeDefined()
|
||||
expect(assistantWithToolCall?.reasoning_content).toBe(
|
||||
'Need to inspect logs via Bash; running a cat.',
|
||||
)
|
||||
})
|
||||
|
||||
test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
|
||||
// Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
|
||||
// outgoing assistant message without reasoning_content to avoid
|
||||
// unknown-field rejections from strict servers.
|
||||
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
|
||||
process.env.OPENAI_API_KEY = 'sk-deepseek'
|
||||
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'deepseek-chat',
|
||||
choices: [
|
||||
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||
],
|
||||
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'deepseek-chat',
|
||||
system: 'test',
|
||||
messages: [
|
||||
{ role: 'user', content: 'hi' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'thought' },
|
||||
{ type: 'text', text: 'hello' },
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'Bash',
|
||||
input: { command: 'ls' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 32,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const assistantWithToolCall = messages.find(
|
||||
m => m.role === 'assistant' && Array.isArray(m.tool_calls),
|
||||
)
|
||||
expect(assistantWithToolCall).toBeDefined()
|
||||
expect(assistantWithToolCall?.reasoning_content).toBeUndefined()
|
||||
})
|
||||
|
||||
test('Moonshot: cn host is also detected', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
|
||||
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||
|
||||
@@ -218,6 +218,14 @@ interface OpenAIMessage {
|
||||
}>
|
||||
tool_call_id?: string
|
||||
name?: string
|
||||
/**
|
||||
* Per-assistant-message chain-of-thought, attached when echoing an
|
||||
* assistant message back to providers that require it (notably Moonshot:
|
||||
* "thinking is enabled but reasoning_content is missing in assistant
|
||||
* tool call message at index N" 400). Derived from the Anthropic thinking
|
||||
* block captured when the original response was translated.
|
||||
*/
|
||||
reasoning_content?: string
|
||||
}
|
||||
|
||||
interface OpenAITool {
|
||||
@@ -385,7 +393,9 @@ function convertMessages(
|
||||
content?: unknown
|
||||
}>,
|
||||
system: unknown,
|
||||
options?: { preserveReasoningContent?: boolean },
|
||||
): OpenAIMessage[] {
|
||||
const preserveReasoningContent = options?.preserveReasoningContent === true
|
||||
const result: OpenAIMessage[] = []
|
||||
const knownToolCallIds = new Set<string>()
|
||||
|
||||
@@ -488,6 +498,21 @@ function convertMessages(
|
||||
})(),
|
||||
}
|
||||
|
||||
// Providers that validate reasoning continuity (Moonshot: "thinking
|
||||
// is enabled but reasoning_content is missing in assistant tool call
|
||||
// message at index N" 400) need the original chain-of-thought echoed
|
||||
// back on each assistant message that carries a tool_call. We kept
|
||||
// the thinking block on the Anthropic side; re-attach it here as the
|
||||
// `reasoning_content` field on the outgoing OpenAI-shaped message.
|
||||
// Gated per-provider because other endpoints either ignore the field
|
||||
// (harmless) or strict-reject unknown fields (harmful).
|
||||
if (preserveReasoningContent) {
|
||||
const thinkingText = (thinkingBlock as { thinking?: string } | undefined)?.thinking
|
||||
if (typeof thinkingText === 'string' && thinkingText.trim().length > 0) {
|
||||
assistantMsg.reasoning_content = thinkingText
|
||||
}
|
||||
}
|
||||
|
||||
if (toolUses.length > 0) {
|
||||
const mappedToolCalls = toolUses
|
||||
.map(
|
||||
@@ -1460,7 +1485,12 @@ class OpenAIShimMessages {
|
||||
}>,
|
||||
request.resolvedModel,
|
||||
)
|
||||
const openaiMessages = convertMessages(compressedMessages, params.system)
|
||||
const openaiMessages = convertMessages(compressedMessages, params.system, {
|
||||
// Moonshot requires every assistant tool-call message to carry
|
||||
// reasoning_content when its thinking feature is active. Echo it back
|
||||
// from the thinking block we captured on the inbound response.
|
||||
preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
|
||||
})
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: request.resolvedModel,
|
||||
|
||||
Reference in New Issue
Block a user