fix(api): consolidate 3P provider compatibility fixes
- Strip store field from request body for local providers (Ollama, vLLM) that reject unknown JSON fields with 400 errors - Add Gemini 3.x model context windows and output token limits (gemini-3-flash-preview, gemini-3.1-pro-preview, google/ OpenRouter variants) - Preserve reasoning_content on assistant tool-call message replays for providers that require it (Kimi k2.5, DeepSeek reasoner) - Use conservative max_output_tokens fallback (4096/16384) for unknown 3P models to prevent vLLM/Ollama 400 errors from exceeding max_model_len Consolidates fixes from: #258, #268, #237, #643, #666, #677 Co-authored-by: auriti <auriti@users.noreply.github.com> Co-authored-by: Gustavo-Falci <Gustavo-Falci@users.noreply.github.com> Co-authored-by: lttlin <lttlin@users.noreply.github.com> Co-authored-by: Durannd <Durannd@users.noreply.github.com>
This commit is contained in:
@@ -21,11 +21,11 @@ describe('Gemini store field fix', () => {
|
||||
test('isGeminiMode is imported and used in openaiShim', async () => {
|
||||
const content = await file('services/api/openaiShim.ts').text()
|
||||
|
||||
// Verify the fix: store deletion should check for Gemini mode
|
||||
// Verify the fix: store deletion should check for Gemini mode and local providers
|
||||
expect(content).toContain('isGeminiMode()')
|
||||
expect(content).toContain("mistral and gemini don't recognize body.store")
|
||||
// Ensure the delete body.store is guarded for both Mistral and Gemini
|
||||
expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)/)
|
||||
expect(content).toContain("Strip store for providers that don't recognize it")
|
||||
// Ensure the delete body.store is guarded for Mistral, Gemini, and local providers
|
||||
expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)\s*\|\|\s*isLocal/)
|
||||
})
|
||||
|
||||
test('store: false is still set by default (OpenAI needs it)', async () => {
|
||||
|
||||
@@ -3019,3 +3019,123 @@ test('preserves valid tool_result and drops orphan tool_result', async () => {
|
||||
const orphanMessage = toolMessages.find(m => m.tool_call_id === 'orphan_call_2')
|
||||
expect(orphanMessage).toBeUndefined()
|
||||
})
|
||||
|
||||
test('request body does not contain store field for local providers', async () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion',
|
||||
model: 'test-model',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'some-model',
|
||||
messages: [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
expect(requestBody).toBeDefined()
|
||||
expect('store' in requestBody!).toBe(false)
|
||||
})
|
||||
|
||||
test('preserves reasoning_content on assistant messages with tool_calls during replay', async () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion',
|
||||
model: 'test-model',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'done' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'kimi-k2.5',
|
||||
messages: [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'read file' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'I should use the read tool' },
|
||||
{ type: 'tool_use', id: 'call_1', name: 'Read', input: { file_path: 'test.ts' } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'call_1', content: 'file contents here' },
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const assistantMsg = messages.find(m => m.role === 'assistant' && m.tool_calls)
|
||||
expect(assistantMsg).toBeDefined()
|
||||
expect(assistantMsg!.reasoning_content).toBe('I should use the read tool')
|
||||
})
|
||||
|
||||
test('does not add reasoning_content on assistant messages without tool_calls', async () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion',
|
||||
model: 'test-model',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'deepseek-reasoner',
|
||||
messages: [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'explain' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Let me think about this' },
|
||||
{ type: 'text', text: 'Here is the explanation' },
|
||||
],
|
||||
},
|
||||
{ role: 'user', content: [{ type: 'text', text: 'thanks' }] },
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const assistantMsg = messages.find(m => m.role === 'assistant' && !m.tool_calls)
|
||||
expect(assistantMsg).toBeDefined()
|
||||
expect(assistantMsg!.reasoning_content).toBeUndefined()
|
||||
})
|
||||
@@ -192,6 +192,7 @@ function sleepMs(ms: number): Promise<void> {
|
||||
interface OpenAIMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||
content?: string | Array<{ type: string; text?: string; image_url?: { url: string } }>
|
||||
reasoning_content?: string
|
||||
tool_calls?: Array<{
|
||||
id: string
|
||||
type: 'function'
|
||||
@@ -416,6 +417,16 @@ function convertMessages(
|
||||
}
|
||||
|
||||
if (toolUses.length > 0) {
|
||||
// Preserve thinking text as reasoning_content for providers that
|
||||
// require it on replayed assistant tool-call messages (e.g. Kimi,
|
||||
// DeepSeek). Without this, follow-up requests fail with 400:
|
||||
// "reasoning_content is missing in assistant tool call message".
|
||||
// Note: only the first thinking block per turn is captured (.find);
|
||||
// Anthropic's API typically produces one thinking block per turn.
|
||||
if (thinkingBlock) {
|
||||
assistantMsg.reasoning_content = (thinkingBlock as { thinking?: string }).thinking ?? ''
|
||||
}
|
||||
|
||||
assistantMsg.tool_calls = toolUses.map(
|
||||
(tu: {
|
||||
id?: string
|
||||
@@ -1345,9 +1356,10 @@ class OpenAIShimMessages {
|
||||
delete body.max_completion_tokens
|
||||
}
|
||||
|
||||
// mistral and gemini don't recognize body.store — Gemini returns 400
|
||||
// "Invalid JSON payload received. Unknown name 'store': Cannot find field."
|
||||
if (isMistral || isGeminiMode()) {
|
||||
// Strip store for providers that don't recognize it. Only OpenAI's own
|
||||
// API supports this field — Gemini returns 400, local servers (vLLM,
|
||||
// Ollama) reject unknown fields, and other providers silently ignore it.
|
||||
if (isMistral || isGeminiMode() || isLocal) {
|
||||
delete body.store
|
||||
}
|
||||
|
||||
|
||||
@@ -190,16 +190,20 @@ export function getModelMaxOutputTokens(model: string): {
|
||||
}
|
||||
|
||||
// OpenAI-compatible provider — use known output limits to avoid 400 errors
|
||||
if (
|
||||
const isOpenAICompatProvider =
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
|
||||
) {
|
||||
if (isOpenAICompatProvider) {
|
||||
const openaiMax = getOpenAIMaxOutputTokens(model)
|
||||
if (openaiMax !== undefined) {
|
||||
return { default: openaiMax, upperLimit: openaiMax }
|
||||
}
|
||||
// Unknown 3P model — use conservative default to avoid vLLM/Ollama 400
|
||||
// errors when the default 32k exceeds the model's max_model_len.
|
||||
// Users can override with CLAUDE_CODE_MAX_OUTPUT_TOKENS.
|
||||
return { default: 4_096, upperLimit: 16_384 }
|
||||
}
|
||||
|
||||
const m = getCanonicalName(model)
|
||||
|
||||
@@ -177,14 +177,18 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
|
||||
'MiniMax-M2': 204_800,
|
||||
|
||||
// Google (via OpenRouter)
|
||||
'google/gemini-2.0-flash':1_048_576,
|
||||
'google/gemini-2.0-flash': 1_048_576,
|
||||
'google/gemini-2.5-pro': 1_048_576,
|
||||
'google/gemini-3-flash-preview': 1_048_576,
|
||||
'google/gemini-3.1-pro-preview': 1_048_576,
|
||||
|
||||
// Google (native via CLAUDE_CODE_USE_GEMINI)
|
||||
'gemini-2.0-flash': 1_048_576,
|
||||
'gemini-2.5-pro': 1_048_576,
|
||||
'gemini-2.5-flash': 1_048_576,
|
||||
'gemini-3-flash-preview': 1_048_576,
|
||||
'gemini-3.1-pro': 1_048_576,
|
||||
'gemini-3.1-pro-preview': 1_048_576,
|
||||
'gemini-3.1-flash-lite-preview': 1_048_576,
|
||||
|
||||
// Ollama local models
|
||||
@@ -331,12 +335,16 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
|
||||
// Google (via OpenRouter)
|
||||
'google/gemini-2.0-flash': 8_192,
|
||||
'google/gemini-2.5-pro': 65_536,
|
||||
'google/gemini-3-flash-preview': 65_536,
|
||||
'google/gemini-3.1-pro-preview': 65_536,
|
||||
|
||||
// Google (native via CLAUDE_CODE_USE_GEMINI)
|
||||
'gemini-2.0-flash': 8_192,
|
||||
'gemini-2.5-pro': 65_536,
|
||||
'gemini-2.5-flash': 65_536,
|
||||
'gemini-3-flash-preview': 65_536,
|
||||
'gemini-3.1-pro': 65_536,
|
||||
'gemini-3.1-pro-preview': 65_536,
|
||||
'gemini-3.1-flash-lite-preview': 65_536,
|
||||
|
||||
// Ollama local models (conservative safe defaults)
|
||||
|
||||
Reference in New Issue
Block a user