From 02599e0b6f939f777a861d32ed99f893b20e214b Mon Sep 17 00:00:00 2001 From: Juan Camilo Date: Mon, 20 Apr 2026 10:08:09 +0200 Subject: [PATCH] fix(api): consolidate 3P provider compatibility fixes - Strip store field from request body for local providers (Ollama, vLLM) that reject unknown JSON fields with 400 errors - Add Gemini 3.x model context windows and output token limits (gemini-3-flash-preview, gemini-3.1-pro-preview, google/ OpenRouter variants) - Preserve reasoning_content on assistant tool-call message replays for providers that require it (Kimi k2.5, DeepSeek reasoner) - Use conservative max_output_tokens fallback (4096/16384) for unknown 3P models to prevent vLLM/Ollama 400 errors from exceeding max_model_len Consolidates fixes from: #258, #268, #237, #643, #666, #677 Co-authored-by: auriti Co-authored-by: Gustavo-Falci Co-authored-by: lttlin Co-authored-by: Durannd --- src/__tests__/bugfixes.test.ts | 8 +- src/services/api/openaiShim.test.ts | 120 ++++++++++++++++++++++++ src/services/api/openaiShim.ts | 18 +++- src/utils/context.ts | 8 +- src/utils/model/openaiContextWindows.ts | 36 ++++--- 5 files changed, 167 insertions(+), 23 deletions(-) diff --git a/src/__tests__/bugfixes.test.ts b/src/__tests__/bugfixes.test.ts index c028bdd9..60142d0a 100644 --- a/src/__tests__/bugfixes.test.ts +++ b/src/__tests__/bugfixes.test.ts @@ -21,11 +21,11 @@ describe('Gemini store field fix', () => { test('isGeminiMode is imported and used in openaiShim', async () => { const content = await file('services/api/openaiShim.ts').text() - // Verify the fix: store deletion should check for Gemini mode + // Verify the fix: store deletion should check for Gemini mode and local providers expect(content).toContain('isGeminiMode()') - expect(content).toContain("mistral and gemini don't recognize body.store") - // Ensure the delete body.store is guarded for both Mistral and Gemini - expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)/) + expect(content).toContain("Strip store for providers that don't recognize it") + // Ensure the delete body.store is guarded for Mistral, Gemini, and local providers + expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)\s*\|\|\s*isLocal/) }) test('store: false is still set by default (OpenAI needs it)', async () => { diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index eaf8b590..41046a26 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -3019,3 +3019,123 @@ test('preserves valid tool_result and drops orphan tool_result', async () => { const orphanMessage = toolMessages.find(m => m.tool_call_id === 'orphan_call_2') expect(orphanMessage).toBeUndefined() }) + +test('request body does not contain store field for local providers', async () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1' + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + object: 'chat.completion', + model: 'test-model', + choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient + await client.beta.messages.create({ + model: 'some-model', + messages: [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }], + max_tokens: 64, + stream: false, + }) + + expect(requestBody).toBeDefined() + expect('store' in requestBody!).toBe(false) +}) + +test('preserves reasoning_content on assistant messages with tool_calls during replay', async () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + object: 'chat.completion', + model: 'test-model', + choices: [{ index: 0, message: { role: 'assistant', content: 'done' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient + await client.beta.messages.create({ + model: 'kimi-k2.5', + messages: [ + { role: 'user', content: [{ type: 'text', text: 'read file' }] }, + { + role: 'assistant', + content: [ + { type: 'thinking', thinking: 'I should use the read tool' }, + { type: 'tool_use', id: 'call_1', name: 'Read', input: { file_path: 'test.ts' } }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'call_1', content: 'file contents here' }, + ], + }, + ], + max_tokens: 64, + stream: false, + }) + + const messages = requestBody?.messages as Array> + const assistantMsg = messages.find(m => m.role === 'assistant' && m.tool_calls) + expect(assistantMsg).toBeDefined() + expect(assistantMsg!.reasoning_content).toBe('I should use the read tool') +}) + +test('does not add reasoning_content on assistant messages without tool_calls', async () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + object: 'chat.completion', + model: 'test-model', + choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({ defaultHeaders: {} }) as unknown as OpenAIShimClient + await client.beta.messages.create({ + model: 'deepseek-reasoner', + messages: [ + { role: 'user', content: [{ type: 'text', text: 'explain' }] }, + { + role: 'assistant', + content: [ + { type: 'thinking', thinking: 'Let me think about this' }, + { type: 'text', text: 'Here is the explanation' }, + ], + }, + { role: 'user', content: [{ type: 'text', text: 'thanks' }] }, + ], + max_tokens: 64, + stream: false, + }) + + const messages = requestBody?.messages as Array> + const assistantMsg = messages.find(m => m.role === 'assistant' && !m.tool_calls) + expect(assistantMsg).toBeDefined() + expect(assistantMsg!.reasoning_content).toBeUndefined() +}) \ No newline at end of file diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 6c3c1ffa..cb827d3e 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -192,6 +192,7 @@ function sleepMs(ms: number): Promise { interface OpenAIMessage { role: 'system' | 'user' | 'assistant' | 'tool' content?: string | Array<{ type: string; text?: string; image_url?: { url: string } }> + reasoning_content?: string tool_calls?: Array<{ id: string type: 'function' @@ -416,6 +417,16 @@ function convertMessages( } if (toolUses.length > 0) { + // Preserve thinking text as reasoning_content for providers that + // require it on replayed assistant tool-call messages (e.g. Kimi, + // DeepSeek). Without this, follow-up requests fail with 400: + // "reasoning_content is missing in assistant tool call message". + // Note: only the first thinking block per turn is captured (.find); + // Anthropic's API typically produces one thinking block per turn. + if (thinkingBlock) { + assistantMsg.reasoning_content = (thinkingBlock as { thinking?: string }).thinking ?? '' + } + assistantMsg.tool_calls = toolUses.map( (tu: { id?: string @@ -1345,9 +1356,10 @@ class OpenAIShimMessages { delete body.max_completion_tokens } - // mistral and gemini don't recognize body.store — Gemini returns 400 - // "Invalid JSON payload received. Unknown name 'store': Cannot find field." - if (isMistral || isGeminiMode()) { + // Strip store for providers that don't recognize it. Only OpenAI's own + // API supports this field — Gemini returns 400, local servers (vLLM, + // Ollama) reject unknown fields, and other providers silently ignore it. + if (isMistral || isGeminiMode() || isLocal) { delete body.store } diff --git a/src/utils/context.ts b/src/utils/context.ts index 370ed5df..45c343e5 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -190,16 +190,20 @@ export function getModelMaxOutputTokens(model: string): { } // OpenAI-compatible provider — use known output limits to avoid 400 errors - if ( + const isOpenAICompatProvider = isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) || isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL) - ) { + if (isOpenAICompatProvider) { const openaiMax = getOpenAIMaxOutputTokens(model) if (openaiMax !== undefined) { return { default: openaiMax, upperLimit: openaiMax } } + // Unknown 3P model — use conservative default to avoid vLLM/Ollama 400 + // errors when the default 32k exceeds the model's max_model_len. + // Users can override with CLAUDE_CODE_MAX_OUTPUT_TOKENS. + return { default: 4_096, upperLimit: 16_384 } } const m = getCanonicalName(model) diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts index 7c9838fb..63f6961b 100644 --- a/src/utils/model/openaiContextWindows.ts +++ b/src/utils/model/openaiContextWindows.ts @@ -177,15 +177,19 @@ const OPENAI_CONTEXT_WINDOWS: Record = { 'MiniMax-M2': 204_800, // Google (via OpenRouter) - 'google/gemini-2.0-flash':1_048_576, - 'google/gemini-2.5-pro': 1_048_576, + 'google/gemini-2.0-flash': 1_048_576, + 'google/gemini-2.5-pro': 1_048_576, + 'google/gemini-3-flash-preview': 1_048_576, + 'google/gemini-3.1-pro-preview': 1_048_576, // Google (native via CLAUDE_CODE_USE_GEMINI) - 'gemini-2.0-flash': 1_048_576, - 'gemini-2.5-pro': 1_048_576, - 'gemini-2.5-flash': 1_048_576, - 'gemini-3.1-pro': 1_048_576, - 'gemini-3.1-flash-lite-preview': 1_048_576, + 'gemini-2.0-flash': 1_048_576, + 'gemini-2.5-pro': 1_048_576, + 'gemini-2.5-flash': 1_048_576, + 'gemini-3-flash-preview': 1_048_576, + 'gemini-3.1-pro': 1_048_576, + 'gemini-3.1-pro-preview': 1_048_576, + 'gemini-3.1-flash-lite-preview': 1_048_576, // Ollama local models // Llama 3.1+ models support 128k context natively (Meta official specs). @@ -329,15 +333,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record = { 'MiniMax-Vision-01-Fast': 16_384, // Google (via OpenRouter) - 'google/gemini-2.0-flash': 8_192, - 'google/gemini-2.5-pro': 65_536, + 'google/gemini-2.0-flash': 8_192, + 'google/gemini-2.5-pro': 65_536, + 'google/gemini-3-flash-preview': 65_536, + 'google/gemini-3.1-pro-preview': 65_536, // Google (native via CLAUDE_CODE_USE_GEMINI) - 'gemini-2.0-flash': 8_192, - 'gemini-2.5-pro': 65_536, - 'gemini-2.5-flash': 65_536, - 'gemini-3.1-pro': 65_536, - 'gemini-3.1-flash-lite-preview': 65_536, + 'gemini-2.0-flash': 8_192, + 'gemini-2.5-pro': 65_536, + 'gemini-2.5-flash': 65_536, + 'gemini-3-flash-preview': 65_536, + 'gemini-3.1-pro': 65_536, + 'gemini-3.1-pro-preview': 65_536, + 'gemini-3.1-flash-lite-preview': 65_536, // Ollama local models (conservative safe defaults) 'llama3.3:70b': 4_096,