From d5852ca73dd2c44f9f32ace7eda93058b57475e7 Mon Sep 17 00:00:00 2001 From: Juan Camilo Auriti Date: Mon, 6 Apr 2026 00:47:11 +0200 Subject: [PATCH] fix: coalesce consecutive same-role messages for strict template models (#241) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models served through Ollama/vLLM with strict Jinja templates (Devstral, Mistral, etc.) require strict user↔assistant role alternation and reject requests with consecutive messages of the same role. convertMessages() could produce consecutive user or assistant messages in three scenarios: batched user input, text-only + tool_use assistant turns, and tool result remainders followed by another user message. Added a coalescing pass at the end of convertMessages() that merges consecutive same-role messages (string concat or array concat), preserving tool_calls on assistant messages. Tool and system messages are excluded from coalescing as they have their own alternation rules. Includes regression tests for both user and assistant coalescing. Fixes #202 --- src/services/api/openaiShim.test.ts | 77 +++++++++++++++++++++++++++++ src/services/api/openaiShim.ts | 36 +++++++++++++- 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index be975650..1bde1380 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -573,3 +573,80 @@ test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async expect(properties?.priority?.enum).toEqual([0, 1, 2, 3]) expect(properties?.priority).not.toHaveProperty('default') }) + +// --------------------------------------------------------------------------- +// Issue #202 — consecutive role coalescing (Devstral, Mistral strict templates) +// --------------------------------------------------------------------------- + +function makeNonStreamResponse(content = 'ok'): Response { + return new Response( + JSON.stringify({ + id: 'chatcmpl-test', + model: 'test-model', + choices: [{ message: { role: 'assistant', content }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) +} + +test('coalesces consecutive user messages to avoid alternation errors (issue #202)', async () => { + let sentMessages: Array<{ role: string; content: unknown }> | undefined + + globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => { + sentMessages = JSON.parse(String(init?.body)).messages + return makeNonStreamResponse() + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'test-model', + system: 'sys', + messages: [ + { role: 'user', content: 'first message' }, + { role: 'user', content: 'second message' }, + ], + max_tokens: 64, + stream: false, + }) + + expect(sentMessages?.length).toBe(2) // system + 1 merged user + expect(sentMessages?.[0]?.role).toBe('system') + expect(sentMessages?.[1]?.role).toBe('user') + const userContent = sentMessages?.[1]?.content as string + expect(userContent).toContain('first message') + expect(userContent).toContain('second message') +}) + +test('coalesces consecutive assistant messages preserving tool_calls (issue #202)', async () => { + let sentMessages: Array<{ role: string; content: unknown; tool_calls?: unknown[] }> | undefined + + globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => { + sentMessages = JSON.parse(String(init?.body)).messages + return makeNonStreamResponse() + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'test-model', + system: 'sys', + messages: [ + { role: 'user', content: 'go' }, + { role: 'assistant', content: 'thinking...' }, + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'call_1', name: 'Bash', input: { command: 'ls' } }], + }, + { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'file.txt' }] }, + ], + max_tokens: 64, + stream: false, + }) + + // system + user + merged assistant + tool + const assistantMsgs = sentMessages?.filter(m => m.role === 'assistant') + expect(assistantMsgs?.length).toBe(1) // two assistant turns merged into one + expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0) +}) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 29f0ce87..8c2e07e3 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -295,7 +295,41 @@ function convertMessages( } } - return result + // Coalescing pass: merge consecutive messages of the same role. + // OpenAI/vLLM/Ollama require strict user↔assistant alternation. + // Multiple consecutive tool messages are allowed (assistant → tool* → user). + // Consecutive user or assistant messages must be merged to avoid Jinja + // template errors like "roles must alternate" (Devstral, Mistral models). + const coalesced: OpenAIMessage[] = [] + for (const msg of result) { + const prev = coalesced[coalesced.length - 1] + + if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') { + const prevContent = prev.content + const curContent = msg.content + + if (typeof prevContent === 'string' && typeof curContent === 'string') { + prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent + } else { + const toArray = ( + c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined, + ): Array<{ type: string; text?: string; image_url?: { url: string } }> => { + if (!c) return [] + if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : [] + return c + } + prev.content = [...toArray(prevContent), ...toArray(curContent)] + } + + if (msg.tool_calls?.length) { + prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls] + } + } else { + coalesced.push(msg) + } + } + + return coalesced } /**