From d5852ca73dd2c44f9f32ace7eda93058b57475e7 Mon Sep 17 00:00:00 2001
From: Juan Camilo Auriti <juancamilo.auriti@gmail.com>
Date: Mon, 6 Apr 2026 00:47:11 +0200
Subject: [PATCH] fix: coalesce consecutive same-role messages for strict
 template models (#241)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Models served through Ollama/vLLM with strict Jinja templates (Devstral,
Mistral, etc.) require strict user↔assistant role alternation and reject
requests with consecutive messages of the same role.

convertMessages() could produce consecutive user or assistant messages in
three scenarios: batched user input, text-only + tool_use assistant turns,
and tool result remainders followed by another user message.

Added a coalescing pass at the end of convertMessages() that merges
consecutive same-role messages (string concat or array concat), preserving
tool_calls on assistant messages. Tool and system messages are excluded
from coalescing as they have their own alternation rules.

Includes regression tests for both user and assistant coalescing.

Fixes #202
---
 src/services/api/openaiShim.test.ts | 77 +++++++++++++++++++++++++++++
 src/services/api/openaiShim.ts      | 36 +++++++++++++-
 2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts
index be975650..1bde1380 100644
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -573,3 +573,80 @@ test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async
   expect(properties?.priority?.enum).toEqual([0, 1, 2, 3])
   expect(properties?.priority).not.toHaveProperty('default')
 })
+
+// ---------------------------------------------------------------------------
+// Issue #202 — consecutive role coalescing (Devstral, Mistral strict templates)
+// ---------------------------------------------------------------------------
+
+function makeNonStreamResponse(content = 'ok'): Response {
+  return new Response(
+    JSON.stringify({
+      id: 'chatcmpl-test',
+      model: 'test-model',
+      choices: [{ message: { role: 'assistant', content }, finish_reason: 'stop' }],
+      usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
+    }),
+    { headers: { 'Content-Type': 'application/json' } },
+  )
+}
+
+test('coalesces consecutive user messages to avoid alternation errors (issue #202)', async () => {
+  let sentMessages: Array<{ role: string; content: unknown }> | undefined
+
+  globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => {
+    sentMessages = JSON.parse(String(init?.body)).messages
+    return makeNonStreamResponse()
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'test-model',
+    system: 'sys',
+    messages: [
+      { role: 'user', content: 'first message' },
+      { role: 'user', content: 'second message' },
+    ],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  expect(sentMessages?.length).toBe(2) // system + 1 merged user
+  expect(sentMessages?.[0]?.role).toBe('system')
+  expect(sentMessages?.[1]?.role).toBe('user')
+  const userContent = sentMessages?.[1]?.content as string
+  expect(userContent).toContain('first message')
+  expect(userContent).toContain('second message')
+})
+
+test('coalesces consecutive assistant messages preserving tool_calls (issue #202)', async () => {
+  let sentMessages: Array<{ role: string; content: unknown; tool_calls?: unknown[] }> | undefined
+
+  globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => {
+    sentMessages = JSON.parse(String(init?.body)).messages
+    return makeNonStreamResponse()
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'test-model',
+    system: 'sys',
+    messages: [
+      { role: 'user', content: 'go' },
+      { role: 'assistant', content: 'thinking...' },
+      {
+        role: 'assistant',
+        content: [{ type: 'tool_use', id: 'call_1', name: 'Bash', input: { command: 'ls' } }],
+      },
+      { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'file.txt' }] },
+    ],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  // system + user + merged assistant + tool
+  const assistantMsgs = sentMessages?.filter(m => m.role === 'assistant')
+  expect(assistantMsgs?.length).toBe(1) // two assistant turns merged into one
+  expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0)
+})
diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts
index 29f0ce87..8c2e07e3 100644
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -295,7 +295,41 @@ function convertMessages(
     }
   }
 
-  return result
+  // Coalescing pass: merge consecutive messages of the same role.
+  // OpenAI/vLLM/Ollama require strict user↔assistant alternation.
+  // Multiple consecutive tool messages are allowed (assistant → tool* → user).
+  // Consecutive user or assistant messages must be merged to avoid Jinja
+  // template errors like "roles must alternate" (Devstral, Mistral models).
+  const coalesced: OpenAIMessage[] = []
+  for (const msg of result) {
+    const prev = coalesced[coalesced.length - 1]
+
+    if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') {
+      const prevContent = prev.content
+      const curContent = msg.content
+
+      if (typeof prevContent === 'string' && typeof curContent === 'string') {
+        prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent
+      } else {
+        const toArray = (
+          c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined,
+        ): Array<{ type: string; text?: string; image_url?: { url: string } }> => {
+          if (!c) return []
+          if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : []
+          return c
+        }
+        prev.content = [...toArray(prevContent), ...toArray(curContent)]
+      }
+
+      if (msg.tool_calls?.length) {
+        prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls]
+      }
+    } else {
+      coalesced.push(msg)
+    }
+  }
+
+  return coalesced
 }
 
 /**