fix: coalesce consecutive same-role messages for strict template models (#241)
Models served through Ollama/vLLM with strict Jinja templates (Devstral, Mistral, etc.) require strict user↔assistant role alternation and reject requests with consecutive messages of the same role. convertMessages() could produce consecutive user or assistant messages in three scenarios: batched user input, text-only + tool_use assistant turns, and tool result remainders followed by another user message. Added a coalescing pass at the end of convertMessages() that merges consecutive same-role messages (string concat or array concat), preserving tool_calls on assistant messages. Tool and system messages are excluded from coalescing as they have their own alternation rules. Includes regression tests for both user and assistant coalescing. Fixes #202
This commit is contained in:
committed by
GitHub
parent
c534aa5771
commit
d5852ca73d
@@ -573,3 +573,80 @@ test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async
|
|||||||
expect(properties?.priority?.enum).toEqual([0, 1, 2, 3])
|
expect(properties?.priority?.enum).toEqual([0, 1, 2, 3])
|
||||||
expect(properties?.priority).not.toHaveProperty('default')
|
expect(properties?.priority).not.toHaveProperty('default')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Issue #202 — consecutive role coalescing (Devstral, Mistral strict templates)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function makeNonStreamResponse(content = 'ok'): Response {
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
id: 'chatcmpl-test',
|
||||||
|
model: 'test-model',
|
||||||
|
choices: [{ message: { role: 'assistant', content }, finish_reason: 'stop' }],
|
||||||
|
usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
|
||||||
|
}),
|
||||||
|
{ headers: { 'Content-Type': 'application/json' } },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
test('coalesces consecutive user messages to avoid alternation errors (issue #202)', async () => {
|
||||||
|
let sentMessages: Array<{ role: string; content: unknown }> | undefined
|
||||||
|
|
||||||
|
globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => {
|
||||||
|
sentMessages = JSON.parse(String(init?.body)).messages
|
||||||
|
return makeNonStreamResponse()
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
|
||||||
|
await client.beta.messages.create({
|
||||||
|
model: 'test-model',
|
||||||
|
system: 'sys',
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: 'first message' },
|
||||||
|
{ role: 'user', content: 'second message' },
|
||||||
|
],
|
||||||
|
max_tokens: 64,
|
||||||
|
stream: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(sentMessages?.length).toBe(2) // system + 1 merged user
|
||||||
|
expect(sentMessages?.[0]?.role).toBe('system')
|
||||||
|
expect(sentMessages?.[1]?.role).toBe('user')
|
||||||
|
const userContent = sentMessages?.[1]?.content as string
|
||||||
|
expect(userContent).toContain('first message')
|
||||||
|
expect(userContent).toContain('second message')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('coalesces consecutive assistant messages preserving tool_calls (issue #202)', async () => {
|
||||||
|
let sentMessages: Array<{ role: string; content: unknown; tool_calls?: unknown[] }> | undefined
|
||||||
|
|
||||||
|
globalThis.fetch = (async (_input: unknown, init: RequestInit | undefined) => {
|
||||||
|
sentMessages = JSON.parse(String(init?.body)).messages
|
||||||
|
return makeNonStreamResponse()
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
|
||||||
|
await client.beta.messages.create({
|
||||||
|
model: 'test-model',
|
||||||
|
system: 'sys',
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: 'go' },
|
||||||
|
{ role: 'assistant', content: 'thinking...' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'tool_use', id: 'call_1', name: 'Bash', input: { command: 'ls' } }],
|
||||||
|
},
|
||||||
|
{ role: 'user', content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'file.txt' }] },
|
||||||
|
],
|
||||||
|
max_tokens: 64,
|
||||||
|
stream: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
// system + user + merged assistant + tool
|
||||||
|
const assistantMsgs = sentMessages?.filter(m => m.role === 'assistant')
|
||||||
|
expect(assistantMsgs?.length).toBe(1) // two assistant turns merged into one
|
||||||
|
expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|||||||
@@ -295,7 +295,41 @@ function convertMessages(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
// Coalescing pass: merge consecutive messages of the same role.
|
||||||
|
// OpenAI/vLLM/Ollama require strict user↔assistant alternation.
|
||||||
|
// Multiple consecutive tool messages are allowed (assistant → tool* → user).
|
||||||
|
// Consecutive user or assistant messages must be merged to avoid Jinja
|
||||||
|
// template errors like "roles must alternate" (Devstral, Mistral models).
|
||||||
|
const coalesced: OpenAIMessage[] = []
|
||||||
|
for (const msg of result) {
|
||||||
|
const prev = coalesced[coalesced.length - 1]
|
||||||
|
|
||||||
|
if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') {
|
||||||
|
const prevContent = prev.content
|
||||||
|
const curContent = msg.content
|
||||||
|
|
||||||
|
if (typeof prevContent === 'string' && typeof curContent === 'string') {
|
||||||
|
prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent
|
||||||
|
} else {
|
||||||
|
const toArray = (
|
||||||
|
c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined,
|
||||||
|
): Array<{ type: string; text?: string; image_url?: { url: string } }> => {
|
||||||
|
if (!c) return []
|
||||||
|
if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : []
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
prev.content = [...toArray(prevContent), ...toArray(curContent)]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg.tool_calls?.length) {
|
||||||
|
prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
coalesced.push(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return coalesced
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user