From 06e7684eb56df8e694ac784575e163641931c44c Mon Sep 17 00:00:00 2001 From: 3kin0x Date: Tue, 21 Apr 2026 12:28:57 +0200 Subject: [PATCH] fix(api): ensure strict role sequence and filter empty assistant messages after interruption (#745 regression) (#794) --- src/services/api/openaiShim.test.ts | 91 +++++++++++ src/services/api/openaiShim.ts | 225 ++++++++++++++++++++-------- 2 files changed, 255 insertions(+), 61 deletions(-) diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index f4a6bfe9..e12daa10 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -3216,4 +3216,95 @@ test('preserves valid tool_result and drops orphan tool_result', async () => { const orphanMessage = toolMessages.find(m => m.tool_call_id === 'orphan_call_2') expect(orphanMessage).toBeUndefined() + + // Actually, the semantic message IS injected here because the user block with orphan + // tool result is converted to: + // 1. Tool result (valid_call_1) -> role 'tool' + // 2. User content ("What happened?") -> role 'user' + // This triggers the tool -> assistant injection. + const assistantMessages = messages.filter(m => m.role === 'assistant') + expect(assistantMessages.some(m => m.content === '[Tool execution interrupted by user]')).toBe(true) +}) + +test('drops empty assistant message when only thinking block was present and stripped', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response(JSON.stringify({ + id: 'chatcmpl-1', + object: 'chat.completion', + created: 123456789, + model: 'mistral-large-latest', + choices: [{ message: { role: 'assistant', content: 'hi' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } + }), { headers: { 'Content-Type': 'application/json' } }) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'mistral-large-latest', + messages: [ + { role: 'user', content: 'Initial' }, + { role: 'assistant', content: [{ type: 'thinking', thinking: 'I am thinking...', signature: 'sig' }] }, + { role: 'user', content: 'Interrupting query' }, + ], + max_tokens: 64, + stream: false, + }) + + const messages = requestBody?.messages as Array> + // The assistant msg is dropped because thinking is stripped. + // The two user messages are coalesced. + expect(messages.length).toBe(1) + expect(messages[0].role).toBe('user') + expect(String(messages[0].content)).toContain('Initial') + expect(String(messages[0].content)).toContain('Interrupting query') +}) + +test('injects semantic assistant message when tool result is followed by user message', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + return new Response(JSON.stringify({ + id: 'chatcmpl-2', + object: 'chat.completion', + created: 123456789, + model: 'mistral-large-latest', + choices: [{ message: { role: 'assistant', content: 'hi' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } + }), { headers: { 'Content-Type': 'application/json' } }) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'mistral-large-latest', + messages: [ + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'call_1', name: 'search', input: {} }] + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'call_1', content: 'Result' } + ] + }, + { role: 'user', content: 'Next user query' }, + ], + max_tokens: 64, + stream: false, + }) + + const messages = requestBody?.messages as Array> + // Roles should be: assistant (tool_calls) -> tool -> assistant (semantic) -> user + const roles = messages.map(m => m.role) + expect(roles).toEqual(['assistant', 'tool', 'assistant', 'user']) + + const semanticMsg = messages[2] + expect(semanticMsg.role).toBe('assistant') + expect(semanticMsg.content).toBe('[Tool execution interrupted by user]') }) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 09c8ec31..3840b69d 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -347,19 +347,43 @@ function isGeminiMode(): boolean { } function convertMessages( - messages: Array<{ role: string; message?: { role?: string; content?: unknown }; content?: unknown }>, + messages: Array<{ + role: string + message?: { role?: string; content?: unknown } + content?: unknown + }>, system: unknown, ): OpenAIMessage[] { const result: OpenAIMessage[] = [] const knownToolCallIds = new Set() + // Pre-scan for all tool results in the history to identify valid tool calls + const toolResultIds = new Set() + for (const msg of messages) { + const inner = msg.message ?? msg + const content = (inner as { content?: unknown }).content + if (Array.isArray(content)) { + for (const block of content) { + if ( + (block as { type?: string }).type === 'tool_result' && + (block as { tool_use_id?: string }).tool_use_id + ) { + toolResultIds.add((block as { tool_use_id: string }).tool_use_id) + } + } + } + } + // System message first const sysText = convertSystemPrompt(system) if (sysText) { result.push({ role: 'system', content: sysText }) } - for (const msg of messages) { + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + const isLastInHistory = i === messages.length - 1 + // Claude Code wraps messages in { role, message: { role, content } } const inner = msg.message ?? msg const role = (inner as { role?: string }).role ?? msg.role @@ -368,8 +392,12 @@ function convertMessages( if (role === 'user') { // Check for tool_result blocks in user messages if (Array.isArray(content)) { - const toolResults = content.filter((b: { type?: string }) => b.type === 'tool_result') - const otherContent = content.filter((b: { type?: string }) => b.type !== 'tool_result') + const toolResults = content.filter( + (b: { type?: string }) => b.type === 'tool_result', + ) + const otherContent = content.filter( + (b: { type?: string }) => b.type !== 'tool_result', + ) // Emit tool results as tool messages, but ONLY if we have a matching tool_use ID. // Mistral/OpenAI strictly require tool messages to follow an assistant message with tool_calls. @@ -384,7 +412,9 @@ function convertMessages( content: convertToolResultContent(tr.content, tr.is_error), }) } else { - logForDebugging(`Dropping orphan tool_result for ID: ${id} to prevent API error`) + logForDebugging( + `Dropping orphan tool_result for ID: ${id} to prevent API error`, + ) } } @@ -404,8 +434,12 @@ function convertMessages( } else if (role === 'assistant') { // Check for tool_use blocks if (Array.isArray(content)) { - const toolUses = content.filter((b: { type?: string }) => b.type === 'tool_use') - const thinkingBlock = content.find((b: { type?: string }) => b.type === 'thinking') + const toolUses = content.filter( + (b: { type?: string }) => b.type === 'tool_use', + ) + const thinkingBlock = content.find( + (b: { type?: string }) => b.type === 'thinking', + ) const textContent = content.filter( (b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking', ) @@ -414,70 +448,108 @@ function convertMessages( role: 'assistant', content: (() => { const c = convertContentBlocks(textContent) - return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' + return typeof c === 'string' + ? c + : Array.isArray(c) + ? c.map((p: { text?: string }) => p.text ?? '').join('') + : '' })(), } if (toolUses.length > 0) { - assistantMsg.tool_calls = toolUses.map( - (tu: { - id?: string - name?: string - input?: unknown - extra_content?: Record - signature?: string - }) => { - const id = tu.id ?? `call_${crypto.randomUUID().replace(/-/g, '')}` - knownToolCallIds.add(id) - const toolCall: NonNullable[number] = { - id, - type: 'function' as const, - function: { - name: tu.name ?? 'unknown', - arguments: - typeof tu.input === 'string' - ? tu.input - : JSON.stringify(tu.input ?? {}), - }, - } + const mappedToolCalls = toolUses + .map( + (tu: { + id?: string + name?: string + input?: unknown + extra_content?: Record + signature?: string + }) => { + const id = tu.id ?? `call_${crypto.randomUUID().replace(/-/g, '')}` - // Preserve existing extra_content if present - if (tu.extra_content) { - toolCall.extra_content = { ...tu.extra_content } - } + // Only keep tool calls that have a corresponding result in the history, + // or if it's the last message (prefill scenario). + // Orphaned tool calls (e.g. from user interruption) cause 400 errors. + if (!toolResultIds.has(id) && !isLastInHistory) { + return null + } - // Handle Gemini thought_signature - if (isGeminiMode()) { - // If the model provided a signature in the tool_use block itself (e.g. from a previous Turn/Step) - // Use thinkingBlock.signature for ALL tool calls in the same assistant turn if available. - // The API requires the same signature on every replayed function call part in a parallel set. - const signature = tu.signature ?? (thinkingBlock as any)?.signature + knownToolCallIds.add(id) + const toolCall: NonNullable< + OpenAIMessage['tool_calls'] + >[number] = { + id, + type: 'function' as const, + function: { + name: tu.name ?? 'unknown', + arguments: + typeof tu.input === 'string' + ? tu.input + : JSON.stringify(tu.input ?? {}), + }, + } - // Merge into existing google-specific metadata if present - const existingGoogle = (toolCall.extra_content?.google as Record) ?? {} - toolCall.extra_content = { - ...toolCall.extra_content, - google: { - ...existingGoogle, - thought_signature: signature ?? "skip_thought_signature_validator" + // Preserve existing extra_content if present + if (tu.extra_content) { + toolCall.extra_content = { ...tu.extra_content } + } + + // Handle Gemini thought_signature + if (isGeminiMode()) { + // If the model provided a signature in the tool_use block itself (e.g. from a previous Turn/Step) + // Use thinkingBlock.signature for ALL tool calls in the same assistant turn if available. + // The API requires the same signature on every replayed function call part in a parallel set. + const signature = + tu.signature ?? (thinkingBlock as any)?.signature + + // Merge into existing google-specific metadata if present + const existingGoogle = + (toolCall.extra_content?.google as Record< + string, + unknown + >) ?? {} + toolCall.extra_content = { + ...toolCall.extra_content, + google: { + ...existingGoogle, + thought_signature: + signature ?? 'skip_thought_signature_validator', + }, } } - } - return toolCall - }, - ) + return toolCall + }, + ) + .filter((tc): tc is NonNullable => tc !== null) + + if (mappedToolCalls.length > 0) { + assistantMsg.tool_calls = mappedToolCalls + } } - result.push(assistantMsg) + // Only push assistant message if it has content or tool calls. + // Stripped thinking-only blocks from user interruptions are empty and cause 400s. + if (assistantMsg.content || assistantMsg.tool_calls?.length) { + result.push(assistantMsg) + } } else { - result.push({ + const assistantMsg: OpenAIMessage = { role: 'assistant', content: (() => { const c = convertContentBlocks(content) - return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : '' + return typeof c === 'string' + ? c + : Array.isArray(c) + ? c.map((p: { text?: string }) => p.text ?? '').join('') + : '' })(), - }) + } + + if (assistantMsg.content) { + result.push(assistantMsg) + } } } } @@ -491,25 +563,56 @@ function convertMessages( for (const msg of result) { const prev = coalesced[coalesced.length - 1] - if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') { - const prevContent = prev.content + // Mistral/Devstral: 'tool' message must be followed by an 'assistant' message. + // If a 'tool' result is followed by a 'user' message, we must inject a semantic + // assistant response to satisfy the strict role sequence: + // ... -> assistant (calls) -> tool (results) -> assistant (semantic) -> user (next) + if (prev && prev.role === 'tool' && msg.role === 'user') { + coalesced.push({ + role: 'assistant', + content: '[Tool execution interrupted by user]', + }) + } + + const lastAfterPossibleInjection = coalesced[coalesced.length - 1] + if ( + lastAfterPossibleInjection && + lastAfterPossibleInjection.role === msg.role && + msg.role !== 'tool' && + msg.role !== 'system' + ) { + const prevContent = lastAfterPossibleInjection.content const curContent = msg.content if (typeof prevContent === 'string' && typeof curContent === 'string') { - prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent + lastAfterPossibleInjection.content = + prevContent + (prevContent && curContent ? '\n' : '') + curContent } else { const toArray = ( - c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined, - ): Array<{ type: string; text?: string; image_url?: { url: string } }> => { + c: + | string + | Array<{ type: string; text?: string; image_url?: { url: string } }> + | undefined, + ): Array<{ + type: string + text?: string + image_url?: { url: string } + }> => { if (!c) return [] if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : [] return c } - prev.content = [...toArray(prevContent), ...toArray(curContent)] + lastAfterPossibleInjection.content = [ + ...toArray(prevContent), + ...toArray(curContent), + ] } if (msg.tool_calls?.length) { - prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls] + lastAfterPossibleInjection.tool_calls = [ + ...(lastAfterPossibleInjection.tool_calls ?? []), + ...msg.tool_calls, + ] } } else { coalesced.push(msg)