diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index 101c2ae5..ed2911af 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -313,6 +313,57 @@ test('preserves Gemini tool call extra_content from streaming chunks', async () }) }) +test('strips thinking blocks from assistant messages instead of leaking them as text', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'gpt-4o', + choices: [ + { + message: { role: 'assistant', content: 'done' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 10, completion_tokens: 1, total_tokens: 11 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'gpt-4o', + system: 'test', + messages: [ + { role: 'user', content: 'hello' }, + { + role: 'assistant', + content: [ + { type: 'thinking', thinking: 'secret reasoning' }, + { type: 'text', text: 'visible reply' }, + ], + }, + { role: 'user', content: 'follow up' }, + ], + max_tokens: 64, + stream: false, + }) + + const msgs = requestBody?.messages as Array<{ role: string; content: string }> + const assistantMsg = msgs.find(m => m.role === 'assistant') + + // The assistant message should contain only the visible text, + // not secret reasoning + expect(assistantMsg?.content).toBe('visible reply') + expect(assistantMsg?.content).not.toContain('thinking') +}) + test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async () => { let requestBody: Record | undefined diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 68cb31e9..420365a5 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -139,10 +139,12 @@ function convertContentBlocks( // handled separately break case 'thinking': - // Append thinking as text with a marker for models that support reasoning - if (block.thinking) { - parts.push({ type: 'text', text: `${block.thinking}` }) - } + case 'redacted_thinking': + // Strip thinking blocks for OpenAI-compatible providers. + // These are Anthropic-specific content types that 3P providers + // don't understand. Serializing them as text corrupts + // multi-turn context: the model sees the tags as part of its + // previous reply and may mimic or misattribute them. break default: if (block.text) { diff --git a/src/utils/context.ts b/src/utils/context.ts index 4eae1782..dd35bf52 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -72,16 +72,23 @@ export function getContextWindowForModel( return 1_000_000 } - // OpenAI-compatible provider — use known context windows for the model - if ( + // OpenAI-compatible provider — use known context windows for the model. + // Unknown models get a conservative 8k default so auto-compact triggers + // before hitting a hard context_window_exceeded error (issue #248 finding 3). + const isOpenAIProvider = isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) - ) { + if (isOpenAIProvider) { const openaiWindow = getOpenAIContextWindow(model) if (openaiWindow !== undefined) { return openaiWindow } + console.error( + `[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` + + 'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.', + ) + return 8_000 } const cap = getModelCapability(model) diff --git a/src/utils/conversationRecovery.ts b/src/utils/conversationRecovery.ts index af5ea230..1138b469 100644 --- a/src/utils/conversationRecovery.ts +++ b/src/utils/conversationRecovery.ts @@ -24,6 +24,7 @@ import { type FileHistorySnapshot, } from './fileHistory.js' import { logError } from './log.js' +import { getAPIProvider } from './model/providers.js' import { createAssistantMessage, createUserMessage, @@ -145,6 +146,25 @@ export type DeserializeResult = { turnInterruptionState: TurnInterruptionState } +/** + * Remove thinking/redacted_thinking content blocks from assistant messages. + * Messages that become empty after stripping are removed entirely. + */ +function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] { + return messages.reduce((acc, msg) => { + if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) { + acc.push(msg) + return acc + } + const filtered = msg.message.content.filter( + (block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking', + ) + if (filtered.length === 0) return acc + acc.push({ ...msg, message: { ...msg.message, content: filtered } }) + return acc + }, []) +} + /** * Deserializes messages from a log file into the format expected by the REPL. * Filters unresolved tool uses, orphaned thinking messages, and appends a @@ -195,10 +215,19 @@ export function deserializeMessagesWithInterruptDetection( filteredToolUses, ) as NormalizedMessage[] + // Strip thinking/redacted_thinking content blocks from assistant messages + // when resuming against a 3P provider. These Anthropic-specific blocks cause + // 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5). + const provider = getAPIProvider() + const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry' + const thinkingStripped = isThirdPartyProvider + ? stripThinkingBlocks(filteredThinking) + : filteredThinking + // Filter out assistant messages with only whitespace text content. // This can happen when model outputs "\n\n" before thinking, user cancels mid-stream. const filteredMessages = filterWhitespaceOnlyAssistantMessages( - filteredThinking, + thinkingStripped, ) as NormalizedMessage[] const internalState = detectTurnInterruption(filteredMessages)