Compare commits

...

1 Commits

Author SHA1 Message Date
Juan Camilo
037a855528 fix: strip Anthropic-specific params from 3P provider paths
Three silent failure modes affecting all third-party provider users:

1. Thinking blocks serialized as <thinking> text corrupt multi-turn
   context — strip them instead of converting to raw text tags.

2. Unknown models fall through to 200k context window default, so
   auto-compact never triggers — use conservative 8k for unknown
   3P models with a warning log.

3. Session resume with thinking blocks causes 400 or context corruption
   on 3P providers — strip thinking/redacted_thinking content blocks
   from deserialized messages when resuming against a non-Anthropic
   provider.

Addresses findings 2, 3, and 5 from #248.
2026-04-03 14:05:34 +02:00
4 changed files with 97 additions and 8 deletions

View File

@@ -313,6 +313,57 @@ test('preserves Gemini tool call extra_content from streaming chunks', async ()
}) })
}) })
test('strips thinking blocks from assistant messages instead of leaking them as text', async () => {
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'gpt-4o',
choices: [
{
message: { role: 'assistant', content: 'done' },
finish_reason: 'stop',
},
],
usage: { prompt_tokens: 10, completion_tokens: 1, total_tokens: 11 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'gpt-4o',
system: 'test',
messages: [
{ role: 'user', content: 'hello' },
{
role: 'assistant',
content: [
{ type: 'thinking', thinking: 'secret reasoning' },
{ type: 'text', text: 'visible reply' },
],
},
{ role: 'user', content: 'follow up' },
],
max_tokens: 64,
stream: false,
})
const msgs = requestBody?.messages as Array<{ role: string; content: string }>
const assistantMsg = msgs.find(m => m.role === 'assistant')
// The assistant message should contain only the visible text,
// not <thinking>secret reasoning</thinking>
expect(assistantMsg?.content).toBe('visible reply')
expect(assistantMsg?.content).not.toContain('thinking')
})
test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async () => { test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async () => {
let requestBody: Record<string, unknown> | undefined let requestBody: Record<string, unknown> | undefined

View File

@@ -139,10 +139,12 @@ function convertContentBlocks(
// handled separately // handled separately
break break
case 'thinking': case 'thinking':
// Append thinking as text with a marker for models that support reasoning case 'redacted_thinking':
if (block.thinking) { // Strip thinking blocks for OpenAI-compatible providers.
parts.push({ type: 'text', text: `<thinking>${block.thinking}</thinking>` }) // These are Anthropic-specific content types that 3P providers
} // don't understand. Serializing them as <thinking> text corrupts
// multi-turn context: the model sees the tags as part of its
// previous reply and may mimic or misattribute them.
break break
default: default:
if (block.text) { if (block.text) {

View File

@@ -72,16 +72,23 @@ export function getContextWindowForModel(
return 1_000_000 return 1_000_000
} }
// OpenAI-compatible provider — use known context windows for the model // OpenAI-compatible provider — use known context windows for the model.
if ( // Unknown models get a conservative 8k default so auto-compact triggers
// before hitting a hard context_window_exceeded error (issue #248 finding 3).
const isOpenAIProvider =
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
) { if (isOpenAIProvider) {
const openaiWindow = getOpenAIContextWindow(model) const openaiWindow = getOpenAIContextWindow(model)
if (openaiWindow !== undefined) { if (openaiWindow !== undefined) {
return openaiWindow return openaiWindow
} }
console.error(
`[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` +
'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
)
return 8_000
} }
const cap = getModelCapability(model) const cap = getModelCapability(model)

View File

@@ -24,6 +24,7 @@ import {
type FileHistorySnapshot, type FileHistorySnapshot,
} from './fileHistory.js' } from './fileHistory.js'
import { logError } from './log.js' import { logError } from './log.js'
import { getAPIProvider } from './model/providers.js'
import { import {
createAssistantMessage, createAssistantMessage,
createUserMessage, createUserMessage,
@@ -145,6 +146,25 @@ export type DeserializeResult = {
turnInterruptionState: TurnInterruptionState turnInterruptionState: TurnInterruptionState
} }
/**
* Remove thinking/redacted_thinking content blocks from assistant messages.
* Messages that become empty after stripping are removed entirely.
*/
function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] {
return messages.reduce<NormalizedMessage[]>((acc, msg) => {
if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) {
acc.push(msg)
return acc
}
const filtered = msg.message.content.filter(
(block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking',
)
if (filtered.length === 0) return acc
acc.push({ ...msg, message: { ...msg.message, content: filtered } })
return acc
}, [])
}
/** /**
* Deserializes messages from a log file into the format expected by the REPL. * Deserializes messages from a log file into the format expected by the REPL.
* Filters unresolved tool uses, orphaned thinking messages, and appends a * Filters unresolved tool uses, orphaned thinking messages, and appends a
@@ -195,10 +215,19 @@ export function deserializeMessagesWithInterruptDetection(
filteredToolUses, filteredToolUses,
) as NormalizedMessage[] ) as NormalizedMessage[]
// Strip thinking/redacted_thinking content blocks from assistant messages
// when resuming against a 3P provider. These Anthropic-specific blocks cause
// 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5).
const provider = getAPIProvider()
const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry'
const thinkingStripped = isThirdPartyProvider
? stripThinkingBlocks(filteredThinking)
: filteredThinking
// Filter out assistant messages with only whitespace text content. // Filter out assistant messages with only whitespace text content.
// This can happen when model outputs "\n\n" before thinking, user cancels mid-stream. // This can happen when model outputs "\n\n" before thinking, user cancels mid-stream.
const filteredMessages = filterWhitespaceOnlyAssistantMessages( const filteredMessages = filterWhitespaceOnlyAssistantMessages(
filteredThinking, thinkingStripped,
) as NormalizedMessage[] ) as NormalizedMessage[]
const internalState = detectTurnInterruption(filteredMessages) const internalState = detectTurnInterruption(filteredMessages)