Compare commits
1 Commits
fix/provid
...
fix/repl-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
037a855528 |
@@ -313,6 +313,57 @@ test('preserves Gemini tool call extra_content from streaming chunks', async ()
|
||||
})
|
||||
})
|
||||
|
||||
test('strips thinking blocks from assistant messages instead of leaking them as text', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'gpt-4o',
|
||||
choices: [
|
||||
{
|
||||
message: { role: 'assistant', content: 'done' },
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 1, total_tokens: 11 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'gpt-4o',
|
||||
system: 'test',
|
||||
messages: [
|
||||
{ role: 'user', content: 'hello' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'secret reasoning' },
|
||||
{ type: 'text', text: 'visible reply' },
|
||||
],
|
||||
},
|
||||
{ role: 'user', content: 'follow up' },
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const msgs = requestBody?.messages as Array<{ role: string; content: string }>
|
||||
const assistantMsg = msgs.find(m => m.role === 'assistant')
|
||||
|
||||
// The assistant message should contain only the visible text,
|
||||
// not <thinking>secret reasoning</thinking>
|
||||
expect(assistantMsg?.content).toBe('visible reply')
|
||||
expect(assistantMsg?.content).not.toContain('thinking')
|
||||
})
|
||||
|
||||
test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
|
||||
@@ -139,10 +139,12 @@ function convertContentBlocks(
|
||||
// handled separately
|
||||
break
|
||||
case 'thinking':
|
||||
// Append thinking as text with a marker for models that support reasoning
|
||||
if (block.thinking) {
|
||||
parts.push({ type: 'text', text: `<thinking>${block.thinking}</thinking>` })
|
||||
}
|
||||
case 'redacted_thinking':
|
||||
// Strip thinking blocks for OpenAI-compatible providers.
|
||||
// These are Anthropic-specific content types that 3P providers
|
||||
// don't understand. Serializing them as <thinking> text corrupts
|
||||
// multi-turn context: the model sees the tags as part of its
|
||||
// previous reply and may mimic or misattribute them.
|
||||
break
|
||||
default:
|
||||
if (block.text) {
|
||||
|
||||
@@ -72,16 +72,23 @@ export function getContextWindowForModel(
|
||||
return 1_000_000
|
||||
}
|
||||
|
||||
// OpenAI-compatible provider — use known context windows for the model
|
||||
if (
|
||||
// OpenAI-compatible provider — use known context windows for the model.
|
||||
// Unknown models get a conservative 8k default so auto-compact triggers
|
||||
// before hitting a hard context_window_exceeded error (issue #248 finding 3).
|
||||
const isOpenAIProvider =
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||||
) {
|
||||
if (isOpenAIProvider) {
|
||||
const openaiWindow = getOpenAIContextWindow(model)
|
||||
if (openaiWindow !== undefined) {
|
||||
return openaiWindow
|
||||
}
|
||||
console.error(
|
||||
`[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` +
|
||||
'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
|
||||
)
|
||||
return 8_000
|
||||
}
|
||||
|
||||
const cap = getModelCapability(model)
|
||||
|
||||
@@ -24,6 +24,7 @@ import {
|
||||
type FileHistorySnapshot,
|
||||
} from './fileHistory.js'
|
||||
import { logError } from './log.js'
|
||||
import { getAPIProvider } from './model/providers.js'
|
||||
import {
|
||||
createAssistantMessage,
|
||||
createUserMessage,
|
||||
@@ -145,6 +146,25 @@ export type DeserializeResult = {
|
||||
turnInterruptionState: TurnInterruptionState
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove thinking/redacted_thinking content blocks from assistant messages.
|
||||
* Messages that become empty after stripping are removed entirely.
|
||||
*/
|
||||
function stripThinkingBlocks(messages: NormalizedMessage[]): NormalizedMessage[] {
|
||||
return messages.reduce<NormalizedMessage[]>((acc, msg) => {
|
||||
if (msg.type !== 'assistant' || !Array.isArray(msg.message?.content)) {
|
||||
acc.push(msg)
|
||||
return acc
|
||||
}
|
||||
const filtered = msg.message.content.filter(
|
||||
(block: { type?: string }) => block.type !== 'thinking' && block.type !== 'redacted_thinking',
|
||||
)
|
||||
if (filtered.length === 0) return acc
|
||||
acc.push({ ...msg, message: { ...msg.message, content: filtered } })
|
||||
return acc
|
||||
}, [])
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserializes messages from a log file into the format expected by the REPL.
|
||||
* Filters unresolved tool uses, orphaned thinking messages, and appends a
|
||||
@@ -195,10 +215,19 @@ export function deserializeMessagesWithInterruptDetection(
|
||||
filteredToolUses,
|
||||
) as NormalizedMessage[]
|
||||
|
||||
// Strip thinking/redacted_thinking content blocks from assistant messages
|
||||
// when resuming against a 3P provider. These Anthropic-specific blocks cause
|
||||
// 400 errors or context corruption on OpenAI-compatible providers (issue #248 finding 5).
|
||||
const provider = getAPIProvider()
|
||||
const isThirdPartyProvider = provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && provider !== 'foundry'
|
||||
const thinkingStripped = isThirdPartyProvider
|
||||
? stripThinkingBlocks(filteredThinking)
|
||||
: filteredThinking
|
||||
|
||||
// Filter out assistant messages with only whitespace text content.
|
||||
// This can happen when model outputs "\n\n" before thinking, user cancels mid-stream.
|
||||
const filteredMessages = filterWhitespaceOnlyAssistantMessages(
|
||||
filteredThinking,
|
||||
thinkingStripped,
|
||||
) as NormalizedMessage[]
|
||||
|
||||
const internalState = detectTurnInterruption(filteredMessages)
|
||||
|
||||
Reference in New Issue
Block a user