diff --git a/src/Tool.ts b/src/Tool.ts index 205cac09..3a89f2d4 100644 --- a/src/Tool.ts +++ b/src/Tool.ts @@ -290,6 +290,14 @@ export type ToolUseContext = { * resumeAgentBackground threads one reconstructed from sidechain records. */ contentReplacementState?: ContentReplacementState + /** + * Interactive REPL only: mirror persisted tool-result replacements back + * into the live transcript so the original oversized payloads can be + * released from heap once the replacement decision is known. + */ + syncToolResultReplacements?: ( + replacements: ReadonlyMap, + ) => void /** * Parent's rendered system prompt bytes, frozen at turn start. * Used by fork subagents to share the parent's prompt cache — re-calling diff --git a/src/query.ts b/src/query.ts index 07e8b6fa..c94abc96 100644 --- a/src/query.ts +++ b/src/query.ts @@ -376,7 +376,7 @@ async function* queryLoop( const persistReplacements = querySource.startsWith('agent:') || querySource.startsWith('repl_main_thread') - messagesForQuery = await applyToolResultBudget( + const toolResultBudgetResult = await applyToolResultBudget( messagesForQuery, toolUseContext.contentReplacementState, persistReplacements @@ -392,6 +392,12 @@ async function* queryLoop( .map(t => t.name), ), ) + messagesForQuery = toolResultBudgetResult.messages + if (toolResultBudgetResult.newlyReplaced.length > 0) { + toolUseContext.syncToolResultReplacements?.( + toolUseContext.contentReplacementState?.replacements ?? new Map(), + ) + } // Apply snip before microcompact (both may run — they are not mutually exclusive). // snipTokensFreed is plumbed to autocompact so its threshold check reflects diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx index 93e61985..033fddfb 100644 --- a/src/screens/REPL.tsx +++ b/src/screens/REPL.tsx @@ -177,7 +177,7 @@ import { deserializeMessages } from '../utils/conversationRecovery.js'; import { extractReadFilesFromMessages, extractBashToolsFromMessages } from '../utils/queryHelpers.js'; import { resetMicrocompactState } from '../services/compact/microCompact.js'; import { runPostCompactCleanup } from '../services/compact/postCompactCleanup.js'; -import { provisionContentReplacementState, reconstructContentReplacementState, type ContentReplacementRecord } from '../utils/toolResultStorage.js'; +import { applyToolResultReplacementsToMessages, provisionContentReplacementState, reconstructContentReplacementState, type ContentReplacementRecord } from '../utils/toolResultStorage.js'; import { partialCompactConversation } from '../services/compact/compact.js'; import type { LogOption } from '../types/logs.js'; import type { AgentColorName } from '../tools/AgentTool/agentColorManager.js'; @@ -1174,7 +1174,11 @@ export function REPL({ registerLeaderToolUseConfirmQueue(setToolUseConfirmQueue); return () => unregisterLeaderToolUseConfirmQueue(); }, [setToolUseConfirmQueue]); - const [messages, rawSetMessages] = useState(initialMessages ?? []); + const [messages, rawSetMessages] = useState(() => { + if (!initialMessages) return []; + const initialReplacementState = provisionContentReplacementState(initialMessages, initialContentReplacements); + return initialReplacementState ? applyToolResultReplacementsToMessages(initialMessages, initialReplacementState.replacements) : initialMessages; + }); const messagesRef = useRef(messages); // Stores the willowMode variant that was shown (or false if no hint shown). // Captured at hint_shown time so hint_converted telemetry reports the same @@ -1226,6 +1230,10 @@ export function REPL({ } setUserInputOnProcessingRaw(input); }, []); + const syncToolResultReplacements = useCallback((replacements: ReadonlyMap) => { + if (replacements.size === 0) return; + setMessages(current => applyToolResultReplacementsToMessages(current, replacements)); + }, [setMessages]); // Fullscreen: track the unseen-divider position. dividerIndex changes // only ~twice/scroll-session (first scroll-away + repin). pillVisible // and stickyPrompt now live in FullscreenLayout — they subscribe to @@ -1918,10 +1926,11 @@ export function REPL({ if (contentReplacementStateRef.current && entrypoint !== 'fork') { contentReplacementStateRef.current = reconstructContentReplacementState(messages, log.contentReplacements ?? []); } + const hydratedMessages = contentReplacementStateRef.current ? applyToolResultReplacementsToMessages(messages, contentReplacementStateRef.current.replacements) : messages; // Reset messages to the provided initial messages // Use a callback to ensure we're not dependent on stale state - setMessages(() => messages); + setMessages(() => hydratedMessages); // Clear any active tool JSX setToolJSX(null); @@ -2513,9 +2522,10 @@ export function REPL({ resume, setConversationId, requestPrompt: feature('HOOK_PROMPTS') ? requestPrompt : undefined, - contentReplacementState: contentReplacementStateRef.current + contentReplacementState: contentReplacementStateRef.current, + syncToolResultReplacements }; - }, [commands, combinedInitialTools, mainThreadAgentDefinition, debug, initialMcpClients, ideInstallationStatus, dynamicMcpConfig, theme, allowedAgentTypes, store, setAppState, reverify, addNotification, setMessages, onChangeDynamicMcpConfig, resume, requestPrompt, disabled, customSystemPrompt, appendSystemPrompt, setConversationId]); + }, [commands, combinedInitialTools, mainThreadAgentDefinition, debug, initialMcpClients, ideInstallationStatus, dynamicMcpConfig, theme, allowedAgentTypes, store, setAppState, reverify, addNotification, setMessages, onChangeDynamicMcpConfig, resume, requestPrompt, disabled, customSystemPrompt, appendSystemPrompt, setConversationId, syncToolResultReplacements]); // Session backgrounding (Ctrl+B to background/foreground) const handleBackgroundQuery = useCallback(() => { diff --git a/src/services/api/codexShim.test.ts b/src/services/api/codexShim.test.ts index 96ab999c..44f900ef 100644 --- a/src/services/api/codexShim.test.ts +++ b/src/services/api/codexShim.test.ts @@ -180,6 +180,47 @@ describe('Codex request translation', () => { ]) }) + test('sanitizes malformed enum/default values for Responses tool schemas', () => { + const tools = convertToolsToResponsesTools([ + { + name: 'mcp__clientry__create_task', + description: 'Create a task', + input_schema: { + type: 'object', + properties: { + priority: { + type: 'integer', + description: 'Priority: 0=low, 1=medium, 2=high, 3=urgent', + default: true, + enum: [false, 0, 1, 2, 3], + }, + }, + }, + }, + ]) + + expect(tools).toEqual([ + { + type: 'function', + name: 'mcp__clientry__create_task', + description: 'Create a task', + parameters: { + type: 'object', + properties: { + priority: { + type: 'integer', + description: 'Priority: 0=low, 1=medium, 2=high, 3=urgent', + enum: [0, 1, 2, 3], + }, + }, + required: ['priority'], + additionalProperties: false, + }, + strict: true, + }, + ]) + }) + test('converts assistant tool use and user tool result into Responses items', () => { const items = convertAnthropicMessagesToResponsesInput([ { diff --git a/src/services/api/codexShim.ts b/src/services/api/codexShim.ts index d6519b06..26ae237e 100644 --- a/src/services/api/codexShim.ts +++ b/src/services/api/codexShim.ts @@ -3,6 +3,7 @@ import type { ResolvedCodexCredentials, ResolvedProviderRequest, } from './providerConfig.js' +import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js' export interface AnthropicUsage { input_tokens: number @@ -306,15 +307,8 @@ export function convertAnthropicMessagesToResponsesInput( * - Nested schemas (properties, items, anyOf/oneOf/allOf) are processed too */ function enforceStrictSchema(schema: unknown): Record { - if (!schema || typeof schema !== 'object' || Array.isArray(schema)) { - return (schema ?? {}) as Record - } + const record = sanitizeSchemaForOpenAICompat(schema) - const record = { ...(schema as Record) } - - // Codex API strict schemas reject these JSON schema keywords - delete record.$schema - delete record.propertyNames // Codex Responses rejects JSON Schema's standard `uri` string format. // Keep URL validation in the tool layer and send a plain string here. if (record.format === 'uri') { diff --git a/src/services/api/openaiSchemaSanitizer.ts b/src/services/api/openaiSchemaSanitizer.ts new file mode 100644 index 00000000..7fc00eff --- /dev/null +++ b/src/services/api/openaiSchemaSanitizer.ts @@ -0,0 +1,216 @@ +function isSchemaRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value) +} + +function deepEqualJsonValue(a: unknown, b: unknown): boolean { + if (Object.is(a, b)) return true + if (typeof a !== typeof b) return false + + if (Array.isArray(a) && Array.isArray(b)) { + return ( + a.length === b.length && + a.every((value, index) => deepEqualJsonValue(value, b[index])) + ) + } + + if (isSchemaRecord(a) && isSchemaRecord(b)) { + const aKeys = Object.keys(a) + const bKeys = Object.keys(b) + return ( + aKeys.length === bKeys.length && + aKeys.every(key => key in b && deepEqualJsonValue(a[key], b[key])) + ) + } + + return false +} + +function matchesJsonSchemaType(type: string, value: unknown): boolean { + switch (type) { + case 'string': + return typeof value === 'string' + case 'number': + return typeof value === 'number' && Number.isFinite(value) + case 'integer': + return typeof value === 'number' && Number.isInteger(value) + case 'boolean': + return typeof value === 'boolean' + case 'object': + return value !== null && typeof value === 'object' && !Array.isArray(value) + case 'array': + return Array.isArray(value) + case 'null': + return value === null + default: + return true + } +} + +function getJsonSchemaTypes(record: Record): string[] { + const raw = record.type + if (typeof raw === 'string') { + return [raw] + } + if (Array.isArray(raw)) { + return raw.filter((value): value is string => typeof value === 'string') + } + return [] +} + +function schemaAllowsValue(schema: Record, value: unknown): boolean { + if (Array.isArray(schema.anyOf)) { + return schema.anyOf.some(item => + schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value), + ) + } + + if (Array.isArray(schema.oneOf)) { + return ( + schema.oneOf.filter(item => + schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value), + ).length === 1 + ) + } + + if (Array.isArray(schema.allOf)) { + return schema.allOf.every(item => + schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value), + ) + } + + if ('const' in schema && !deepEqualJsonValue(schema.const, value)) { + return false + } + + if (Array.isArray(schema.enum)) { + if (!schema.enum.some(item => deepEqualJsonValue(item, value))) { + return false + } + } + + const types = getJsonSchemaTypes(schema) + if (types.length > 0 && !types.some(type => matchesJsonSchemaType(type, value))) { + return false + } + + return true +} + +function sanitizeTypeField(record: Record): void { + const allowed = new Set([ + 'string', + 'number', + 'integer', + 'boolean', + 'object', + 'array', + 'null', + ]) + + const raw = record.type + if (typeof raw === 'string') { + if (!allowed.has(raw)) delete record.type + return + } + + if (!Array.isArray(raw)) return + + const filtered = raw.filter( + (value, index): value is string => + typeof value === 'string' && + allowed.has(value) && + raw.indexOf(value) === index, + ) + + if (filtered.length === 0) { + delete record.type + } else if (filtered.length === 1) { + record.type = filtered[0] + } else { + record.type = filtered + } +} + +/** + * Sanitize loose/invalid JSON Schema into a form OpenAI-compatible providers + * are more likely to accept. This is intentionally defensive for external MCP + * servers that may advertise imperfect schemas. + */ +export function sanitizeSchemaForOpenAICompat( + schema: unknown, +): Record { + if (!isSchemaRecord(schema)) { + return {} + } + + const record = { ...schema } + + delete record.$schema + delete record.propertyNames + + sanitizeTypeField(record) + + if (isSchemaRecord(record.properties)) { + const sanitizedProps: Record = {} + for (const [key, value] of Object.entries(record.properties)) { + sanitizedProps[key] = sanitizeSchemaForOpenAICompat(value) + } + record.properties = sanitizedProps + } + + if ('items' in record) { + if (Array.isArray(record.items)) { + record.items = record.items.map(item => + sanitizeSchemaForOpenAICompat(item), + ) + } else { + record.items = sanitizeSchemaForOpenAICompat(record.items) + } + } + + for (const key of ['anyOf', 'oneOf', 'allOf'] as const) { + if (Array.isArray(record[key])) { + record[key] = record[key].map(item => + sanitizeSchemaForOpenAICompat(item), + ) + } + } + + if (Array.isArray(record.required) && isSchemaRecord(record.properties)) { + record.required = record.required.filter( + (value): value is string => + typeof value === 'string' && value in record.properties, + ) + } + + const schemaWithoutEnum = { ...record } + delete schemaWithoutEnum.enum + + if (Array.isArray(record.enum)) { + const filteredEnum = record.enum.filter(value => + schemaAllowsValue(schemaWithoutEnum, value), + ) + if (filteredEnum.length > 0) { + record.enum = filteredEnum + } else { + delete record.enum + } + } + + const schemaWithoutConst = { ...record } + delete schemaWithoutConst.const + if ('const' in record && !schemaAllowsValue(schemaWithoutConst, record.const)) { + delete record.const + } + + const schemaWithoutDefault = { ...record } + delete schemaWithoutDefault.default + if ( + 'default' in record && + !schemaAllowsValue(schemaWithoutDefault, record.default) + ) { + delete record.default + } + + return record +} diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index 6141bdb9..101c2ae5 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -312,3 +312,77 @@ test('preserves Gemini tool call extra_content from streaming chunks', async () }, }) }) + +test('sanitizes malformed MCP tool schemas before sending them to OpenAI', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'gpt-4o', + choices: [ + { + message: { + role: 'assistant', + content: 'ok', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 1, + total_tokens: 11, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'gpt-4o', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + tools: [ + { + name: 'mcp__clientry__create_task', + description: 'Create a task', + input_schema: { + type: 'object', + properties: { + priority: { + type: 'integer', + description: 'Priority: 0=low, 1=medium, 2=high, 3=urgent', + default: true, + enum: [false, 0, 1, 2, 3], + }, + }, + }, + }, + ], + max_tokens: 64, + stream: false, + }) + + const parameters = ( + requestBody?.tools as Array<{ function?: { parameters?: Record } }> + )?.[0]?.function?.parameters + const properties = parameters?.properties as + | Record + | undefined + + expect(parameters?.additionalProperties).toBe(false) + expect(parameters?.required).toEqual(['priority']) + expect(properties?.priority?.type).toBe('integer') + expect(properties?.priority?.enum).toEqual([0, 1, 2, 3]) + expect(properties?.priority).not.toHaveProperty('default') +}) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 8ab97ac3..9acf6521 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -39,6 +39,7 @@ import { resolveProviderRequest, } from './providerConfig.js' import { redactSecretValueForDisplay } from '../../utils/providerProfile.js' +import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js' const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference' const GITHUB_API_VERSION = '2022-11-28' @@ -270,11 +271,7 @@ function normalizeSchemaForOpenAI( schema: Record, strict = true, ): Record { - if (!schema || typeof schema !== 'object' || Array.isArray(schema)) { - return (schema ?? {}) as Record - } - - const record = { ...schema } + const record = sanitizeSchemaForOpenAICompat(schema) if (record.type === 'object' && record.properties) { const properties = record.properties as Record> diff --git a/src/utils/toolResultStorage.test.ts b/src/utils/toolResultStorage.test.ts new file mode 100644 index 00000000..8040f1ff --- /dev/null +++ b/src/utils/toolResultStorage.test.ts @@ -0,0 +1,54 @@ +import { expect, test } from 'bun:test' + +import { createUserMessage } from './messages.ts' +import { applyToolResultReplacementsToMessages } from './toolResultStorage.ts' + +test('applyToolResultReplacementsToMessages replaces matching tool results and preserves unrelated messages', () => { + const unrelated = createUserMessage({ content: 'keep me' }) + const oversizedResult = createUserMessage({ + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-1', + content: 'very large tool output', + is_error: false, + }, + ], + }) + const messages = [unrelated, oversizedResult] + const replacement = + '\nOutput too large. Preview\n' + + const next = applyToolResultReplacementsToMessages( + messages, + new Map([['tool-1', replacement]]), + ) + + expect(next).not.toBe(messages) + expect(next[0]).toBe(unrelated) + expect(next[1]).not.toBe(oversizedResult) + expect((next[1]!.message.content as Array<{ content: string }>)[0]!.content).toBe( + replacement, + ) +}) + +test('applyToolResultReplacementsToMessages is idempotent when messages are already hydrated', () => { + const hydrated = createUserMessage({ + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-1', + content: '\nPreview\n', + is_error: false, + }, + ], + }) + const messages = [hydrated] + + const next = applyToolResultReplacementsToMessages( + messages, + new Map([['tool-1', '\nPreview\n']]), + ) + + expect(next).toBe(messages) +}) diff --git a/src/utils/toolResultStorage.ts b/src/utils/toolResultStorage.ts index f4dfef32..e5f16c1a 100644 --- a/src/utils/toolResultStorage.ts +++ b/src/utils/toolResultStorage.ts @@ -698,17 +698,22 @@ function selectFreshToReplace( */ function replaceToolResultContents( messages: Message[], - replacementMap: Map, + replacementMap: ReadonlyMap, ): Message[] { - return messages.map(message => { + let changed = false + const nextMessages = messages.map(message => { if (message.type !== 'user' || !Array.isArray(message.message.content)) { return message } const content = message.message.content const needsReplace = content.some( - b => b.type === 'tool_result' && replacementMap.has(b.tool_use_id), + b => + b.type === 'tool_result' && + replacementMap.has(b.tool_use_id) && + b.content !== replacementMap.get(b.tool_use_id), ) if (!needsReplace) return message + changed = true return { ...message, message: { @@ -716,13 +721,28 @@ function replaceToolResultContents( content: content.map(block => { if (block.type !== 'tool_result') return block const replacement = replacementMap.get(block.tool_use_id) - return replacement === undefined + return replacement === undefined || block.content === replacement ? block : { ...block, content: replacement } }), }, } }) + return changed ? nextMessages : messages +} + +/** + * Mirror already-known tool-result replacements back into an in-memory + * transcript. Used by the interactive REPL so once a large result has been + * persisted/replaced for model use, the original oversized string can be + * dropped from live session state as well. + */ +export function applyToolResultReplacementsToMessages( + messages: Message[], + replacements: ReadonlyMap, +): Message[] { + if (replacements.size === 0) return messages + return replaceToolResultContents(messages, replacements) } async function buildReplacement( @@ -926,13 +946,16 @@ export async function applyToolResultBudget( state: ContentReplacementState | undefined, writeToTranscript?: (records: ToolResultReplacementRecord[]) => void, skipToolNames?: ReadonlySet, -): Promise { - if (!state) return messages +): Promise<{ + messages: Message[] + newlyReplaced: ToolResultReplacementRecord[] +}> { + if (!state) return { messages, newlyReplaced: [] } const result = await enforceToolResultBudget(messages, state, skipToolNames) if (result.newlyReplaced.length > 0) { writeToTranscript?.(result.newlyReplaced) } - return result.messages + return result } /**