From a6a3de5ac155fe9d00befbfcab98d439314effd8 Mon Sep 17 00:00:00 2001 From: viudes <57104322+andersonviudes@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:36:26 -0300 Subject: [PATCH] feat(api): compress old tool_result content for small-context providers (#801) * feat(api): compress old tool_result content for small-context providers Adds a shim-layer pass that tiers tool_result content by age on providers with small effective context windows (Copilot gpt-4o 128k, Mistral, Ollama). Recent turns remain full; mid-tier results are truncated to 2k chars; older results are replaced with a stub that preserves tool name and arguments so the model can re-invoke if needed. Tier sizes auto-tune via getEffectiveContextWindowSize, same calculation used by auto-compact. Reuses COMPACTABLE_TOOLS and TOOL_RESULT_CLEARED_MESSAGE to complement (not duplicate) microCompact. Configurable via /config toolHistoryCompressionEnabled. Addresses active-session context accumulation on Copilot where microCompact's time-based trigger never fires, which surfaces as "tools appearing in a loop" and prompt_too_long errors after ~15 turns. * fix: config tool history --- .gitignore | 1 + src/components/Settings/Config.tsx | 21 + src/services/api/codexShim.ts | 5 +- src/services/api/compressToolHistory.test.ts | 572 ++++++++++++++++++ src/services/api/compressToolHistory.ts | 255 ++++++++ .../api/openaiShim.compression.test.ts | 317 ++++++++++ src/services/api/openaiShim.ts | 6 +- src/services/compact/microCompact.ts | 4 +- src/utils/config.ts | 3 + 9 files changed, 1179 insertions(+), 5 deletions(-) create mode 100644 src/services/api/compressToolHistory.test.ts create mode 100644 src/services/api/compressToolHistory.ts create mode 100644 src/services/api/openaiShim.compression.test.ts diff --git a/.gitignore b/.gitignore index 2d046b19..6ae40bc3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ dist/ .openclaude-profile.json reports/ GEMINI.md +CLAUDE.md package-lock.json /.claude coverage/ diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx index 9b48b8c4..31ed3ee4 100644 --- a/src/components/Settings/Config.tsx +++ b/src/components/Settings/Config.tsx @@ -281,6 +281,24 @@ export function Config({ enabled: autoCompactEnabled }); } + }, { + id: 'toolHistoryCompressionEnabled', + label: 'Tool history compression', + value: globalConfig.toolHistoryCompressionEnabled, + type: 'boolean' as const, + onChange(toolHistoryCompressionEnabled: boolean) { + saveGlobalConfig(current => ({ + ...current, + toolHistoryCompressionEnabled + })); + setGlobalConfig({ + ...getGlobalConfig(), + toolHistoryCompressionEnabled + }); + logEvent('tengu_tool_history_compression_setting_changed', { + enabled: toolHistoryCompressionEnabled + }); + } }, { id: 'spinnerTipsEnabled', label: 'Show tips', @@ -1158,6 +1176,9 @@ export function Config({ if (globalConfig.autoCompactEnabled !== initialConfig.current.autoCompactEnabled) { formattedChanges.push(`${globalConfig.autoCompactEnabled ? 'Enabled' : 'Disabled'} auto-compact`); } + if (globalConfig.toolHistoryCompressionEnabled !== initialConfig.current.toolHistoryCompressionEnabled) { + formattedChanges.push(`${globalConfig.toolHistoryCompressionEnabled ? 'Enabled' : 'Disabled'} tool history compression`); + } if (globalConfig.respectGitignore !== initialConfig.current.respectGitignore) { formattedChanges.push(`${globalConfig.respectGitignore ? 'Enabled' : 'Disabled'} respect .gitignore in file picker`); } diff --git a/src/services/api/codexShim.ts b/src/services/api/codexShim.ts index 7ed29df5..ef8b7806 100644 --- a/src/services/api/codexShim.ts +++ b/src/services/api/codexShim.ts @@ -1,4 +1,5 @@ import { APIError } from '@anthropic-ai/sdk' +import { compressToolHistory } from './compressToolHistory.js' import { fetchWithProxyRetry } from './fetchWithProxyRetry.js' import type { ResolvedCodexCredentials, @@ -484,13 +485,15 @@ export async function performCodexRequest(options: { defaultHeaders: Record signal?: AbortSignal }): Promise { - const input = convertAnthropicMessagesToResponsesInput( + const compressedMessages = compressToolHistory( options.params.messages as Array<{ role?: string message?: { role?: string; content?: unknown } content?: unknown }>, + options.request.resolvedModel, ) + const input = convertAnthropicMessagesToResponsesInput(compressedMessages) const body: Record = { model: options.request.resolvedModel, input: input.length > 0 diff --git a/src/services/api/compressToolHistory.test.ts b/src/services/api/compressToolHistory.test.ts new file mode 100644 index 00000000..ed5f136f --- /dev/null +++ b/src/services/api/compressToolHistory.test.ts @@ -0,0 +1,572 @@ +import { afterEach, beforeEach, expect, mock, test } from 'bun:test' +import { compressToolHistory, getTiers } from './compressToolHistory.js' + +// Mock the two dependencies so tests are deterministic and don't read disk config. +const mockState = { + enabled: true, + effectiveWindow: 100_000, +} + +mock.module('../../utils/config.js', () => ({ + getGlobalConfig: () => ({ + toolHistoryCompressionEnabled: mockState.enabled, + }), +})) + +mock.module('../compact/autoCompact.js', () => ({ + getEffectiveContextWindowSize: () => mockState.effectiveWindow, +})) + +beforeEach(() => { + mockState.enabled = true + mockState.effectiveWindow = 100_000 +}) + +afterEach(() => { + mockState.enabled = true + mockState.effectiveWindow = 100_000 +}) + +type Block = Record +type Msg = { role: string; content: Block[] | string } + +function bigText(n: number): string { + return 'x'.repeat(n) +} + +function buildToolExchange(id: number, resultLength: number): Msg[] { + return [ + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: `toolu_${id}`, + name: 'Read', + input: { file_path: `/path/to/file${id}.ts` }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: `toolu_${id}`, + content: bigText(resultLength), + }, + ], + }, + ] +} + +function buildConversation(numToolExchanges: number, resultLength = 5_000): Msg[] { + const out: Msg[] = [{ role: 'user', content: 'Initial request' }] + for (let i = 0; i < numToolExchanges; i++) { + out.push(...buildToolExchange(i, resultLength)) + } + return out +} + +function getResultMessages(messages: Msg[]): Msg[] { + return messages.filter( + m => Array.isArray(m.content) && m.content.some((b: any) => b.type === 'tool_result'), + ) +} + +function getResultBlock(msg: Msg): Block { + return (msg.content as Block[]).find((b: any) => b.type === 'tool_result') as Block +} + +function getResultText(msg: Msg): string { + const block = getResultBlock(msg) + const c = block.content + if (typeof c === 'string') return c + if (Array.isArray(c)) { + return c + .filter((b: any) => b.type === 'text') + .map((b: any) => b.text) + .join('\n') + } + return '' +} + +// ---------- getTiers ---------- + +test('getTiers: < 16k window → recent=2, mid=3', () => { + expect(getTiers(8_000)).toEqual({ recent: 2, mid: 3 }) +}) + +test('getTiers: 16k–32k → recent=3, mid=5', () => { + expect(getTiers(20_000)).toEqual({ recent: 3, mid: 5 }) +}) + +test('getTiers: 32k–64k → recent=4, mid=8', () => { + expect(getTiers(48_000)).toEqual({ recent: 4, mid: 8 }) +}) + +test('getTiers: 64k–128k (Copilot gpt-4o) → recent=5, mid=10', () => { + expect(getTiers(100_000)).toEqual({ recent: 5, mid: 10 }) +}) + +test('getTiers: 128k–256k (Copilot Claude) → recent=8, mid=15', () => { + expect(getTiers(200_000)).toEqual({ recent: 8, mid: 15 }) +}) + +test('getTiers: 256k–500k → recent=12, mid=25', () => { + expect(getTiers(400_000)).toEqual({ recent: 12, mid: 25 }) +}) + +test('getTiers: ≥ 500k (gpt-4.1 1M) → recent=25, mid=50', () => { + expect(getTiers(1_000_000)).toEqual({ recent: 25, mid: 50 }) +}) + +// ---------- master switch ---------- + +test('pass-through when toolHistoryCompressionEnabled is false', () => { + mockState.enabled = false + const messages = buildConversation(20) + const result = compressToolHistory(messages, 'gpt-4o') + expect(result).toBe(messages) // same reference (no transformation) +}) + +test('pass-through when total tool_results <= recent tier', () => { + // 100k effective → recent=5; only 4 exchanges → no compression + const messages = buildConversation(4) + const result = compressToolHistory(messages, 'gpt-4o') + expect(result).toBe(messages) +}) + +// ---------- per-tier behavior ---------- + +test('recent tier: tool_result content untouched', () => { + // 100k effective → recent=5, mid=10. With 6 exchanges, only the oldest is touched. + const messages = buildConversation(6, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // Last 5 should be untouched (full 5000 chars) + for (let i = resultMsgs.length - 5; i < resultMsgs.length; i++) { + expect(getResultText(resultMsgs[i]).length).toBe(5_000) + } +}) + +test('mid tier: long content truncated to MID_MAX_CHARS with marker', () => { + // 100k → recent=5, mid=10. 10 exchanges: 5 recent + 5 mid (none old). + const messages = buildConversation(10, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // First 5 are mid tier — should be truncated to ~2000 chars + marker + for (let i = 0; i < 5; i++) { + const text = getResultText(resultMsgs[i]) + expect(text).toContain('[…truncated') + expect(text).toContain('chars from tool history]') + // Should be roughly 2000 chars + marker (under 2200) + expect(text.length).toBeLessThan(2_200) + expect(text.length).toBeGreaterThan(2_000) + } +}) + +test('mid tier: short content (< MID_MAX_CHARS) untouched', () => { + const messages = buildConversation(10, 500) // 500 < MID_MAX_CHARS + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + for (let i = 0; i < 5; i++) { + expect(getResultText(resultMsgs[i])).toBe(bigText(500)) + } +}) + +test('old tier: content replaced with stub [name args={...} → N chars omitted]', () => { + // 100k → recent=5, mid=10, old=rest. 20 exchanges → 5 old + 10 mid + 5 recent. + const messages = buildConversation(20, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // First 5 are old tier — should be stubs + for (let i = 0; i < 5; i++) { + const text = getResultText(resultMsgs[i]) + expect(text).toMatch(/^\[Read args=\{.*\} → 5000 chars omitted\]$/) + } +}) + +test('old tier: stub args truncated to 200 chars', () => { + const longArg = bigText(500) + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'toolu_x', + name: 'Bash', + input: { command: longArg }, + }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'toolu_x', content: 'output' }, + ], + }, + // Pad with enough recent exchanges to push the above into old tier + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + const text = getResultText(resultMsgs[0]) + + // Stub format: [Bash args= → N chars omitted] + // The args portion (between args= and →) must be ≤ 200 chars. + const argsMatch = text.match(/args=(.*?) →/) + expect(argsMatch).not.toBeNull() + expect(argsMatch![1].length).toBeLessThanOrEqual(200) +}) + +test('old tier: orphan tool_result (no matching tool_use) falls back to "tool"', () => { + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + // Orphan: tool_result without matching tool_use in history + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'orphan_id', content: 'data' }, + ], + }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + const text = getResultText(resultMsgs[0]) + + expect(text).toMatch(/^\[tool args=\{\} → 4 chars omitted\]$/) +}) + +// ---------- structural preservation ---------- + +test('tool_use blocks always preserved', () => { + const messages = buildConversation(20, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + + const useCount = (msgs: Msg[]) => + msgs.reduce((sum, m) => { + if (!Array.isArray(m.content)) return sum + return sum + m.content.filter((b: any) => b.type === 'tool_use').length + }, 0) + + expect(useCount(result as Msg[])).toBe(useCount(messages)) +}) + +test('text blocks always preserved', () => { + const messages: Msg[] = [ + { role: 'user', content: 'first' }, + { + role: 'assistant', + content: [ + { type: 'text', text: 'reasoning before tool' }, + { type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} }, + ], + }, + { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }], + }, + ...buildConversation(20, 5_000).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const assistantMsg = (result as Msg[])[1] + const textBlock = (assistantMsg.content as Block[]).find((b: any) => b.type === 'text') + + expect(textBlock).toEqual({ type: 'text', text: 'reasoning before tool' }) +}) + +test('thinking blocks always preserved', () => { + const messages: Msg[] = [ + { role: 'user', content: 'first' }, + { + role: 'assistant', + content: [ + { type: 'thinking', thinking: 'internal reasoning', signature: 'sig' }, + { type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} }, + ], + }, + { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }], + }, + ...buildConversation(20, 5_000).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const assistantMsg = (result as Msg[])[1] + const thinking = (assistantMsg.content as Block[]).find((b: any) => b.type === 'thinking') + + expect(thinking).toEqual({ + type: 'thinking', + thinking: 'internal reasoning', + signature: 'sig', + }) +}) + +test('non-array content (string) handled gracefully', () => { + const messages: Msg[] = [ + { role: 'user', content: 'plain string content' }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + expect((result as Msg[])[0].content).toBe('plain string content') +}) + +test('empty content array handled gracefully', () => { + const messages: Msg[] = [ + { role: 'user', content: [] }, + ...buildConversation(20, 100).slice(1), + ] + expect(() => compressToolHistory(messages, 'gpt-4o')).not.toThrow() +}) + +// ---------- message shape compatibility ---------- + +test('wrapped shape ({ message: { role, content } }) handled', () => { + type WrappedMsg = { message: { role: string; content: Block[] | string } } + const wrap = (m: Msg): WrappedMsg => ({ message: { role: m.role, content: m.content } }) + const messages = buildConversation(20, 5_000).map(wrap) + const result = compressToolHistory(messages as any, 'gpt-4o') + + // First wrapped tool-result message should have stub content (old tier) + const firstResultMsg = (result as WrappedMsg[]).find( + m => + Array.isArray(m.message.content) && + m.message.content.some((b: any) => b.type === 'tool_result'), + ) + const block = (firstResultMsg!.message.content as Block[]).find( + (b: any) => b.type === 'tool_result', + ) as Block + const text = ((block.content as Block[])[0] as any).text + expect(text).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/) +}) + +test('flat shape ({ role, content }) handled', () => { + const messages = buildConversation(20, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + expect(getResultText(resultMsgs[0])).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/) +}) + +// ---------- tier boundary correctness ---------- + +test('tier boundaries: 6 exchanges → 1 mid + 5 recent (recent=5)', () => { + const messages = buildConversation(6, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // Oldest: mid (truncated) + expect(getResultText(resultMsgs[0])).toContain('[…truncated') + // Last 5: untouched + for (let i = 1; i < 6; i++) { + expect(getResultText(resultMsgs[i]).length).toBe(5_000) + } +}) + +test('tier boundaries: 16 exchanges → 1 old + 10 mid + 5 recent', () => { + const messages = buildConversation(16, 5_000) + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // Oldest 1: stub (old tier) + expect(getResultText(resultMsgs[0])).toMatch(/^\[Read .*chars omitted\]$/) + // Next 10: mid (truncated) + for (let i = 1; i < 11; i++) { + expect(getResultText(resultMsgs[i])).toContain('[…truncated') + } + // Last 5: untouched + for (let i = 11; i < 16; i++) { + expect(getResultText(resultMsgs[i]).length).toBe(5_000) + } +}) + +test('large window (1M) with 30 exchanges: all untouched (recent=25 ≥ 30 - 5)', () => { + // ≥500k → recent=25, mid=50. 30 exchanges → 5 mid + 25 recent. None old. + mockState.effectiveWindow = 1_000_000 + const messages = buildConversation(30, 5_000) + const result = compressToolHistory(messages, 'gpt-4.1') + const resultMsgs = getResultMessages(result) + + // Last 25: untouched + for (let i = 5; i < 30; i++) { + expect(getResultText(resultMsgs[i]).length).toBe(5_000) + } +}) + +// ---------- attribute preservation ---------- + +test('is_error flag preserved in mid tier', () => { + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_err', + is_error: true, + content: bigText(5_000), + }, + ], + }, + // Pad with enough recent exchanges to push the above into MID tier + ...buildConversation(10, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown } + + expect(block.is_error).toBe(true) + expect(getResultText(resultMsgs[0])).toContain('[…truncated') +}) + +test('is_error flag preserved in old tier (stub)', () => { + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_err', + is_error: true, + content: bigText(5_000), + }, + ], + }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown } + + expect(block.is_error).toBe(true) + expect(getResultText(resultMsgs[0])).toMatch(/^\[Bash .*chars omitted\]$/) +}) + +// ---------- COMPACTABLE_TOOLS filter ---------- + +test('non-compactable tool (e.g. Task/Agent) is NEVER compressed', () => { + // Build conversation where the OLDEST exchange uses a non-compactable tool name + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [ + { type: 'tool_use', id: 'task_1', name: 'Task', input: { goal: 'plan' } }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'task_1', content: bigText(5_000) }, + ], + }, + // Pad with 20 compactable exchanges to push Task into old tier + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // First tool_result is for Task (non-compactable) → must remain full + expect(getResultText(resultMsgs[0]).length).toBe(5_000) + expect(getResultText(resultMsgs[0])).not.toContain('chars omitted') + expect(getResultText(resultMsgs[0])).not.toContain('[…truncated') +}) + +test('mcp__ prefixed tools ARE compactable (matches microCompact behavior)', () => { + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [ + { type: 'tool_use', id: 'mcp_1', name: 'mcp__github__get_issue', input: {} }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', tool_use_id: 'mcp_1', content: bigText(5_000) }, + ], + }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // MCP tool result is compressed (gets stub since it's in old tier) + expect(getResultText(resultMsgs[0])).toMatch(/^\[mcp__github__get_issue .*chars omitted\]$/) +}) + +// ---------- skip already-cleared blocks ---------- + +test('blocks already cleared by microCompact are NOT re-compressed', () => { + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'cleared_1', name: 'Read', input: {} }], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'cleared_1', + content: '[Old tool result content cleared]', // microCompact's marker + }, + ], + }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + + // Already-cleared marker survives untouched (no double processing) + expect(getResultText(resultMsgs[0])).toBe('[Old tool result content cleared]') +}) + +test('extra block attributes (e.g. cache_control) preserved across rewrites', () => { + const cacheControl = { type: 'ephemeral' } + const messages: Msg[] = [ + { role: 'user', content: 'start' }, + { + role: 'assistant', + content: [{ type: 'tool_use', id: 'toolu_cc', name: 'Read', input: {} }], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_cc', + cache_control: cacheControl, + content: bigText(5_000), + }, + ], + }, + ...buildConversation(20, 100).slice(1), + ] + const result = compressToolHistory(messages, 'gpt-4o') + const resultMsgs = getResultMessages(result) + const block = getResultBlock(resultMsgs[0]) as { cache_control?: unknown } + + // The custom attribute survived the stub rewrite via ...block spread + expect(block.cache_control).toEqual(cacheControl) +}) diff --git a/src/services/api/compressToolHistory.ts b/src/services/api/compressToolHistory.ts new file mode 100644 index 00000000..465036f0 --- /dev/null +++ b/src/services/api/compressToolHistory.ts @@ -0,0 +1,255 @@ +/** + * Compresses old tool_result content for stateless OpenAI-compatible providers + * (Copilot, Mistral, Ollama). Preserves all conversation structure — tool_use, + * tool_result pairing, text, thinking, and is_error all survive intact. Only + * the BULK text of older tool_results is shrunk to delay context saturation. + * + * Tier sizes scale with the model's effective context window via + * getEffectiveContextWindowSize() — same calculation used by auto-compact, so + * the two systems stay aligned. + * + * Complements (does not replace) microCompact.ts: + * - microCompact: time/cache-based, runs from query.ts, binary clear/keep, + * limited to Claude (cache editing) or idle gaps (time-based). + * - compressToolHistory: size-based, runs at the shim layer, tiered + * compression, covers the gap for active sessions on non-Claude providers. + * + * Reuses isCompactableTool from microCompact to avoid touching tools the + * project already classifies as unsafe to compress (e.g. Task, Agent). + * Skips blocks already cleared by microCompact (TOOL_RESULT_CLEARED_MESSAGE). + * + * Anthropic native bypasses both shims, so it is unaffected by this module. + */ +import { getEffectiveContextWindowSize } from '../compact/autoCompact.js' +import { isCompactableTool } from '../compact/microCompact.js' +import { TOOL_RESULT_CLEARED_MESSAGE } from '../../utils/toolResultStorage.js' +import { getGlobalConfig } from '../../utils/config.js' + +// Mid-tier truncation budget. 2k chars ≈ 500 tokens, enough to preserve the +// shape of most tool outputs (file headers, command stderr, top grep hits) +// without ballooning context. Bump too high and the tier loses its purpose. +const MID_MAX_CHARS = 2_000 + +// Stub args budget. JSON.stringify of a typical tool input fits in 200 chars +// (file paths, short commands, small queries). Long inputs are rare and clamping +// here keeps the stub size bounded even when callers pass oversized arguments. +const STUB_ARGS_MAX_CHARS = 200 + +type AnyMessage = { + role?: string + message?: { role?: string; content?: unknown } + content?: unknown +} + +type ToolResultBlock = { + type: 'tool_result' + tool_use_id?: string + is_error?: boolean + content?: unknown +} + +type ToolUseBlock = { + type: 'tool_use' + id?: string + name?: string + input?: unknown +} + +type Tiers = { recent: number; mid: number } + +// Tier sizes scale with effective window. Targets roughly: +// - recent tier stays under ~25% of available window (full fidelity kept) +// - recent + mid tier stays under ~50% of available window (bounded bulk) +// - everything older collapses to ~15-token stubs +// Values assume ~5KB avg tool_result, which matches the Copilot default case +// (parallel_tool_calls=true means multiple Read/Bash outputs per turn). For +// ≥ 500k models the tiers are so generous that compression is effectively +// inert for any realistic session — see compressToolHistory.test.ts. +export function getTiers(effectiveWindow: number): Tiers { + if (effectiveWindow < 16_000) return { recent: 2, mid: 3 } + if (effectiveWindow < 32_000) return { recent: 3, mid: 5 } + if (effectiveWindow < 64_000) return { recent: 4, mid: 8 } + if (effectiveWindow < 128_000) return { recent: 5, mid: 10 } + if (effectiveWindow < 256_000) return { recent: 8, mid: 15 } + if (effectiveWindow < 500_000) return { recent: 12, mid: 25 } + return { recent: 25, mid: 50 } +} + +function extractText(content: unknown): string { + if (typeof content === 'string') return content + if (Array.isArray(content)) { + return content + .filter( + (b: { type?: string; text?: string }) => + b?.type === 'text' && typeof b.text === 'string', + ) + .map((b: { text?: string }) => b.text ?? '') + .join('\n') + } + return '' +} + +// Old-tier compression strategy. Replaces content entirely with a one-line +// metadata marker ~10× more token-efficient than a 500-char truncation AND +// unambiguous — partial truncations can look authoritative to the model. The +// stub format encodes tool name + args so the model can re-invoke the same +// tool if it needs the omitted output back. +function buildStub( + block: ToolResultBlock, + toolUsesById: Map, +): ToolResultBlock { + const original = extractText(block.content) + const toolUse = toolUsesById.get(block.tool_use_id ?? '') + const name = toolUse?.name ?? 'tool' + const args = toolUse?.input + ? JSON.stringify(toolUse.input).slice(0, STUB_ARGS_MAX_CHARS) + : '{}' + return { + ...block, + content: [ + { + type: 'text', + text: `[${name} args=${args} → ${original.length} chars omitted]`, + }, + ], + } +} + +// Mid-tier compression. The trailing marker is load-bearing: without it, the +// model can't distinguish "tool returned 2000 chars" from "tool returned 20k +// chars that we cut to 2000". Distinguishing those matters for the model's +// decision to re-invoke the tool. +function truncateBlock( + block: ToolResultBlock, + maxChars: number, +): ToolResultBlock { + const text = extractText(block.content) + if (text.length <= maxChars) return block + const omitted = text.length - maxChars + return { + ...block, + content: [ + { + type: 'text', + text: `${text.slice(0, maxChars)}\n[…truncated ${omitted} chars from tool history]`, + }, + ], + } +} + +function getInner(msg: AnyMessage): { role?: string; content?: unknown } { + return (msg.message ?? msg) as { role?: string; content?: unknown } +} + +function indexToolUses(messages: AnyMessage[]): Map { + const map = new Map() + for (const msg of messages) { + const content = getInner(msg).content + if (!Array.isArray(content)) continue + for (const b of content as Array<{ type?: string; id?: string }>) { + if (b?.type === 'tool_use' && b.id) { + map.set(b.id, b as ToolUseBlock) + } + } + } + return map +} + +function indexToolResultMessages(messages: AnyMessage[]): number[] { + const indices: number[] = [] + for (let i = 0; i < messages.length; i++) { + const inner = getInner(messages[i]) + const role = inner.role ?? messages[i].role + const content = inner.content + if ( + role === 'user' && + Array.isArray(content) && + content.some((b: { type?: string }) => b?.type === 'tool_result') + ) { + indices.push(i) + } + } + return indices +} + +function rewriteMessage( + msg: T, + newContent: unknown[], +): T { + if (msg.message) { + return { ...msg, message: { ...msg.message, content: newContent } } + } + return { ...msg, content: newContent } +} + +// microCompact.maybeTimeBasedMicrocompact may have already replaced old +// tool_result content with TOOL_RESULT_CLEARED_MESSAGE before we see it. +// Re-compressing produces a stub over a marker (e.g. `[Read args={} → 40 +// chars omitted]`), wasteful and less informative than the canonical marker. +function isAlreadyCleared(block: ToolResultBlock): boolean { + const text = extractText(block.content) + return text === TOOL_RESULT_CLEARED_MESSAGE +} + +function shouldCompressBlock( + block: ToolResultBlock, + toolUsesById: Map, +): boolean { + if (isAlreadyCleared(block)) return false + const toolUse = toolUsesById.get(block.tool_use_id ?? '') + // Unknown tool name (orphan tool_result with no matching tool_use) falls + // through to compression with a generic "tool" stub. Safer default: the + // original tool_use vanished so there's no downstream use for the output. + if (!toolUse?.name) return true + // Respect microCompact's curated safe-to-compress set (Read/Bash/Grep/…/ + // mcp__*) so user-facing flow tools (Task, Agent, custom) stay intact. + return isCompactableTool(toolUse.name) +} + +export function compressToolHistory( + messages: T[], + model: string, +): T[] { + // Master kill-switch. Returns the original reference so callers skip a + // defensive copy when the feature is disabled. + if (!getGlobalConfig().toolHistoryCompressionEnabled) return messages + + const tiers = getTiers(getEffectiveContextWindowSize(model)) + + const toolResultIndices = indexToolResultMessages(messages) + const total = toolResultIndices.length + // If every tool-result fits in the recent tier, no boundary crosses; return + // the same reference for the same copy-elision reason. + if (total <= tiers.recent) return messages + + // O(1) lookup: messageIndex → tool-result position (0 = oldest). Replaces + // the naive Array.indexOf(i) that was O(n²) across the .map below. + const positionByIndex = new Map() + for (let pos = 0; pos < toolResultIndices.length; pos++) { + positionByIndex.set(toolResultIndices[pos], pos) + } + + const toolUsesById = indexToolUses(messages) + + return messages.map((msg, i) => { + const pos = positionByIndex.get(i) + if (pos === undefined) return msg + + const fromEnd = total - 1 - pos + if (fromEnd < tiers.recent) return msg + + const inMidWindow = fromEnd < tiers.recent + tiers.mid + const content = getInner(msg).content as unknown[] + const newContent = content.map(block => { + const b = block as { type?: string } + if (b?.type !== 'tool_result') return block + const tr = block as ToolResultBlock + if (!shouldCompressBlock(tr, toolUsesById)) return block + return inMidWindow + ? truncateBlock(tr, MID_MAX_CHARS) + : buildStub(tr, toolUsesById) + }) + + return rewriteMessage(msg, newContent) + }) +} diff --git a/src/services/api/openaiShim.compression.test.ts b/src/services/api/openaiShim.compression.test.ts new file mode 100644 index 00000000..b45811ff --- /dev/null +++ b/src/services/api/openaiShim.compression.test.ts @@ -0,0 +1,317 @@ +import { afterEach, beforeEach, expect, mock, test } from 'bun:test' +import { createOpenAIShimClient } from './openaiShim.js' + +type FetchType = typeof globalThis.fetch +const originalFetch = globalThis.fetch + +const originalEnv = { + OPENAI_BASE_URL: process.env.OPENAI_BASE_URL, + OPENAI_API_KEY: process.env.OPENAI_API_KEY, + OPENAI_MODEL: process.env.OPENAI_MODEL, +} + +// Mock config + autoCompact so the shim sees deterministic state. +const mockState = { + enabled: true, + effectiveWindow: 100_000, // Copilot gpt-4o tier +} + +mock.module('../../utils/config.js', () => ({ + getGlobalConfig: () => ({ + toolHistoryCompressionEnabled: mockState.enabled, + autoCompactEnabled: false, + }), +})) + +mock.module('../compact/autoCompact.js', () => ({ + getEffectiveContextWindowSize: () => mockState.effectiveWindow, +})) + +type OpenAIShimClient = { + beta: { + messages: { + create: ( + params: Record, + options?: Record, + ) => Promise + } + } +} + +function bigText(n: number): string { + return 'A'.repeat(n) +} + +function buildToolExchange(id: number, resultLength: number) { + return [ + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: `toolu_${id}`, + name: 'Read', + input: { file_path: `/path/to/file${id}.ts` }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: `toolu_${id}`, + content: bigText(resultLength), + }, + ], + }, + ] +} + +function buildLongConversation(numExchanges: number, resultLength = 5_000) { + const out: Array<{ role: string; content: unknown }> = [ + { role: 'user', content: 'start the work' }, + ] + for (let i = 0; i < numExchanges; i++) { + out.push(...buildToolExchange(i, resultLength)) + } + return out +} + +function makeFakeResponse(): Response { + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'gpt-4o', + choices: [ + { + message: { role: 'assistant', content: 'done' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 8, completion_tokens: 2, total_tokens: 10 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) +} + +beforeEach(() => { + process.env.OPENAI_BASE_URL = 'http://example.test/v1' + process.env.OPENAI_API_KEY = 'test-key' + delete process.env.OPENAI_MODEL + mockState.enabled = true + mockState.effectiveWindow = 100_000 +}) + +afterEach(() => { + if (originalEnv.OPENAI_BASE_URL === undefined) delete process.env.OPENAI_BASE_URL + else process.env.OPENAI_BASE_URL = originalEnv.OPENAI_BASE_URL + if (originalEnv.OPENAI_API_KEY === undefined) delete process.env.OPENAI_API_KEY + else process.env.OPENAI_API_KEY = originalEnv.OPENAI_API_KEY + if (originalEnv.OPENAI_MODEL === undefined) delete process.env.OPENAI_MODEL + else process.env.OPENAI_MODEL = originalEnv.OPENAI_MODEL + globalThis.fetch = originalFetch +}) + +async function captureRequestBody( + messages: Array<{ role: string; content: unknown }>, + model: string, +): Promise> { + let captured: Record | undefined + + globalThis.fetch = (async (_input, init) => { + captured = JSON.parse(String(init?.body)) + return makeFakeResponse() + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + await client.beta.messages.create({ + model, + system: 'system prompt', + messages, + }) + + if (!captured) throw new Error('request not captured') + return captured +} + +function getToolMessages(body: Record): Array<{ content: string }> { + const messages = body.messages as Array<{ role: string; content: string }> + return messages.filter(m => m.role === 'tool') +} + +function getAssistantToolCalls(body: Record): unknown[] { + const messages = body.messages as Array<{ + role: string + tool_calls?: unknown[] + }> + return messages + .filter(m => m.role === 'assistant' && Array.isArray(m.tool_calls)) + .flatMap(m => m.tool_calls ?? []) +} + +// ============================================================================ +// BUG REPRO: without compression, full tool history is resent every turn +// ============================================================================ + +test('BUG REPRO: without compression, all 30 tool results are sent at full size', async () => { + mockState.enabled = false + const messages = buildLongConversation(30, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4o') + const toolMessages = getToolMessages(body) + const payloadSize = JSON.stringify(body).length + + // All 30 tool results present, none truncated + expect(toolMessages.length).toBe(30) + for (const m of toolMessages) { + expect(m.content.length).toBeGreaterThanOrEqual(5_000) + expect(m.content).not.toContain('[…truncated') + expect(m.content).not.toContain('chars omitted') + } + + // Total payload is large (~150KB raw) — this is the cost being paid every turn + expect(payloadSize).toBeGreaterThan(150_000) +}) + +// ============================================================================ +// FIX: with compression, recent kept full, mid truncated, old stubbed +// ============================================================================ + +test('FIX: with compression on Copilot gpt-4o (tier 5/10/rest), 30 turns shrinks dramatically', async () => { + mockState.enabled = true + mockState.effectiveWindow = 100_000 // 64–128k → recent=5, mid=10 + const messages = buildLongConversation(30, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4o') + const toolMessages = getToolMessages(body) + const payloadSize = JSON.stringify(body).length + + // Structure preserved: still 30 tool messages, no orphan tool_calls + expect(toolMessages.length).toBe(30) + expect(getAssistantToolCalls(body).length).toBe(30) + + // Tier breakdown (oldest → newest): + // indices 0..14 → old tier (stubs) + // indices 15..24 → mid tier (truncated) + // indices 25..29 → recent (full) + for (let i = 0; i <= 14; i++) { + expect(toolMessages[i].content).toMatch(/^\[Read args=.*chars omitted\]$/) + } + for (let i = 15; i <= 24; i++) { + expect(toolMessages[i].content).toContain('[…truncated') + } + for (let i = 25; i <= 29; i++) { + expect(toolMessages[i].content.length).toBe(5_000) + expect(toolMessages[i].content).not.toContain('[…truncated') + expect(toolMessages[i].content).not.toContain('chars omitted') + } + + // Significant reduction: from ~150KB to <60KB (10 mid×2KB + structure overhead) + expect(payloadSize).toBeLessThan(60_000) +}) + +// ============================================================================ +// FIX: large-context model gets generous tiers — compression effectively inert +// ============================================================================ + +test('FIX: gpt-4.1 (1M context) with 25 exchanges keeps all full (recent tier=25)', async () => { + mockState.enabled = true + mockState.effectiveWindow = 1_000_000 // ≥500k → recent=25, mid=50 + const messages = buildLongConversation(25, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4.1') + const toolMessages = getToolMessages(body) + + expect(toolMessages.length).toBe(25) + for (const m of toolMessages) { + expect(m.content.length).toBe(5_000) + expect(m.content).not.toContain('[…truncated') + expect(m.content).not.toContain('chars omitted') + } +}) + +test('FIX: gpt-4.1 (1M context) with 30 exchanges → only first 5 mid-truncated', async () => { + mockState.enabled = true + mockState.effectiveWindow = 1_000_000 // recent=25, mid=50 + const messages = buildLongConversation(30, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4.1') + const toolMessages = getToolMessages(body) + + // 30 total: indices 0..4 mid, indices 5..29 recent + for (let i = 0; i < 5; i++) { + expect(toolMessages[i].content).toContain('[…truncated') + } + for (let i = 5; i < 30; i++) { + expect(toolMessages[i].content.length).toBe(5_000) + } +}) + +// ============================================================================ +// FIX: stub preserves tool name and args — model can re-invoke if needed +// ============================================================================ + +test('FIX: stub format includes original tool name and arguments', async () => { + mockState.enabled = true + mockState.effectiveWindow = 100_000 + const messages = buildLongConversation(30, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4o') + const toolMessages = getToolMessages(body) + const oldestStub = toolMessages[0].content + + // Format: [ args= chars omitted] + expect(oldestStub).toMatch(/^\[Read /) + expect(oldestStub).toMatch(/file_path/) + expect(oldestStub).toMatch(/→ 5000 chars omitted\]$/) +}) + +// ============================================================================ +// FIX: tool_use blocks (assistant tool_calls) are never modified +// ============================================================================ + +test('FIX: every tool_call retains its full id, name, and arguments', async () => { + mockState.enabled = true + mockState.effectiveWindow = 100_000 + const messages = buildLongConversation(30, 5_000) + + const body = await captureRequestBody(messages, 'gpt-4o') + const toolCalls = getAssistantToolCalls(body) as Array<{ + id: string + function: { name: string; arguments: string } + }> + + expect(toolCalls.length).toBe(30) + for (let i = 0; i < toolCalls.length; i++) { + expect(toolCalls[i].id).toBe(`toolu_${i}`) + expect(toolCalls[i].function.name).toBe('Read') + expect(JSON.parse(toolCalls[i].function.arguments)).toEqual({ + file_path: `/path/to/file${i}.ts`, + }) + } +}) + +// ============================================================================ +// FIX: small-context provider (Mistral 32k) gets aggressive compression +// ============================================================================ + +test('FIX: 32k window (Mistral tier) → recent=3 keeps last 3 only', async () => { + mockState.enabled = true + mockState.effectiveWindow = 24_000 // 16–32k → recent=3, mid=5 + const messages = buildLongConversation(15, 3_000) + + const body = await captureRequestBody(messages, 'mistral-large-latest') + const toolMessages = getToolMessages(body) + + // 15 total: indices 0..6 old, 7..11 mid, 12..14 recent + for (let i = 0; i <= 6; i++) { + expect(toolMessages[i].content).toContain('chars omitted') + } + for (let i = 7; i <= 11; i++) { + expect(toolMessages[i].content).toContain('[…truncated') + } + for (let i = 12; i <= 14; i++) { + expect(toolMessages[i].content.length).toBe(3_000) + } +}) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index e506fc97..09c8ec31 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -46,6 +46,7 @@ import { type AnthropicUsage, type ShimCreateParams, } from './codexShim.js' +import { compressToolHistory } from './compressToolHistory.js' import { fetchWithProxyRetry } from './fetchWithProxyRetry.js' import { getLocalProviderRetryBaseUrls, @@ -1299,14 +1300,15 @@ class OpenAIShimMessages { params: ShimCreateParams, options?: { signal?: AbortSignal; headers?: Record }, ): Promise { - const openaiMessages = convertMessages( + const compressedMessages = compressToolHistory( params.messages as Array<{ role: string message?: { role?: string; content?: unknown } content?: unknown }>, - params.system, + request.resolvedModel, ) + const openaiMessages = convertMessages(compressedMessages, params.system) const body: Record = { model: request.resolvedModel, diff --git a/src/services/compact/microCompact.ts b/src/services/compact/microCompact.ts index f5820005..fc8cdd62 100644 --- a/src/services/compact/microCompact.ts +++ b/src/services/compact/microCompact.ts @@ -38,7 +38,7 @@ export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]' const IMAGE_MAX_TOKEN_SIZE = 2000 // Only compact these built-in tools (MCP tools are also compactable via prefix match) -const COMPACTABLE_TOOLS = new Set([ +export const COMPACTABLE_TOOLS = new Set([ FILE_READ_TOOL_NAME, ...SHELL_TOOL_NAMES, GREP_TOOL_NAME, @@ -51,7 +51,7 @@ const COMPACTABLE_TOOLS = new Set([ const MCP_TOOL_PREFIX = 'mcp__' -function isCompactableTool(name: string): boolean { +export function isCompactableTool(name: string): boolean { return COMPACTABLE_TOOLS.has(name) || name.startsWith(MCP_TOOL_PREFIX) } diff --git a/src/utils/config.ts b/src/utils/config.ts index 1c999625..b4852b52 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -244,6 +244,7 @@ export type GlobalConfig = { bypassPermissionsModeAccepted?: boolean hasUsedBackslashReturn?: boolean autoCompactEnabled: boolean // Controls whether auto-compact is enabled + toolHistoryCompressionEnabled: boolean // Compress old tool_result content for small-context providers showTurnDuration: boolean // Controls whether to show turn duration message (e.g., "Cooked for 1m 6s") /** * @deprecated Use settings.env instead. @@ -622,6 +623,7 @@ function createDefaultGlobalConfig(): GlobalConfig { verbose: false, editorMode: 'normal', autoCompactEnabled: true, + toolHistoryCompressionEnabled: true, showTurnDuration: true, hasSeenTasksHint: false, hasUsedStash: false, @@ -668,6 +670,7 @@ export const GLOBAL_CONFIG_KEYS = [ 'editorMode', 'hasUsedBackslashReturn', 'autoCompactEnabled', + 'toolHistoryCompressionEnabled', 'showTurnDuration', 'diffTool', 'env',