feat(api): compress old tool_result content for small-context providers (#801)

* feat(api): compress old tool_result content for small-context providers Adds a shim-layer pass that tiers tool_result content by age on providers with small effective context windows (Copilot gpt-4o 128k, Mistral, Ollama). Recent turns remain full; mid-tier results are truncated to 2k chars; older results are replaced with a stub that preserves tool name and arguments so the model can re-invoke if needed. Tier sizes auto-tune via getEffectiveContextWindowSize, same calculation used by auto-compact. Reuses COMPACTABLE_TOOLS and TOOL_RESULT_CLEARED_MESSAGE to complement (not duplicate) microCompact. Configurable via /config toolHistoryCompressionEnabled. Addresses active-session context accumulation on Copilot where microCompact's time-based trigger never fires, which surfaces as "tools appearing in a loop" and prompt_too_long errors after ~15 turns. * fix: config tool history
2026-04-21 06:36:26 -03:00
parent 64582c119d
commit a6a3de5ac1
9 changed files with 1179 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ dist/
 .openclaude-profile.json
 reports/
 GEMINI.md
 CLAUDE.md
 package-lock.json
 /.claude
 coverage/
--- a/src/components/Settings/Config.tsx
+++ b/src/components/Settings/Config.tsx
@@ -281,6 +281,24 @@ export function Config({
        enabled: autoCompactEnabled
      });
    }
  }, {
    id: 'toolHistoryCompressionEnabled',
    label: 'Tool history compression',
    value: globalConfig.toolHistoryCompressionEnabled,
    type: 'boolean' as const,
    onChange(toolHistoryCompressionEnabled: boolean) {
      saveGlobalConfig(current => ({
        ...current,
        toolHistoryCompressionEnabled
      }));
      setGlobalConfig({
        ...getGlobalConfig(),
        toolHistoryCompressionEnabled
      });
      logEvent('tengu_tool_history_compression_setting_changed', {
        enabled: toolHistoryCompressionEnabled
      });
    }
  }, {
    id: 'spinnerTipsEnabled',
    label: 'Show tips',
@@ -1158,6 +1176,9 @@ export function Config({
    if (globalConfig.autoCompactEnabled !== initialConfig.current.autoCompactEnabled) {
      formattedChanges.push(`${globalConfig.autoCompactEnabled ? 'Enabled' : 'Disabled'} auto-compact`);
    }
    if (globalConfig.toolHistoryCompressionEnabled !== initialConfig.current.toolHistoryCompressionEnabled) {
      formattedChanges.push(`${globalConfig.toolHistoryCompressionEnabled ? 'Enabled' : 'Disabled'} tool history compression`);
    }
    if (globalConfig.respectGitignore !== initialConfig.current.respectGitignore) {
      formattedChanges.push(`${globalConfig.respectGitignore ? 'Enabled' : 'Disabled'} respect .gitignore in file picker`);
    }
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -1,4 +1,5 @@
 import { APIError } from '@anthropic-ai/sdk'
 import { compressToolHistory } from './compressToolHistory.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
 import type {
  ResolvedCodexCredentials,
@@ -484,13 +485,15 @@ export async function performCodexRequest(options: {
  defaultHeaders: Record<string, string>
  signal?: AbortSignal
 }): Promise<Response> {
-  const input = convertAnthropicMessagesToResponsesInput(
+  const compressedMessages = compressToolHistory(
    options.params.messages as Array<{
      role?: string
      message?: { role?: string; content?: unknown }
      content?: unknown
    }>,
    options.request.resolvedModel,
  )
  const input = convertAnthropicMessagesToResponsesInput(compressedMessages)
  const body: Record<string, unknown> = {
    model: options.request.resolvedModel,
    input: input.length > 0
--- a/src/services/api/compressToolHistory.test.ts
+++ b/src/services/api/compressToolHistory.test.ts
@@ -0,0 +1,572 @@
 import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
 import { compressToolHistory, getTiers } from './compressToolHistory.js'
 // Mock the two dependencies so tests are deterministic and don't read disk config.
 const mockState = {
  enabled: true,
  effectiveWindow: 100_000,
 }
 mock.module('../../utils/config.js', () => ({
  getGlobalConfig: () => ({
    toolHistoryCompressionEnabled: mockState.enabled,
  }),
 }))
 mock.module('../compact/autoCompact.js', () => ({
  getEffectiveContextWindowSize: () => mockState.effectiveWindow,
 }))
 beforeEach(() => {
  mockState.enabled = true
  mockState.effectiveWindow = 100_000
 })
 afterEach(() => {
  mockState.enabled = true
  mockState.effectiveWindow = 100_000
 })
 type Block = Record<string, unknown>
 type Msg = { role: string; content: Block[] | string }
 function bigText(n: number): string {
  return 'x'.repeat(n)
 }
 function buildToolExchange(id: number, resultLength: number): Msg[] {
  return [
    {
      role: 'assistant',
      content: [
        {
          type: 'tool_use',
          id: `toolu_${id}`,
          name: 'Read',
          input: { file_path: `/path/to/file${id}.ts` },
        },
      ],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: `toolu_${id}`,
          content: bigText(resultLength),
        },
      ],
    },
  ]
 }
 function buildConversation(numToolExchanges: number, resultLength = 5_000): Msg[] {
  const out: Msg[] = [{ role: 'user', content: 'Initial request' }]
  for (let i = 0; i < numToolExchanges; i++) {
    out.push(...buildToolExchange(i, resultLength))
  }
  return out
 }
 function getResultMessages(messages: Msg[]): Msg[] {
  return messages.filter(
    m => Array.isArray(m.content) && m.content.some((b: any) => b.type === 'tool_result'),
  )
 }
 function getResultBlock(msg: Msg): Block {
  return (msg.content as Block[]).find((b: any) => b.type === 'tool_result') as Block
 }
 function getResultText(msg: Msg): string {
  const block = getResultBlock(msg)
  const c = block.content
  if (typeof c === 'string') return c
  if (Array.isArray(c)) {
    return c
      .filter((b: any) => b.type === 'text')
      .map((b: any) => b.text)
      .join('\n')
  }
  return ''
 }
 // ---------- getTiers ----------
 test('getTiers: < 16k window → recent=2, mid=3', () => {
  expect(getTiers(8_000)).toEqual({ recent: 2, mid: 3 })
 })
 test('getTiers: 16k–32k → recent=3, mid=5', () => {
  expect(getTiers(20_000)).toEqual({ recent: 3, mid: 5 })
 })
 test('getTiers: 32k–64k → recent=4, mid=8', () => {
  expect(getTiers(48_000)).toEqual({ recent: 4, mid: 8 })
 })
 test('getTiers: 64k–128k (Copilot gpt-4o) → recent=5, mid=10', () => {
  expect(getTiers(100_000)).toEqual({ recent: 5, mid: 10 })
 })
 test('getTiers: 128k–256k (Copilot Claude) → recent=8, mid=15', () => {
  expect(getTiers(200_000)).toEqual({ recent: 8, mid: 15 })
 })
 test('getTiers: 256k–500k → recent=12, mid=25', () => {
  expect(getTiers(400_000)).toEqual({ recent: 12, mid: 25 })
 })
 test('getTiers: ≥ 500k (gpt-4.1 1M) → recent=25, mid=50', () => {
  expect(getTiers(1_000_000)).toEqual({ recent: 25, mid: 50 })
 })
 // ---------- master switch ----------
 test('pass-through when toolHistoryCompressionEnabled is false', () => {
  mockState.enabled = false
  const messages = buildConversation(20)
  const result = compressToolHistory(messages, 'gpt-4o')
  expect(result).toBe(messages) // same reference (no transformation)
 })
 test('pass-through when total tool_results <= recent tier', () => {
  // 100k effective → recent=5; only 4 exchanges → no compression
  const messages = buildConversation(4)
  const result = compressToolHistory(messages, 'gpt-4o')
  expect(result).toBe(messages)
 })
 // ---------- per-tier behavior ----------
 test('recent tier: tool_result content untouched', () => {
  // 100k effective → recent=5, mid=10. With 6 exchanges, only the oldest is touched.
  const messages = buildConversation(6, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // Last 5 should be untouched (full 5000 chars)
  for (let i = resultMsgs.length - 5; i < resultMsgs.length; i++) {
    expect(getResultText(resultMsgs[i]).length).toBe(5_000)
  }
 })
 test('mid tier: long content truncated to MID_MAX_CHARS with marker', () => {
  // 100k → recent=5, mid=10. 10 exchanges: 5 recent + 5 mid (none old).
  const messages = buildConversation(10, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // First 5 are mid tier — should be truncated to ~2000 chars + marker
  for (let i = 0; i < 5; i++) {
    const text = getResultText(resultMsgs[i])
    expect(text).toContain('[…truncated')
    expect(text).toContain('chars from tool history]')
    // Should be roughly 2000 chars + marker (under 2200)
    expect(text.length).toBeLessThan(2_200)
    expect(text.length).toBeGreaterThan(2_000)
  }
 })
 test('mid tier: short content (< MID_MAX_CHARS) untouched', () => {
  const messages = buildConversation(10, 500) // 500 < MID_MAX_CHARS
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  for (let i = 0; i < 5; i++) {
    expect(getResultText(resultMsgs[i])).toBe(bigText(500))
  }
 })
 test('old tier: content replaced with stub [name args={...} → N chars omitted]', () => {
  // 100k → recent=5, mid=10, old=rest. 20 exchanges → 5 old + 10 mid + 5 recent.
  const messages = buildConversation(20, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // First 5 are old tier — should be stubs
  for (let i = 0; i < 5; i++) {
    const text = getResultText(resultMsgs[i])
    expect(text).toMatch(/^\[Read args=\{.*\} → 5000 chars omitted\]$/)
  }
 })
 test('old tier: stub args truncated to 200 chars', () => {
  const longArg = bigText(500)
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [
        {
          type: 'tool_use',
          id: 'toolu_x',
          name: 'Bash',
          input: { command: longArg },
        },
      ],
    },
    {
      role: 'user',
      content: [
        { type: 'tool_result', tool_use_id: 'toolu_x', content: 'output' },
      ],
    },
    // Pad with enough recent exchanges to push the above into old tier
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  const text = getResultText(resultMsgs[0])
  // Stub format: [Bash args=<json≤200chars> → N chars omitted]
  // The args portion (between args= and →) must be ≤ 200 chars.
  const argsMatch = text.match(/args=(.*?) →/)
  expect(argsMatch).not.toBeNull()
  expect(argsMatch![1].length).toBeLessThanOrEqual(200)
 })
 test('old tier: orphan tool_result (no matching tool_use) falls back to "tool"', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    // Orphan: tool_result without matching tool_use in history
    {
      role: 'user',
      content: [
        { type: 'tool_result', tool_use_id: 'orphan_id', content: 'data' },
      ],
    },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  const text = getResultText(resultMsgs[0])
  expect(text).toMatch(/^\[tool args=\{\} → 4 chars omitted\]$/)
 })
 // ---------- structural preservation ----------
 test('tool_use blocks always preserved', () => {
  const messages = buildConversation(20, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const useCount = (msgs: Msg[]) =>
    msgs.reduce((sum, m) => {
      if (!Array.isArray(m.content)) return sum
      return sum + m.content.filter((b: any) => b.type === 'tool_use').length
    }, 0)
  expect(useCount(result as Msg[])).toBe(useCount(messages))
 })
 test('text blocks always preserved', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'first' },
    {
      role: 'assistant',
      content: [
        { type: 'text', text: 'reasoning before tool' },
        { type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
      ],
    },
    {
      role: 'user',
      content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
    },
    ...buildConversation(20, 5_000).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const assistantMsg = (result as Msg[])[1]
  const textBlock = (assistantMsg.content as Block[]).find((b: any) => b.type === 'text')
  expect(textBlock).toEqual({ type: 'text', text: 'reasoning before tool' })
 })
 test('thinking blocks always preserved', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'first' },
    {
      role: 'assistant',
      content: [
        { type: 'thinking', thinking: 'internal reasoning', signature: 'sig' },
        { type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
      ],
    },
    {
      role: 'user',
      content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
    },
    ...buildConversation(20, 5_000).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const assistantMsg = (result as Msg[])[1]
  const thinking = (assistantMsg.content as Block[]).find((b: any) => b.type === 'thinking')
  expect(thinking).toEqual({
    type: 'thinking',
    thinking: 'internal reasoning',
    signature: 'sig',
  })
 })
 test('non-array content (string) handled gracefully', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'plain string content' },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  expect((result as Msg[])[0].content).toBe('plain string content')
 })
 test('empty content array handled gracefully', () => {
  const messages: Msg[] = [
    { role: 'user', content: [] },
    ...buildConversation(20, 100).slice(1),
  ]
  expect(() => compressToolHistory(messages, 'gpt-4o')).not.toThrow()
 })
 // ---------- message shape compatibility ----------
 test('wrapped shape ({ message: { role, content } }) handled', () => {
  type WrappedMsg = { message: { role: string; content: Block[] | string } }
  const wrap = (m: Msg): WrappedMsg => ({ message: { role: m.role, content: m.content } })
  const messages = buildConversation(20, 5_000).map(wrap)
  const result = compressToolHistory(messages as any, 'gpt-4o')
  // First wrapped tool-result message should have stub content (old tier)
  const firstResultMsg = (result as WrappedMsg[]).find(
    m =>
      Array.isArray(m.message.content) &&
      m.message.content.some((b: any) => b.type === 'tool_result'),
  )
  const block = (firstResultMsg!.message.content as Block[]).find(
    (b: any) => b.type === 'tool_result',
  ) as Block
  const text = ((block.content as Block[])[0] as any).text
  expect(text).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
 })
 test('flat shape ({ role, content }) handled', () => {
  const messages = buildConversation(20, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  expect(getResultText(resultMsgs[0])).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
 })
 // ---------- tier boundary correctness ----------
 test('tier boundaries: 6 exchanges → 1 mid + 5 recent (recent=5)', () => {
  const messages = buildConversation(6, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // Oldest: mid (truncated)
  expect(getResultText(resultMsgs[0])).toContain('[…truncated')
  // Last 5: untouched
  for (let i = 1; i < 6; i++) {
    expect(getResultText(resultMsgs[i]).length).toBe(5_000)
  }
 })
 test('tier boundaries: 16 exchanges → 1 old + 10 mid + 5 recent', () => {
  const messages = buildConversation(16, 5_000)
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // Oldest 1: stub (old tier)
  expect(getResultText(resultMsgs[0])).toMatch(/^\[Read .*chars omitted\]$/)
  // Next 10: mid (truncated)
  for (let i = 1; i < 11; i++) {
    expect(getResultText(resultMsgs[i])).toContain('[…truncated')
  }
  // Last 5: untouched
  for (let i = 11; i < 16; i++) {
    expect(getResultText(resultMsgs[i]).length).toBe(5_000)
  }
 })
 test('large window (1M) with 30 exchanges: all untouched (recent=25 ≥ 30 - 5)', () => {
  // ≥500k → recent=25, mid=50. 30 exchanges → 5 mid + 25 recent. None old.
  mockState.effectiveWindow = 1_000_000
  const messages = buildConversation(30, 5_000)
  const result = compressToolHistory(messages, 'gpt-4.1')
  const resultMsgs = getResultMessages(result)
  // Last 25: untouched
  for (let i = 5; i < 30; i++) {
    expect(getResultText(resultMsgs[i]).length).toBe(5_000)
  }
 })
 // ---------- attribute preservation ----------
 test('is_error flag preserved in mid tier', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: 'toolu_err',
          is_error: true,
          content: bigText(5_000),
        },
      ],
    },
    // Pad with enough recent exchanges to push the above into MID tier
    ...buildConversation(10, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
  expect(block.is_error).toBe(true)
  expect(getResultText(resultMsgs[0])).toContain('[…truncated')
 })
 test('is_error flag preserved in old tier (stub)', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: 'toolu_err',
          is_error: true,
          content: bigText(5_000),
        },
      ],
    },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
  expect(block.is_error).toBe(true)
  expect(getResultText(resultMsgs[0])).toMatch(/^\[Bash .*chars omitted\]$/)
 })
 // ---------- COMPACTABLE_TOOLS filter ----------
 test('non-compactable tool (e.g. Task/Agent) is NEVER compressed', () => {
  // Build conversation where the OLDEST exchange uses a non-compactable tool name
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [
        { type: 'tool_use', id: 'task_1', name: 'Task', input: { goal: 'plan' } },
      ],
    },
    {
      role: 'user',
      content: [
        { type: 'tool_result', tool_use_id: 'task_1', content: bigText(5_000) },
      ],
    },
    // Pad with 20 compactable exchanges to push Task into old tier
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // First tool_result is for Task (non-compactable) → must remain full
  expect(getResultText(resultMsgs[0]).length).toBe(5_000)
  expect(getResultText(resultMsgs[0])).not.toContain('chars omitted')
  expect(getResultText(resultMsgs[0])).not.toContain('[…truncated')
 })
 test('mcp__ prefixed tools ARE compactable (matches microCompact behavior)', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [
        { type: 'tool_use', id: 'mcp_1', name: 'mcp__github__get_issue', input: {} },
      ],
    },
    {
      role: 'user',
      content: [
        { type: 'tool_result', tool_use_id: 'mcp_1', content: bigText(5_000) },
      ],
    },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // MCP tool result is compressed (gets stub since it's in old tier)
  expect(getResultText(resultMsgs[0])).toMatch(/^\[mcp__github__get_issue .*chars omitted\]$/)
 })
 // ---------- skip already-cleared blocks ----------
 test('blocks already cleared by microCompact are NOT re-compressed', () => {
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [{ type: 'tool_use', id: 'cleared_1', name: 'Read', input: {} }],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: 'cleared_1',
          content: '[Old tool result content cleared]', // microCompact's marker
        },
      ],
    },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  // Already-cleared marker survives untouched (no double processing)
  expect(getResultText(resultMsgs[0])).toBe('[Old tool result content cleared]')
 })
 test('extra block attributes (e.g. cache_control) preserved across rewrites', () => {
  const cacheControl = { type: 'ephemeral' }
  const messages: Msg[] = [
    { role: 'user', content: 'start' },
    {
      role: 'assistant',
      content: [{ type: 'tool_use', id: 'toolu_cc', name: 'Read', input: {} }],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: 'toolu_cc',
          cache_control: cacheControl,
          content: bigText(5_000),
        },
      ],
    },
    ...buildConversation(20, 100).slice(1),
  ]
  const result = compressToolHistory(messages, 'gpt-4o')
  const resultMsgs = getResultMessages(result)
  const block = getResultBlock(resultMsgs[0]) as { cache_control?: unknown }
  // The custom attribute survived the stub rewrite via ...block spread
  expect(block.cache_control).toEqual(cacheControl)
 })
--- a/src/services/api/compressToolHistory.ts
+++ b/src/services/api/compressToolHistory.ts
@@ -0,0 +1,255 @@
 /**
 * Compresses old tool_result content for stateless OpenAI-compatible providers
 * (Copilot, Mistral, Ollama). Preserves all conversation structure — tool_use,
 * tool_result pairing, text, thinking, and is_error all survive intact. Only
 * the BULK text of older tool_results is shrunk to delay context saturation.
 *
 * Tier sizes scale with the model's effective context window via
 * getEffectiveContextWindowSize() — same calculation used by auto-compact, so
 * the two systems stay aligned.
 *
 * Complements (does not replace) microCompact.ts:
 * - microCompact: time/cache-based, runs from query.ts, binary clear/keep,
 *   limited to Claude (cache editing) or idle gaps (time-based).
 * - compressToolHistory: size-based, runs at the shim layer, tiered
 *   compression, covers the gap for active sessions on non-Claude providers.
 *
 * Reuses isCompactableTool from microCompact to avoid touching tools the
 * project already classifies as unsafe to compress (e.g. Task, Agent).
 * Skips blocks already cleared by microCompact (TOOL_RESULT_CLEARED_MESSAGE).
 *
 * Anthropic native bypasses both shims, so it is unaffected by this module.
 */
 import { getEffectiveContextWindowSize } from '../compact/autoCompact.js'
 import { isCompactableTool } from '../compact/microCompact.js'
 import { TOOL_RESULT_CLEARED_MESSAGE } from '../../utils/toolResultStorage.js'
 import { getGlobalConfig } from '../../utils/config.js'
 // Mid-tier truncation budget. 2k chars ≈ 500 tokens, enough to preserve the
 // shape of most tool outputs (file headers, command stderr, top grep hits)
 // without ballooning context. Bump too high and the tier loses its purpose.
 const MID_MAX_CHARS = 2_000
 // Stub args budget. JSON.stringify of a typical tool input fits in 200 chars
 // (file paths, short commands, small queries). Long inputs are rare and clamping
 // here keeps the stub size bounded even when callers pass oversized arguments.
 const STUB_ARGS_MAX_CHARS = 200
 type AnyMessage = {
  role?: string
  message?: { role?: string; content?: unknown }
  content?: unknown
 }
 type ToolResultBlock = {
  type: 'tool_result'
  tool_use_id?: string
  is_error?: boolean
  content?: unknown
 }
 type ToolUseBlock = {
  type: 'tool_use'
  id?: string
  name?: string
  input?: unknown
 }
 type Tiers = { recent: number; mid: number }
 // Tier sizes scale with effective window. Targets roughly:
 // - recent tier stays under ~25% of available window (full fidelity kept)
 // - recent + mid tier stays under ~50% of available window (bounded bulk)
 // - everything older collapses to ~15-token stubs
 // Values assume ~5KB avg tool_result, which matches the Copilot default case
 // (parallel_tool_calls=true means multiple Read/Bash outputs per turn). For
 // ≥ 500k models the tiers are so generous that compression is effectively
 // inert for any realistic session — see compressToolHistory.test.ts.
 export function getTiers(effectiveWindow: number): Tiers {
  if (effectiveWindow < 16_000) return { recent: 2, mid: 3 }
  if (effectiveWindow < 32_000) return { recent: 3, mid: 5 }
  if (effectiveWindow < 64_000) return { recent: 4, mid: 8 }
  if (effectiveWindow < 128_000) return { recent: 5, mid: 10 }
  if (effectiveWindow < 256_000) return { recent: 8, mid: 15 }
  if (effectiveWindow < 500_000) return { recent: 12, mid: 25 }
  return { recent: 25, mid: 50 }
 }
 function extractText(content: unknown): string {
  if (typeof content === 'string') return content
  if (Array.isArray(content)) {
    return content
      .filter(
        (b: { type?: string; text?: string }) =>
          b?.type === 'text' && typeof b.text === 'string',
      )
      .map((b: { text?: string }) => b.text ?? '')
      .join('\n')
  }
  return ''
 }
 // Old-tier compression strategy. Replaces content entirely with a one-line
 // metadata marker ~10× more token-efficient than a 500-char truncation AND
 // unambiguous — partial truncations can look authoritative to the model. The
 // stub format encodes tool name + args so the model can re-invoke the same
 // tool if it needs the omitted output back.
 function buildStub(
  block: ToolResultBlock,
  toolUsesById: Map<string, ToolUseBlock>,
 ): ToolResultBlock {
  const original = extractText(block.content)
  const toolUse = toolUsesById.get(block.tool_use_id ?? '')
  const name = toolUse?.name ?? 'tool'
  const args = toolUse?.input
    ? JSON.stringify(toolUse.input).slice(0, STUB_ARGS_MAX_CHARS)
    : '{}'
  return {
    ...block,
    content: [
      {
        type: 'text',
        text: `[${name} args=${args} → ${original.length} chars omitted]`,
      },
    ],
  }
 }
 // Mid-tier compression. The trailing marker is load-bearing: without it, the
 // model can't distinguish "tool returned 2000 chars" from "tool returned 20k
 // chars that we cut to 2000". Distinguishing those matters for the model's
 // decision to re-invoke the tool.
 function truncateBlock(
  block: ToolResultBlock,
  maxChars: number,
 ): ToolResultBlock {
  const text = extractText(block.content)
  if (text.length <= maxChars) return block
  const omitted = text.length - maxChars
  return {
    ...block,
    content: [
      {
        type: 'text',
        text: `${text.slice(0, maxChars)}\n[…truncated ${omitted} chars from tool history]`,
      },
    ],
  }
 }
 function getInner(msg: AnyMessage): { role?: string; content?: unknown } {
  return (msg.message ?? msg) as { role?: string; content?: unknown }
 }
 function indexToolUses(messages: AnyMessage[]): Map<string, ToolUseBlock> {
  const map = new Map<string, ToolUseBlock>()
  for (const msg of messages) {
    const content = getInner(msg).content
    if (!Array.isArray(content)) continue
    for (const b of content as Array<{ type?: string; id?: string }>) {
      if (b?.type === 'tool_use' && b.id) {
        map.set(b.id, b as ToolUseBlock)
      }
    }
  }
  return map
 }
 function indexToolResultMessages(messages: AnyMessage[]): number[] {
  const indices: number[] = []
  for (let i = 0; i < messages.length; i++) {
    const inner = getInner(messages[i])
    const role = inner.role ?? messages[i].role
    const content = inner.content
    if (
      role === 'user' &&
      Array.isArray(content) &&
      content.some((b: { type?: string }) => b?.type === 'tool_result')
    ) {
      indices.push(i)
    }
  }
  return indices
 }
 function rewriteMessage<T extends AnyMessage>(
  msg: T,
  newContent: unknown[],
 ): T {
  if (msg.message) {
    return { ...msg, message: { ...msg.message, content: newContent } }
  }
  return { ...msg, content: newContent }
 }
 // microCompact.maybeTimeBasedMicrocompact may have already replaced old
 // tool_result content with TOOL_RESULT_CLEARED_MESSAGE before we see it.
 // Re-compressing produces a stub over a marker (e.g. `[Read args={} → 40
 // chars omitted]`), wasteful and less informative than the canonical marker.
 function isAlreadyCleared(block: ToolResultBlock): boolean {
  const text = extractText(block.content)
  return text === TOOL_RESULT_CLEARED_MESSAGE
 }
 function shouldCompressBlock(
  block: ToolResultBlock,
  toolUsesById: Map<string, ToolUseBlock>,
 ): boolean {
  if (isAlreadyCleared(block)) return false
  const toolUse = toolUsesById.get(block.tool_use_id ?? '')
  // Unknown tool name (orphan tool_result with no matching tool_use) falls
  // through to compression with a generic "tool" stub. Safer default: the
  // original tool_use vanished so there's no downstream use for the output.
  if (!toolUse?.name) return true
  // Respect microCompact's curated safe-to-compress set (Read/Bash/Grep/…/
  // mcp__*) so user-facing flow tools (Task, Agent, custom) stay intact.
  return isCompactableTool(toolUse.name)
 }
 export function compressToolHistory<T extends AnyMessage>(
  messages: T[],
  model: string,
 ): T[] {
  // Master kill-switch. Returns the original reference so callers skip a
  // defensive copy when the feature is disabled.
  if (!getGlobalConfig().toolHistoryCompressionEnabled) return messages
  const tiers = getTiers(getEffectiveContextWindowSize(model))
  const toolResultIndices = indexToolResultMessages(messages)
  const total = toolResultIndices.length
  // If every tool-result fits in the recent tier, no boundary crosses; return
  // the same reference for the same copy-elision reason.
  if (total <= tiers.recent) return messages
  // O(1) lookup: messageIndex → tool-result position (0 = oldest). Replaces
  // the naive Array.indexOf(i) that was O(n²) across the .map below.
  const positionByIndex = new Map<number, number>()
  for (let pos = 0; pos < toolResultIndices.length; pos++) {
    positionByIndex.set(toolResultIndices[pos], pos)
  }
  const toolUsesById = indexToolUses(messages)
  return messages.map((msg, i) => {
    const pos = positionByIndex.get(i)
    if (pos === undefined) return msg
    const fromEnd = total - 1 - pos
    if (fromEnd < tiers.recent) return msg
    const inMidWindow = fromEnd < tiers.recent + tiers.mid
    const content = getInner(msg).content as unknown[]
    const newContent = content.map(block => {
      const b = block as { type?: string }
      if (b?.type !== 'tool_result') return block
      const tr = block as ToolResultBlock
      if (!shouldCompressBlock(tr, toolUsesById)) return block
      return inMidWindow
        ? truncateBlock(tr, MID_MAX_CHARS)
        : buildStub(tr, toolUsesById)
    })
    return rewriteMessage(msg, newContent)
  })
 }
--- a/src/services/api/openaiShim.compression.test.ts
+++ b/src/services/api/openaiShim.compression.test.ts
@@ -0,0 +1,317 @@
 import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
 import { createOpenAIShimClient } from './openaiShim.js'
 type FetchType = typeof globalThis.fetch
 const originalFetch = globalThis.fetch
 const originalEnv = {
  OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
  OPENAI_API_KEY: process.env.OPENAI_API_KEY,
  OPENAI_MODEL: process.env.OPENAI_MODEL,
 }
 // Mock config + autoCompact so the shim sees deterministic state.
 const mockState = {
  enabled: true,
  effectiveWindow: 100_000, // Copilot gpt-4o tier
 }
 mock.module('../../utils/config.js', () => ({
  getGlobalConfig: () => ({
    toolHistoryCompressionEnabled: mockState.enabled,
    autoCompactEnabled: false,
  }),
 }))
 mock.module('../compact/autoCompact.js', () => ({
  getEffectiveContextWindowSize: () => mockState.effectiveWindow,
 }))
 type OpenAIShimClient = {
  beta: {
    messages: {
      create: (
        params: Record<string, unknown>,
        options?: Record<string, unknown>,
      ) => Promise<unknown>
    }
  }
 }
 function bigText(n: number): string {
  return 'A'.repeat(n)
 }
 function buildToolExchange(id: number, resultLength: number) {
  return [
    {
      role: 'assistant',
      content: [
        {
          type: 'tool_use',
          id: `toolu_${id}`,
          name: 'Read',
          input: { file_path: `/path/to/file${id}.ts` },
        },
      ],
    },
    {
      role: 'user',
      content: [
        {
          type: 'tool_result',
          tool_use_id: `toolu_${id}`,
          content: bigText(resultLength),
        },
      ],
    },
  ]
 }
 function buildLongConversation(numExchanges: number, resultLength = 5_000) {
  const out: Array<{ role: string; content: unknown }> = [
    { role: 'user', content: 'start the work' },
  ]
  for (let i = 0; i < numExchanges; i++) {
    out.push(...buildToolExchange(i, resultLength))
  }
  return out
 }
 function makeFakeResponse(): Response {
  return new Response(
    JSON.stringify({
      id: 'chatcmpl-1',
      model: 'gpt-4o',
      choices: [
        {
          message: { role: 'assistant', content: 'done' },
          finish_reason: 'stop',
        },
      ],
      usage: { prompt_tokens: 8, completion_tokens: 2, total_tokens: 10 },
    }),
    { headers: { 'Content-Type': 'application/json' } },
  )
 }
 beforeEach(() => {
  process.env.OPENAI_BASE_URL = 'http://example.test/v1'
  process.env.OPENAI_API_KEY = 'test-key'
  delete process.env.OPENAI_MODEL
  mockState.enabled = true
  mockState.effectiveWindow = 100_000
 })
 afterEach(() => {
  if (originalEnv.OPENAI_BASE_URL === undefined) delete process.env.OPENAI_BASE_URL
  else process.env.OPENAI_BASE_URL = originalEnv.OPENAI_BASE_URL
  if (originalEnv.OPENAI_API_KEY === undefined) delete process.env.OPENAI_API_KEY
  else process.env.OPENAI_API_KEY = originalEnv.OPENAI_API_KEY
  if (originalEnv.OPENAI_MODEL === undefined) delete process.env.OPENAI_MODEL
  else process.env.OPENAI_MODEL = originalEnv.OPENAI_MODEL
  globalThis.fetch = originalFetch
 })
 async function captureRequestBody(
  messages: Array<{ role: string; content: unknown }>,
  model: string,
 ): Promise<Record<string, unknown>> {
  let captured: Record<string, unknown> | undefined
  globalThis.fetch = (async (_input, init) => {
    captured = JSON.parse(String(init?.body))
    return makeFakeResponse()
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await client.beta.messages.create({
    model,
    system: 'system prompt',
    messages,
  })
  if (!captured) throw new Error('request not captured')
  return captured
 }
 function getToolMessages(body: Record<string, unknown>): Array<{ content: string }> {
  const messages = body.messages as Array<{ role: string; content: string }>
  return messages.filter(m => m.role === 'tool')
 }
 function getAssistantToolCalls(body: Record<string, unknown>): unknown[] {
  const messages = body.messages as Array<{
    role: string
    tool_calls?: unknown[]
  }>
  return messages
    .filter(m => m.role === 'assistant' && Array.isArray(m.tool_calls))
    .flatMap(m => m.tool_calls ?? [])
 }
 // ============================================================================
 // BUG REPRO: without compression, full tool history is resent every turn
 // ============================================================================
 test('BUG REPRO: without compression, all 30 tool results are sent at full size', async () => {
  mockState.enabled = false
  const messages = buildLongConversation(30, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4o')
  const toolMessages = getToolMessages(body)
  const payloadSize = JSON.stringify(body).length
  // All 30 tool results present, none truncated
  expect(toolMessages.length).toBe(30)
  for (const m of toolMessages) {
    expect(m.content.length).toBeGreaterThanOrEqual(5_000)
    expect(m.content).not.toContain('[…truncated')
    expect(m.content).not.toContain('chars omitted')
  }
  // Total payload is large (~150KB raw) — this is the cost being paid every turn
  expect(payloadSize).toBeGreaterThan(150_000)
 })
 // ============================================================================
 // FIX: with compression, recent kept full, mid truncated, old stubbed
 // ============================================================================
 test('FIX: with compression on Copilot gpt-4o (tier 5/10/rest), 30 turns shrinks dramatically', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 100_000 // 64–128k → recent=5, mid=10
  const messages = buildLongConversation(30, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4o')
  const toolMessages = getToolMessages(body)
  const payloadSize = JSON.stringify(body).length
  // Structure preserved: still 30 tool messages, no orphan tool_calls
  expect(toolMessages.length).toBe(30)
  expect(getAssistantToolCalls(body).length).toBe(30)
  // Tier breakdown (oldest → newest):
  //   indices 0..14  → old tier (stubs)
  //   indices 15..24 → mid tier (truncated)
  //   indices 25..29 → recent (full)
  for (let i = 0; i <= 14; i++) {
    expect(toolMessages[i].content).toMatch(/^\[Read args=.*chars omitted\]$/)
  }
  for (let i = 15; i <= 24; i++) {
    expect(toolMessages[i].content).toContain('[…truncated')
  }
  for (let i = 25; i <= 29; i++) {
    expect(toolMessages[i].content.length).toBe(5_000)
    expect(toolMessages[i].content).not.toContain('[…truncated')
    expect(toolMessages[i].content).not.toContain('chars omitted')
  }
  // Significant reduction: from ~150KB to <60KB (10 mid×2KB + structure overhead)
  expect(payloadSize).toBeLessThan(60_000)
 })
 // ============================================================================
 // FIX: large-context model gets generous tiers — compression effectively inert
 // ============================================================================
 test('FIX: gpt-4.1 (1M context) with 25 exchanges keeps all full (recent tier=25)', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 1_000_000 // ≥500k → recent=25, mid=50
  const messages = buildLongConversation(25, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4.1')
  const toolMessages = getToolMessages(body)
  expect(toolMessages.length).toBe(25)
  for (const m of toolMessages) {
    expect(m.content.length).toBe(5_000)
    expect(m.content).not.toContain('[…truncated')
    expect(m.content).not.toContain('chars omitted')
  }
 })
 test('FIX: gpt-4.1 (1M context) with 30 exchanges → only first 5 mid-truncated', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 1_000_000 // recent=25, mid=50
  const messages = buildLongConversation(30, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4.1')
  const toolMessages = getToolMessages(body)
  // 30 total: indices 0..4 mid, indices 5..29 recent
  for (let i = 0; i < 5; i++) {
    expect(toolMessages[i].content).toContain('[…truncated')
  }
  for (let i = 5; i < 30; i++) {
    expect(toolMessages[i].content.length).toBe(5_000)
  }
 })
 // ============================================================================
 // FIX: stub preserves tool name and args — model can re-invoke if needed
 // ============================================================================
 test('FIX: stub format includes original tool name and arguments', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 100_000
  const messages = buildLongConversation(30, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4o')
  const toolMessages = getToolMessages(body)
  const oldestStub = toolMessages[0].content
  // Format: [<tool_name> args=<json> → <N> chars omitted]
  expect(oldestStub).toMatch(/^\[Read /)
  expect(oldestStub).toMatch(/file_path/)
  expect(oldestStub).toMatch(/→ 5000 chars omitted\]$/)
 })
 // ============================================================================
 // FIX: tool_use blocks (assistant tool_calls) are never modified
 // ============================================================================
 test('FIX: every tool_call retains its full id, name, and arguments', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 100_000
  const messages = buildLongConversation(30, 5_000)
  const body = await captureRequestBody(messages, 'gpt-4o')
  const toolCalls = getAssistantToolCalls(body) as Array<{
    id: string
    function: { name: string; arguments: string }
  }>
  expect(toolCalls.length).toBe(30)
  for (let i = 0; i < toolCalls.length; i++) {
    expect(toolCalls[i].id).toBe(`toolu_${i}`)
    expect(toolCalls[i].function.name).toBe('Read')
    expect(JSON.parse(toolCalls[i].function.arguments)).toEqual({
      file_path: `/path/to/file${i}.ts`,
    })
  }
 })
 // ============================================================================
 // FIX: small-context provider (Mistral 32k) gets aggressive compression
 // ============================================================================
 test('FIX: 32k window (Mistral tier) → recent=3 keeps last 3 only', async () => {
  mockState.enabled = true
  mockState.effectiveWindow = 24_000 // 16–32k → recent=3, mid=5
  const messages = buildLongConversation(15, 3_000)
  const body = await captureRequestBody(messages, 'mistral-large-latest')
  const toolMessages = getToolMessages(body)
  // 15 total: indices 0..6 old, 7..11 mid, 12..14 recent
  for (let i = 0; i <= 6; i++) {
    expect(toolMessages[i].content).toContain('chars omitted')
  }
  for (let i = 7; i <= 11; i++) {
    expect(toolMessages[i].content).toContain('[…truncated')
  }
  for (let i = 12; i <= 14; i++) {
    expect(toolMessages[i].content.length).toBe(3_000)
  }
 })
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -46,6 +46,7 @@ import {
  type AnthropicUsage,
  type ShimCreateParams,
 } from './codexShim.js'
 import { compressToolHistory } from './compressToolHistory.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
 import {
  getLocalProviderRetryBaseUrls,
@@ -1299,14 +1300,15 @@ class OpenAIShimMessages {
    params: ShimCreateParams,
    options?: { signal?: AbortSignal; headers?: Record<string, string> },
  ): Promise<Response> {
-    const openaiMessages = convertMessages(
+    const compressedMessages = compressToolHistory(
      params.messages as Array<{
        role: string
        message?: { role?: string; content?: unknown }
        content?: unknown
      }>,
-      params.system,
+      request.resolvedModel,
    )
    const openaiMessages = convertMessages(compressedMessages, params.system)
    const body: Record<string, unknown> = {
      model: request.resolvedModel,
--- a/src/services/compact/microCompact.ts
+++ b/src/services/compact/microCompact.ts
@@ -38,7 +38,7 @@ export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
 const IMAGE_MAX_TOKEN_SIZE = 2000
 // Only compact these built-in tools (MCP tools are also compactable via prefix match)
-const COMPACTABLE_TOOLS = new Set<string>([
+export const COMPACTABLE_TOOLS = new Set<string>([
  FILE_READ_TOOL_NAME,
  ...SHELL_TOOL_NAMES,
  GREP_TOOL_NAME,
@@ -51,7 +51,7 @@ const COMPACTABLE_TOOLS = new Set<string>([
 const MCP_TOOL_PREFIX = 'mcp__'
-function isCompactableTool(name: string): boolean {
+export function isCompactableTool(name: string): boolean {
  return COMPACTABLE_TOOLS.has(name) || name.startsWith(MCP_TOOL_PREFIX)
 }
--- a/src/utils/config.ts
+++ b/src/utils/config.ts
@@ -244,6 +244,7 @@ export type GlobalConfig = {
  bypassPermissionsModeAccepted?: boolean
  hasUsedBackslashReturn?: boolean
  autoCompactEnabled: boolean // Controls whether auto-compact is enabled
  toolHistoryCompressionEnabled: boolean // Compress old tool_result content for small-context providers
  showTurnDuration: boolean // Controls whether to show turn duration message (e.g., "Cooked for 1m 6s")
  /**
   * @deprecated Use settings.env instead.
@@ -622,6 +623,7 @@ function createDefaultGlobalConfig(): GlobalConfig {
    verbose: false,
    editorMode: 'normal',
    autoCompactEnabled: true,
    toolHistoryCompressionEnabled: true,
    showTurnDuration: true,
    hasSeenTasksHint: false,
    hasUsedStash: false,
@@ -668,6 +670,7 @@ export const GLOBAL_CONFIG_KEYS = [
  'editorMode',
  'hasUsedBackslashReturn',
  'autoCompactEnabled',
  'toolHistoryCompressionEnabled',
  'showTurnDuration',
  'diffTool',
  'env',