feat(api): compress old tool_result content for small-context providers (#801)
* feat(api): compress old tool_result content for small-context providers Adds a shim-layer pass that tiers tool_result content by age on providers with small effective context windows (Copilot gpt-4o 128k, Mistral, Ollama). Recent turns remain full; mid-tier results are truncated to 2k chars; older results are replaced with a stub that preserves tool name and arguments so the model can re-invoke if needed. Tier sizes auto-tune via getEffectiveContextWindowSize, same calculation used by auto-compact. Reuses COMPACTABLE_TOOLS and TOOL_RESULT_CLEARED_MESSAGE to complement (not duplicate) microCompact. Configurable via /config toolHistoryCompressionEnabled. Addresses active-session context accumulation on Copilot where microCompact's time-based trigger never fires, which surfaces as "tools appearing in a loop" and prompt_too_long errors after ~15 turns. * fix: config tool history
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,6 +7,7 @@ dist/
|
|||||||
.openclaude-profile.json
|
.openclaude-profile.json
|
||||||
reports/
|
reports/
|
||||||
GEMINI.md
|
GEMINI.md
|
||||||
|
CLAUDE.md
|
||||||
package-lock.json
|
package-lock.json
|
||||||
/.claude
|
/.claude
|
||||||
coverage/
|
coverage/
|
||||||
|
|||||||
@@ -281,6 +281,24 @@ export function Config({
|
|||||||
enabled: autoCompactEnabled
|
enabled: autoCompactEnabled
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
id: 'toolHistoryCompressionEnabled',
|
||||||
|
label: 'Tool history compression',
|
||||||
|
value: globalConfig.toolHistoryCompressionEnabled,
|
||||||
|
type: 'boolean' as const,
|
||||||
|
onChange(toolHistoryCompressionEnabled: boolean) {
|
||||||
|
saveGlobalConfig(current => ({
|
||||||
|
...current,
|
||||||
|
toolHistoryCompressionEnabled
|
||||||
|
}));
|
||||||
|
setGlobalConfig({
|
||||||
|
...getGlobalConfig(),
|
||||||
|
toolHistoryCompressionEnabled
|
||||||
|
});
|
||||||
|
logEvent('tengu_tool_history_compression_setting_changed', {
|
||||||
|
enabled: toolHistoryCompressionEnabled
|
||||||
|
});
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
id: 'spinnerTipsEnabled',
|
id: 'spinnerTipsEnabled',
|
||||||
label: 'Show tips',
|
label: 'Show tips',
|
||||||
@@ -1158,6 +1176,9 @@ export function Config({
|
|||||||
if (globalConfig.autoCompactEnabled !== initialConfig.current.autoCompactEnabled) {
|
if (globalConfig.autoCompactEnabled !== initialConfig.current.autoCompactEnabled) {
|
||||||
formattedChanges.push(`${globalConfig.autoCompactEnabled ? 'Enabled' : 'Disabled'} auto-compact`);
|
formattedChanges.push(`${globalConfig.autoCompactEnabled ? 'Enabled' : 'Disabled'} auto-compact`);
|
||||||
}
|
}
|
||||||
|
if (globalConfig.toolHistoryCompressionEnabled !== initialConfig.current.toolHistoryCompressionEnabled) {
|
||||||
|
formattedChanges.push(`${globalConfig.toolHistoryCompressionEnabled ? 'Enabled' : 'Disabled'} tool history compression`);
|
||||||
|
}
|
||||||
if (globalConfig.respectGitignore !== initialConfig.current.respectGitignore) {
|
if (globalConfig.respectGitignore !== initialConfig.current.respectGitignore) {
|
||||||
formattedChanges.push(`${globalConfig.respectGitignore ? 'Enabled' : 'Disabled'} respect .gitignore in file picker`);
|
formattedChanges.push(`${globalConfig.respectGitignore ? 'Enabled' : 'Disabled'} respect .gitignore in file picker`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { APIError } from '@anthropic-ai/sdk'
|
import { APIError } from '@anthropic-ai/sdk'
|
||||||
|
import { compressToolHistory } from './compressToolHistory.js'
|
||||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||||
import type {
|
import type {
|
||||||
ResolvedCodexCredentials,
|
ResolvedCodexCredentials,
|
||||||
@@ -484,13 +485,15 @@ export async function performCodexRequest(options: {
|
|||||||
defaultHeaders: Record<string, string>
|
defaultHeaders: Record<string, string>
|
||||||
signal?: AbortSignal
|
signal?: AbortSignal
|
||||||
}): Promise<Response> {
|
}): Promise<Response> {
|
||||||
const input = convertAnthropicMessagesToResponsesInput(
|
const compressedMessages = compressToolHistory(
|
||||||
options.params.messages as Array<{
|
options.params.messages as Array<{
|
||||||
role?: string
|
role?: string
|
||||||
message?: { role?: string; content?: unknown }
|
message?: { role?: string; content?: unknown }
|
||||||
content?: unknown
|
content?: unknown
|
||||||
}>,
|
}>,
|
||||||
|
options.request.resolvedModel,
|
||||||
)
|
)
|
||||||
|
const input = convertAnthropicMessagesToResponsesInput(compressedMessages)
|
||||||
const body: Record<string, unknown> = {
|
const body: Record<string, unknown> = {
|
||||||
model: options.request.resolvedModel,
|
model: options.request.resolvedModel,
|
||||||
input: input.length > 0
|
input: input.length > 0
|
||||||
|
|||||||
572
src/services/api/compressToolHistory.test.ts
Normal file
572
src/services/api/compressToolHistory.test.ts
Normal file
@@ -0,0 +1,572 @@
|
|||||||
|
import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
|
||||||
|
import { compressToolHistory, getTiers } from './compressToolHistory.js'
|
||||||
|
|
||||||
|
// Mock the two dependencies so tests are deterministic and don't read disk config.
|
||||||
|
const mockState = {
|
||||||
|
enabled: true,
|
||||||
|
effectiveWindow: 100_000,
|
||||||
|
}
|
||||||
|
|
||||||
|
mock.module('../../utils/config.js', () => ({
|
||||||
|
getGlobalConfig: () => ({
|
||||||
|
toolHistoryCompressionEnabled: mockState.enabled,
|
||||||
|
}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../compact/autoCompact.js', () => ({
|
||||||
|
getEffectiveContextWindowSize: () => mockState.effectiveWindow,
|
||||||
|
}))
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000
|
||||||
|
})
|
||||||
|
|
||||||
|
type Block = Record<string, unknown>
|
||||||
|
type Msg = { role: string; content: Block[] | string }
|
||||||
|
|
||||||
|
function bigText(n: number): string {
|
||||||
|
return 'x'.repeat(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildToolExchange(id: number, resultLength: number): Msg[] {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: `toolu_${id}`,
|
||||||
|
name: 'Read',
|
||||||
|
input: { file_path: `/path/to/file${id}.ts` },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: `toolu_${id}`,
|
||||||
|
content: bigText(resultLength),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildConversation(numToolExchanges: number, resultLength = 5_000): Msg[] {
|
||||||
|
const out: Msg[] = [{ role: 'user', content: 'Initial request' }]
|
||||||
|
for (let i = 0; i < numToolExchanges; i++) {
|
||||||
|
out.push(...buildToolExchange(i, resultLength))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
function getResultMessages(messages: Msg[]): Msg[] {
|
||||||
|
return messages.filter(
|
||||||
|
m => Array.isArray(m.content) && m.content.some((b: any) => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function getResultBlock(msg: Msg): Block {
|
||||||
|
return (msg.content as Block[]).find((b: any) => b.type === 'tool_result') as Block
|
||||||
|
}
|
||||||
|
|
||||||
|
function getResultText(msg: Msg): string {
|
||||||
|
const block = getResultBlock(msg)
|
||||||
|
const c = block.content
|
||||||
|
if (typeof c === 'string') return c
|
||||||
|
if (Array.isArray(c)) {
|
||||||
|
return c
|
||||||
|
.filter((b: any) => b.type === 'text')
|
||||||
|
.map((b: any) => b.text)
|
||||||
|
.join('\n')
|
||||||
|
}
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- getTiers ----------
|
||||||
|
|
||||||
|
test('getTiers: < 16k window → recent=2, mid=3', () => {
|
||||||
|
expect(getTiers(8_000)).toEqual({ recent: 2, mid: 3 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: 16k–32k → recent=3, mid=5', () => {
|
||||||
|
expect(getTiers(20_000)).toEqual({ recent: 3, mid: 5 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: 32k–64k → recent=4, mid=8', () => {
|
||||||
|
expect(getTiers(48_000)).toEqual({ recent: 4, mid: 8 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: 64k–128k (Copilot gpt-4o) → recent=5, mid=10', () => {
|
||||||
|
expect(getTiers(100_000)).toEqual({ recent: 5, mid: 10 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: 128k–256k (Copilot Claude) → recent=8, mid=15', () => {
|
||||||
|
expect(getTiers(200_000)).toEqual({ recent: 8, mid: 15 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: 256k–500k → recent=12, mid=25', () => {
|
||||||
|
expect(getTiers(400_000)).toEqual({ recent: 12, mid: 25 })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('getTiers: ≥ 500k (gpt-4.1 1M) → recent=25, mid=50', () => {
|
||||||
|
expect(getTiers(1_000_000)).toEqual({ recent: 25, mid: 50 })
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- master switch ----------
|
||||||
|
|
||||||
|
test('pass-through when toolHistoryCompressionEnabled is false', () => {
|
||||||
|
mockState.enabled = false
|
||||||
|
const messages = buildConversation(20)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
expect(result).toBe(messages) // same reference (no transformation)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('pass-through when total tool_results <= recent tier', () => {
|
||||||
|
// 100k effective → recent=5; only 4 exchanges → no compression
|
||||||
|
const messages = buildConversation(4)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
expect(result).toBe(messages)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- per-tier behavior ----------
|
||||||
|
|
||||||
|
test('recent tier: tool_result content untouched', () => {
|
||||||
|
// 100k effective → recent=5, mid=10. With 6 exchanges, only the oldest is touched.
|
||||||
|
const messages = buildConversation(6, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// Last 5 should be untouched (full 5000 chars)
|
||||||
|
for (let i = resultMsgs.length - 5; i < resultMsgs.length; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('mid tier: long content truncated to MID_MAX_CHARS with marker', () => {
|
||||||
|
// 100k → recent=5, mid=10. 10 exchanges: 5 recent + 5 mid (none old).
|
||||||
|
const messages = buildConversation(10, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// First 5 are mid tier — should be truncated to ~2000 chars + marker
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
const text = getResultText(resultMsgs[i])
|
||||||
|
expect(text).toContain('[…truncated')
|
||||||
|
expect(text).toContain('chars from tool history]')
|
||||||
|
// Should be roughly 2000 chars + marker (under 2200)
|
||||||
|
expect(text.length).toBeLessThan(2_200)
|
||||||
|
expect(text.length).toBeGreaterThan(2_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('mid tier: short content (< MID_MAX_CHARS) untouched', () => {
|
||||||
|
const messages = buildConversation(10, 500) // 500 < MID_MAX_CHARS
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i])).toBe(bigText(500))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('old tier: content replaced with stub [name args={...} → N chars omitted]', () => {
|
||||||
|
// 100k → recent=5, mid=10, old=rest. 20 exchanges → 5 old + 10 mid + 5 recent.
|
||||||
|
const messages = buildConversation(20, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// First 5 are old tier — should be stubs
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
const text = getResultText(resultMsgs[i])
|
||||||
|
expect(text).toMatch(/^\[Read args=\{.*\} → 5000 chars omitted\]$/)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('old tier: stub args truncated to 200 chars', () => {
|
||||||
|
const longArg = bigText(500)
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: 'toolu_x',
|
||||||
|
name: 'Bash',
|
||||||
|
input: { command: longArg },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', tool_use_id: 'toolu_x', content: 'output' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Pad with enough recent exchanges to push the above into old tier
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
const text = getResultText(resultMsgs[0])
|
||||||
|
|
||||||
|
// Stub format: [Bash args=<json≤200chars> → N chars omitted]
|
||||||
|
// The args portion (between args= and →) must be ≤ 200 chars.
|
||||||
|
const argsMatch = text.match(/args=(.*?) →/)
|
||||||
|
expect(argsMatch).not.toBeNull()
|
||||||
|
expect(argsMatch![1].length).toBeLessThanOrEqual(200)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('old tier: orphan tool_result (no matching tool_use) falls back to "tool"', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
// Orphan: tool_result without matching tool_use in history
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', tool_use_id: 'orphan_id', content: 'data' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
const text = getResultText(resultMsgs[0])
|
||||||
|
|
||||||
|
expect(text).toMatch(/^\[tool args=\{\} → 4 chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- structural preservation ----------
|
||||||
|
|
||||||
|
test('tool_use blocks always preserved', () => {
|
||||||
|
const messages = buildConversation(20, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
|
||||||
|
const useCount = (msgs: Msg[]) =>
|
||||||
|
msgs.reduce((sum, m) => {
|
||||||
|
if (!Array.isArray(m.content)) return sum
|
||||||
|
return sum + m.content.filter((b: any) => b.type === 'tool_use').length
|
||||||
|
}, 0)
|
||||||
|
|
||||||
|
expect(useCount(result as Msg[])).toBe(useCount(messages))
|
||||||
|
})
|
||||||
|
|
||||||
|
test('text blocks always preserved', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'first' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'reasoning before tool' },
|
||||||
|
{ type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 5_000).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const assistantMsg = (result as Msg[])[1]
|
||||||
|
const textBlock = (assistantMsg.content as Block[]).find((b: any) => b.type === 'text')
|
||||||
|
|
||||||
|
expect(textBlock).toEqual({ type: 'text', text: 'reasoning before tool' })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('thinking blocks always preserved', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'first' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'internal reasoning', signature: 'sig' },
|
||||||
|
{ type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 5_000).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const assistantMsg = (result as Msg[])[1]
|
||||||
|
const thinking = (assistantMsg.content as Block[]).find((b: any) => b.type === 'thinking')
|
||||||
|
|
||||||
|
expect(thinking).toEqual({
|
||||||
|
type: 'thinking',
|
||||||
|
thinking: 'internal reasoning',
|
||||||
|
signature: 'sig',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('non-array content (string) handled gracefully', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'plain string content' },
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
expect((result as Msg[])[0].content).toBe('plain string content')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('empty content array handled gracefully', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: [] },
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
expect(() => compressToolHistory(messages, 'gpt-4o')).not.toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- message shape compatibility ----------
|
||||||
|
|
||||||
|
test('wrapped shape ({ message: { role, content } }) handled', () => {
|
||||||
|
type WrappedMsg = { message: { role: string; content: Block[] | string } }
|
||||||
|
const wrap = (m: Msg): WrappedMsg => ({ message: { role: m.role, content: m.content } })
|
||||||
|
const messages = buildConversation(20, 5_000).map(wrap)
|
||||||
|
const result = compressToolHistory(messages as any, 'gpt-4o')
|
||||||
|
|
||||||
|
// First wrapped tool-result message should have stub content (old tier)
|
||||||
|
const firstResultMsg = (result as WrappedMsg[]).find(
|
||||||
|
m =>
|
||||||
|
Array.isArray(m.message.content) &&
|
||||||
|
m.message.content.some((b: any) => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
const block = (firstResultMsg!.message.content as Block[]).find(
|
||||||
|
(b: any) => b.type === 'tool_result',
|
||||||
|
) as Block
|
||||||
|
const text = ((block.content as Block[])[0] as any).text
|
||||||
|
expect(text).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('flat shape ({ role, content }) handled', () => {
|
||||||
|
const messages = buildConversation(20, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
expect(getResultText(resultMsgs[0])).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- tier boundary correctness ----------
|
||||||
|
|
||||||
|
test('tier boundaries: 6 exchanges → 1 mid + 5 recent (recent=5)', () => {
|
||||||
|
const messages = buildConversation(6, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// Oldest: mid (truncated)
|
||||||
|
expect(getResultText(resultMsgs[0])).toContain('[…truncated')
|
||||||
|
// Last 5: untouched
|
||||||
|
for (let i = 1; i < 6; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('tier boundaries: 16 exchanges → 1 old + 10 mid + 5 recent', () => {
|
||||||
|
const messages = buildConversation(16, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// Oldest 1: stub (old tier)
|
||||||
|
expect(getResultText(resultMsgs[0])).toMatch(/^\[Read .*chars omitted\]$/)
|
||||||
|
// Next 10: mid (truncated)
|
||||||
|
for (let i = 1; i < 11; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i])).toContain('[…truncated')
|
||||||
|
}
|
||||||
|
// Last 5: untouched
|
||||||
|
for (let i = 11; i < 16; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('large window (1M) with 30 exchanges: all untouched (recent=25 ≥ 30 - 5)', () => {
|
||||||
|
// ≥500k → recent=25, mid=50. 30 exchanges → 5 mid + 25 recent. None old.
|
||||||
|
mockState.effectiveWindow = 1_000_000
|
||||||
|
const messages = buildConversation(30, 5_000)
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4.1')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// Last 25: untouched
|
||||||
|
for (let i = 5; i < 30; i++) {
|
||||||
|
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- attribute preservation ----------
|
||||||
|
|
||||||
|
test('is_error flag preserved in mid tier', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: 'toolu_err',
|
||||||
|
is_error: true,
|
||||||
|
content: bigText(5_000),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Pad with enough recent exchanges to push the above into MID tier
|
||||||
|
...buildConversation(10, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
|
||||||
|
|
||||||
|
expect(block.is_error).toBe(true)
|
||||||
|
expect(getResultText(resultMsgs[0])).toContain('[…truncated')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('is_error flag preserved in old tier (stub)', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: 'toolu_err',
|
||||||
|
is_error: true,
|
||||||
|
content: bigText(5_000),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
|
||||||
|
|
||||||
|
expect(block.is_error).toBe(true)
|
||||||
|
expect(getResultText(resultMsgs[0])).toMatch(/^\[Bash .*chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- COMPACTABLE_TOOLS filter ----------
|
||||||
|
|
||||||
|
test('non-compactable tool (e.g. Task/Agent) is NEVER compressed', () => {
|
||||||
|
// Build conversation where the OLDEST exchange uses a non-compactable tool name
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_use', id: 'task_1', name: 'Task', input: { goal: 'plan' } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', tool_use_id: 'task_1', content: bigText(5_000) },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Pad with 20 compactable exchanges to push Task into old tier
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// First tool_result is for Task (non-compactable) → must remain full
|
||||||
|
expect(getResultText(resultMsgs[0]).length).toBe(5_000)
|
||||||
|
expect(getResultText(resultMsgs[0])).not.toContain('chars omitted')
|
||||||
|
expect(getResultText(resultMsgs[0])).not.toContain('[…truncated')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('mcp__ prefixed tools ARE compactable (matches microCompact behavior)', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_use', id: 'mcp_1', name: 'mcp__github__get_issue', input: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', tool_use_id: 'mcp_1', content: bigText(5_000) },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// MCP tool result is compressed (gets stub since it's in old tier)
|
||||||
|
expect(getResultText(resultMsgs[0])).toMatch(/^\[mcp__github__get_issue .*chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- skip already-cleared blocks ----------
|
||||||
|
|
||||||
|
test('blocks already cleared by microCompact are NOT re-compressed', () => {
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'tool_use', id: 'cleared_1', name: 'Read', input: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: 'cleared_1',
|
||||||
|
content: '[Old tool result content cleared]', // microCompact's marker
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
|
||||||
|
// Already-cleared marker survives untouched (no double processing)
|
||||||
|
expect(getResultText(resultMsgs[0])).toBe('[Old tool result content cleared]')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extra block attributes (e.g. cache_control) preserved across rewrites', () => {
|
||||||
|
const cacheControl = { type: 'ephemeral' }
|
||||||
|
const messages: Msg[] = [
|
||||||
|
{ role: 'user', content: 'start' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'tool_use', id: 'toolu_cc', name: 'Read', input: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: 'toolu_cc',
|
||||||
|
cache_control: cacheControl,
|
||||||
|
content: bigText(5_000),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...buildConversation(20, 100).slice(1),
|
||||||
|
]
|
||||||
|
const result = compressToolHistory(messages, 'gpt-4o')
|
||||||
|
const resultMsgs = getResultMessages(result)
|
||||||
|
const block = getResultBlock(resultMsgs[0]) as { cache_control?: unknown }
|
||||||
|
|
||||||
|
// The custom attribute survived the stub rewrite via ...block spread
|
||||||
|
expect(block.cache_control).toEqual(cacheControl)
|
||||||
|
})
|
||||||
255
src/services/api/compressToolHistory.ts
Normal file
255
src/services/api/compressToolHistory.ts
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
/**
|
||||||
|
* Compresses old tool_result content for stateless OpenAI-compatible providers
|
||||||
|
* (Copilot, Mistral, Ollama). Preserves all conversation structure — tool_use,
|
||||||
|
* tool_result pairing, text, thinking, and is_error all survive intact. Only
|
||||||
|
* the BULK text of older tool_results is shrunk to delay context saturation.
|
||||||
|
*
|
||||||
|
* Tier sizes scale with the model's effective context window via
|
||||||
|
* getEffectiveContextWindowSize() — same calculation used by auto-compact, so
|
||||||
|
* the two systems stay aligned.
|
||||||
|
*
|
||||||
|
* Complements (does not replace) microCompact.ts:
|
||||||
|
* - microCompact: time/cache-based, runs from query.ts, binary clear/keep,
|
||||||
|
* limited to Claude (cache editing) or idle gaps (time-based).
|
||||||
|
* - compressToolHistory: size-based, runs at the shim layer, tiered
|
||||||
|
* compression, covers the gap for active sessions on non-Claude providers.
|
||||||
|
*
|
||||||
|
* Reuses isCompactableTool from microCompact to avoid touching tools the
|
||||||
|
* project already classifies as unsafe to compress (e.g. Task, Agent).
|
||||||
|
* Skips blocks already cleared by microCompact (TOOL_RESULT_CLEARED_MESSAGE).
|
||||||
|
*
|
||||||
|
* Anthropic native bypasses both shims, so it is unaffected by this module.
|
||||||
|
*/
|
||||||
|
import { getEffectiveContextWindowSize } from '../compact/autoCompact.js'
|
||||||
|
import { isCompactableTool } from '../compact/microCompact.js'
|
||||||
|
import { TOOL_RESULT_CLEARED_MESSAGE } from '../../utils/toolResultStorage.js'
|
||||||
|
import { getGlobalConfig } from '../../utils/config.js'
|
||||||
|
|
||||||
|
// Mid-tier truncation budget. 2k chars ≈ 500 tokens, enough to preserve the
|
||||||
|
// shape of most tool outputs (file headers, command stderr, top grep hits)
|
||||||
|
// without ballooning context. Bump too high and the tier loses its purpose.
|
||||||
|
const MID_MAX_CHARS = 2_000
|
||||||
|
|
||||||
|
// Stub args budget. JSON.stringify of a typical tool input fits in 200 chars
|
||||||
|
// (file paths, short commands, small queries). Long inputs are rare and clamping
|
||||||
|
// here keeps the stub size bounded even when callers pass oversized arguments.
|
||||||
|
const STUB_ARGS_MAX_CHARS = 200
|
||||||
|
|
||||||
|
type AnyMessage = {
|
||||||
|
role?: string
|
||||||
|
message?: { role?: string; content?: unknown }
|
||||||
|
content?: unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
type ToolResultBlock = {
|
||||||
|
type: 'tool_result'
|
||||||
|
tool_use_id?: string
|
||||||
|
is_error?: boolean
|
||||||
|
content?: unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
type ToolUseBlock = {
|
||||||
|
type: 'tool_use'
|
||||||
|
id?: string
|
||||||
|
name?: string
|
||||||
|
input?: unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
type Tiers = { recent: number; mid: number }
|
||||||
|
|
||||||
|
// Tier sizes scale with effective window. Targets roughly:
|
||||||
|
// - recent tier stays under ~25% of available window (full fidelity kept)
|
||||||
|
// - recent + mid tier stays under ~50% of available window (bounded bulk)
|
||||||
|
// - everything older collapses to ~15-token stubs
|
||||||
|
// Values assume ~5KB avg tool_result, which matches the Copilot default case
|
||||||
|
// (parallel_tool_calls=true means multiple Read/Bash outputs per turn). For
|
||||||
|
// ≥ 500k models the tiers are so generous that compression is effectively
|
||||||
|
// inert for any realistic session — see compressToolHistory.test.ts.
|
||||||
|
export function getTiers(effectiveWindow: number): Tiers {
|
||||||
|
if (effectiveWindow < 16_000) return { recent: 2, mid: 3 }
|
||||||
|
if (effectiveWindow < 32_000) return { recent: 3, mid: 5 }
|
||||||
|
if (effectiveWindow < 64_000) return { recent: 4, mid: 8 }
|
||||||
|
if (effectiveWindow < 128_000) return { recent: 5, mid: 10 }
|
||||||
|
if (effectiveWindow < 256_000) return { recent: 8, mid: 15 }
|
||||||
|
if (effectiveWindow < 500_000) return { recent: 12, mid: 25 }
|
||||||
|
return { recent: 25, mid: 50 }
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractText(content: unknown): string {
|
||||||
|
if (typeof content === 'string') return content
|
||||||
|
if (Array.isArray(content)) {
|
||||||
|
return content
|
||||||
|
.filter(
|
||||||
|
(b: { type?: string; text?: string }) =>
|
||||||
|
b?.type === 'text' && typeof b.text === 'string',
|
||||||
|
)
|
||||||
|
.map((b: { text?: string }) => b.text ?? '')
|
||||||
|
.join('\n')
|
||||||
|
}
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
// Old-tier compression strategy. Replaces content entirely with a one-line
|
||||||
|
// metadata marker ~10× more token-efficient than a 500-char truncation AND
|
||||||
|
// unambiguous — partial truncations can look authoritative to the model. The
|
||||||
|
// stub format encodes tool name + args so the model can re-invoke the same
|
||||||
|
// tool if it needs the omitted output back.
|
||||||
|
function buildStub(
|
||||||
|
block: ToolResultBlock,
|
||||||
|
toolUsesById: Map<string, ToolUseBlock>,
|
||||||
|
): ToolResultBlock {
|
||||||
|
const original = extractText(block.content)
|
||||||
|
const toolUse = toolUsesById.get(block.tool_use_id ?? '')
|
||||||
|
const name = toolUse?.name ?? 'tool'
|
||||||
|
const args = toolUse?.input
|
||||||
|
? JSON.stringify(toolUse.input).slice(0, STUB_ARGS_MAX_CHARS)
|
||||||
|
: '{}'
|
||||||
|
return {
|
||||||
|
...block,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `[${name} args=${args} → ${original.length} chars omitted]`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mid-tier compression. The trailing marker is load-bearing: without it, the
|
||||||
|
// model can't distinguish "tool returned 2000 chars" from "tool returned 20k
|
||||||
|
// chars that we cut to 2000". Distinguishing those matters for the model's
|
||||||
|
// decision to re-invoke the tool.
|
||||||
|
function truncateBlock(
|
||||||
|
block: ToolResultBlock,
|
||||||
|
maxChars: number,
|
||||||
|
): ToolResultBlock {
|
||||||
|
const text = extractText(block.content)
|
||||||
|
if (text.length <= maxChars) return block
|
||||||
|
const omitted = text.length - maxChars
|
||||||
|
return {
|
||||||
|
...block,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `${text.slice(0, maxChars)}\n[…truncated ${omitted} chars from tool history]`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getInner(msg: AnyMessage): { role?: string; content?: unknown } {
|
||||||
|
return (msg.message ?? msg) as { role?: string; content?: unknown }
|
||||||
|
}
|
||||||
|
|
||||||
|
function indexToolUses(messages: AnyMessage[]): Map<string, ToolUseBlock> {
|
||||||
|
const map = new Map<string, ToolUseBlock>()
|
||||||
|
for (const msg of messages) {
|
||||||
|
const content = getInner(msg).content
|
||||||
|
if (!Array.isArray(content)) continue
|
||||||
|
for (const b of content as Array<{ type?: string; id?: string }>) {
|
||||||
|
if (b?.type === 'tool_use' && b.id) {
|
||||||
|
map.set(b.id, b as ToolUseBlock)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map
|
||||||
|
}
|
||||||
|
|
||||||
|
function indexToolResultMessages(messages: AnyMessage[]): number[] {
|
||||||
|
const indices: number[] = []
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
const inner = getInner(messages[i])
|
||||||
|
const role = inner.role ?? messages[i].role
|
||||||
|
const content = inner.content
|
||||||
|
if (
|
||||||
|
role === 'user' &&
|
||||||
|
Array.isArray(content) &&
|
||||||
|
content.some((b: { type?: string }) => b?.type === 'tool_result')
|
||||||
|
) {
|
||||||
|
indices.push(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return indices
|
||||||
|
}
|
||||||
|
|
||||||
|
function rewriteMessage<T extends AnyMessage>(
|
||||||
|
msg: T,
|
||||||
|
newContent: unknown[],
|
||||||
|
): T {
|
||||||
|
if (msg.message) {
|
||||||
|
return { ...msg, message: { ...msg.message, content: newContent } }
|
||||||
|
}
|
||||||
|
return { ...msg, content: newContent }
|
||||||
|
}
|
||||||
|
|
||||||
|
// microCompact.maybeTimeBasedMicrocompact may have already replaced old
|
||||||
|
// tool_result content with TOOL_RESULT_CLEARED_MESSAGE before we see it.
|
||||||
|
// Re-compressing produces a stub over a marker (e.g. `[Read args={} → 40
|
||||||
|
// chars omitted]`), wasteful and less informative than the canonical marker.
|
||||||
|
function isAlreadyCleared(block: ToolResultBlock): boolean {
|
||||||
|
const text = extractText(block.content)
|
||||||
|
return text === TOOL_RESULT_CLEARED_MESSAGE
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldCompressBlock(
|
||||||
|
block: ToolResultBlock,
|
||||||
|
toolUsesById: Map<string, ToolUseBlock>,
|
||||||
|
): boolean {
|
||||||
|
if (isAlreadyCleared(block)) return false
|
||||||
|
const toolUse = toolUsesById.get(block.tool_use_id ?? '')
|
||||||
|
// Unknown tool name (orphan tool_result with no matching tool_use) falls
|
||||||
|
// through to compression with a generic "tool" stub. Safer default: the
|
||||||
|
// original tool_use vanished so there's no downstream use for the output.
|
||||||
|
if (!toolUse?.name) return true
|
||||||
|
// Respect microCompact's curated safe-to-compress set (Read/Bash/Grep/…/
|
||||||
|
// mcp__*) so user-facing flow tools (Task, Agent, custom) stay intact.
|
||||||
|
return isCompactableTool(toolUse.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function compressToolHistory<T extends AnyMessage>(
|
||||||
|
messages: T[],
|
||||||
|
model: string,
|
||||||
|
): T[] {
|
||||||
|
// Master kill-switch. Returns the original reference so callers skip a
|
||||||
|
// defensive copy when the feature is disabled.
|
||||||
|
if (!getGlobalConfig().toolHistoryCompressionEnabled) return messages
|
||||||
|
|
||||||
|
const tiers = getTiers(getEffectiveContextWindowSize(model))
|
||||||
|
|
||||||
|
const toolResultIndices = indexToolResultMessages(messages)
|
||||||
|
const total = toolResultIndices.length
|
||||||
|
// If every tool-result fits in the recent tier, no boundary crosses; return
|
||||||
|
// the same reference for the same copy-elision reason.
|
||||||
|
if (total <= tiers.recent) return messages
|
||||||
|
|
||||||
|
// O(1) lookup: messageIndex → tool-result position (0 = oldest). Replaces
|
||||||
|
// the naive Array.indexOf(i) that was O(n²) across the .map below.
|
||||||
|
const positionByIndex = new Map<number, number>()
|
||||||
|
for (let pos = 0; pos < toolResultIndices.length; pos++) {
|
||||||
|
positionByIndex.set(toolResultIndices[pos], pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
const toolUsesById = indexToolUses(messages)
|
||||||
|
|
||||||
|
return messages.map((msg, i) => {
|
||||||
|
const pos = positionByIndex.get(i)
|
||||||
|
if (pos === undefined) return msg
|
||||||
|
|
||||||
|
const fromEnd = total - 1 - pos
|
||||||
|
if (fromEnd < tiers.recent) return msg
|
||||||
|
|
||||||
|
const inMidWindow = fromEnd < tiers.recent + tiers.mid
|
||||||
|
const content = getInner(msg).content as unknown[]
|
||||||
|
const newContent = content.map(block => {
|
||||||
|
const b = block as { type?: string }
|
||||||
|
if (b?.type !== 'tool_result') return block
|
||||||
|
const tr = block as ToolResultBlock
|
||||||
|
if (!shouldCompressBlock(tr, toolUsesById)) return block
|
||||||
|
return inMidWindow
|
||||||
|
? truncateBlock(tr, MID_MAX_CHARS)
|
||||||
|
: buildStub(tr, toolUsesById)
|
||||||
|
})
|
||||||
|
|
||||||
|
return rewriteMessage(msg, newContent)
|
||||||
|
})
|
||||||
|
}
|
||||||
317
src/services/api/openaiShim.compression.test.ts
Normal file
317
src/services/api/openaiShim.compression.test.ts
Normal file
@@ -0,0 +1,317 @@
|
|||||||
|
import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
|
||||||
|
import { createOpenAIShimClient } from './openaiShim.js'
|
||||||
|
|
||||||
|
type FetchType = typeof globalThis.fetch
|
||||||
|
const originalFetch = globalThis.fetch
|
||||||
|
|
||||||
|
const originalEnv = {
|
||||||
|
OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
|
||||||
|
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
|
||||||
|
OPENAI_MODEL: process.env.OPENAI_MODEL,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mock config + autoCompact so the shim sees deterministic state.
|
||||||
|
const mockState = {
|
||||||
|
enabled: true,
|
||||||
|
effectiveWindow: 100_000, // Copilot gpt-4o tier
|
||||||
|
}
|
||||||
|
|
||||||
|
mock.module('../../utils/config.js', () => ({
|
||||||
|
getGlobalConfig: () => ({
|
||||||
|
toolHistoryCompressionEnabled: mockState.enabled,
|
||||||
|
autoCompactEnabled: false,
|
||||||
|
}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../compact/autoCompact.js', () => ({
|
||||||
|
getEffectiveContextWindowSize: () => mockState.effectiveWindow,
|
||||||
|
}))
|
||||||
|
|
||||||
|
type OpenAIShimClient = {
|
||||||
|
beta: {
|
||||||
|
messages: {
|
||||||
|
create: (
|
||||||
|
params: Record<string, unknown>,
|
||||||
|
options?: Record<string, unknown>,
|
||||||
|
) => Promise<unknown>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function bigText(n: number): string {
|
||||||
|
return 'A'.repeat(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildToolExchange(id: number, resultLength: number) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: `toolu_${id}`,
|
||||||
|
name: 'Read',
|
||||||
|
input: { file_path: `/path/to/file${id}.ts` },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: `toolu_${id}`,
|
||||||
|
content: bigText(resultLength),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildLongConversation(numExchanges: number, resultLength = 5_000) {
|
||||||
|
const out: Array<{ role: string; content: unknown }> = [
|
||||||
|
{ role: 'user', content: 'start the work' },
|
||||||
|
]
|
||||||
|
for (let i = 0; i < numExchanges; i++) {
|
||||||
|
out.push(...buildToolExchange(i, resultLength))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeFakeResponse(): Response {
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
model: 'gpt-4o',
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
message: { role: 'assistant', content: 'done' },
|
||||||
|
finish_reason: 'stop',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
usage: { prompt_tokens: 8, completion_tokens: 2, total_tokens: 10 },
|
||||||
|
}),
|
||||||
|
{ headers: { 'Content-Type': 'application/json' } },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
process.env.OPENAI_BASE_URL = 'http://example.test/v1'
|
||||||
|
process.env.OPENAI_API_KEY = 'test-key'
|
||||||
|
delete process.env.OPENAI_MODEL
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (originalEnv.OPENAI_BASE_URL === undefined) delete process.env.OPENAI_BASE_URL
|
||||||
|
else process.env.OPENAI_BASE_URL = originalEnv.OPENAI_BASE_URL
|
||||||
|
if (originalEnv.OPENAI_API_KEY === undefined) delete process.env.OPENAI_API_KEY
|
||||||
|
else process.env.OPENAI_API_KEY = originalEnv.OPENAI_API_KEY
|
||||||
|
if (originalEnv.OPENAI_MODEL === undefined) delete process.env.OPENAI_MODEL
|
||||||
|
else process.env.OPENAI_MODEL = originalEnv.OPENAI_MODEL
|
||||||
|
globalThis.fetch = originalFetch
|
||||||
|
})
|
||||||
|
|
||||||
|
async function captureRequestBody(
|
||||||
|
messages: Array<{ role: string; content: unknown }>,
|
||||||
|
model: string,
|
||||||
|
): Promise<Record<string, unknown>> {
|
||||||
|
let captured: Record<string, unknown> | undefined
|
||||||
|
|
||||||
|
globalThis.fetch = (async (_input, init) => {
|
||||||
|
captured = JSON.parse(String(init?.body))
|
||||||
|
return makeFakeResponse()
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
await client.beta.messages.create({
|
||||||
|
model,
|
||||||
|
system: 'system prompt',
|
||||||
|
messages,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!captured) throw new Error('request not captured')
|
||||||
|
return captured
|
||||||
|
}
|
||||||
|
|
||||||
|
function getToolMessages(body: Record<string, unknown>): Array<{ content: string }> {
|
||||||
|
const messages = body.messages as Array<{ role: string; content: string }>
|
||||||
|
return messages.filter(m => m.role === 'tool')
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAssistantToolCalls(body: Record<string, unknown>): unknown[] {
|
||||||
|
const messages = body.messages as Array<{
|
||||||
|
role: string
|
||||||
|
tool_calls?: unknown[]
|
||||||
|
}>
|
||||||
|
return messages
|
||||||
|
.filter(m => m.role === 'assistant' && Array.isArray(m.tool_calls))
|
||||||
|
.flatMap(m => m.tool_calls ?? [])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// BUG REPRO: without compression, full tool history is resent every turn
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('BUG REPRO: without compression, all 30 tool results are sent at full size', async () => {
|
||||||
|
mockState.enabled = false
|
||||||
|
const messages = buildLongConversation(30, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
const payloadSize = JSON.stringify(body).length
|
||||||
|
|
||||||
|
// All 30 tool results present, none truncated
|
||||||
|
expect(toolMessages.length).toBe(30)
|
||||||
|
for (const m of toolMessages) {
|
||||||
|
expect(m.content.length).toBeGreaterThanOrEqual(5_000)
|
||||||
|
expect(m.content).not.toContain('[…truncated')
|
||||||
|
expect(m.content).not.toContain('chars omitted')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Total payload is large (~150KB raw) — this is the cost being paid every turn
|
||||||
|
expect(payloadSize).toBeGreaterThan(150_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// FIX: with compression, recent kept full, mid truncated, old stubbed
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('FIX: with compression on Copilot gpt-4o (tier 5/10/rest), 30 turns shrinks dramatically', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000 // 64–128k → recent=5, mid=10
|
||||||
|
const messages = buildLongConversation(30, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
const payloadSize = JSON.stringify(body).length
|
||||||
|
|
||||||
|
// Structure preserved: still 30 tool messages, no orphan tool_calls
|
||||||
|
expect(toolMessages.length).toBe(30)
|
||||||
|
expect(getAssistantToolCalls(body).length).toBe(30)
|
||||||
|
|
||||||
|
// Tier breakdown (oldest → newest):
|
||||||
|
// indices 0..14 → old tier (stubs)
|
||||||
|
// indices 15..24 → mid tier (truncated)
|
||||||
|
// indices 25..29 → recent (full)
|
||||||
|
for (let i = 0; i <= 14; i++) {
|
||||||
|
expect(toolMessages[i].content).toMatch(/^\[Read args=.*chars omitted\]$/)
|
||||||
|
}
|
||||||
|
for (let i = 15; i <= 24; i++) {
|
||||||
|
expect(toolMessages[i].content).toContain('[…truncated')
|
||||||
|
}
|
||||||
|
for (let i = 25; i <= 29; i++) {
|
||||||
|
expect(toolMessages[i].content.length).toBe(5_000)
|
||||||
|
expect(toolMessages[i].content).not.toContain('[…truncated')
|
||||||
|
expect(toolMessages[i].content).not.toContain('chars omitted')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Significant reduction: from ~150KB to <60KB (10 mid×2KB + structure overhead)
|
||||||
|
expect(payloadSize).toBeLessThan(60_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// FIX: large-context model gets generous tiers — compression effectively inert
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('FIX: gpt-4.1 (1M context) with 25 exchanges keeps all full (recent tier=25)', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 1_000_000 // ≥500k → recent=25, mid=50
|
||||||
|
const messages = buildLongConversation(25, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4.1')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
|
||||||
|
expect(toolMessages.length).toBe(25)
|
||||||
|
for (const m of toolMessages) {
|
||||||
|
expect(m.content.length).toBe(5_000)
|
||||||
|
expect(m.content).not.toContain('[…truncated')
|
||||||
|
expect(m.content).not.toContain('chars omitted')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('FIX: gpt-4.1 (1M context) with 30 exchanges → only first 5 mid-truncated', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 1_000_000 // recent=25, mid=50
|
||||||
|
const messages = buildLongConversation(30, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4.1')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
|
||||||
|
// 30 total: indices 0..4 mid, indices 5..29 recent
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
expect(toolMessages[i].content).toContain('[…truncated')
|
||||||
|
}
|
||||||
|
for (let i = 5; i < 30; i++) {
|
||||||
|
expect(toolMessages[i].content.length).toBe(5_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// FIX: stub preserves tool name and args — model can re-invoke if needed
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('FIX: stub format includes original tool name and arguments', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000
|
||||||
|
const messages = buildLongConversation(30, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
const oldestStub = toolMessages[0].content
|
||||||
|
|
||||||
|
// Format: [<tool_name> args=<json> → <N> chars omitted]
|
||||||
|
expect(oldestStub).toMatch(/^\[Read /)
|
||||||
|
expect(oldestStub).toMatch(/file_path/)
|
||||||
|
expect(oldestStub).toMatch(/→ 5000 chars omitted\]$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// FIX: tool_use blocks (assistant tool_calls) are never modified
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('FIX: every tool_call retains its full id, name, and arguments', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 100_000
|
||||||
|
const messages = buildLongConversation(30, 5_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||||
|
const toolCalls = getAssistantToolCalls(body) as Array<{
|
||||||
|
id: string
|
||||||
|
function: { name: string; arguments: string }
|
||||||
|
}>
|
||||||
|
|
||||||
|
expect(toolCalls.length).toBe(30)
|
||||||
|
for (let i = 0; i < toolCalls.length; i++) {
|
||||||
|
expect(toolCalls[i].id).toBe(`toolu_${i}`)
|
||||||
|
expect(toolCalls[i].function.name).toBe('Read')
|
||||||
|
expect(JSON.parse(toolCalls[i].function.arguments)).toEqual({
|
||||||
|
file_path: `/path/to/file${i}.ts`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// FIX: small-context provider (Mistral 32k) gets aggressive compression
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
test('FIX: 32k window (Mistral tier) → recent=3 keeps last 3 only', async () => {
|
||||||
|
mockState.enabled = true
|
||||||
|
mockState.effectiveWindow = 24_000 // 16–32k → recent=3, mid=5
|
||||||
|
const messages = buildLongConversation(15, 3_000)
|
||||||
|
|
||||||
|
const body = await captureRequestBody(messages, 'mistral-large-latest')
|
||||||
|
const toolMessages = getToolMessages(body)
|
||||||
|
|
||||||
|
// 15 total: indices 0..6 old, 7..11 mid, 12..14 recent
|
||||||
|
for (let i = 0; i <= 6; i++) {
|
||||||
|
expect(toolMessages[i].content).toContain('chars omitted')
|
||||||
|
}
|
||||||
|
for (let i = 7; i <= 11; i++) {
|
||||||
|
expect(toolMessages[i].content).toContain('[…truncated')
|
||||||
|
}
|
||||||
|
for (let i = 12; i <= 14; i++) {
|
||||||
|
expect(toolMessages[i].content.length).toBe(3_000)
|
||||||
|
}
|
||||||
|
})
|
||||||
@@ -46,6 +46,7 @@ import {
|
|||||||
type AnthropicUsage,
|
type AnthropicUsage,
|
||||||
type ShimCreateParams,
|
type ShimCreateParams,
|
||||||
} from './codexShim.js'
|
} from './codexShim.js'
|
||||||
|
import { compressToolHistory } from './compressToolHistory.js'
|
||||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||||
import {
|
import {
|
||||||
getLocalProviderRetryBaseUrls,
|
getLocalProviderRetryBaseUrls,
|
||||||
@@ -1299,14 +1300,15 @@ class OpenAIShimMessages {
|
|||||||
params: ShimCreateParams,
|
params: ShimCreateParams,
|
||||||
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
||||||
): Promise<Response> {
|
): Promise<Response> {
|
||||||
const openaiMessages = convertMessages(
|
const compressedMessages = compressToolHistory(
|
||||||
params.messages as Array<{
|
params.messages as Array<{
|
||||||
role: string
|
role: string
|
||||||
message?: { role?: string; content?: unknown }
|
message?: { role?: string; content?: unknown }
|
||||||
content?: unknown
|
content?: unknown
|
||||||
}>,
|
}>,
|
||||||
params.system,
|
request.resolvedModel,
|
||||||
)
|
)
|
||||||
|
const openaiMessages = convertMessages(compressedMessages, params.system)
|
||||||
|
|
||||||
const body: Record<string, unknown> = {
|
const body: Record<string, unknown> = {
|
||||||
model: request.resolvedModel,
|
model: request.resolvedModel,
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
|
|||||||
const IMAGE_MAX_TOKEN_SIZE = 2000
|
const IMAGE_MAX_TOKEN_SIZE = 2000
|
||||||
|
|
||||||
// Only compact these built-in tools (MCP tools are also compactable via prefix match)
|
// Only compact these built-in tools (MCP tools are also compactable via prefix match)
|
||||||
const COMPACTABLE_TOOLS = new Set<string>([
|
export const COMPACTABLE_TOOLS = new Set<string>([
|
||||||
FILE_READ_TOOL_NAME,
|
FILE_READ_TOOL_NAME,
|
||||||
...SHELL_TOOL_NAMES,
|
...SHELL_TOOL_NAMES,
|
||||||
GREP_TOOL_NAME,
|
GREP_TOOL_NAME,
|
||||||
@@ -51,7 +51,7 @@ const COMPACTABLE_TOOLS = new Set<string>([
|
|||||||
|
|
||||||
const MCP_TOOL_PREFIX = 'mcp__'
|
const MCP_TOOL_PREFIX = 'mcp__'
|
||||||
|
|
||||||
function isCompactableTool(name: string): boolean {
|
export function isCompactableTool(name: string): boolean {
|
||||||
return COMPACTABLE_TOOLS.has(name) || name.startsWith(MCP_TOOL_PREFIX)
|
return COMPACTABLE_TOOLS.has(name) || name.startsWith(MCP_TOOL_PREFIX)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -244,6 +244,7 @@ export type GlobalConfig = {
|
|||||||
bypassPermissionsModeAccepted?: boolean
|
bypassPermissionsModeAccepted?: boolean
|
||||||
hasUsedBackslashReturn?: boolean
|
hasUsedBackslashReturn?: boolean
|
||||||
autoCompactEnabled: boolean // Controls whether auto-compact is enabled
|
autoCompactEnabled: boolean // Controls whether auto-compact is enabled
|
||||||
|
toolHistoryCompressionEnabled: boolean // Compress old tool_result content for small-context providers
|
||||||
showTurnDuration: boolean // Controls whether to show turn duration message (e.g., "Cooked for 1m 6s")
|
showTurnDuration: boolean // Controls whether to show turn duration message (e.g., "Cooked for 1m 6s")
|
||||||
/**
|
/**
|
||||||
* @deprecated Use settings.env instead.
|
* @deprecated Use settings.env instead.
|
||||||
@@ -622,6 +623,7 @@ function createDefaultGlobalConfig(): GlobalConfig {
|
|||||||
verbose: false,
|
verbose: false,
|
||||||
editorMode: 'normal',
|
editorMode: 'normal',
|
||||||
autoCompactEnabled: true,
|
autoCompactEnabled: true,
|
||||||
|
toolHistoryCompressionEnabled: true,
|
||||||
showTurnDuration: true,
|
showTurnDuration: true,
|
||||||
hasSeenTasksHint: false,
|
hasSeenTasksHint: false,
|
||||||
hasUsedStash: false,
|
hasUsedStash: false,
|
||||||
@@ -668,6 +670,7 @@ export const GLOBAL_CONFIG_KEYS = [
|
|||||||
'editorMode',
|
'editorMode',
|
||||||
'hasUsedBackslashReturn',
|
'hasUsedBackslashReturn',
|
||||||
'autoCompactEnabled',
|
'autoCompactEnabled',
|
||||||
|
'toolHistoryCompressionEnabled',
|
||||||
'showTurnDuration',
|
'showTurnDuration',
|
||||||
'diffTool',
|
'diffTool',
|
||||||
'env',
|
'env',
|
||||||
|
|||||||
Reference in New Issue
Block a user