feat: context preloading and hybrid context strategy (#860)

* feat: context preloading and hybrid context strategy

PR 2D - Section 2.7, 2.8:
- Add contextPreload.ts with pattern-based prediction
- Add hybridContextStrategy.ts with cache/fresh balancing
- Optimize for cost vs accuracy
- Add comprehensive tests (13 passing)

* feat: wire hybrid context strategy into API path

- Apply hybrid strategy after normalizeMessagesForAPI
- Feature-flag controlled (HYBRID_CONTEXT_STRATEGY)
- Optimizes cache/fresh balance for API requests

* fix: resolve PR 2D blocking issues

- Fix predictContextNeeds self-assign bug (matchedCategory = category)
- Add test for non-empty predictedNeed
- Preserve conversation tail in hybridStrategy (never drop last 3 messages)
- Add comment for hardcoded 200k cap in claude.ts

Fixes reviewer feedback from gnanam1990 and Vasanthdev2004

* fix: preserve tool_use/tool_result chains in hybridStrategy

- Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next)
- Add getMessageChain() to preserve paired messages
- Chains kept together in final selection

* fix: PR 860 - tool_use/tool_result pairing and safe token counting

Blocking:
- getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id
- Find tool_use blocks by id, pair with tool_result having matching tool_use_id
- Fixes tool_result surviving while paired tool_use dropped

- Token counting now includes array content (tool_use, tool_result, thinking)
- Not just string content, prevents undercounting prompt size

- Deduplicate messages by UUID when combining chains + split + tail
- Prevents duplicate messages in final request

Non-blocking:
- Add regression test for tool_use/tool_result pairing

* fix: PR 860 - account for actual structured payload size in token counting

Blocking:
- getMessageTokenCount now calculates actual token count for structured blocks
- tool_use: uses JSON.stringify(input).length / 4 + base
- tool_result: counts actual content (string or array of text blocks)
- thinking: counts actual thinking text length / 4
- is_error flag adds small overhead

Non-blocking:
- Add tests for large tool_use input and large thinking blocks
This commit is contained in:
ArkhAngelLifeJiggy
2026-04-29 08:49:46 +01:00
committed by GitHub
parent 91f93ce615
commit 92d297e50e
5 changed files with 800 additions and 0 deletions

View File

@@ -0,0 +1,230 @@
import { describe, expect, it } from 'bun:test'
import {
splitContext,
applyHybridStrategy,
optimizeForCost,
optimizeForAccuracy,
getHybridStats,
} from './hybridContextStrategy.js'
function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
return {
message: { role, content, id: 'test', type: 'message', created_at: createdAt },
sender: role,
}
}
describe('hybridContextStrategy', () => {
describe('splitContext', () => {
it('splits context into cached and fresh', () => {
const messages = [
createMessage('system', 'System prompt', Date.now() - 86400000),
createMessage('user', 'Hello'),
createMessage('assistant', 'Hi there'),
]
const split = splitContext(messages, {
cacheWeight: 0.4,
freshWeight: 0.6,
maxTotalTokens: 10000,
})
expect(split.cachedTokens).toBeGreaterThanOrEqual(0)
expect(split.freshTokens).toBeGreaterThanOrEqual(0)
expect(split.totalTokens).toBeGreaterThan(0)
})
it('respects weight configuration', () => {
const messages = [
createMessage('system', 'Old system', Date.now() - 86400000),
createMessage('user', 'Recent message', Date.now()),
]
const split = splitContext(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 10000,
})
expect(split.cached).toBeDefined()
expect(split.fresh).toBeDefined()
})
})
describe('applyHybridStrategy', () => {
it('applies strategy and returns messages', () => {
const messages = [
createMessage('user', 'Message 1'),
createMessage('assistant', 'Response 1'),
]
const result = applyHybridStrategy(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 10000,
})
expect(result.selectedMessages.length).toBeGreaterThan(0)
expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy)
})
it('calculates estimated cost', () => {
const messages = [
createMessage('user', 'Test message'),
]
const result = applyHybridStrategy(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 10000,
})
expect(result.estimatedCost).toBeGreaterThanOrEqual(0)
})
})
describe('optimizeForCost', () => {
it('returns messages within budget', () => {
const messages = [
createMessage('user', 'Message 1'),
createMessage('assistant', 'Response 1'),
]
const result = optimizeForCost(messages, 0.001)
expect(result.length).toBeGreaterThanOrEqual(0)
})
})
describe('optimizeForAccuracy', () => {
it('optimizes for accuracy with token limit', () => {
const messages = [
createMessage('user', 'Message 1'),
createMessage('assistant', 'Response 1'),
]
const result = optimizeForAccuracy(messages, 5000)
expect(result.length).toBeGreaterThan(0)
})
})
describe('getHybridStats', () => {
it('returns statistics', () => {
const messages = [
createMessage('system', 'System', Date.now() - 86400000),
createMessage('user', 'Hello'),
]
const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 })
const stats = getHybridStats(split)
expect(stats.cacheRatio).toBeGreaterThanOrEqual(0)
expect(stats.freshRatio).toBeGreaterThanOrEqual(0)
expect(stats.totalTokens).toBeGreaterThan(0)
})
})
describe('tool_use/tool_result pairing', () => {
it('preserves tool_use and tool_result together', () => {
const toolUseId = 'tool-use-123'
const messages = [
{
type: 'assistant',
uuid: 'uuid-1',
message: {
role: 'assistant',
content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }],
id: 'msg-1',
created_at: 1000,
},
},
{
type: 'user',
uuid: 'uuid-2',
message: {
role: 'user',
content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }],
id: 'msg-2',
created_at: 2000,
},
},
{
type: 'assistant',
uuid: 'uuid-3',
message: {
role: 'assistant',
content: 'Response after tool',
id: 'msg-3',
created_at: 3000,
},
},
] as any[]
const result = applyHybridStrategy(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 10000,
})
const hasToolUse = result.selectedMessages.some(
m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use')
)
const hasToolResult = result.selectedMessages.some(
m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result')
)
expect(hasToolUse).toBe(true)
expect(hasToolResult).toBe(true)
})
it('accounts for large tool_use input in token counting', () => {
const largeInput = 'x'.repeat(5000)
const messages = [
{
type: 'assistant',
message: {
role: 'assistant',
content: [
{ type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } },
],
created_at: 1000,
},
},
] as any[]
const result = applyHybridStrategy(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 20000,
})
expect(result.totalTokens).toBeGreaterThan(1000)
})
it('accounts for large thinking blocks in token counting', () => {
const longThinking = 'Thinking '.repeat(1000)
const messages = [
{
type: 'assistant',
message: {
role: 'assistant',
content: [
{ type: 'thinking', thinking: longThinking },
{ type: 'text', text: 'Final response' },
],
created_at: 1000,
},
},
] as any[]
const result = applyHybridStrategy(messages, {
cacheWeight: 0.5,
freshWeight: 0.5,
maxTotalTokens: 20000,
})
expect(result.totalTokens).toBeGreaterThan(500)
})
})
})