feat: context preloading and hybrid context strategy (#860)
* feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks
This commit is contained in:
committed by
GitHub
parent
91f93ce615
commit
92d297e50e
230
src/utils/hybridContextStrategy.test.ts
Normal file
230
src/utils/hybridContextStrategy.test.ts
Normal file
@@ -0,0 +1,230 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import {
|
||||
splitContext,
|
||||
applyHybridStrategy,
|
||||
optimizeForCost,
|
||||
optimizeForAccuracy,
|
||||
getHybridStats,
|
||||
} from './hybridContextStrategy.js'
|
||||
|
||||
function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
|
||||
return {
|
||||
message: { role, content, id: 'test', type: 'message', created_at: createdAt },
|
||||
sender: role,
|
||||
}
|
||||
}
|
||||
|
||||
describe('hybridContextStrategy', () => {
|
||||
describe('splitContext', () => {
|
||||
it('splits context into cached and fresh', () => {
|
||||
const messages = [
|
||||
createMessage('system', 'System prompt', Date.now() - 86400000),
|
||||
createMessage('user', 'Hello'),
|
||||
createMessage('assistant', 'Hi there'),
|
||||
]
|
||||
|
||||
const split = splitContext(messages, {
|
||||
cacheWeight: 0.4,
|
||||
freshWeight: 0.6,
|
||||
maxTotalTokens: 10000,
|
||||
})
|
||||
|
||||
expect(split.cachedTokens).toBeGreaterThanOrEqual(0)
|
||||
expect(split.freshTokens).toBeGreaterThanOrEqual(0)
|
||||
expect(split.totalTokens).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('respects weight configuration', () => {
|
||||
const messages = [
|
||||
createMessage('system', 'Old system', Date.now() - 86400000),
|
||||
createMessage('user', 'Recent message', Date.now()),
|
||||
]
|
||||
|
||||
const split = splitContext(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 10000,
|
||||
})
|
||||
|
||||
expect(split.cached).toBeDefined()
|
||||
expect(split.fresh).toBeDefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('applyHybridStrategy', () => {
|
||||
it('applies strategy and returns messages', () => {
|
||||
const messages = [
|
||||
createMessage('user', 'Message 1'),
|
||||
createMessage('assistant', 'Response 1'),
|
||||
]
|
||||
|
||||
const result = applyHybridStrategy(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 10000,
|
||||
})
|
||||
|
||||
expect(result.selectedMessages.length).toBeGreaterThan(0)
|
||||
expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy)
|
||||
})
|
||||
|
||||
it('calculates estimated cost', () => {
|
||||
const messages = [
|
||||
createMessage('user', 'Test message'),
|
||||
]
|
||||
|
||||
const result = applyHybridStrategy(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 10000,
|
||||
})
|
||||
|
||||
expect(result.estimatedCost).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('optimizeForCost', () => {
|
||||
it('returns messages within budget', () => {
|
||||
const messages = [
|
||||
createMessage('user', 'Message 1'),
|
||||
createMessage('assistant', 'Response 1'),
|
||||
]
|
||||
|
||||
const result = optimizeForCost(messages, 0.001)
|
||||
|
||||
expect(result.length).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('optimizeForAccuracy', () => {
|
||||
it('optimizes for accuracy with token limit', () => {
|
||||
const messages = [
|
||||
createMessage('user', 'Message 1'),
|
||||
createMessage('assistant', 'Response 1'),
|
||||
]
|
||||
|
||||
const result = optimizeForAccuracy(messages, 5000)
|
||||
|
||||
expect(result.length).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('getHybridStats', () => {
|
||||
it('returns statistics', () => {
|
||||
const messages = [
|
||||
createMessage('system', 'System', Date.now() - 86400000),
|
||||
createMessage('user', 'Hello'),
|
||||
]
|
||||
|
||||
const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 })
|
||||
const stats = getHybridStats(split)
|
||||
|
||||
expect(stats.cacheRatio).toBeGreaterThanOrEqual(0)
|
||||
expect(stats.freshRatio).toBeGreaterThanOrEqual(0)
|
||||
expect(stats.totalTokens).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('tool_use/tool_result pairing', () => {
|
||||
it('preserves tool_use and tool_result together', () => {
|
||||
const toolUseId = 'tool-use-123'
|
||||
const messages = [
|
||||
{
|
||||
type: 'assistant',
|
||||
uuid: 'uuid-1',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }],
|
||||
id: 'msg-1',
|
||||
created_at: 1000,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'uuid-2',
|
||||
message: {
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }],
|
||||
id: 'msg-2',
|
||||
created_at: 2000,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'assistant',
|
||||
uuid: 'uuid-3',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'Response after tool',
|
||||
id: 'msg-3',
|
||||
created_at: 3000,
|
||||
},
|
||||
},
|
||||
] as any[]
|
||||
|
||||
const result = applyHybridStrategy(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 10000,
|
||||
})
|
||||
|
||||
const hasToolUse = result.selectedMessages.some(
|
||||
m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use')
|
||||
)
|
||||
const hasToolResult = result.selectedMessages.some(
|
||||
m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result')
|
||||
)
|
||||
|
||||
expect(hasToolUse).toBe(true)
|
||||
expect(hasToolResult).toBe(true)
|
||||
})
|
||||
|
||||
it('accounts for large tool_use input in token counting', () => {
|
||||
const largeInput = 'x'.repeat(5000)
|
||||
const messages = [
|
||||
{
|
||||
type: 'assistant',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } },
|
||||
],
|
||||
created_at: 1000,
|
||||
},
|
||||
},
|
||||
] as any[]
|
||||
|
||||
const result = applyHybridStrategy(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 20000,
|
||||
})
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(1000)
|
||||
})
|
||||
|
||||
it('accounts for large thinking blocks in token counting', () => {
|
||||
const longThinking = 'Thinking '.repeat(1000)
|
||||
const messages = [
|
||||
{
|
||||
type: 'assistant',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: longThinking },
|
||||
{ type: 'text', text: 'Final response' },
|
||||
],
|
||||
created_at: 1000,
|
||||
},
|
||||
},
|
||||
] as any[]
|
||||
|
||||
const result = applyHybridStrategy(messages, {
|
||||
cacheWeight: 0.5,
|
||||
freshWeight: 0.5,
|
||||
maxTotalTokens: 20000,
|
||||
})
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(500)
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user