feat: context preloading and hybrid context strategy (#860)

* feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks
2026-04-29 08:49:46 +01:00
parent 91f93ce615
commit 92d297e50e
5 changed files with 800 additions and 0 deletions
--- a/src/utils/hybridContextStrategy.test.ts
+++ b/src/utils/hybridContextStrategy.test.ts
@@ -0,0 +1,230 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  splitContext,
+  applyHybridStrategy,
+  optimizeForCost,
+  optimizeForAccuracy,
+  getHybridStats,
+} from './hybridContextStrategy.js'
+
+function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
+  return {
+    message: { role, content, id: 'test', type: 'message', created_at: createdAt },
+    sender: role,
+  }
+}
+
+describe('hybridContextStrategy', () => {
+  describe('splitContext', () => {
+    it('splits context into cached and fresh', () => {
+      const messages = [
+        createMessage('system', 'System prompt', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+        createMessage('assistant', 'Hi there'),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.4,
+        freshWeight: 0.6,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cachedTokens).toBeGreaterThanOrEqual(0)
+      expect(split.freshTokens).toBeGreaterThanOrEqual(0)
+      expect(split.totalTokens).toBeGreaterThan(0)
+    })
+
+    it('respects weight configuration', () => {
+      const messages = [
+        createMessage('system', 'Old system', Date.now() - 86400000),
+        createMessage('user', 'Recent message', Date.now()),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cached).toBeDefined()
+      expect(split.fresh).toBeDefined()
+    })
+  })
+
+  describe('applyHybridStrategy', () => {
+    it('applies strategy and returns messages', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.selectedMessages.length).toBeGreaterThan(0)
+      expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy)
+    })
+
+    it('calculates estimated cost', () => {
+      const messages = [
+        createMessage('user', 'Test message'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.estimatedCost).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForCost', () => {
+    it('returns messages within budget', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForCost(messages, 0.001)
+
+      expect(result.length).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForAccuracy', () => {
+    it('optimizes for accuracy with token limit', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForAccuracy(messages, 5000)
+
+      expect(result.length).toBeGreaterThan(0)
+    })
+  })
+
+  describe('getHybridStats', () => {
+    it('returns statistics', () => {
+      const messages = [
+        createMessage('system', 'System', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+      ]
+
+      const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 })
+      const stats = getHybridStats(split)
+
+      expect(stats.cacheRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.freshRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.totalTokens).toBeGreaterThan(0)
+    })
+  })
+
+  describe('tool_use/tool_result pairing', () => {
+    it('preserves tool_use and tool_result together', () => {
+      const toolUseId = 'tool-use-123'
+      const messages = [
+        {
+          type: 'assistant',
+          uuid: 'uuid-1',
+          message: {
+            role: 'assistant',
+            content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }],
+            id: 'msg-1',
+            created_at: 1000,
+          },
+        },
+        {
+          type: 'user',
+          uuid: 'uuid-2',
+          message: {
+            role: 'user',
+            content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }],
+            id: 'msg-2',
+            created_at: 2000,
+          },
+        },
+        {
+          type: 'assistant',
+          uuid: 'uuid-3',
+          message: {
+            role: 'assistant',
+            content: 'Response after tool',
+            id: 'msg-3',
+            created_at: 3000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      const hasToolUse = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use')
+      )
+      const hasToolResult = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result')
+      )
+
+      expect(hasToolUse).toBe(true)
+      expect(hasToolResult).toBe(true)
+    })
+
+    it('accounts for large tool_use input in token counting', () => {
+      const largeInput = 'x'.repeat(5000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(1000)
+    })
+
+    it('accounts for large thinking blocks in token counting', () => {
+      const longThinking = 'Thinking '.repeat(1000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'thinking', thinking: longThinking },
+              { type: 'text', text: 'Final response' },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(500)
+    })
+  })
+})