feat: context preloading and hybrid context strategy (#860)

* feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks
2026-04-29 08:49:46 +01:00
parent 91f93ce615
commit 92d297e50e
5 changed files with 800 additions and 0 deletions
--- a/src/utils/contextPreload.ts
+++ b/src/utils/contextPreload.ts
@@ -0,0 +1,145 @@
+/**
+ * Context Pre-loading - Production Grade
+ * 
+ * Proactively loads relevant context before it's needed.
+ * Prediction based on conversation patterns.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import type { Message } from '../types/message.js'
+
+export interface PreloadConfig {
+  maxPreloadTokens: number
+  predictionWindow?: number
+  confidenceThreshold?: number
+}
+
+export interface PreloadPrediction {
+  predictedNeed: string[]
+  confidence: number
+  suggestedMessages: Message[]
+}
+
+export interface ConversationPattern {
+  userQuery: string
+  neededContext: string[]
+  frequency: number
+}
+
+const PATTERN_KEYWORDS: Record<string, string[]> = {
+  'code': ['code', 'function', 'implement', 'write'],
+  'debug': ['error', 'bug', 'fix', 'issue', 'debug'],
+  'refactor': ['refactor', 'improve', 'clean', 'optimize'],
+  'test': ['test', 'spec', 'coverage', 'verify'],
+  'explain': ['explain', 'what', 'how', 'why', 'describe'],
+  'search': ['find', 'search', 'look', 'grep', 'glob'],
+}
+
+export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] {
+  const patterns: ConversationPattern[] = []
+  const recentMessages = messages.slice(-10)
+
+  for (let i = 0; i < recentMessages.length - 1; i++) {
+    const userMsg = recentMessages[i]
+    const assistantMsg = recentMessages[i + 1]
+
+    const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : ''
+    const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : ''
+
+    for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+      if (keywords.some(k => userContent.toLowerCase().includes(k))) {
+        patterns.push({
+          userQuery: category,
+          neededContext: extractContextNeeds(assistantContent),
+          frequency: 1,
+        })
+      }
+    }
+  }
+
+  return patterns
+}
+
+function extractContextNeeds(content: string): string[] {
+  const needs: string[] = []
+  if (content.includes('file')) needs.push('file_context')
+  if (content.includes('function')) needs.push('function_defs')
+  if (content.includes('error')) needs.push('error_history')
+  if (content.includes('test')) needs.push('test_files')
+  return needs
+}
+
+export function predictContextNeeds(
+  currentQuery: string,
+  patterns: ConversationPattern[],
+  config: PreloadConfig,
+): PreloadPrediction {
+  const threshold = config.confidenceThreshold ?? 0.5
+  let matchedCategory = ''
+  let highestConfidence = 0
+
+  for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+    const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length
+    const confidence = matches / keywords.length
+
+    if (confidence > highestConfidence && confidence >= threshold) {
+      highestConfidence = confidence
+      matchedCategory = category
+    }
+  }
+
+  const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory)
+  const allNeeds = relevantPatterns.flatMap(p => p.neededContext)
+
+  return {
+    predictedNeed: [...new Set(allNeeds)],
+    confidence: highestConfidence,
+    suggestedMessages: [],
+  }
+}
+
+export function preloadContext(
+  availableContext: Message[],
+  prediction: PreloadPrediction,
+  config: PreloadConfig,
+): Message[] {
+  const targetTokens = config.maxPreloadTokens ?? 30000
+  const selected: Message[] = []
+  let usedTokens = 0
+
+  const priorityTypes = prediction.predictedNeed
+
+  const sorted = [...availableContext].sort((a, b) => {
+    const aContent = typeof a.message?.content === 'string' ? a.message.content : ''
+    const bContent = typeof b.message?.content === 'string' ? b.message.content : ''
+
+    const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0
+    const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0
+
+    if (bPriority !== aPriority) return bPriority - aPriority
+    return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0)
+  })
+
+  for (const msg of sorted) {
+    const tokens = roughTokenCountEstimation(
+      typeof msg.message?.content === 'string' ? msg.message.content : ''
+    )
+
+    if (usedTokens + tokens > targetTokens) break
+
+    selected.push(msg)
+    usedTokens += tokens
+  }
+
+  return selected
+}
+
+export function createPreloadStrategy(config: PreloadConfig) {
+  return {
+    analyze: analyzeConversationPatterns,
+    predict: (query: string, patterns: ConversationPattern[]) =>
+      predictContextNeeds(query, patterns, config),
+    preload: (context: Message[], prediction: PreloadPrediction) =>
+      preloadContext(context, prediction, config),
+  }
+}