feat: context preloading and hybrid context strategy (#860)

* feat: context preloading and hybrid context strategy

PR 2D - Section 2.7, 2.8:
- Add contextPreload.ts with pattern-based prediction
- Add hybridContextStrategy.ts with cache/fresh balancing
- Optimize for cost vs accuracy
- Add comprehensive tests (13 passing)

* feat: wire hybrid context strategy into API path

- Apply hybrid strategy after normalizeMessagesForAPI
- Feature-flag controlled (HYBRID_CONTEXT_STRATEGY)
- Optimizes cache/fresh balance for API requests

* fix: resolve PR 2D blocking issues

- Fix predictContextNeeds self-assign bug (matchedCategory = category)
- Add test for non-empty predictedNeed
- Preserve conversation tail in hybridStrategy (never drop last 3 messages)
- Add comment for hardcoded 200k cap in claude.ts

Fixes reviewer feedback from gnanam1990 and Vasanthdev2004

* fix: preserve tool_use/tool_result chains in hybridStrategy

- Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next)
- Add getMessageChain() to preserve paired messages
- Chains kept together in final selection

* fix: PR 860 - tool_use/tool_result pairing and safe token counting

Blocking:
- getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id
- Find tool_use blocks by id, pair with tool_result having matching tool_use_id
- Fixes tool_result surviving while paired tool_use dropped

- Token counting now includes array content (tool_use, tool_result, thinking)
- Not just string content, prevents undercounting prompt size

- Deduplicate messages by UUID when combining chains + split + tail
- Prevents duplicate messages in final request

Non-blocking:
- Add regression test for tool_use/tool_result pairing

* fix: PR 860 - account for actual structured payload size in token counting

Blocking:
- getMessageTokenCount now calculates actual token count for structured blocks
- tool_use: uses JSON.stringify(input).length / 4 + base
- tool_result: counts actual content (string or array of text blocks)
- thinking: counts actual thinking text length / 4
- is_error flag adds small overhead

Non-blocking:
- Add tests for large tool_use input and large thinking blocks
This commit is contained in:
ArkhAngelLifeJiggy
2026-04-29 08:49:46 +01:00
committed by GitHub
parent 91f93ce615
commit 92d297e50e
5 changed files with 800 additions and 0 deletions

145
src/utils/contextPreload.ts Normal file
View File

@@ -0,0 +1,145 @@
/**
* Context Pre-loading - Production Grade
*
* Proactively loads relevant context before it's needed.
* Prediction based on conversation patterns.
*/
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
import type { Message } from '../types/message.js'
export interface PreloadConfig {
maxPreloadTokens: number
predictionWindow?: number
confidenceThreshold?: number
}
export interface PreloadPrediction {
predictedNeed: string[]
confidence: number
suggestedMessages: Message[]
}
export interface ConversationPattern {
userQuery: string
neededContext: string[]
frequency: number
}
const PATTERN_KEYWORDS: Record<string, string[]> = {
'code': ['code', 'function', 'implement', 'write'],
'debug': ['error', 'bug', 'fix', 'issue', 'debug'],
'refactor': ['refactor', 'improve', 'clean', 'optimize'],
'test': ['test', 'spec', 'coverage', 'verify'],
'explain': ['explain', 'what', 'how', 'why', 'describe'],
'search': ['find', 'search', 'look', 'grep', 'glob'],
}
export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] {
const patterns: ConversationPattern[] = []
const recentMessages = messages.slice(-10)
for (let i = 0; i < recentMessages.length - 1; i++) {
const userMsg = recentMessages[i]
const assistantMsg = recentMessages[i + 1]
const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : ''
const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : ''
for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
if (keywords.some(k => userContent.toLowerCase().includes(k))) {
patterns.push({
userQuery: category,
neededContext: extractContextNeeds(assistantContent),
frequency: 1,
})
}
}
}
return patterns
}
function extractContextNeeds(content: string): string[] {
const needs: string[] = []
if (content.includes('file')) needs.push('file_context')
if (content.includes('function')) needs.push('function_defs')
if (content.includes('error')) needs.push('error_history')
if (content.includes('test')) needs.push('test_files')
return needs
}
export function predictContextNeeds(
currentQuery: string,
patterns: ConversationPattern[],
config: PreloadConfig,
): PreloadPrediction {
const threshold = config.confidenceThreshold ?? 0.5
let matchedCategory = ''
let highestConfidence = 0
for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length
const confidence = matches / keywords.length
if (confidence > highestConfidence && confidence >= threshold) {
highestConfidence = confidence
matchedCategory = category
}
}
const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory)
const allNeeds = relevantPatterns.flatMap(p => p.neededContext)
return {
predictedNeed: [...new Set(allNeeds)],
confidence: highestConfidence,
suggestedMessages: [],
}
}
export function preloadContext(
availableContext: Message[],
prediction: PreloadPrediction,
config: PreloadConfig,
): Message[] {
const targetTokens = config.maxPreloadTokens ?? 30000
const selected: Message[] = []
let usedTokens = 0
const priorityTypes = prediction.predictedNeed
const sorted = [...availableContext].sort((a, b) => {
const aContent = typeof a.message?.content === 'string' ? a.message.content : ''
const bContent = typeof b.message?.content === 'string' ? b.message.content : ''
const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0
const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0
if (bPriority !== aPriority) return bPriority - aPriority
return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0)
})
for (const msg of sorted) {
const tokens = roughTokenCountEstimation(
typeof msg.message?.content === 'string' ? msg.message.content : ''
)
if (usedTokens + tokens > targetTokens) break
selected.push(msg)
usedTokens += tokens
}
return selected
}
export function createPreloadStrategy(config: PreloadConfig) {
return {
analyze: analyzeConversationPatterns,
predict: (query: string, patterns: ConversationPattern[]) =>
predictContextNeeds(query, patterns, config),
preload: (context: Message[], prediction: PreloadPrediction) =>
preloadContext(context, prediction, config),
}
}