feat: context preloading and hybrid context strategy (#860)
* feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks
This commit is contained in:
committed by
GitHub
parent
91f93ce615
commit
92d297e50e
145
src/utils/contextPreload.ts
Normal file
145
src/utils/contextPreload.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
/**
|
||||
* Context Pre-loading - Production Grade
|
||||
*
|
||||
* Proactively loads relevant context before it's needed.
|
||||
* Prediction based on conversation patterns.
|
||||
*/
|
||||
|
||||
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
|
||||
import type { Message } from '../types/message.js'
|
||||
|
||||
export interface PreloadConfig {
|
||||
maxPreloadTokens: number
|
||||
predictionWindow?: number
|
||||
confidenceThreshold?: number
|
||||
}
|
||||
|
||||
export interface PreloadPrediction {
|
||||
predictedNeed: string[]
|
||||
confidence: number
|
||||
suggestedMessages: Message[]
|
||||
}
|
||||
|
||||
export interface ConversationPattern {
|
||||
userQuery: string
|
||||
neededContext: string[]
|
||||
frequency: number
|
||||
}
|
||||
|
||||
const PATTERN_KEYWORDS: Record<string, string[]> = {
|
||||
'code': ['code', 'function', 'implement', 'write'],
|
||||
'debug': ['error', 'bug', 'fix', 'issue', 'debug'],
|
||||
'refactor': ['refactor', 'improve', 'clean', 'optimize'],
|
||||
'test': ['test', 'spec', 'coverage', 'verify'],
|
||||
'explain': ['explain', 'what', 'how', 'why', 'describe'],
|
||||
'search': ['find', 'search', 'look', 'grep', 'glob'],
|
||||
}
|
||||
|
||||
export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] {
|
||||
const patterns: ConversationPattern[] = []
|
||||
const recentMessages = messages.slice(-10)
|
||||
|
||||
for (let i = 0; i < recentMessages.length - 1; i++) {
|
||||
const userMsg = recentMessages[i]
|
||||
const assistantMsg = recentMessages[i + 1]
|
||||
|
||||
const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : ''
|
||||
const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : ''
|
||||
|
||||
for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
|
||||
if (keywords.some(k => userContent.toLowerCase().includes(k))) {
|
||||
patterns.push({
|
||||
userQuery: category,
|
||||
neededContext: extractContextNeeds(assistantContent),
|
||||
frequency: 1,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return patterns
|
||||
}
|
||||
|
||||
function extractContextNeeds(content: string): string[] {
|
||||
const needs: string[] = []
|
||||
if (content.includes('file')) needs.push('file_context')
|
||||
if (content.includes('function')) needs.push('function_defs')
|
||||
if (content.includes('error')) needs.push('error_history')
|
||||
if (content.includes('test')) needs.push('test_files')
|
||||
return needs
|
||||
}
|
||||
|
||||
export function predictContextNeeds(
|
||||
currentQuery: string,
|
||||
patterns: ConversationPattern[],
|
||||
config: PreloadConfig,
|
||||
): PreloadPrediction {
|
||||
const threshold = config.confidenceThreshold ?? 0.5
|
||||
let matchedCategory = ''
|
||||
let highestConfidence = 0
|
||||
|
||||
for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
|
||||
const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length
|
||||
const confidence = matches / keywords.length
|
||||
|
||||
if (confidence > highestConfidence && confidence >= threshold) {
|
||||
highestConfidence = confidence
|
||||
matchedCategory = category
|
||||
}
|
||||
}
|
||||
|
||||
const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory)
|
||||
const allNeeds = relevantPatterns.flatMap(p => p.neededContext)
|
||||
|
||||
return {
|
||||
predictedNeed: [...new Set(allNeeds)],
|
||||
confidence: highestConfidence,
|
||||
suggestedMessages: [],
|
||||
}
|
||||
}
|
||||
|
||||
export function preloadContext(
|
||||
availableContext: Message[],
|
||||
prediction: PreloadPrediction,
|
||||
config: PreloadConfig,
|
||||
): Message[] {
|
||||
const targetTokens = config.maxPreloadTokens ?? 30000
|
||||
const selected: Message[] = []
|
||||
let usedTokens = 0
|
||||
|
||||
const priorityTypes = prediction.predictedNeed
|
||||
|
||||
const sorted = [...availableContext].sort((a, b) => {
|
||||
const aContent = typeof a.message?.content === 'string' ? a.message.content : ''
|
||||
const bContent = typeof b.message?.content === 'string' ? b.message.content : ''
|
||||
|
||||
const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0
|
||||
const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0
|
||||
|
||||
if (bPriority !== aPriority) return bPriority - aPriority
|
||||
return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0)
|
||||
})
|
||||
|
||||
for (const msg of sorted) {
|
||||
const tokens = roughTokenCountEstimation(
|
||||
typeof msg.message?.content === 'string' ? msg.message.content : ''
|
||||
)
|
||||
|
||||
if (usedTokens + tokens > targetTokens) break
|
||||
|
||||
selected.push(msg)
|
||||
usedTokens += tokens
|
||||
}
|
||||
|
||||
return selected
|
||||
}
|
||||
|
||||
export function createPreloadStrategy(config: PreloadConfig) {
|
||||
return {
|
||||
analyze: analyzeConversationPatterns,
|
||||
predict: (query: string, patterns: ConversationPattern[]) =>
|
||||
predictContextNeeds(query, patterns, config),
|
||||
preload: (context: Message[], prediction: PreloadPrediction) =>
|
||||
preloadContext(context, prediction, config),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user