From 92d297e50efcc7225f57f0d3cb0ba989dc40d624 Mon Sep 17 00:00:00 2001 From: ArkhAngelLifeJiggy <141562589+LifeJiggy@users.noreply.github.com> Date: Wed, 29 Apr 2026 08:49:46 +0100 Subject: [PATCH] feat: context preloading and hybrid context strategy (#860) * feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks --- src/services/api/claude.ts | 15 ++ src/utils/contextPreload.test.ts | 104 ++++++++ src/utils/contextPreload.ts | 145 +++++++++++ src/utils/hybridContextStrategy.test.ts | 230 ++++++++++++++++++ src/utils/hybridContextStrategy.ts | 306 ++++++++++++++++++++++++ 5 files changed, 800 insertions(+) create mode 100644 src/utils/contextPreload.test.ts create mode 100644 src/utils/contextPreload.ts create mode 100644 src/utils/hybridContextStrategy.test.ts create mode 100644 src/utils/hybridContextStrategy.ts diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 275be0c2..6815dfbe 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1283,6 +1283,21 @@ async function* queryModel( let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools) queryCheckpoint('query_message_normalization_end') + // Apply hybrid context strategy for optimal cache/fresh balance + if (feature('HYBRID_CONTEXT_STRATEGY')) { + const { applyHybridStrategy } = await import('../../utils/hybridContextStrategy.js') + // Cap at 200k to avoid edge case with very large context windows + const strategyResult = applyHybridStrategy(messagesForAPI, { + cacheWeight: 0.4, + freshWeight: 0.6, + maxTotalTokens: Math.min( + getContextWindowForModel(model, getSdkBetas()) - COMPACT_MAX_OUTPUT_TOKENS, + 200000 + ), + }) + messagesForAPI = strategyResult.selectedMessages + } + // Model-specific post-processing: strip tool-search-specific fields if the // selected model doesn't support tool search. // diff --git a/src/utils/contextPreload.test.ts b/src/utils/contextPreload.test.ts new file mode 100644 index 00000000..8ee2d10f --- /dev/null +++ b/src/utils/contextPreload.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it } from 'bun:test' +import { + analyzeConversationPatterns, + predictContextNeeds, + preloadContext, + createPreloadStrategy, +} from './contextPreload.js' + +function createMessage(role: string, content: string, createdAt: number = Date.now()): any { + return { + message: { role, content, id: 'test', type: 'message', created_at: createdAt }, + sender: role, + } +} + +describe('contextPreload', () => { + describe('analyzeConversationPatterns', () => { + it('extracts patterns from messages', () => { + const messages = [ + createMessage('user', 'Fix the error in my code', 1000), + createMessage('assistant', 'I found the bug', 2000), + ] + + const patterns = analyzeConversationPatterns(messages) + + expect(patterns.length).toBeGreaterThanOrEqual(0) + }) + + it('detects debug patterns', () => { + const messages = [ + createMessage('user', 'Debug this error please', 1000), + createMessage('assistant', 'Found it', 2000), + ] + + const patterns = analyzeConversationPatterns(messages) + + expect(patterns.some(p => p.userQuery === 'debug')).toBe(true) + }) + + it('detects code patterns', () => { + const messages = [ + createMessage('user', 'Write a function for me', 1000), + createMessage('assistant', 'Here is the code', 2000), + ] + + const patterns = analyzeConversationPatterns(messages) + + expect(patterns.some(p => p.userQuery === 'code')).toBe(true) + }) + }) + + describe('predictContextNeeds', () => { + it('predicts context needs based on query', () => { + const patterns = [{ userQuery: 'debug', neededContext: ['error_history'], frequency: 1 }] + + const prediction = predictContextNeeds('Fix the bug', patterns, { + maxPreloadTokens: 10000, + confidenceThreshold: 0.3, + }) + + expect(prediction.confidence).toBeGreaterThan(0) + expect(prediction.predictedNeed.length).toBeGreaterThan(0) + }) + + it('returns non-empty predictedNeed when pattern matches', () => { + const patterns = [ + { userQuery: 'debug', neededContext: ['error_history', 'stack_trace'], frequency: 2 }, + ] + + const prediction = predictContextNeeds('debug this error', patterns, { + maxPreloadTokens: 10000, + confidenceThreshold: 0.1, + }) + + expect(prediction.predictedNeed).toContain('error_history') + }) + }) + + describe('preloadContext', () => { + it('preloads relevant context', () => { + const messages = [ + createMessage('system', 'System prompt'), + createMessage('user', 'Debug error'), + createMessage('assistant', 'Fixed'), + ] + + const prediction = { predictedNeed: ['error'], confidence: 0.8, suggestedMessages: [] } + + const result = preloadContext(messages, prediction, { maxPreloadTokens: 5000 }) + + expect(result.length).toBeGreaterThan(0) + }) + }) + + describe('createPreloadStrategy', () => { + it('creates strategy with all methods', () => { + const strategy = createPreloadStrategy({ maxPreloadTokens: 10000 }) + + expect(strategy.analyze).toBeDefined() + expect(strategy.predict).toBeDefined() + expect(strategy.preload).toBeDefined() + }) + }) +}) \ No newline at end of file diff --git a/src/utils/contextPreload.ts b/src/utils/contextPreload.ts new file mode 100644 index 00000000..83cce088 --- /dev/null +++ b/src/utils/contextPreload.ts @@ -0,0 +1,145 @@ +/** + * Context Pre-loading - Production Grade + * + * Proactively loads relevant context before it's needed. + * Prediction based on conversation patterns. + */ + +import { roughTokenCountEstimation } from '../services/tokenEstimation.js' +import type { Message } from '../types/message.js' + +export interface PreloadConfig { + maxPreloadTokens: number + predictionWindow?: number + confidenceThreshold?: number +} + +export interface PreloadPrediction { + predictedNeed: string[] + confidence: number + suggestedMessages: Message[] +} + +export interface ConversationPattern { + userQuery: string + neededContext: string[] + frequency: number +} + +const PATTERN_KEYWORDS: Record = { + 'code': ['code', 'function', 'implement', 'write'], + 'debug': ['error', 'bug', 'fix', 'issue', 'debug'], + 'refactor': ['refactor', 'improve', 'clean', 'optimize'], + 'test': ['test', 'spec', 'coverage', 'verify'], + 'explain': ['explain', 'what', 'how', 'why', 'describe'], + 'search': ['find', 'search', 'look', 'grep', 'glob'], +} + +export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] { + const patterns: ConversationPattern[] = [] + const recentMessages = messages.slice(-10) + + for (let i = 0; i < recentMessages.length - 1; i++) { + const userMsg = recentMessages[i] + const assistantMsg = recentMessages[i + 1] + + const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : '' + const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : '' + + for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) { + if (keywords.some(k => userContent.toLowerCase().includes(k))) { + patterns.push({ + userQuery: category, + neededContext: extractContextNeeds(assistantContent), + frequency: 1, + }) + } + } + } + + return patterns +} + +function extractContextNeeds(content: string): string[] { + const needs: string[] = [] + if (content.includes('file')) needs.push('file_context') + if (content.includes('function')) needs.push('function_defs') + if (content.includes('error')) needs.push('error_history') + if (content.includes('test')) needs.push('test_files') + return needs +} + +export function predictContextNeeds( + currentQuery: string, + patterns: ConversationPattern[], + config: PreloadConfig, +): PreloadPrediction { + const threshold = config.confidenceThreshold ?? 0.5 + let matchedCategory = '' + let highestConfidence = 0 + + for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) { + const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length + const confidence = matches / keywords.length + + if (confidence > highestConfidence && confidence >= threshold) { + highestConfidence = confidence + matchedCategory = category + } + } + + const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory) + const allNeeds = relevantPatterns.flatMap(p => p.neededContext) + + return { + predictedNeed: [...new Set(allNeeds)], + confidence: highestConfidence, + suggestedMessages: [], + } +} + +export function preloadContext( + availableContext: Message[], + prediction: PreloadPrediction, + config: PreloadConfig, +): Message[] { + const targetTokens = config.maxPreloadTokens ?? 30000 + const selected: Message[] = [] + let usedTokens = 0 + + const priorityTypes = prediction.predictedNeed + + const sorted = [...availableContext].sort((a, b) => { + const aContent = typeof a.message?.content === 'string' ? a.message.content : '' + const bContent = typeof b.message?.content === 'string' ? b.message.content : '' + + const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0 + const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0 + + if (bPriority !== aPriority) return bPriority - aPriority + return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0) + }) + + for (const msg of sorted) { + const tokens = roughTokenCountEstimation( + typeof msg.message?.content === 'string' ? msg.message.content : '' + ) + + if (usedTokens + tokens > targetTokens) break + + selected.push(msg) + usedTokens += tokens + } + + return selected +} + +export function createPreloadStrategy(config: PreloadConfig) { + return { + analyze: analyzeConversationPatterns, + predict: (query: string, patterns: ConversationPattern[]) => + predictContextNeeds(query, patterns, config), + preload: (context: Message[], prediction: PreloadPrediction) => + preloadContext(context, prediction, config), + } +} \ No newline at end of file diff --git a/src/utils/hybridContextStrategy.test.ts b/src/utils/hybridContextStrategy.test.ts new file mode 100644 index 00000000..d6ae2c59 --- /dev/null +++ b/src/utils/hybridContextStrategy.test.ts @@ -0,0 +1,230 @@ +import { describe, expect, it } from 'bun:test' +import { + splitContext, + applyHybridStrategy, + optimizeForCost, + optimizeForAccuracy, + getHybridStats, +} from './hybridContextStrategy.js' + +function createMessage(role: string, content: string, createdAt: number = Date.now()): any { + return { + message: { role, content, id: 'test', type: 'message', created_at: createdAt }, + sender: role, + } +} + +describe('hybridContextStrategy', () => { + describe('splitContext', () => { + it('splits context into cached and fresh', () => { + const messages = [ + createMessage('system', 'System prompt', Date.now() - 86400000), + createMessage('user', 'Hello'), + createMessage('assistant', 'Hi there'), + ] + + const split = splitContext(messages, { + cacheWeight: 0.4, + freshWeight: 0.6, + maxTotalTokens: 10000, + }) + + expect(split.cachedTokens).toBeGreaterThanOrEqual(0) + expect(split.freshTokens).toBeGreaterThanOrEqual(0) + expect(split.totalTokens).toBeGreaterThan(0) + }) + + it('respects weight configuration', () => { + const messages = [ + createMessage('system', 'Old system', Date.now() - 86400000), + createMessage('user', 'Recent message', Date.now()), + ] + + const split = splitContext(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 10000, + }) + + expect(split.cached).toBeDefined() + expect(split.fresh).toBeDefined() + }) + }) + + describe('applyHybridStrategy', () => { + it('applies strategy and returns messages', () => { + const messages = [ + createMessage('user', 'Message 1'), + createMessage('assistant', 'Response 1'), + ] + + const result = applyHybridStrategy(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 10000, + }) + + expect(result.selectedMessages.length).toBeGreaterThan(0) + expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy) + }) + + it('calculates estimated cost', () => { + const messages = [ + createMessage('user', 'Test message'), + ] + + const result = applyHybridStrategy(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 10000, + }) + + expect(result.estimatedCost).toBeGreaterThanOrEqual(0) + }) + }) + + describe('optimizeForCost', () => { + it('returns messages within budget', () => { + const messages = [ + createMessage('user', 'Message 1'), + createMessage('assistant', 'Response 1'), + ] + + const result = optimizeForCost(messages, 0.001) + + expect(result.length).toBeGreaterThanOrEqual(0) + }) + }) + + describe('optimizeForAccuracy', () => { + it('optimizes for accuracy with token limit', () => { + const messages = [ + createMessage('user', 'Message 1'), + createMessage('assistant', 'Response 1'), + ] + + const result = optimizeForAccuracy(messages, 5000) + + expect(result.length).toBeGreaterThan(0) + }) + }) + + describe('getHybridStats', () => { + it('returns statistics', () => { + const messages = [ + createMessage('system', 'System', Date.now() - 86400000), + createMessage('user', 'Hello'), + ] + + const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 }) + const stats = getHybridStats(split) + + expect(stats.cacheRatio).toBeGreaterThanOrEqual(0) + expect(stats.freshRatio).toBeGreaterThanOrEqual(0) + expect(stats.totalTokens).toBeGreaterThan(0) + }) + }) + + describe('tool_use/tool_result pairing', () => { + it('preserves tool_use and tool_result together', () => { + const toolUseId = 'tool-use-123' + const messages = [ + { + type: 'assistant', + uuid: 'uuid-1', + message: { + role: 'assistant', + content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }], + id: 'msg-1', + created_at: 1000, + }, + }, + { + type: 'user', + uuid: 'uuid-2', + message: { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }], + id: 'msg-2', + created_at: 2000, + }, + }, + { + type: 'assistant', + uuid: 'uuid-3', + message: { + role: 'assistant', + content: 'Response after tool', + id: 'msg-3', + created_at: 3000, + }, + }, + ] as any[] + + const result = applyHybridStrategy(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 10000, + }) + + const hasToolUse = result.selectedMessages.some( + m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use') + ) + const hasToolResult = result.selectedMessages.some( + m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result') + ) + + expect(hasToolUse).toBe(true) + expect(hasToolResult).toBe(true) + }) + + it('accounts for large tool_use input in token counting', () => { + const largeInput = 'x'.repeat(5000) + const messages = [ + { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } }, + ], + created_at: 1000, + }, + }, + ] as any[] + + const result = applyHybridStrategy(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 20000, + }) + + expect(result.totalTokens).toBeGreaterThan(1000) + }) + + it('accounts for large thinking blocks in token counting', () => { + const longThinking = 'Thinking '.repeat(1000) + const messages = [ + { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { type: 'thinking', thinking: longThinking }, + { type: 'text', text: 'Final response' }, + ], + created_at: 1000, + }, + }, + ] as any[] + + const result = applyHybridStrategy(messages, { + cacheWeight: 0.5, + freshWeight: 0.5, + maxTotalTokens: 20000, + }) + + expect(result.totalTokens).toBeGreaterThan(500) + }) + }) +}) \ No newline at end of file diff --git a/src/utils/hybridContextStrategy.ts b/src/utils/hybridContextStrategy.ts new file mode 100644 index 00000000..9e110346 --- /dev/null +++ b/src/utils/hybridContextStrategy.ts @@ -0,0 +1,306 @@ +/** + * Hybrid Context Strategy - Production Grade + * + * Combines cached + new tokens intelligently. + * Optimizes for cost vs accuracy. + */ + +import { roughTokenCountEstimation } from '../services/tokenEstimation.js' +import type { Message } from '../types/message.js' + +export interface HybridConfig { + cacheWeight: number + freshWeight: number + maxTotalTokens: number + costThreshold?: number +} + +export interface ContextSplit { + cached: Message[] + fresh: Message[] + cachedTokens: number + freshTokens: number + totalTokens: number +} + +export interface HybridStrategyResult { + selectedMessages: Message[] + totalTokens: number + strategy: 'cache_heavy' | 'fresh_heavy' | 'balanced' + estimatedCost: number +} + +const DEFAULT_CONFIG: Required = { + cacheWeight: 0.4, + freshWeight: 0.6, + maxTotalTokens: 100000, + costThreshold: 0.01, +} + +// Keep enough for: tool_use -> tool_result -> assistant -> user -> next +const MIN_TAILMessages = 5 + +function getMessageChain( + messages: Message[], +): { chains: Message[][]; orphans: Message[] } { + const toolUseIds = new Set() + const toolUseMessages = new Map() + const allMessagesByUuid = new Map() + + for (const msg of messages) { + const uuid = msg.uuid ?? '' + if (uuid) { + const existing = allMessagesByUuid.get(uuid) ?? [] + existing.push(msg) + allMessagesByUuid.set(uuid, existing) + } + + const content = msg.message?.content + if (Array.isArray(content)) { + for (const block of content) { + if (block?.type === 'tool_use' && block?.id) { + toolUseIds.add(block.id) + const existing = toolUseMessages.get(block.id) ?? [] + existing.push(msg) + toolUseMessages.set(block.id, existing) + } + } + } + } + + const chains: Message[][] = [] + const orphans: Message[] = [] + + for (const [toolUseId, msgs] of toolUseMessages) { + const chainMessages: Message[] = [...msgs] + + for (const msg of messages) { + const content = msg.message?.content + if (Array.isArray(content)) { + for (const block of content) { + if (block?.type === 'tool_result' && block?.tool_use_id === toolUseId) { + chainMessages.push(msg) + } + } + } + } + + chains.push(chainMessages) + } + + const chainMessageUuids = new Set() + for (const chain of chains) { + for (const msg of chain) { + if (msg.uuid) chainMessageUuids.add(msg.uuid) + } + } + + for (const [uuid, msgs] of allMessagesByUuid) { + if (!chainMessageUuids.has(uuid)) { + orphans.push(...msgs) + } + } + + return { chains, orphans } +} + +function getCacheAge(message: Message): number { + const created = message.message?.created_at ?? 0 + if (created === 0) return 1000 + return (Date.now() - created) / (1000 * 60 * 60) +} + +function getMessageTokenCount(message: Message): number { + const content = message.message?.content + if (typeof content === 'string') { + return roughTokenCountEstimation(content) + } + if (Array.isArray(content)) { + let tokens = 0 + for (const block of content) { + if (typeof block !== 'object' || block === null) continue + + const b = block as Record + + if (b.type === 'text' && typeof b.text === 'string') { + tokens += roughTokenCountEstimation(b.text) + } else if (b.type === 'tool_use') { + const inputSize = JSON.stringify(b.input ?? {}).length + tokens += Math.ceil(inputSize / 4) + 20 + } else if (b.type === 'tool_result') { + if (typeof b.content === 'string') { + tokens += roughTokenCountEstimation(b.content) + } else if (Array.isArray(b.content)) { + for (const rc of b.content) { + if (typeof rc === 'object' && rc !== null && 'text' in rc) { + tokens += roughTokenCountEstimation((rc as { text: string }).text) + } + } + } else { + tokens += 50 + } + if (b.is_error === true) tokens += 10 + } else if (b.type === 'thinking' && typeof b.thinking === 'string') { + tokens += roughTokenCountEstimation(b.thinking) + } + } + return tokens + } + return 0 +} + +function calculateCacheValue(message: Message): number { + const content = typeof message.message?.content === 'string' ? message.message.content : '' + const age = getCacheAge(message) + + let value = 0.5 + + if (content.includes('error') || content.includes('fail')) value += 0.3 + if (content.includes('function') || content.includes('class')) value += 0.2 + if (content.includes('important') || content.includes('key')) value += 0.15 + + if (age < 1) value += 0.2 + else if (age < 6) value += 0.1 + else value -= 0.2 + + if (message.message?.role === 'system') value += 0.1 + + return Math.max(0, Math.min(1, value)) +} + +export function splitContext( + messages: Message[], + config: HybridConfig, +): ContextSplit { + const cfg = { ...DEFAULT_CONFIG, ...config } + + const sorted = [...messages].sort((a, b) => { + const aValue = calculateCacheValue(a) + const bValue = calculateCacheValue(b) + return bValue - aValue + }) + + const cached: Message[] = [] + const fresh: Message[] = [] + let cachedTokens = 0 + let freshTokens = 0 + + const cacheTarget = Math.floor(cfg.maxTotalTokens * cfg.cacheWeight) + const freshTarget = Math.floor(cfg.maxTotalTokens * cfg.freshWeight) + + for (const msg of sorted) { + const tokens = getMessageTokenCount(msg) + const age = getCacheAge(msg) + + if (age > 24 && cachedTokens < cacheTarget) { + if (cachedTokens + tokens <= cacheTarget) { + cached.push(msg) + cachedTokens += tokens + continue + } + } + + if (freshTokens + tokens <= freshTarget) { + fresh.push(msg) + freshTokens += tokens + } + } + + return { + cached, + fresh, + cachedTokens, + freshTokens, + totalTokens: cachedTokens + freshTokens, + } +} + +export function applyHybridStrategy( + messages: Message[], + config: HybridConfig, +): HybridStrategyResult { + const cfg = { ...DEFAULT_CONFIG, ...config } + + // Preserve message chains (tool_use/tool_result pairs) + const { chains, orphans } = getMessageChain(messages) + + // Always preserve the conversation tail (last N messages) + const tailMessages = messages.slice(-MIN_TAILMessages) + const coreMessages = messages.slice(0, -MIN_TAILMessages) + + const split = splitContext(coreMessages, cfg) + + let strategy: HybridStrategyResult['strategy'] = 'balanced' + if (split.cachedTokens > split.freshTokens * 1.5) { + strategy = 'cache_heavy' + } else if (split.freshTokens > split.cachedTokens * 1.5) { + strategy = 'fresh_heavy' + } + + const allSelected = [ + ...chains.flat(), + ...split.cached, + ...split.fresh, + ...tailMessages + ] + + const seenUuids = new Set() + const selectedMessages: Message[] = [] + for (const msg of allSelected) { + const uuid = msg.uuid ?? msg.message?.id ?? '' + if (!seenUuids.has(uuid)) { + seenUuids.add(uuid) + selectedMessages.push(msg) + } + } + + selectedMessages.sort( + (a, b) => (a.message?.created_at ?? 0) - (b.message?.created_at ?? 0) + ) + + let totalTokens = 0 + for (const msg of selectedMessages) { + totalTokens += getMessageTokenCount(msg) + } + + const estimatedCost = totalTokens * 0.000001 * 0.5 + + return { + selectedMessages, + totalTokens, + strategy, + estimatedCost, + } +} + +export function optimizeForCost(messages: Message[], budget: number): Message[] { + const result = applyHybridStrategy(messages, { + cacheWeight: 0.7, + freshWeight: 0.3, + maxTotalTokens: Math.floor(budget * 1000), + costThreshold: budget, + }) + return result.selectedMessages +} + +export function optimizeForAccuracy(messages: Message[], maxTokens: number): Message[] { + const result = applyHybridStrategy(messages, { + cacheWeight: 0.3, + freshWeight: 0.7, + maxTotalTokens: maxTokens, + }) + return result.selectedMessages +} + +export function getHybridStats(split: ContextSplit) { + const cacheRatio = split.totalTokens > 0 ? split.cachedTokens / split.totalTokens : 0 + const freshRatio = split.totalTokens > 0 ? split.freshTokens / split.totalTokens : 0 + + return { + cacheRatio: Math.round(cacheRatio * 100), + freshRatio: Math.round(freshRatio * 100), + totalTokens: split.totalTokens, + messageCount: split.cached.length + split.fresh.length, + efficiency: split.totalTokens / (split.cachedTokens + split.freshTokens + 1), + } +} \ No newline at end of file