From 92d297e50efcc7225f57f0d3cb0ba989dc40d624 Mon Sep 17 00:00:00 2001
From: ArkhAngelLifeJiggy <141562589+LifeJiggy@users.noreply.github.com>
Date: Wed, 29 Apr 2026 08:49:46 +0100
Subject: [PATCH] feat: context preloading and hybrid context strategy (#860)

* feat: context preloading and hybrid context strategy

PR 2D - Section 2.7, 2.8:
- Add contextPreload.ts with pattern-based prediction
- Add hybridContextStrategy.ts with cache/fresh balancing
- Optimize for cost vs accuracy
- Add comprehensive tests (13 passing)

* feat: wire hybrid context strategy into API path

- Apply hybrid strategy after normalizeMessagesForAPI
- Feature-flag controlled (HYBRID_CONTEXT_STRATEGY)
- Optimizes cache/fresh balance for API requests

* fix: resolve PR 2D blocking issues

- Fix predictContextNeeds self-assign bug (matchedCategory = category)
- Add test for non-empty predictedNeed
- Preserve conversation tail in hybridStrategy (never drop last 3 messages)
- Add comment for hardcoded 200k cap in claude.ts

Fixes reviewer feedback from gnanam1990 and Vasanthdev2004

* fix: preserve tool_use/tool_result chains in hybridStrategy

- Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next)
- Add getMessageChain() to preserve paired messages
- Chains kept together in final selection

* fix: PR 860 - tool_use/tool_result pairing and safe token counting

Blocking:
- getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id
- Find tool_use blocks by id, pair with tool_result having matching tool_use_id
- Fixes tool_result surviving while paired tool_use dropped

- Token counting now includes array content (tool_use, tool_result, thinking)
- Not just string content, prevents undercounting prompt size

- Deduplicate messages by UUID when combining chains + split + tail
- Prevents duplicate messages in final request

Non-blocking:
- Add regression test for tool_use/tool_result pairing

* fix: PR 860 - account for actual structured payload size in token counting

Blocking:
- getMessageTokenCount now calculates actual token count for structured blocks
- tool_use: uses JSON.stringify(input).length / 4 + base
- tool_result: counts actual content (string or array of text blocks)
- thinking: counts actual thinking text length / 4
- is_error flag adds small overhead

Non-blocking:
- Add tests for large tool_use input and large thinking blocks
---
 src/services/api/claude.ts              |  15 ++
 src/utils/contextPreload.test.ts        | 104 ++++++++
 src/utils/contextPreload.ts             | 145 +++++++++++
 src/utils/hybridContextStrategy.test.ts | 230 ++++++++++++++++++
 src/utils/hybridContextStrategy.ts      | 306 ++++++++++++++++++++++++
 5 files changed, 800 insertions(+)
 create mode 100644 src/utils/contextPreload.test.ts
 create mode 100644 src/utils/contextPreload.ts
 create mode 100644 src/utils/hybridContextStrategy.test.ts
 create mode 100644 src/utils/hybridContextStrategy.ts

diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts
index 275be0c2..6815dfbe 100644
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -1283,6 +1283,21 @@ async function* queryModel(
   let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools)
   queryCheckpoint('query_message_normalization_end')
 
+  // Apply hybrid context strategy for optimal cache/fresh balance
+  if (feature('HYBRID_CONTEXT_STRATEGY')) {
+    const { applyHybridStrategy } = await import('../../utils/hybridContextStrategy.js')
+    // Cap at 200k to avoid edge case with very large context windows
+    const strategyResult = applyHybridStrategy(messagesForAPI, {
+      cacheWeight: 0.4,
+      freshWeight: 0.6,
+      maxTotalTokens: Math.min(
+        getContextWindowForModel(model, getSdkBetas()) - COMPACT_MAX_OUTPUT_TOKENS,
+        200000
+      ),
+    })
+    messagesForAPI = strategyResult.selectedMessages
+  }
+
   // Model-specific post-processing: strip tool-search-specific fields if the
   // selected model doesn't support tool search.
   //
diff --git a/src/utils/contextPreload.test.ts b/src/utils/contextPreload.test.ts
new file mode 100644
index 00000000..8ee2d10f
--- /dev/null
+++ b/src/utils/contextPreload.test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  analyzeConversationPatterns,
+  predictContextNeeds,
+  preloadContext,
+  createPreloadStrategy,
+} from './contextPreload.js'
+
+function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
+  return {
+    message: { role, content, id: 'test', type: 'message', created_at: createdAt },
+    sender: role,
+  }
+}
+
+describe('contextPreload', () => {
+  describe('analyzeConversationPatterns', () => {
+    it('extracts patterns from messages', () => {
+      const messages = [
+        createMessage('user', 'Fix the error in my code', 1000),
+        createMessage('assistant', 'I found the bug', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.length).toBeGreaterThanOrEqual(0)
+    })
+
+    it('detects debug patterns', () => {
+      const messages = [
+        createMessage('user', 'Debug this error please', 1000),
+        createMessage('assistant', 'Found it', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.some(p => p.userQuery === 'debug')).toBe(true)
+    })
+
+    it('detects code patterns', () => {
+      const messages = [
+        createMessage('user', 'Write a function for me', 1000),
+        createMessage('assistant', 'Here is the code', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.some(p => p.userQuery === 'code')).toBe(true)
+    })
+  })
+
+  describe('predictContextNeeds', () => {
+    it('predicts context needs based on query', () => {
+      const patterns = [{ userQuery: 'debug', neededContext: ['error_history'], frequency: 1 }]
+
+      const prediction = predictContextNeeds('Fix the bug', patterns, {
+        maxPreloadTokens: 10000,
+        confidenceThreshold: 0.3,
+      })
+
+      expect(prediction.confidence).toBeGreaterThan(0)
+      expect(prediction.predictedNeed.length).toBeGreaterThan(0)
+    })
+
+    it('returns non-empty predictedNeed when pattern matches', () => {
+      const patterns = [
+        { userQuery: 'debug', neededContext: ['error_history', 'stack_trace'], frequency: 2 },
+      ]
+
+      const prediction = predictContextNeeds('debug this error', patterns, {
+        maxPreloadTokens: 10000,
+        confidenceThreshold: 0.1,
+      })
+
+      expect(prediction.predictedNeed).toContain('error_history')
+    })
+  })
+
+  describe('preloadContext', () => {
+    it('preloads relevant context', () => {
+      const messages = [
+        createMessage('system', 'System prompt'),
+        createMessage('user', 'Debug error'),
+        createMessage('assistant', 'Fixed'),
+      ]
+
+      const prediction = { predictedNeed: ['error'], confidence: 0.8, suggestedMessages: [] }
+
+      const result = preloadContext(messages, prediction, { maxPreloadTokens: 5000 })
+
+      expect(result.length).toBeGreaterThan(0)
+    })
+  })
+
+  describe('createPreloadStrategy', () => {
+    it('creates strategy with all methods', () => {
+      const strategy = createPreloadStrategy({ maxPreloadTokens: 10000 })
+
+      expect(strategy.analyze).toBeDefined()
+      expect(strategy.predict).toBeDefined()
+      expect(strategy.preload).toBeDefined()
+    })
+  })
+})
\ No newline at end of file
diff --git a/src/utils/contextPreload.ts b/src/utils/contextPreload.ts
new file mode 100644
index 00000000..83cce088
--- /dev/null
+++ b/src/utils/contextPreload.ts
@@ -0,0 +1,145 @@
+/**
+ * Context Pre-loading - Production Grade
+ * 
+ * Proactively loads relevant context before it's needed.
+ * Prediction based on conversation patterns.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import type { Message } from '../types/message.js'
+
+export interface PreloadConfig {
+  maxPreloadTokens: number
+  predictionWindow?: number
+  confidenceThreshold?: number
+}
+
+export interface PreloadPrediction {
+  predictedNeed: string[]
+  confidence: number
+  suggestedMessages: Message[]
+}
+
+export interface ConversationPattern {
+  userQuery: string
+  neededContext: string[]
+  frequency: number
+}
+
+const PATTERN_KEYWORDS: Record<string, string[]> = {
+  'code': ['code', 'function', 'implement', 'write'],
+  'debug': ['error', 'bug', 'fix', 'issue', 'debug'],
+  'refactor': ['refactor', 'improve', 'clean', 'optimize'],
+  'test': ['test', 'spec', 'coverage', 'verify'],
+  'explain': ['explain', 'what', 'how', 'why', 'describe'],
+  'search': ['find', 'search', 'look', 'grep', 'glob'],
+}
+
+export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] {
+  const patterns: ConversationPattern[] = []
+  const recentMessages = messages.slice(-10)
+
+  for (let i = 0; i < recentMessages.length - 1; i++) {
+    const userMsg = recentMessages[i]
+    const assistantMsg = recentMessages[i + 1]
+
+    const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : ''
+    const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : ''
+
+    for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+      if (keywords.some(k => userContent.toLowerCase().includes(k))) {
+        patterns.push({
+          userQuery: category,
+          neededContext: extractContextNeeds(assistantContent),
+          frequency: 1,
+        })
+      }
+    }
+  }
+
+  return patterns
+}
+
+function extractContextNeeds(content: string): string[] {
+  const needs: string[] = []
+  if (content.includes('file')) needs.push('file_context')
+  if (content.includes('function')) needs.push('function_defs')
+  if (content.includes('error')) needs.push('error_history')
+  if (content.includes('test')) needs.push('test_files')
+  return needs
+}
+
+export function predictContextNeeds(
+  currentQuery: string,
+  patterns: ConversationPattern[],
+  config: PreloadConfig,
+): PreloadPrediction {
+  const threshold = config.confidenceThreshold ?? 0.5
+  let matchedCategory = ''
+  let highestConfidence = 0
+
+  for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+    const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length
+    const confidence = matches / keywords.length
+
+    if (confidence > highestConfidence && confidence >= threshold) {
+      highestConfidence = confidence
+      matchedCategory = category
+    }
+  }
+
+  const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory)
+  const allNeeds = relevantPatterns.flatMap(p => p.neededContext)
+
+  return {
+    predictedNeed: [...new Set(allNeeds)],
+    confidence: highestConfidence,
+    suggestedMessages: [],
+  }
+}
+
+export function preloadContext(
+  availableContext: Message[],
+  prediction: PreloadPrediction,
+  config: PreloadConfig,
+): Message[] {
+  const targetTokens = config.maxPreloadTokens ?? 30000
+  const selected: Message[] = []
+  let usedTokens = 0
+
+  const priorityTypes = prediction.predictedNeed
+
+  const sorted = [...availableContext].sort((a, b) => {
+    const aContent = typeof a.message?.content === 'string' ? a.message.content : ''
+    const bContent = typeof b.message?.content === 'string' ? b.message.content : ''
+
+    const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0
+    const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0
+
+    if (bPriority !== aPriority) return bPriority - aPriority
+    return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0)
+  })
+
+  for (const msg of sorted) {
+    const tokens = roughTokenCountEstimation(
+      typeof msg.message?.content === 'string' ? msg.message.content : ''
+    )
+
+    if (usedTokens + tokens > targetTokens) break
+
+    selected.push(msg)
+    usedTokens += tokens
+  }
+
+  return selected
+}
+
+export function createPreloadStrategy(config: PreloadConfig) {
+  return {
+    analyze: analyzeConversationPatterns,
+    predict: (query: string, patterns: ConversationPattern[]) =>
+      predictContextNeeds(query, patterns, config),
+    preload: (context: Message[], prediction: PreloadPrediction) =>
+      preloadContext(context, prediction, config),
+  }
+}
\ No newline at end of file
diff --git a/src/utils/hybridContextStrategy.test.ts b/src/utils/hybridContextStrategy.test.ts
new file mode 100644
index 00000000..d6ae2c59
--- /dev/null
+++ b/src/utils/hybridContextStrategy.test.ts
@@ -0,0 +1,230 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  splitContext,
+  applyHybridStrategy,
+  optimizeForCost,
+  optimizeForAccuracy,
+  getHybridStats,
+} from './hybridContextStrategy.js'
+
+function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
+  return {
+    message: { role, content, id: 'test', type: 'message', created_at: createdAt },
+    sender: role,
+  }
+}
+
+describe('hybridContextStrategy', () => {
+  describe('splitContext', () => {
+    it('splits context into cached and fresh', () => {
+      const messages = [
+        createMessage('system', 'System prompt', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+        createMessage('assistant', 'Hi there'),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.4,
+        freshWeight: 0.6,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cachedTokens).toBeGreaterThanOrEqual(0)
+      expect(split.freshTokens).toBeGreaterThanOrEqual(0)
+      expect(split.totalTokens).toBeGreaterThan(0)
+    })
+
+    it('respects weight configuration', () => {
+      const messages = [
+        createMessage('system', 'Old system', Date.now() - 86400000),
+        createMessage('user', 'Recent message', Date.now()),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cached).toBeDefined()
+      expect(split.fresh).toBeDefined()
+    })
+  })
+
+  describe('applyHybridStrategy', () => {
+    it('applies strategy and returns messages', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.selectedMessages.length).toBeGreaterThan(0)
+      expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy)
+    })
+
+    it('calculates estimated cost', () => {
+      const messages = [
+        createMessage('user', 'Test message'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.estimatedCost).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForCost', () => {
+    it('returns messages within budget', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForCost(messages, 0.001)
+
+      expect(result.length).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForAccuracy', () => {
+    it('optimizes for accuracy with token limit', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForAccuracy(messages, 5000)
+
+      expect(result.length).toBeGreaterThan(0)
+    })
+  })
+
+  describe('getHybridStats', () => {
+    it('returns statistics', () => {
+      const messages = [
+        createMessage('system', 'System', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+      ]
+
+      const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 })
+      const stats = getHybridStats(split)
+
+      expect(stats.cacheRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.freshRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.totalTokens).toBeGreaterThan(0)
+    })
+  })
+
+  describe('tool_use/tool_result pairing', () => {
+    it('preserves tool_use and tool_result together', () => {
+      const toolUseId = 'tool-use-123'
+      const messages = [
+        {
+          type: 'assistant',
+          uuid: 'uuid-1',
+          message: {
+            role: 'assistant',
+            content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }],
+            id: 'msg-1',
+            created_at: 1000,
+          },
+        },
+        {
+          type: 'user',
+          uuid: 'uuid-2',
+          message: {
+            role: 'user',
+            content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }],
+            id: 'msg-2',
+            created_at: 2000,
+          },
+        },
+        {
+          type: 'assistant',
+          uuid: 'uuid-3',
+          message: {
+            role: 'assistant',
+            content: 'Response after tool',
+            id: 'msg-3',
+            created_at: 3000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      const hasToolUse = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use')
+      )
+      const hasToolResult = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result')
+      )
+
+      expect(hasToolUse).toBe(true)
+      expect(hasToolResult).toBe(true)
+    })
+
+    it('accounts for large tool_use input in token counting', () => {
+      const largeInput = 'x'.repeat(5000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(1000)
+    })
+
+    it('accounts for large thinking blocks in token counting', () => {
+      const longThinking = 'Thinking '.repeat(1000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'thinking', thinking: longThinking },
+              { type: 'text', text: 'Final response' },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(500)
+    })
+  })
+})
\ No newline at end of file
diff --git a/src/utils/hybridContextStrategy.ts b/src/utils/hybridContextStrategy.ts
new file mode 100644
index 00000000..9e110346
--- /dev/null
+++ b/src/utils/hybridContextStrategy.ts
@@ -0,0 +1,306 @@
+/**
+ * Hybrid Context Strategy - Production Grade
+ * 
+ * Combines cached + new tokens intelligently.
+ * Optimizes for cost vs accuracy.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import type { Message } from '../types/message.js'
+
+export interface HybridConfig {
+  cacheWeight: number
+  freshWeight: number
+  maxTotalTokens: number
+  costThreshold?: number
+}
+
+export interface ContextSplit {
+  cached: Message[]
+  fresh: Message[]
+  cachedTokens: number
+  freshTokens: number
+  totalTokens: number
+}
+
+export interface HybridStrategyResult {
+  selectedMessages: Message[]
+  totalTokens: number
+  strategy: 'cache_heavy' | 'fresh_heavy' | 'balanced'
+  estimatedCost: number
+}
+
+const DEFAULT_CONFIG: Required<HybridConfig> = {
+  cacheWeight: 0.4,
+  freshWeight: 0.6,
+  maxTotalTokens: 100000,
+  costThreshold: 0.01,
+}
+
+// Keep enough for: tool_use -> tool_result -> assistant -> user -> next
+const MIN_TAILMessages = 5
+
+function getMessageChain(
+  messages: Message[],
+): { chains: Message[][]; orphans: Message[] } {
+  const toolUseIds = new Set<string>()
+  const toolUseMessages = new Map<string, Message[]>()
+  const allMessagesByUuid = new Map<string, Message[]>()
+
+  for (const msg of messages) {
+    const uuid = msg.uuid ?? ''
+    if (uuid) {
+      const existing = allMessagesByUuid.get(uuid) ?? []
+      existing.push(msg)
+      allMessagesByUuid.set(uuid, existing)
+    }
+
+    const content = msg.message?.content
+    if (Array.isArray(content)) {
+      for (const block of content) {
+        if (block?.type === 'tool_use' && block?.id) {
+          toolUseIds.add(block.id)
+          const existing = toolUseMessages.get(block.id) ?? []
+          existing.push(msg)
+          toolUseMessages.set(block.id, existing)
+        }
+      }
+    }
+  }
+
+  const chains: Message[][] = []
+  const orphans: Message[] = []
+
+  for (const [toolUseId, msgs] of toolUseMessages) {
+    const chainMessages: Message[] = [...msgs]
+
+    for (const msg of messages) {
+      const content = msg.message?.content
+      if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block?.type === 'tool_result' && block?.tool_use_id === toolUseId) {
+            chainMessages.push(msg)
+          }
+        }
+      }
+    }
+
+    chains.push(chainMessages)
+  }
+
+  const chainMessageUuids = new Set<string>()
+  for (const chain of chains) {
+    for (const msg of chain) {
+      if (msg.uuid) chainMessageUuids.add(msg.uuid)
+    }
+  }
+
+  for (const [uuid, msgs] of allMessagesByUuid) {
+    if (!chainMessageUuids.has(uuid)) {
+      orphans.push(...msgs)
+    }
+  }
+
+  return { chains, orphans }
+}
+
+function getCacheAge(message: Message): number {
+  const created = message.message?.created_at ?? 0
+  if (created === 0) return 1000
+  return (Date.now() - created) / (1000 * 60 * 60)
+}
+
+function getMessageTokenCount(message: Message): number {
+  const content = message.message?.content
+  if (typeof content === 'string') {
+    return roughTokenCountEstimation(content)
+  }
+  if (Array.isArray(content)) {
+    let tokens = 0
+    for (const block of content) {
+      if (typeof block !== 'object' || block === null) continue
+
+      const b = block as Record<string, unknown>
+
+      if (b.type === 'text' && typeof b.text === 'string') {
+        tokens += roughTokenCountEstimation(b.text)
+      } else if (b.type === 'tool_use') {
+        const inputSize = JSON.stringify(b.input ?? {}).length
+        tokens += Math.ceil(inputSize / 4) + 20
+      } else if (b.type === 'tool_result') {
+        if (typeof b.content === 'string') {
+          tokens += roughTokenCountEstimation(b.content)
+        } else if (Array.isArray(b.content)) {
+          for (const rc of b.content) {
+            if (typeof rc === 'object' && rc !== null && 'text' in rc) {
+              tokens += roughTokenCountEstimation((rc as { text: string }).text)
+            }
+          }
+        } else {
+          tokens += 50
+        }
+        if (b.is_error === true) tokens += 10
+      } else if (b.type === 'thinking' && typeof b.thinking === 'string') {
+        tokens += roughTokenCountEstimation(b.thinking)
+      }
+    }
+    return tokens
+  }
+  return 0
+}
+
+function calculateCacheValue(message: Message): number {
+  const content = typeof message.message?.content === 'string' ? message.message.content : ''
+  const age = getCacheAge(message)
+
+  let value = 0.5
+
+  if (content.includes('error') || content.includes('fail')) value += 0.3
+  if (content.includes('function') || content.includes('class')) value += 0.2
+  if (content.includes('important') || content.includes('key')) value += 0.15
+
+  if (age < 1) value += 0.2
+  else if (age < 6) value += 0.1
+  else value -= 0.2
+
+  if (message.message?.role === 'system') value += 0.1
+
+  return Math.max(0, Math.min(1, value))
+}
+
+export function splitContext(
+  messages: Message[],
+  config: HybridConfig,
+): ContextSplit {
+  const cfg = { ...DEFAULT_CONFIG, ...config }
+
+  const sorted = [...messages].sort((a, b) => {
+    const aValue = calculateCacheValue(a)
+    const bValue = calculateCacheValue(b)
+    return bValue - aValue
+  })
+
+  const cached: Message[] = []
+  const fresh: Message[] = []
+  let cachedTokens = 0
+  let freshTokens = 0
+
+  const cacheTarget = Math.floor(cfg.maxTotalTokens * cfg.cacheWeight)
+  const freshTarget = Math.floor(cfg.maxTotalTokens * cfg.freshWeight)
+
+  for (const msg of sorted) {
+    const tokens = getMessageTokenCount(msg)
+    const age = getCacheAge(msg)
+
+    if (age > 24 && cachedTokens < cacheTarget) {
+      if (cachedTokens + tokens <= cacheTarget) {
+        cached.push(msg)
+        cachedTokens += tokens
+        continue
+      }
+    }
+
+    if (freshTokens + tokens <= freshTarget) {
+      fresh.push(msg)
+      freshTokens += tokens
+    }
+  }
+
+  return {
+    cached,
+    fresh,
+    cachedTokens,
+    freshTokens,
+    totalTokens: cachedTokens + freshTokens,
+  }
+}
+
+export function applyHybridStrategy(
+  messages: Message[],
+  config: HybridConfig,
+): HybridStrategyResult {
+  const cfg = { ...DEFAULT_CONFIG, ...config }
+  
+  // Preserve message chains (tool_use/tool_result pairs)
+  const { chains, orphans } = getMessageChain(messages)
+  
+  // Always preserve the conversation tail (last N messages)
+  const tailMessages = messages.slice(-MIN_TAILMessages)
+  const coreMessages = messages.slice(0, -MIN_TAILMessages)
+  
+  const split = splitContext(coreMessages, cfg)
+
+  let strategy: HybridStrategyResult['strategy'] = 'balanced'
+  if (split.cachedTokens > split.freshTokens * 1.5) {
+    strategy = 'cache_heavy'
+  } else if (split.freshTokens > split.cachedTokens * 1.5) {
+    strategy = 'fresh_heavy'
+  }
+
+  const allSelected = [
+    ...chains.flat(),
+    ...split.cached,
+    ...split.fresh,
+    ...tailMessages
+  ]
+
+  const seenUuids = new Set<string>()
+  const selectedMessages: Message[] = []
+  for (const msg of allSelected) {
+    const uuid = msg.uuid ?? msg.message?.id ?? ''
+    if (!seenUuids.has(uuid)) {
+      seenUuids.add(uuid)
+      selectedMessages.push(msg)
+    }
+  }
+
+  selectedMessages.sort(
+    (a, b) => (a.message?.created_at ?? 0) - (b.message?.created_at ?? 0)
+  )
+
+  let totalTokens = 0
+  for (const msg of selectedMessages) {
+    totalTokens += getMessageTokenCount(msg)
+  }
+
+  const estimatedCost = totalTokens * 0.000001 * 0.5
+
+  return {
+    selectedMessages,
+    totalTokens,
+    strategy,
+    estimatedCost,
+  }
+}
+
+export function optimizeForCost(messages: Message[], budget: number): Message[] {
+  const result = applyHybridStrategy(messages, {
+    cacheWeight: 0.7,
+    freshWeight: 0.3,
+    maxTotalTokens: Math.floor(budget * 1000),
+    costThreshold: budget,
+  })
+  return result.selectedMessages
+}
+
+export function optimizeForAccuracy(messages: Message[], maxTokens: number): Message[] {
+  const result = applyHybridStrategy(messages, {
+    cacheWeight: 0.3,
+    freshWeight: 0.7,
+    maxTotalTokens: maxTokens,
+  })
+  return result.selectedMessages
+}
+
+export function getHybridStats(split: ContextSplit) {
+  const cacheRatio = split.totalTokens > 0 ? split.cachedTokens / split.totalTokens : 0
+  const freshRatio = split.totalTokens > 0 ? split.freshTokens / split.totalTokens : 0
+
+  return {
+    cacheRatio: Math.round(cacheRatio * 100),
+    freshRatio: Math.round(freshRatio * 100),
+    totalTokens: split.totalTokens,
+    messageCount: split.cached.length + split.fresh.length,
+    efficiency: split.totalTokens / (split.cachedTokens + split.freshTokens + 1),
+  }
+}
\ No newline at end of file