feat: add thinking token extraction (#798)

* feat: add thinking token tracking and historical analytics - extractThinkingTokens(): separate thinking from output tokens - TokenUsageTracker class for historical analytics - Track: cache hit rate, most used model, requests per hour/day - Analytics: average tokens per request, totals - Add tests (7 passing) PR 4B: Features 1.10 + 1.11 * refactor: extract thinking and analytics to separate files - Create thinkingTokenExtractor.ts with ThinkingTokenAnalyzer - Create tokenAnalytics.ts with TokenUsageTracker - Add production-grade methods and tests - Update test imports
2026-04-21 16:25:12 +01:00
parent 761924daa7
commit 268c0398e4
6 changed files with 855 additions and 1 deletions
--- a/src/utils/thinkingTokenExtractor.test.ts
+++ b/src/utils/thinkingTokenExtractor.test.ts
@@ -0,0 +1,106 @@
+import { describe, expect, it } from 'bun:test'
+import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
+
+describe('ThinkingTokenAnalyzer', () => {
+  describe('extract', () => {
+    it('extracts thinking and output separately', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [
+            { type: 'thinking', thinking: 'Let me think about this...' },
+            { type: 'text', text: 'Here is my answer.' },
+          ],
+        },
+      } as any
+
+      const result = ThinkingTokenAnalyzer.extract(message)
+
+      expect(result.thinking).toBeGreaterThan(0)
+      expect(result.output).toBeGreaterThan(0)
+      expect(result.total).toBe(result.thinking + result.output)
+    })
+
+    it('handles no thinking', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [{ type: 'text', text: 'Hello world' }],
+        },
+      } as any
+
+      const result = ThinkingTokenAnalyzer.extract(message)
+
+      expect(result.thinking).toBe(0)
+      expect(result.output).toBeGreaterThan(0)
+    })
+
+    it('handles redacted thinking', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [
+            { type: 'redacted_thinking', data: '[thinking hidden]' },
+            { type: 'text', text: 'Answer here.' },
+          ],
+        },
+      } as any
+
+      const result = ThinkingTokenAnalyzer.extract(message)
+
+      expect(result.thinking).toBeGreaterThan(0)
+      expect(result.output).toBeGreaterThan(0)
+    })
+  })
+
+  describe('analyze', () => {
+    it('calculates percentages', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [
+            { type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
+            { type: 'text', text: 'Output1 Output2' },
+          ],
+        },
+      } as any
+
+      const analysis = ThinkingTokenAnalyzer.analyze(message)
+
+      expect(analysis.hasThinking).toBe(true)
+      expect(analysis.thinkingPercentage).toBeGreaterThan(0)
+      expect(analysis.outputPercentage).toBeGreaterThan(0)
+      expect(analysis.reasoningComplexity).toBeTruthy()
+    })
+  })
+
+  describe('hasSignificantThinking', () => {
+    it('detects significant thinking', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [
+            { type: 'thinking', thinking: 'x'.repeat(500) },
+            { type: 'text', text: 'short' },
+          ],
+        },
+      } as any
+
+      expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
+    })
+
+    it('rejects minimal thinking', () => {
+      const message = {
+        type: 'assistant',
+        message: {
+          content: [
+            { type: 'thinking', thinking: 'a' },
+            { type: 'text', text: 'much longer output text here with more content' },
+          ],
+        },
+      } as any
+
+      expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
+    })
+  })
+})
--- a/src/utils/thinkingTokenExtractor.ts
+++ b/src/utils/thinkingTokenExtractor.ts
@@ -0,0 +1,192 @@
+/**
+ * Thinking Token Extractor - Production-grade thinking token analysis
+ * 
+ * Extracts and analyzes thinking tokens from assistant messages.
+ * Provides detailed breakdown, statistics, and insights.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import { jsonStringify } from './slowOperations.js'
+import type { AssistantMessage, Message } from '../types/message.js'
+
+export interface ThinkingBlock {
+  type: 'thinking' | 'redacted_thinking'
+  content: string
+  tokens: number
+}
+
+export interface OutputBlock {
+  type: 'text' | 'tool_use'
+  content: string
+  tokens: number
+}
+
+export interface ThinkingTokenBreakdown {
+  thinking: number
+  output: number
+  total: number
+  thinkingBlocks: ThinkingBlock[]
+  outputBlocks: OutputBlock[]
+}
+
+export interface ThinkingAnalysis {
+  hasThinking: boolean
+  thinkingPercentage: number
+  outputPercentage: number
+  blockCount: number
+  avgThinkingBlockSize: number
+  avgOutputBlockSize: number
+  totalTextLength: number
+  reasoningComplexity: 'low' | 'medium' | 'high'
+}
+
+export class ThinkingTokenAnalyzer {
+  /**
+   * Extract detailed thinking vs output breakdown
+   */
+  static extract(message: AssistantMessage): ThinkingTokenBreakdown {
+    const thinkingBlocks: ThinkingBlock[] = []
+    const outputBlocks: OutputBlock[] = []
+    let thinking = 0
+    let output = 0
+
+    for (const block of message.message.content) {
+      if (block.type === 'thinking') {
+        const tokens = roughTokenCountEstimation(block.thinking)
+        thinking += tokens
+        thinkingBlocks.push({
+          type: 'thinking',
+          content: block.thinking,
+          tokens,
+        })
+      } else if (block.type === 'redacted_thinking') {
+        const tokens = roughTokenCountEstimation(block.data)
+        thinking += tokens
+        thinkingBlocks.push({
+          type: 'redacted_thinking',
+          content: block.data,
+          tokens,
+        })
+      } else if (block.type === 'text') {
+        const tokens = roughTokenCountEstimation(block.text)
+        output += tokens
+        outputBlocks.push({
+          type: 'text',
+          content: block.text,
+          tokens,
+        })
+      } else if (block.type === 'tool_use') {
+        const content = jsonStringify(block.input)
+        const tokens = roughTokenCountEstimation(content)
+        output += tokens
+        outputBlocks.push({
+          type: 'tool_use',
+          content,
+          tokens,
+        })
+      }
+    }
+
+    return {
+      thinking,
+      output,
+      total: thinking + output,
+      thinkingBlocks,
+      outputBlocks,
+    }
+  }
+
+  /**
+   * Simple extraction for quick use
+   */
+  static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
+    return this.extract(message)
+  }
+
+  /**
+   * Analyze thinking patterns and provide insights
+   */
+  static analyze(message: AssistantMessage): ThinkingAnalysis {
+    const breakdown = this.extract(message)
+    const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
+
+    const hasThinking = thinking > 0
+    const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
+    const outputPercentage = total > 0 ? (output / total) * 100 : 0
+
+    const avgThinkingBlockSize = thinkingBlocks.length > 0
+      ? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
+      : 0
+
+    const avgOutputBlockSize = outputBlocks.length > 0
+      ? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
+      : 0
+
+    const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
+      (sum, b) => sum + b.content.length,
+      0,
+    )
+
+    // Complexity based on thinking percentage and block count
+    let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
+    if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
+      reasoningComplexity = 'high'
+    } else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
+      reasoningComplexity = 'medium'
+    }
+
+    return {
+      hasThinking,
+      thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
+      outputPercentage: Math.round(outputPercentage * 10) / 10,
+      blockCount: thinkingBlocks.length + outputBlocks.length,
+      avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
+      avgOutputBlockSize: Math.round(avgOutputBlockSize),
+      totalTextLength,
+      reasoningComplexity,
+    }
+  }
+
+  /**
+   * Check if message has significant thinking
+   */
+  static hasSignificantThinking(
+    message: AssistantMessage,
+    thresholdPercent = 20,
+  ): boolean {
+    const analysis = this.analyze(message)
+    return analysis.thinkingPercentage >= thresholdPercent
+  }
+
+  /**
+   * Get thinking-only messages from an array
+   */
+  static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
+    return messages
+      .filter((m): m is AssistantMessage => m.type === 'assistant')
+      .filter(m => this.hasSignificantThinking(m))
+  }
+
+  /**
+   * Calculate total thinking tokens across messages
+   */
+  static totalThinkingTokens(messages: Message[]): number {
+    return messages
+      .filter((m): m is AssistantMessage => m.type === 'assistant')
+      .reduce((sum, m) => sum + this.extract(m).thinking, 0)
+  }
+}
+
+/**
+ * Legacy export for backward compatibility
+ */
+export function extractThinkingTokens(
+  message: AssistantMessage,
+): { thinking: number; output: number; total: number } {
+  const result = ThinkingTokenAnalyzer.extract(message)
+  return {
+    thinking: result.thinking,
+    output: result.output,
+    total: result.total,
+  }
+}
--- a/src/utils/thinkingTokens.test.ts
+++ b/src/utils/thinkingTokens.test.ts
@@ -0,0 +1,69 @@
+import { describe, expect, it } from 'bun:test'
+import { extractThinkingTokens } from './tokens.js'
+
+describe('extractThinkingTokens', () => {
+  it('extracts thinking and output separately', () => {
+    const message = {
+      type: 'assistant',
+      message: {
+        content: [
+          { type: 'thinking', thinking: 'Let me think about this...' },
+          { type: 'text', text: 'Here is my answer.' },
+        ],
+      },
+    } as any
+
+    const result = extractThinkingTokens(message)
+
+    expect(result.thinking).toBeGreaterThan(0)
+    expect(result.output).toBeGreaterThan(0)
+    expect(result.total).toBe(result.thinking + result.output)
+  })
+
+  it('handles no thinking', () => {
+    const message = {
+      type: 'assistant',
+      message: {
+        content: [{ type: 'text', text: 'Hello world' }],
+      },
+    } as any
+
+    const result = extractThinkingTokens(message)
+
+    expect(result.thinking).toBe(0)
+    expect(result.output).toBeGreaterThan(0)
+  })
+
+  it('handles redacted thinking', () => {
+    const message = {
+      type: 'assistant',
+      message: {
+        content: [
+          { type: 'redacted_thinking', data: '[thinking hidden]' },
+          { type: 'text', text: 'Answer here.' },
+        ],
+      },
+    } as any
+
+    const result = extractThinkingTokens(message)
+
+    expect(result.thinking).toBeGreaterThan(0)
+    expect(result.output).toBeGreaterThan(0)
+  })
+
+  it('handles tool use', () => {
+    const message = {
+      type: 'assistant',
+      message: {
+        content: [
+          { type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
+          { type: 'text', text: 'Ran command.' },
+        ],
+      },
+    } as any
+
+    const result = extractThinkingTokens(message)
+
+    expect(result.output).toBeGreaterThan(0)
+  })
+})
--- a/src/utils/tokenAnalytics.test.ts
+++ b/src/utils/tokenAnalytics.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it, beforeEach } from 'bun:test'
+import { TokenUsageTracker } from './tokenAnalytics.js'
+
+describe('TokenUsageTracker', () => {
+  let tracker: TokenUsageTracker
+
+  beforeEach(() => {
+    tracker = new TokenUsageTracker(100)
+  })
+
+  it('records token usage', () => {
+    tracker.record({
+      input_tokens: 1000,
+      output_tokens: 500,
+      cache_read_input_tokens: 200,
+      cache_creation_input_tokens: 100,
+      model: 'claude-sonnet-4-5-20250514',
+    })
+
+    expect(tracker.size).toBe(1)
+  })
+
+  it('calculates analytics', () => {
+    tracker.record({
+      input_tokens: 1000,
+      output_tokens: 500,
+      model: 'claude-sonnet-4-5-20250514',
+    })
+
+    tracker.record({
+      input_tokens: 2000,
+      output_tokens: 300,
+      model: 'claude-sonnet-4-5-20250514',
+    })
+
+    const analytics = tracker.getAnalytics()
+
+    expect(analytics.totalRequests).toBe(2)
+    expect(analytics.totalInputTokens).toBe(3000)
+    expect(analytics.totalOutputTokens).toBe(800)
+    expect(analytics.averageInputPerRequest).toBe(1500)
+    expect(analytics.averageOutputPerRequest).toBe(400)
+  })
+
+  it('tracks cache hit rate', () => {
+    tracker.record({
+      input_tokens: 1000,
+      output_tokens: 500,
+      cache_read_input_tokens: 500, // 33% cache
+      model: 'claude-sonnet-4-5-20250514',
+    })
+
+    const analytics = tracker.getAnalytics()
+
+    expect(analytics.cacheHitRate).toBeGreaterThan(0)
+  })
+
+  it('tracks most used model', () => {
+    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
+    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
+    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
+
+    expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
+  })
+
+  it('respects max entries limit', () => {
+    const smallTracker = new TokenUsageTracker(3)
+
+    smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
+    smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
+    smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
+    smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
+    smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
+
+    expect(smallTracker.size).toBe(3)
+  })
+
+it('clears history', () => {
+      tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
+      tracker.clear()
+
+      expect(tracker.size).toBe(0)
+    })
+})
--- a/src/utils/tokenAnalytics.ts
+++ b/src/utils/tokenAnalytics.ts
@@ -0,0 +1,211 @@
+/**
+ * Token Analytics - Historical token usage tracking and analysis
+ * 
+ * Tracks token usage patterns over time for cost optimization
+ * and capacity planning.
+ */
+
+import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+
+export interface TokenUsageEntry {
+  timestamp: number
+  inputTokens: number
+  outputTokens: number
+  cacheReadTokens: number
+  cacheCreationTokens: number
+  model: string
+}
+
+export interface TokenAnalytics {
+  totalRequests: number
+  totalInputTokens: number
+  totalOutputTokens: number
+  totalCacheRead: number
+  totalCacheCreation: number
+  averageInputPerRequest: number
+  averageOutputPerRequest: number
+  cacheHitRate: number
+  mostUsedModel: string
+  requestsLastHour: number
+  requestsLastDay: number
+}
+
+/**
+ * Historical Token Analytics Tracker
+ * 
+ * Tracks token usage patterns over time for analytics,
+ * cost optimization, and capacity planning.
+ */
+export class TokenUsageTracker {
+  private history: TokenUsageEntry[] = []
+  private readonly maxEntries: number
+
+  constructor(maxEntries = 1000) {
+    this.maxEntries = maxEntries
+  }
+
+  /**
+   * Record a token usage event from API response.
+   */
+  record(usage: {
+    input_tokens: number
+    output_tokens: number
+    cache_read_input_tokens?: number
+    cache_creation_input_tokens?: number
+    model: string
+  }): void {
+    const entry: TokenUsageEntry = {
+      timestamp: Date.now(),
+      inputTokens: usage.input_tokens,
+      outputTokens: usage.output_tokens,
+      cacheReadTokens: usage.cache_read_input_tokens ?? 0,
+      cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
+      model: usage.model,
+    }
+
+    this.history.push(entry)
+
+    if (this.history.length > this.maxEntries) {
+      this.history = this.history.slice(-this.maxEntries)
+    }
+  }
+
+  /**
+   * Get analytics summary for all recorded usage.
+   */
+  getAnalytics(): TokenAnalytics {
+    if (this.history.length === 0) {
+      return {
+        totalRequests: 0,
+        totalInputTokens: 0,
+        totalOutputTokens: 0,
+        totalCacheRead: 0,
+        totalCacheCreation: 0,
+        averageInputPerRequest: 0,
+        averageOutputPerRequest: 0,
+        cacheHitRate: 0,
+        mostUsedModel: 'unknown',
+        requestsLastHour: 0,
+        requestsLastDay: 0,
+      }
+    }
+
+    const now = Date.now()
+    const hourAgo = now - 60 * 60 * 1000
+    const dayAgo = now - 24 * 60 * 60 * 1000
+
+    let totalInput = 0
+    let totalOutput = 0
+    let totalCacheRead = 0
+    let totalCacheCreation = 0
+    const modelCounts = new Map<string, number>()
+    let requestsLastHour = 0
+    let requestsLastDay = 0
+
+    for (const entry of this.history) {
+      totalInput += entry.inputTokens
+      totalOutput += entry.outputTokens
+      totalCacheRead += entry.cacheReadTokens
+      totalCacheCreation += entry.cacheCreationTokens
+
+      modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
+
+      if (entry.timestamp >= hourAgo) requestsLastHour++
+      if (entry.timestamp >= dayAgo) requestsLastDay++
+    }
+
+    let mostUsedModel = 'unknown'
+    let maxCount = 0
+    for (const [model, count] of modelCounts) {
+      if (count > maxCount) {
+        maxCount = count
+        mostUsedModel = model
+      }
+    }
+
+    const totalRequests = this.history.length
+    const totalCache = totalCacheRead + totalCacheCreation
+    const totalTokens = totalInput + totalOutput + totalCache
+    const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
+
+    return {
+      totalRequests,
+      totalInputTokens: totalInput,
+      totalOutputTokens: totalOutput,
+      totalCacheRead,
+      totalCacheCreation,
+      averageInputPerRequest: Math.round(totalInput / totalRequests),
+      averageOutputPerRequest: Math.round(totalOutput / totalRequests),
+      cacheHitRate: Math.round(cacheHitRate),
+      mostUsedModel,
+      requestsLastHour,
+      requestsLastDay,
+    }
+  }
+
+  /**
+   * Get recent entries within time window.
+   */
+  getRecent(windowMs: number): TokenUsageEntry[] {
+    const cutoff = Date.now() - windowMs
+    return this.history.filter(e => e.timestamp >= cutoff)
+  }
+
+  /**
+   * Get entries for a specific model
+   */
+  getByModel(model: string): TokenUsageEntry[] {
+    return this.history.filter(e => e.model === model)
+  }
+
+  /**
+   * Calculate cost estimate (approximate)
+   */
+  estimateCost(): { input: number; output: number; cache: number } {
+    const analytics = this.getAnalytics()
+    
+    // Approximate pricing (adjust as needed)
+    const inputCost = analytics.totalInputTokens * 0.00015
+    const outputCost = analytics.totalOutputTokens * 0.0006
+    const cacheCost = analytics.totalCacheRead * 0.000075
+    
+    return {
+      input: Math.round(inputCost * 100) / 100,
+      output: Math.round(outputCost * 100) / 100,
+      cache: Math.round(cacheCost * 100) / 100,
+    }
+  }
+
+  /**
+   * Clear history.
+   */
+  clear(): void {
+    this.history = []
+  }
+
+  /**
+   * Get history size.
+   */
+  get size(): number {
+    return this.history.length
+  }
+
+  /**
+   * Export history as JSON
+   */
+  export(): string {
+    return JSON.stringify(this.history, null, 2)
+  }
+
+  /**
+   * Import history from JSON
+   */
+  import(json: string): void {
+    try {
+      const entries = JSON.parse(json) as TokenUsageEntry[]
+      this.history = entries.slice(-this.maxEntries)
+    } catch {
+      // Invalid JSON, ignore
+    }
+  }
+}
--- a/src/utils/tokens.ts
+++ b/src/utils/tokens.ts
@@ -1,5 +1,5 @@
 import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
-import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
+import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
 import type { AssistantMessage, Message } from '../types/message.js'
 import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
 import { jsonStringify } from './slowOperations.js'
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
  return contentLength
 }

+/**
+ * Extract thinking tokens from an assistant message.
+ * Returns breakdown of thinking vs output tokens.
+ */
+export function extractThinkingTokens(
+  message: AssistantMessage,
+): { thinking: number; output: number; total: number } {
+  let thinking = 0
+  let output = 0
+
+  for (const block of message.message.content) {
+    if (block.type === 'thinking') {
+      thinking += roughTokenCountEstimation(block.thinking)
+    } else if (block.type === 'redacted_thinking') {
+      thinking += roughTokenCountEstimation(block.data)
+    } else if (block.type === 'text') {
+      output += roughTokenCountEstimation(block.text)
+    } else if (block.type === 'tool_use') {
+      output += roughTokenCountEstimation(jsonStringify(block.input))
+    }
+  }
+
+  return { thinking, output, total: thinking + output }
+}
+
+/**
+ * Token usage history entry for tracking patterns over time.
+ */
+export interface TokenUsageEntry {
+  timestamp: number
+  inputTokens: number
+  outputTokens: number
+  cacheReadTokens: number
+  cacheCreationTokens: number
+  model: string
+}
+
+/**
+ * Token analytics summary from historical data.
+ */
+export interface TokenAnalytics {
+  totalRequests: number
+  totalInputTokens: number
+  totalOutputTokens: number
+  totalCacheRead: number
+  totalCacheCreation: number
+  averageInputPerRequest: number
+  averageOutputPerRequest: number
+  cacheHitRate: number
+  mostUsedModel: string
+  requestsLastHour: number
+  requestsLastDay: number
+}
+
+/**
+ * Historical Token Analytics Tracker
+ * 
+ * Tracks token usage patterns over time for analytics,
+ * cost optimization, and capacity planning.
+ */
+export class TokenUsageTracker {
+  private history: TokenUsageEntry[] = []
+  private readonly maxEntries: number
+
+  constructor(maxEntries = 1000) {
+    this.maxEntries = maxEntries
+  }
+
+  /**
+   * Record a token usage event from API response.
+   */
+  record(usage: {
+    input_tokens: number
+    output_tokens: number
+    cache_read_input_tokens?: number
+    cache_creation_input_tokens?: number
+    model: string
+  }): void {
+    const entry: TokenUsageEntry = {
+      timestamp: Date.now(),
+      inputTokens: usage.input_tokens,
+      outputTokens: usage.output_tokens,
+      cacheReadTokens: usage.cache_read_input_tokens ?? 0,
+      cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
+      model: usage.model,
+    }
+
+    this.history.push(entry)
+
+    // Trim old entries
+    if (this.history.length > this.maxEntries) {
+      this.history = this.history.slice(-this.maxEntries)
+    }
+  }
+
+  /**
+   * Get analytics summary for all recorded usage.
+   */
+  getAnalytics(): TokenAnalytics {
+    if (this.history.length === 0) {
+      return {
+        totalRequests: 0,
+        totalInputTokens: 0,
+        totalOutputTokens: 0,
+        totalCacheRead: 0,
+        totalCacheCreation: 0,
+        averageInputPerRequest: 0,
+        averageOutputPerRequest: 0,
+        cacheHitRate: 0,
+        mostUsedModel: 'unknown',
+        requestsLastHour: 0,
+        requestsLastDay: 0,
+      }
+    }
+
+    const now = Date.now()
+    const hourAgo = now - 60 * 60 * 1000
+    const dayAgo = now - 24 * 60 * 60 * 1000
+
+    let totalInput = 0
+    let totalOutput = 0
+    let totalCacheRead = 0
+    let totalCacheCreation = 0
+    let modelCounts = new Map<string, number>()
+    let requestsLastHour = 0
+    let requestsLastDay = 0
+
+    for (const entry of this.history) {
+      totalInput += entry.inputTokens
+      totalOutput += entry.outputTokens
+      totalCacheRead += entry.cacheReadTokens
+      totalCacheCreation += entry.cacheCreationTokens
+
+      modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
+
+      if (entry.timestamp >= hourAgo) requestsLastHour++
+      if (entry.timestamp >= dayAgo) requestsLastDay++
+    }
+
+    // Find most used model
+    let mostUsedModel = 'unknown'
+    let maxCount = 0
+    for (const [model, count] of modelCounts) {
+      if (count > maxCount) {
+        maxCount = count
+        mostUsedModel = model
+      }
+    }
+
+    const totalRequests = this.history.length
+    const totalCache = totalCacheRead + totalCacheCreation
+    const totalTokens = totalInput + totalOutput + totalCache
+    const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
+
+    return {
+      totalRequests,
+      totalInputTokens: totalInput,
+      totalOutputTokens: totalOutput,
+      totalCacheRead,
+      totalCacheCreation,
+      averageInputPerRequest: Math.round(totalInput / totalRequests),
+      averageOutputPerRequest: Math.round(totalOutput / totalRequests),
+      cacheHitRate: Math.round(cacheHitRate),
+      mostUsedModel,
+      requestsLastHour,
+      requestsLastDay,
+    }
+  }
+
+  /**
+   * Get recent entries within time window.
+   */
+  getRecent(windowMs: number): TokenUsageEntry[] {
+    const cutoff = Date.now() - windowMs
+    return this.history.filter(e => e.timestamp >= cutoff)
+  }
+
+  /**
+   * Clear history.
+   */
+  clear(): void {
+    this.history = []
+  }
+
+  /**
+   * Get history size.
+   */
+  get size(): number {
+    return this.history.length
+  }
+}
+
 /**
 * Get the current context window size in tokens.
 *