feat: add thinking token extraction (#798)

* feat: add thinking token tracking and historical analytics - extractThinkingTokens(): separate thinking from output tokens - TokenUsageTracker class for historical analytics - Track: cache hit rate, most used model, requests per hour/day - Analytics: average tokens per request, totals - Add tests (7 passing) PR 4B: Features 1.10 + 1.11 * refactor: extract thinking and analytics to separate files - Create thinkingTokenExtractor.ts with ThinkingTokenAnalyzer - Create tokenAnalytics.ts with TokenUsageTracker - Add production-grade methods and tests - Update test imports
2026-04-21 16:25:12 +01:00
parent 761924daa7
commit 268c0398e4
6 changed files with 855 additions and 1 deletions
--- a/src/utils/thinkingTokenExtractor.test.ts
+++ b/src/utils/thinkingTokenExtractor.test.ts
@@ -0,0 +1,106 @@
 import { describe, expect, it } from 'bun:test'
 import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
 describe('ThinkingTokenAnalyzer', () => {
  describe('extract', () => {
    it('extracts thinking and output separately', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [
            { type: 'thinking', thinking: 'Let me think about this...' },
            { type: 'text', text: 'Here is my answer.' },
          ],
        },
      } as any
      const result = ThinkingTokenAnalyzer.extract(message)
      expect(result.thinking).toBeGreaterThan(0)
      expect(result.output).toBeGreaterThan(0)
      expect(result.total).toBe(result.thinking + result.output)
    })
    it('handles no thinking', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [{ type: 'text', text: 'Hello world' }],
        },
      } as any
      const result = ThinkingTokenAnalyzer.extract(message)
      expect(result.thinking).toBe(0)
      expect(result.output).toBeGreaterThan(0)
    })
    it('handles redacted thinking', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [
            { type: 'redacted_thinking', data: '[thinking hidden]' },
            { type: 'text', text: 'Answer here.' },
          ],
        },
      } as any
      const result = ThinkingTokenAnalyzer.extract(message)
      expect(result.thinking).toBeGreaterThan(0)
      expect(result.output).toBeGreaterThan(0)
    })
  })
  describe('analyze', () => {
    it('calculates percentages', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [
            { type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
            { type: 'text', text: 'Output1 Output2' },
          ],
        },
      } as any
      const analysis = ThinkingTokenAnalyzer.analyze(message)
      expect(analysis.hasThinking).toBe(true)
      expect(analysis.thinkingPercentage).toBeGreaterThan(0)
      expect(analysis.outputPercentage).toBeGreaterThan(0)
      expect(analysis.reasoningComplexity).toBeTruthy()
    })
  })
  describe('hasSignificantThinking', () => {
    it('detects significant thinking', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [
            { type: 'thinking', thinking: 'x'.repeat(500) },
            { type: 'text', text: 'short' },
          ],
        },
      } as any
      expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
    })
    it('rejects minimal thinking', () => {
      const message = {
        type: 'assistant',
        message: {
          content: [
            { type: 'thinking', thinking: 'a' },
            { type: 'text', text: 'much longer output text here with more content' },
          ],
        },
      } as any
      expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
    })
  })
 })
--- a/src/utils/thinkingTokenExtractor.ts
+++ b/src/utils/thinkingTokenExtractor.ts
@@ -0,0 +1,192 @@
 /**
 * Thinking Token Extractor - Production-grade thinking token analysis
 * 
 * Extracts and analyzes thinking tokens from assistant messages.
 * Provides detailed breakdown, statistics, and insights.
 */
 import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
 import { jsonStringify } from './slowOperations.js'
 import type { AssistantMessage, Message } from '../types/message.js'
 export interface ThinkingBlock {
  type: 'thinking' | 'redacted_thinking'
  content: string
  tokens: number
 }
 export interface OutputBlock {
  type: 'text' | 'tool_use'
  content: string
  tokens: number
 }
 export interface ThinkingTokenBreakdown {
  thinking: number
  output: number
  total: number
  thinkingBlocks: ThinkingBlock[]
  outputBlocks: OutputBlock[]
 }
 export interface ThinkingAnalysis {
  hasThinking: boolean
  thinkingPercentage: number
  outputPercentage: number
  blockCount: number
  avgThinkingBlockSize: number
  avgOutputBlockSize: number
  totalTextLength: number
  reasoningComplexity: 'low' | 'medium' | 'high'
 }
 export class ThinkingTokenAnalyzer {
  /**
   * Extract detailed thinking vs output breakdown
   */
  static extract(message: AssistantMessage): ThinkingTokenBreakdown {
    const thinkingBlocks: ThinkingBlock[] = []
    const outputBlocks: OutputBlock[] = []
    let thinking = 0
    let output = 0
    for (const block of message.message.content) {
      if (block.type === 'thinking') {
        const tokens = roughTokenCountEstimation(block.thinking)
        thinking += tokens
        thinkingBlocks.push({
          type: 'thinking',
          content: block.thinking,
          tokens,
        })
      } else if (block.type === 'redacted_thinking') {
        const tokens = roughTokenCountEstimation(block.data)
        thinking += tokens
        thinkingBlocks.push({
          type: 'redacted_thinking',
          content: block.data,
          tokens,
        })
      } else if (block.type === 'text') {
        const tokens = roughTokenCountEstimation(block.text)
        output += tokens
        outputBlocks.push({
          type: 'text',
          content: block.text,
          tokens,
        })
      } else if (block.type === 'tool_use') {
        const content = jsonStringify(block.input)
        const tokens = roughTokenCountEstimation(content)
        output += tokens
        outputBlocks.push({
          type: 'tool_use',
          content,
          tokens,
        })
      }
    }
    return {
      thinking,
      output,
      total: thinking + output,
      thinkingBlocks,
      outputBlocks,
    }
  }
  /**
   * Simple extraction for quick use
   */
  static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
    return this.extract(message)
  }
  /**
   * Analyze thinking patterns and provide insights
   */
  static analyze(message: AssistantMessage): ThinkingAnalysis {
    const breakdown = this.extract(message)
    const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
    const hasThinking = thinking > 0
    const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
    const outputPercentage = total > 0 ? (output / total) * 100 : 0
    const avgThinkingBlockSize = thinkingBlocks.length > 0
      ? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
      : 0
    const avgOutputBlockSize = outputBlocks.length > 0
      ? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
      : 0
    const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
      (sum, b) => sum + b.content.length,
      0,
    )
    // Complexity based on thinking percentage and block count
    let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
    if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
      reasoningComplexity = 'high'
    } else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
      reasoningComplexity = 'medium'
    }
    return {
      hasThinking,
      thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
      outputPercentage: Math.round(outputPercentage * 10) / 10,
      blockCount: thinkingBlocks.length + outputBlocks.length,
      avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
      avgOutputBlockSize: Math.round(avgOutputBlockSize),
      totalTextLength,
      reasoningComplexity,
    }
  }
  /**
   * Check if message has significant thinking
   */
  static hasSignificantThinking(
    message: AssistantMessage,
    thresholdPercent = 20,
  ): boolean {
    const analysis = this.analyze(message)
    return analysis.thinkingPercentage >= thresholdPercent
  }
  /**
   * Get thinking-only messages from an array
   */
  static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
    return messages
      .filter((m): m is AssistantMessage => m.type === 'assistant')
      .filter(m => this.hasSignificantThinking(m))
  }
  /**
   * Calculate total thinking tokens across messages
   */
  static totalThinkingTokens(messages: Message[]): number {
    return messages
      .filter((m): m is AssistantMessage => m.type === 'assistant')
      .reduce((sum, m) => sum + this.extract(m).thinking, 0)
  }
 }
 /**
 * Legacy export for backward compatibility
 */
 export function extractThinkingTokens(
  message: AssistantMessage,
 ): { thinking: number; output: number; total: number } {
  const result = ThinkingTokenAnalyzer.extract(message)
  return {
    thinking: result.thinking,
    output: result.output,
    total: result.total,
  }
 }
--- a/src/utils/thinkingTokens.test.ts
+++ b/src/utils/thinkingTokens.test.ts
@@ -0,0 +1,69 @@
 import { describe, expect, it } from 'bun:test'
 import { extractThinkingTokens } from './tokens.js'
 describe('extractThinkingTokens', () => {
  it('extracts thinking and output separately', () => {
    const message = {
      type: 'assistant',
      message: {
        content: [
          { type: 'thinking', thinking: 'Let me think about this...' },
          { type: 'text', text: 'Here is my answer.' },
        ],
      },
    } as any
    const result = extractThinkingTokens(message)
    expect(result.thinking).toBeGreaterThan(0)
    expect(result.output).toBeGreaterThan(0)
    expect(result.total).toBe(result.thinking + result.output)
  })
  it('handles no thinking', () => {
    const message = {
      type: 'assistant',
      message: {
        content: [{ type: 'text', text: 'Hello world' }],
      },
    } as any
    const result = extractThinkingTokens(message)
    expect(result.thinking).toBe(0)
    expect(result.output).toBeGreaterThan(0)
  })
  it('handles redacted thinking', () => {
    const message = {
      type: 'assistant',
      message: {
        content: [
          { type: 'redacted_thinking', data: '[thinking hidden]' },
          { type: 'text', text: 'Answer here.' },
        ],
      },
    } as any
    const result = extractThinkingTokens(message)
    expect(result.thinking).toBeGreaterThan(0)
    expect(result.output).toBeGreaterThan(0)
  })
  it('handles tool use', () => {
    const message = {
      type: 'assistant',
      message: {
        content: [
          { type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
          { type: 'text', text: 'Ran command.' },
        ],
      },
    } as any
    const result = extractThinkingTokens(message)
    expect(result.output).toBeGreaterThan(0)
  })
 })
--- a/src/utils/tokenAnalytics.test.ts
+++ b/src/utils/tokenAnalytics.test.ts
@@ -0,0 +1,84 @@
 import { describe, expect, it, beforeEach } from 'bun:test'
 import { TokenUsageTracker } from './tokenAnalytics.js'
 describe('TokenUsageTracker', () => {
  let tracker: TokenUsageTracker
  beforeEach(() => {
    tracker = new TokenUsageTracker(100)
  })
  it('records token usage', () => {
    tracker.record({
      input_tokens: 1000,
      output_tokens: 500,
      cache_read_input_tokens: 200,
      cache_creation_input_tokens: 100,
      model: 'claude-sonnet-4-5-20250514',
    })
    expect(tracker.size).toBe(1)
  })
  it('calculates analytics', () => {
    tracker.record({
      input_tokens: 1000,
      output_tokens: 500,
      model: 'claude-sonnet-4-5-20250514',
    })
    tracker.record({
      input_tokens: 2000,
      output_tokens: 300,
      model: 'claude-sonnet-4-5-20250514',
    })
    const analytics = tracker.getAnalytics()
    expect(analytics.totalRequests).toBe(2)
    expect(analytics.totalInputTokens).toBe(3000)
    expect(analytics.totalOutputTokens).toBe(800)
    expect(analytics.averageInputPerRequest).toBe(1500)
    expect(analytics.averageOutputPerRequest).toBe(400)
  })
  it('tracks cache hit rate', () => {
    tracker.record({
      input_tokens: 1000,
      output_tokens: 500,
      cache_read_input_tokens: 500, // 33% cache
      model: 'claude-sonnet-4-5-20250514',
    })
    const analytics = tracker.getAnalytics()
    expect(analytics.cacheHitRate).toBeGreaterThan(0)
  })
  it('tracks most used model', () => {
    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
    tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
    expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
  })
  it('respects max entries limit', () => {
    const smallTracker = new TokenUsageTracker(3)
    smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
    smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
    smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
    smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
    smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
    expect(smallTracker.size).toBe(3)
  })
 it('clears history', () => {
      tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
      tracker.clear()
      expect(tracker.size).toBe(0)
    })
 })
--- a/src/utils/tokenAnalytics.ts
+++ b/src/utils/tokenAnalytics.ts
@@ -0,0 +1,211 @@
 /**
 * Token Analytics - Historical token usage tracking and analysis
 * 
 * Tracks token usage patterns over time for cost optimization
 * and capacity planning.
 */
 import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
 export interface TokenUsageEntry {
  timestamp: number
  inputTokens: number
  outputTokens: number
  cacheReadTokens: number
  cacheCreationTokens: number
  model: string
 }
 export interface TokenAnalytics {
  totalRequests: number
  totalInputTokens: number
  totalOutputTokens: number
  totalCacheRead: number
  totalCacheCreation: number
  averageInputPerRequest: number
  averageOutputPerRequest: number
  cacheHitRate: number
  mostUsedModel: string
  requestsLastHour: number
  requestsLastDay: number
 }
 /**
 * Historical Token Analytics Tracker
 * 
 * Tracks token usage patterns over time for analytics,
 * cost optimization, and capacity planning.
 */
 export class TokenUsageTracker {
  private history: TokenUsageEntry[] = []
  private readonly maxEntries: number
  constructor(maxEntries = 1000) {
    this.maxEntries = maxEntries
  }
  /**
   * Record a token usage event from API response.
   */
  record(usage: {
    input_tokens: number
    output_tokens: number
    cache_read_input_tokens?: number
    cache_creation_input_tokens?: number
    model: string
  }): void {
    const entry: TokenUsageEntry = {
      timestamp: Date.now(),
      inputTokens: usage.input_tokens,
      outputTokens: usage.output_tokens,
      cacheReadTokens: usage.cache_read_input_tokens ?? 0,
      cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
      model: usage.model,
    }
    this.history.push(entry)
    if (this.history.length > this.maxEntries) {
      this.history = this.history.slice(-this.maxEntries)
    }
  }
  /**
   * Get analytics summary for all recorded usage.
   */
  getAnalytics(): TokenAnalytics {
    if (this.history.length === 0) {
      return {
        totalRequests: 0,
        totalInputTokens: 0,
        totalOutputTokens: 0,
        totalCacheRead: 0,
        totalCacheCreation: 0,
        averageInputPerRequest: 0,
        averageOutputPerRequest: 0,
        cacheHitRate: 0,
        mostUsedModel: 'unknown',
        requestsLastHour: 0,
        requestsLastDay: 0,
      }
    }
    const now = Date.now()
    const hourAgo = now - 60 * 60 * 1000
    const dayAgo = now - 24 * 60 * 60 * 1000
    let totalInput = 0
    let totalOutput = 0
    let totalCacheRead = 0
    let totalCacheCreation = 0
    const modelCounts = new Map<string, number>()
    let requestsLastHour = 0
    let requestsLastDay = 0
    for (const entry of this.history) {
      totalInput += entry.inputTokens
      totalOutput += entry.outputTokens
      totalCacheRead += entry.cacheReadTokens
      totalCacheCreation += entry.cacheCreationTokens
      modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
      if (entry.timestamp >= hourAgo) requestsLastHour++
      if (entry.timestamp >= dayAgo) requestsLastDay++
    }
    let mostUsedModel = 'unknown'
    let maxCount = 0
    for (const [model, count] of modelCounts) {
      if (count > maxCount) {
        maxCount = count
        mostUsedModel = model
      }
    }
    const totalRequests = this.history.length
    const totalCache = totalCacheRead + totalCacheCreation
    const totalTokens = totalInput + totalOutput + totalCache
    const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
    return {
      totalRequests,
      totalInputTokens: totalInput,
      totalOutputTokens: totalOutput,
      totalCacheRead,
      totalCacheCreation,
      averageInputPerRequest: Math.round(totalInput / totalRequests),
      averageOutputPerRequest: Math.round(totalOutput / totalRequests),
      cacheHitRate: Math.round(cacheHitRate),
      mostUsedModel,
      requestsLastHour,
      requestsLastDay,
    }
  }
  /**
   * Get recent entries within time window.
   */
  getRecent(windowMs: number): TokenUsageEntry[] {
    const cutoff = Date.now() - windowMs
    return this.history.filter(e => e.timestamp >= cutoff)
  }
  /**
   * Get entries for a specific model
   */
  getByModel(model: string): TokenUsageEntry[] {
    return this.history.filter(e => e.model === model)
  }
  /**
   * Calculate cost estimate (approximate)
   */
  estimateCost(): { input: number; output: number; cache: number } {
    const analytics = this.getAnalytics()
    // Approximate pricing (adjust as needed)
    const inputCost = analytics.totalInputTokens * 0.00015
    const outputCost = analytics.totalOutputTokens * 0.0006
    const cacheCost = analytics.totalCacheRead * 0.000075
    return {
      input: Math.round(inputCost * 100) / 100,
      output: Math.round(outputCost * 100) / 100,
      cache: Math.round(cacheCost * 100) / 100,
    }
  }
  /**
   * Clear history.
   */
  clear(): void {
    this.history = []
  }
  /**
   * Get history size.
   */
  get size(): number {
    return this.history.length
  }
  /**
   * Export history as JSON
   */
  export(): string {
    return JSON.stringify(this.history, null, 2)
  }
  /**
   * Import history from JSON
   */
  import(json: string): void {
    try {
      const entries = JSON.parse(json) as TokenUsageEntry[]
      this.history = entries.slice(-this.maxEntries)
    } catch {
      // Invalid JSON, ignore
    }
  }
 }
--- a/src/utils/tokens.ts
+++ b/src/utils/tokens.ts
@@ -1,5 +1,5 @@
 import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
-import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
+import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
 import type { AssistantMessage, Message } from '../types/message.js'
 import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
 import { jsonStringify } from './slowOperations.js'
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
  return contentLength
 }
 /**
 * Extract thinking tokens from an assistant message.
 * Returns breakdown of thinking vs output tokens.
 */
 export function extractThinkingTokens(
  message: AssistantMessage,
 ): { thinking: number; output: number; total: number } {
  let thinking = 0
  let output = 0
  for (const block of message.message.content) {
    if (block.type === 'thinking') {
      thinking += roughTokenCountEstimation(block.thinking)
    } else if (block.type === 'redacted_thinking') {
      thinking += roughTokenCountEstimation(block.data)
    } else if (block.type === 'text') {
      output += roughTokenCountEstimation(block.text)
    } else if (block.type === 'tool_use') {
      output += roughTokenCountEstimation(jsonStringify(block.input))
    }
  }
  return { thinking, output, total: thinking + output }
 }
 /**
 * Token usage history entry for tracking patterns over time.
 */
 export interface TokenUsageEntry {
  timestamp: number
  inputTokens: number
  outputTokens: number
  cacheReadTokens: number
  cacheCreationTokens: number
  model: string
 }
 /**
 * Token analytics summary from historical data.
 */
 export interface TokenAnalytics {
  totalRequests: number
  totalInputTokens: number
  totalOutputTokens: number
  totalCacheRead: number
  totalCacheCreation: number
  averageInputPerRequest: number
  averageOutputPerRequest: number
  cacheHitRate: number
  mostUsedModel: string
  requestsLastHour: number
  requestsLastDay: number
 }
 /**
 * Historical Token Analytics Tracker
 * 
 * Tracks token usage patterns over time for analytics,
 * cost optimization, and capacity planning.
 */
 export class TokenUsageTracker {
  private history: TokenUsageEntry[] = []
  private readonly maxEntries: number
  constructor(maxEntries = 1000) {
    this.maxEntries = maxEntries
  }
  /**
   * Record a token usage event from API response.
   */
  record(usage: {
    input_tokens: number
    output_tokens: number
    cache_read_input_tokens?: number
    cache_creation_input_tokens?: number
    model: string
  }): void {
    const entry: TokenUsageEntry = {
      timestamp: Date.now(),
      inputTokens: usage.input_tokens,
      outputTokens: usage.output_tokens,
      cacheReadTokens: usage.cache_read_input_tokens ?? 0,
      cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
      model: usage.model,
    }
    this.history.push(entry)
    // Trim old entries
    if (this.history.length > this.maxEntries) {
      this.history = this.history.slice(-this.maxEntries)
    }
  }
  /**
   * Get analytics summary for all recorded usage.
   */
  getAnalytics(): TokenAnalytics {
    if (this.history.length === 0) {
      return {
        totalRequests: 0,
        totalInputTokens: 0,
        totalOutputTokens: 0,
        totalCacheRead: 0,
        totalCacheCreation: 0,
        averageInputPerRequest: 0,
        averageOutputPerRequest: 0,
        cacheHitRate: 0,
        mostUsedModel: 'unknown',
        requestsLastHour: 0,
        requestsLastDay: 0,
      }
    }
    const now = Date.now()
    const hourAgo = now - 60 * 60 * 1000
    const dayAgo = now - 24 * 60 * 60 * 1000
    let totalInput = 0
    let totalOutput = 0
    let totalCacheRead = 0
    let totalCacheCreation = 0
    let modelCounts = new Map<string, number>()
    let requestsLastHour = 0
    let requestsLastDay = 0
    for (const entry of this.history) {
      totalInput += entry.inputTokens
      totalOutput += entry.outputTokens
      totalCacheRead += entry.cacheReadTokens
      totalCacheCreation += entry.cacheCreationTokens
      modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
      if (entry.timestamp >= hourAgo) requestsLastHour++
      if (entry.timestamp >= dayAgo) requestsLastDay++
    }
    // Find most used model
    let mostUsedModel = 'unknown'
    let maxCount = 0
    for (const [model, count] of modelCounts) {
      if (count > maxCount) {
        maxCount = count
        mostUsedModel = model
      }
    }
    const totalRequests = this.history.length
    const totalCache = totalCacheRead + totalCacheCreation
    const totalTokens = totalInput + totalOutput + totalCache
    const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
    return {
      totalRequests,
      totalInputTokens: totalInput,
      totalOutputTokens: totalOutput,
      totalCacheRead,
      totalCacheCreation,
      averageInputPerRequest: Math.round(totalInput / totalRequests),
      averageOutputPerRequest: Math.round(totalOutput / totalRequests),
      cacheHitRate: Math.round(cacheHitRate),
      mostUsedModel,
      requestsLastHour,
      requestsLastDay,
    }
  }
  /**
   * Get recent entries within time window.
   */
  getRecent(windowMs: number): TokenUsageEntry[] {
    const cutoff = Date.now() - windowMs
    return this.history.filter(e => e.timestamp >= cutoff)
  }
  /**
   * Clear history.
   */
  clear(): void {
    this.history = []
  }
  /**
   * Get history size.
   */
  get size(): number {
    return this.history.length
  }
 }
 /**
 * Get the current context window size in tokens.
 *