diff --git a/src/utils/thinkingTokenExtractor.test.ts b/src/utils/thinkingTokenExtractor.test.ts new file mode 100644 index 00000000..a8e418b3 --- /dev/null +++ b/src/utils/thinkingTokenExtractor.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from 'bun:test' +import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js' + +describe('ThinkingTokenAnalyzer', () => { + describe('extract', () => { + it('extracts thinking and output separately', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'thinking', thinking: 'Let me think about this...' }, + { type: 'text', text: 'Here is my answer.' }, + ], + }, + } as any + + const result = ThinkingTokenAnalyzer.extract(message) + + expect(result.thinking).toBeGreaterThan(0) + expect(result.output).toBeGreaterThan(0) + expect(result.total).toBe(result.thinking + result.output) + }) + + it('handles no thinking', () => { + const message = { + type: 'assistant', + message: { + content: [{ type: 'text', text: 'Hello world' }], + }, + } as any + + const result = ThinkingTokenAnalyzer.extract(message) + + expect(result.thinking).toBe(0) + expect(result.output).toBeGreaterThan(0) + }) + + it('handles redacted thinking', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'redacted_thinking', data: '[thinking hidden]' }, + { type: 'text', text: 'Answer here.' }, + ], + }, + } as any + + const result = ThinkingTokenAnalyzer.extract(message) + + expect(result.thinking).toBeGreaterThan(0) + expect(result.output).toBeGreaterThan(0) + }) + }) + + describe('analyze', () => { + it('calculates percentages', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' }, + { type: 'text', text: 'Output1 Output2' }, + ], + }, + } as any + + const analysis = ThinkingTokenAnalyzer.analyze(message) + + expect(analysis.hasThinking).toBe(true) + expect(analysis.thinkingPercentage).toBeGreaterThan(0) + expect(analysis.outputPercentage).toBeGreaterThan(0) + expect(analysis.reasoningComplexity).toBeTruthy() + }) + }) + + describe('hasSignificantThinking', () => { + it('detects significant thinking', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'thinking', thinking: 'x'.repeat(500) }, + { type: 'text', text: 'short' }, + ], + }, + } as any + + expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true) + }) + + it('rejects minimal thinking', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'thinking', thinking: 'a' }, + { type: 'text', text: 'much longer output text here with more content' }, + ], + }, + } as any + + expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false) + }) + }) +}) \ No newline at end of file diff --git a/src/utils/thinkingTokenExtractor.ts b/src/utils/thinkingTokenExtractor.ts new file mode 100644 index 00000000..f2f43149 --- /dev/null +++ b/src/utils/thinkingTokenExtractor.ts @@ -0,0 +1,192 @@ +/** + * Thinking Token Extractor - Production-grade thinking token analysis + * + * Extracts and analyzes thinking tokens from assistant messages. + * Provides detailed breakdown, statistics, and insights. + */ + +import { roughTokenCountEstimation } from '../services/tokenEstimation.js' +import { jsonStringify } from './slowOperations.js' +import type { AssistantMessage, Message } from '../types/message.js' + +export interface ThinkingBlock { + type: 'thinking' | 'redacted_thinking' + content: string + tokens: number +} + +export interface OutputBlock { + type: 'text' | 'tool_use' + content: string + tokens: number +} + +export interface ThinkingTokenBreakdown { + thinking: number + output: number + total: number + thinkingBlocks: ThinkingBlock[] + outputBlocks: OutputBlock[] +} + +export interface ThinkingAnalysis { + hasThinking: boolean + thinkingPercentage: number + outputPercentage: number + blockCount: number + avgThinkingBlockSize: number + avgOutputBlockSize: number + totalTextLength: number + reasoningComplexity: 'low' | 'medium' | 'high' +} + +export class ThinkingTokenAnalyzer { + /** + * Extract detailed thinking vs output breakdown + */ + static extract(message: AssistantMessage): ThinkingTokenBreakdown { + const thinkingBlocks: ThinkingBlock[] = [] + const outputBlocks: OutputBlock[] = [] + let thinking = 0 + let output = 0 + + for (const block of message.message.content) { + if (block.type === 'thinking') { + const tokens = roughTokenCountEstimation(block.thinking) + thinking += tokens + thinkingBlocks.push({ + type: 'thinking', + content: block.thinking, + tokens, + }) + } else if (block.type === 'redacted_thinking') { + const tokens = roughTokenCountEstimation(block.data) + thinking += tokens + thinkingBlocks.push({ + type: 'redacted_thinking', + content: block.data, + tokens, + }) + } else if (block.type === 'text') { + const tokens = roughTokenCountEstimation(block.text) + output += tokens + outputBlocks.push({ + type: 'text', + content: block.text, + tokens, + }) + } else if (block.type === 'tool_use') { + const content = jsonStringify(block.input) + const tokens = roughTokenCountEstimation(content) + output += tokens + outputBlocks.push({ + type: 'tool_use', + content, + tokens, + }) + } + } + + return { + thinking, + output, + total: thinking + output, + thinkingBlocks, + outputBlocks, + } + } + + /** + * Simple extraction for quick use + */ + static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown { + return this.extract(message) + } + + /** + * Analyze thinking patterns and provide insights + */ + static analyze(message: AssistantMessage): ThinkingAnalysis { + const breakdown = this.extract(message) + const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown + + const hasThinking = thinking > 0 + const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0 + const outputPercentage = total > 0 ? (output / total) * 100 : 0 + + const avgThinkingBlockSize = thinkingBlocks.length > 0 + ? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length + : 0 + + const avgOutputBlockSize = outputBlocks.length > 0 + ? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length + : 0 + + const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce( + (sum, b) => sum + b.content.length, + 0, + ) + + // Complexity based on thinking percentage and block count + let reasoningComplexity: 'low' | 'medium' | 'high' = 'low' + if (thinkingPercentage > 30 || thinkingBlocks.length > 5) { + reasoningComplexity = 'high' + } else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) { + reasoningComplexity = 'medium' + } + + return { + hasThinking, + thinkingPercentage: Math.round(thinkingPercentage * 10) / 10, + outputPercentage: Math.round(outputPercentage * 10) / 10, + blockCount: thinkingBlocks.length + outputBlocks.length, + avgThinkingBlockSize: Math.round(avgThinkingBlockSize), + avgOutputBlockSize: Math.round(avgOutputBlockSize), + totalTextLength, + reasoningComplexity, + } + } + + /** + * Check if message has significant thinking + */ + static hasSignificantThinking( + message: AssistantMessage, + thresholdPercent = 20, + ): boolean { + const analysis = this.analyze(message) + return analysis.thinkingPercentage >= thresholdPercent + } + + /** + * Get thinking-only messages from an array + */ + static filterThinkingMessages(messages: Message[]): AssistantMessage[] { + return messages + .filter((m): m is AssistantMessage => m.type === 'assistant') + .filter(m => this.hasSignificantThinking(m)) + } + + /** + * Calculate total thinking tokens across messages + */ + static totalThinkingTokens(messages: Message[]): number { + return messages + .filter((m): m is AssistantMessage => m.type === 'assistant') + .reduce((sum, m) => sum + this.extract(m).thinking, 0) + } +} + +/** + * Legacy export for backward compatibility + */ +export function extractThinkingTokens( + message: AssistantMessage, +): { thinking: number; output: number; total: number } { + const result = ThinkingTokenAnalyzer.extract(message) + return { + thinking: result.thinking, + output: result.output, + total: result.total, + } +} \ No newline at end of file diff --git a/src/utils/thinkingTokens.test.ts b/src/utils/thinkingTokens.test.ts new file mode 100644 index 00000000..329fe958 --- /dev/null +++ b/src/utils/thinkingTokens.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from 'bun:test' +import { extractThinkingTokens } from './tokens.js' + +describe('extractThinkingTokens', () => { + it('extracts thinking and output separately', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'thinking', thinking: 'Let me think about this...' }, + { type: 'text', text: 'Here is my answer.' }, + ], + }, + } as any + + const result = extractThinkingTokens(message) + + expect(result.thinking).toBeGreaterThan(0) + expect(result.output).toBeGreaterThan(0) + expect(result.total).toBe(result.thinking + result.output) + }) + + it('handles no thinking', () => { + const message = { + type: 'assistant', + message: { + content: [{ type: 'text', text: 'Hello world' }], + }, + } as any + + const result = extractThinkingTokens(message) + + expect(result.thinking).toBe(0) + expect(result.output).toBeGreaterThan(0) + }) + + it('handles redacted thinking', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'redacted_thinking', data: '[thinking hidden]' }, + { type: 'text', text: 'Answer here.' }, + ], + }, + } as any + + const result = extractThinkingTokens(message) + + expect(result.thinking).toBeGreaterThan(0) + expect(result.output).toBeGreaterThan(0) + }) + + it('handles tool use', () => { + const message = { + type: 'assistant', + message: { + content: [ + { type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } }, + { type: 'text', text: 'Ran command.' }, + ], + }, + } as any + + const result = extractThinkingTokens(message) + + expect(result.output).toBeGreaterThan(0) + }) +}) \ No newline at end of file diff --git a/src/utils/tokenAnalytics.test.ts b/src/utils/tokenAnalytics.test.ts new file mode 100644 index 00000000..ccca4fae --- /dev/null +++ b/src/utils/tokenAnalytics.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, it, beforeEach } from 'bun:test' +import { TokenUsageTracker } from './tokenAnalytics.js' + +describe('TokenUsageTracker', () => { + let tracker: TokenUsageTracker + + beforeEach(() => { + tracker = new TokenUsageTracker(100) + }) + + it('records token usage', () => { + tracker.record({ + input_tokens: 1000, + output_tokens: 500, + cache_read_input_tokens: 200, + cache_creation_input_tokens: 100, + model: 'claude-sonnet-4-5-20250514', + }) + + expect(tracker.size).toBe(1) + }) + + it('calculates analytics', () => { + tracker.record({ + input_tokens: 1000, + output_tokens: 500, + model: 'claude-sonnet-4-5-20250514', + }) + + tracker.record({ + input_tokens: 2000, + output_tokens: 300, + model: 'claude-sonnet-4-5-20250514', + }) + + const analytics = tracker.getAnalytics() + + expect(analytics.totalRequests).toBe(2) + expect(analytics.totalInputTokens).toBe(3000) + expect(analytics.totalOutputTokens).toBe(800) + expect(analytics.averageInputPerRequest).toBe(1500) + expect(analytics.averageOutputPerRequest).toBe(400) + }) + + it('tracks cache hit rate', () => { + tracker.record({ + input_tokens: 1000, + output_tokens: 500, + cache_read_input_tokens: 500, // 33% cache + model: 'claude-sonnet-4-5-20250514', + }) + + const analytics = tracker.getAnalytics() + + expect(analytics.cacheHitRate).toBeGreaterThan(0) + }) + + it('tracks most used model', () => { + tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' }) + tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' }) + tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' }) + + expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet') + }) + + it('respects max entries limit', () => { + const smallTracker = new TokenUsageTracker(3) + + smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' }) + smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' }) + smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' }) + smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' }) + smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' }) + + expect(smallTracker.size).toBe(3) + }) + +it('clears history', () => { + tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' }) + tracker.clear() + + expect(tracker.size).toBe(0) + }) +}) \ No newline at end of file diff --git a/src/utils/tokenAnalytics.ts b/src/utils/tokenAnalytics.ts new file mode 100644 index 00000000..3f11855c --- /dev/null +++ b/src/utils/tokenAnalytics.ts @@ -0,0 +1,211 @@ +/** + * Token Analytics - Historical token usage tracking and analysis + * + * Tracks token usage patterns over time for cost optimization + * and capacity planning. + */ + +import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' + +export interface TokenUsageEntry { + timestamp: number + inputTokens: number + outputTokens: number + cacheReadTokens: number + cacheCreationTokens: number + model: string +} + +export interface TokenAnalytics { + totalRequests: number + totalInputTokens: number + totalOutputTokens: number + totalCacheRead: number + totalCacheCreation: number + averageInputPerRequest: number + averageOutputPerRequest: number + cacheHitRate: number + mostUsedModel: string + requestsLastHour: number + requestsLastDay: number +} + +/** + * Historical Token Analytics Tracker + * + * Tracks token usage patterns over time for analytics, + * cost optimization, and capacity planning. + */ +export class TokenUsageTracker { + private history: TokenUsageEntry[] = [] + private readonly maxEntries: number + + constructor(maxEntries = 1000) { + this.maxEntries = maxEntries + } + + /** + * Record a token usage event from API response. + */ + record(usage: { + input_tokens: number + output_tokens: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number + model: string + }): void { + const entry: TokenUsageEntry = { + timestamp: Date.now(), + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadTokens: usage.cache_read_input_tokens ?? 0, + cacheCreationTokens: usage.cache_creation_input_tokens ?? 0, + model: usage.model, + } + + this.history.push(entry) + + if (this.history.length > this.maxEntries) { + this.history = this.history.slice(-this.maxEntries) + } + } + + /** + * Get analytics summary for all recorded usage. + */ + getAnalytics(): TokenAnalytics { + if (this.history.length === 0) { + return { + totalRequests: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheRead: 0, + totalCacheCreation: 0, + averageInputPerRequest: 0, + averageOutputPerRequest: 0, + cacheHitRate: 0, + mostUsedModel: 'unknown', + requestsLastHour: 0, + requestsLastDay: 0, + } + } + + const now = Date.now() + const hourAgo = now - 60 * 60 * 1000 + const dayAgo = now - 24 * 60 * 60 * 1000 + + let totalInput = 0 + let totalOutput = 0 + let totalCacheRead = 0 + let totalCacheCreation = 0 + const modelCounts = new Map() + let requestsLastHour = 0 + let requestsLastDay = 0 + + for (const entry of this.history) { + totalInput += entry.inputTokens + totalOutput += entry.outputTokens + totalCacheRead += entry.cacheReadTokens + totalCacheCreation += entry.cacheCreationTokens + + modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1) + + if (entry.timestamp >= hourAgo) requestsLastHour++ + if (entry.timestamp >= dayAgo) requestsLastDay++ + } + + let mostUsedModel = 'unknown' + let maxCount = 0 + for (const [model, count] of modelCounts) { + if (count > maxCount) { + maxCount = count + mostUsedModel = model + } + } + + const totalRequests = this.history.length + const totalCache = totalCacheRead + totalCacheCreation + const totalTokens = totalInput + totalOutput + totalCache + const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0 + + return { + totalRequests, + totalInputTokens: totalInput, + totalOutputTokens: totalOutput, + totalCacheRead, + totalCacheCreation, + averageInputPerRequest: Math.round(totalInput / totalRequests), + averageOutputPerRequest: Math.round(totalOutput / totalRequests), + cacheHitRate: Math.round(cacheHitRate), + mostUsedModel, + requestsLastHour, + requestsLastDay, + } + } + + /** + * Get recent entries within time window. + */ + getRecent(windowMs: number): TokenUsageEntry[] { + const cutoff = Date.now() - windowMs + return this.history.filter(e => e.timestamp >= cutoff) + } + + /** + * Get entries for a specific model + */ + getByModel(model: string): TokenUsageEntry[] { + return this.history.filter(e => e.model === model) + } + + /** + * Calculate cost estimate (approximate) + */ + estimateCost(): { input: number; output: number; cache: number } { + const analytics = this.getAnalytics() + + // Approximate pricing (adjust as needed) + const inputCost = analytics.totalInputTokens * 0.00015 + const outputCost = analytics.totalOutputTokens * 0.0006 + const cacheCost = analytics.totalCacheRead * 0.000075 + + return { + input: Math.round(inputCost * 100) / 100, + output: Math.round(outputCost * 100) / 100, + cache: Math.round(cacheCost * 100) / 100, + } + } + + /** + * Clear history. + */ + clear(): void { + this.history = [] + } + + /** + * Get history size. + */ + get size(): number { + return this.history.length + } + + /** + * Export history as JSON + */ + export(): string { + return JSON.stringify(this.history, null, 2) + } + + /** + * Import history from JSON + */ + import(json: string): void { + try { + const entries = JSON.parse(json) as TokenUsageEntry[] + this.history = entries.slice(-this.maxEntries) + } catch { + // Invalid JSON, ignore + } + } +} \ No newline at end of file diff --git a/src/utils/tokens.ts b/src/utils/tokens.ts index c56da552..021fc756 100644 --- a/src/utils/tokens.ts +++ b/src/utils/tokens.ts @@ -1,5 +1,5 @@ import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' -import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js' +import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js' import type { AssistantMessage, Message } from '../types/message.js' import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js' import { jsonStringify } from './slowOperations.js' @@ -198,6 +198,198 @@ export function getAssistantMessageContentLength( return contentLength } +/** + * Extract thinking tokens from an assistant message. + * Returns breakdown of thinking vs output tokens. + */ +export function extractThinkingTokens( + message: AssistantMessage, +): { thinking: number; output: number; total: number } { + let thinking = 0 + let output = 0 + + for (const block of message.message.content) { + if (block.type === 'thinking') { + thinking += roughTokenCountEstimation(block.thinking) + } else if (block.type === 'redacted_thinking') { + thinking += roughTokenCountEstimation(block.data) + } else if (block.type === 'text') { + output += roughTokenCountEstimation(block.text) + } else if (block.type === 'tool_use') { + output += roughTokenCountEstimation(jsonStringify(block.input)) + } + } + + return { thinking, output, total: thinking + output } +} + +/** + * Token usage history entry for tracking patterns over time. + */ +export interface TokenUsageEntry { + timestamp: number + inputTokens: number + outputTokens: number + cacheReadTokens: number + cacheCreationTokens: number + model: string +} + +/** + * Token analytics summary from historical data. + */ +export interface TokenAnalytics { + totalRequests: number + totalInputTokens: number + totalOutputTokens: number + totalCacheRead: number + totalCacheCreation: number + averageInputPerRequest: number + averageOutputPerRequest: number + cacheHitRate: number + mostUsedModel: string + requestsLastHour: number + requestsLastDay: number +} + +/** + * Historical Token Analytics Tracker + * + * Tracks token usage patterns over time for analytics, + * cost optimization, and capacity planning. + */ +export class TokenUsageTracker { + private history: TokenUsageEntry[] = [] + private readonly maxEntries: number + + constructor(maxEntries = 1000) { + this.maxEntries = maxEntries + } + + /** + * Record a token usage event from API response. + */ + record(usage: { + input_tokens: number + output_tokens: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number + model: string + }): void { + const entry: TokenUsageEntry = { + timestamp: Date.now(), + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadTokens: usage.cache_read_input_tokens ?? 0, + cacheCreationTokens: usage.cache_creation_input_tokens ?? 0, + model: usage.model, + } + + this.history.push(entry) + + // Trim old entries + if (this.history.length > this.maxEntries) { + this.history = this.history.slice(-this.maxEntries) + } + } + + /** + * Get analytics summary for all recorded usage. + */ + getAnalytics(): TokenAnalytics { + if (this.history.length === 0) { + return { + totalRequests: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheRead: 0, + totalCacheCreation: 0, + averageInputPerRequest: 0, + averageOutputPerRequest: 0, + cacheHitRate: 0, + mostUsedModel: 'unknown', + requestsLastHour: 0, + requestsLastDay: 0, + } + } + + const now = Date.now() + const hourAgo = now - 60 * 60 * 1000 + const dayAgo = now - 24 * 60 * 60 * 1000 + + let totalInput = 0 + let totalOutput = 0 + let totalCacheRead = 0 + let totalCacheCreation = 0 + let modelCounts = new Map() + let requestsLastHour = 0 + let requestsLastDay = 0 + + for (const entry of this.history) { + totalInput += entry.inputTokens + totalOutput += entry.outputTokens + totalCacheRead += entry.cacheReadTokens + totalCacheCreation += entry.cacheCreationTokens + + modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1) + + if (entry.timestamp >= hourAgo) requestsLastHour++ + if (entry.timestamp >= dayAgo) requestsLastDay++ + } + + // Find most used model + let mostUsedModel = 'unknown' + let maxCount = 0 + for (const [model, count] of modelCounts) { + if (count > maxCount) { + maxCount = count + mostUsedModel = model + } + } + + const totalRequests = this.history.length + const totalCache = totalCacheRead + totalCacheCreation + const totalTokens = totalInput + totalOutput + totalCache + const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0 + + return { + totalRequests, + totalInputTokens: totalInput, + totalOutputTokens: totalOutput, + totalCacheRead, + totalCacheCreation, + averageInputPerRequest: Math.round(totalInput / totalRequests), + averageOutputPerRequest: Math.round(totalOutput / totalRequests), + cacheHitRate: Math.round(cacheHitRate), + mostUsedModel, + requestsLastHour, + requestsLastDay, + } + } + + /** + * Get recent entries within time window. + */ + getRecent(windowMs: number): TokenUsageEntry[] { + const cutoff = Date.now() - windowMs + return this.history.filter(e => e.timestamp >= cutoff) + } + + /** + * Clear history. + */ + clear(): void { + this.history = [] + } + + /** + * Get history size. + */ + get size(): number { + return this.history.length + } +} + /** * Get the current context window size in tokens. *