feat: add thinking token extraction (#798)
* feat: add thinking token tracking and historical analytics - extractThinkingTokens(): separate thinking from output tokens - TokenUsageTracker class for historical analytics - Track: cache hit rate, most used model, requests per hour/day - Analytics: average tokens per request, totals - Add tests (7 passing) PR 4B: Features 1.10 + 1.11 * refactor: extract thinking and analytics to separate files - Create thinkingTokenExtractor.ts with ThinkingTokenAnalyzer - Create tokenAnalytics.ts with TokenUsageTracker - Add production-grade methods and tests - Update test imports
This commit is contained in:
committed by
GitHub
parent
761924daa7
commit
268c0398e4
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
|
||||
|
||||
describe('ThinkingTokenAnalyzer', () => {
|
||||
describe('extract', () => {
|
||||
it('extracts thinking and output separately', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||
{ type: 'text', text: 'Here is my answer.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
expect(result.total).toBe(result.thinking + result.output)
|
||||
})
|
||||
|
||||
it('handles no thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Hello world' }],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBe(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles redacted thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||
{ type: 'text', text: 'Answer here.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('analyze', () => {
|
||||
it('calculates percentages', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
|
||||
{ type: 'text', text: 'Output1 Output2' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const analysis = ThinkingTokenAnalyzer.analyze(message)
|
||||
|
||||
expect(analysis.hasThinking).toBe(true)
|
||||
expect(analysis.thinkingPercentage).toBeGreaterThan(0)
|
||||
expect(analysis.outputPercentage).toBeGreaterThan(0)
|
||||
expect(analysis.reasoningComplexity).toBeTruthy()
|
||||
})
|
||||
})
|
||||
|
||||
describe('hasSignificantThinking', () => {
|
||||
it('detects significant thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'x'.repeat(500) },
|
||||
{ type: 'text', text: 'short' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
|
||||
})
|
||||
|
||||
it('rejects minimal thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'a' },
|
||||
{ type: 'text', text: 'much longer output text here with more content' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
|
||||
})
|
||||
})
|
||||
})
|
||||
192
src/utils/thinkingTokenExtractor.ts
Normal file
192
src/utils/thinkingTokenExtractor.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
/**
|
||||
* Thinking Token Extractor - Production-grade thinking token analysis
|
||||
*
|
||||
* Extracts and analyzes thinking tokens from assistant messages.
|
||||
* Provides detailed breakdown, statistics, and insights.
|
||||
*/
|
||||
|
||||
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
|
||||
import { jsonStringify } from './slowOperations.js'
|
||||
import type { AssistantMessage, Message } from '../types/message.js'
|
||||
|
||||
export interface ThinkingBlock {
|
||||
type: 'thinking' | 'redacted_thinking'
|
||||
content: string
|
||||
tokens: number
|
||||
}
|
||||
|
||||
export interface OutputBlock {
|
||||
type: 'text' | 'tool_use'
|
||||
content: string
|
||||
tokens: number
|
||||
}
|
||||
|
||||
export interface ThinkingTokenBreakdown {
|
||||
thinking: number
|
||||
output: number
|
||||
total: number
|
||||
thinkingBlocks: ThinkingBlock[]
|
||||
outputBlocks: OutputBlock[]
|
||||
}
|
||||
|
||||
export interface ThinkingAnalysis {
|
||||
hasThinking: boolean
|
||||
thinkingPercentage: number
|
||||
outputPercentage: number
|
||||
blockCount: number
|
||||
avgThinkingBlockSize: number
|
||||
avgOutputBlockSize: number
|
||||
totalTextLength: number
|
||||
reasoningComplexity: 'low' | 'medium' | 'high'
|
||||
}
|
||||
|
||||
export class ThinkingTokenAnalyzer {
|
||||
/**
|
||||
* Extract detailed thinking vs output breakdown
|
||||
*/
|
||||
static extract(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||
const thinkingBlocks: ThinkingBlock[] = []
|
||||
const outputBlocks: OutputBlock[] = []
|
||||
let thinking = 0
|
||||
let output = 0
|
||||
|
||||
for (const block of message.message.content) {
|
||||
if (block.type === 'thinking') {
|
||||
const tokens = roughTokenCountEstimation(block.thinking)
|
||||
thinking += tokens
|
||||
thinkingBlocks.push({
|
||||
type: 'thinking',
|
||||
content: block.thinking,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'redacted_thinking') {
|
||||
const tokens = roughTokenCountEstimation(block.data)
|
||||
thinking += tokens
|
||||
thinkingBlocks.push({
|
||||
type: 'redacted_thinking',
|
||||
content: block.data,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'text') {
|
||||
const tokens = roughTokenCountEstimation(block.text)
|
||||
output += tokens
|
||||
outputBlocks.push({
|
||||
type: 'text',
|
||||
content: block.text,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'tool_use') {
|
||||
const content = jsonStringify(block.input)
|
||||
const tokens = roughTokenCountEstimation(content)
|
||||
output += tokens
|
||||
outputBlocks.push({
|
||||
type: 'tool_use',
|
||||
content,
|
||||
tokens,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
thinking,
|
||||
output,
|
||||
total: thinking + output,
|
||||
thinkingBlocks,
|
||||
outputBlocks,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple extraction for quick use
|
||||
*/
|
||||
static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||
return this.extract(message)
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze thinking patterns and provide insights
|
||||
*/
|
||||
static analyze(message: AssistantMessage): ThinkingAnalysis {
|
||||
const breakdown = this.extract(message)
|
||||
const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
|
||||
|
||||
const hasThinking = thinking > 0
|
||||
const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
|
||||
const outputPercentage = total > 0 ? (output / total) * 100 : 0
|
||||
|
||||
const avgThinkingBlockSize = thinkingBlocks.length > 0
|
||||
? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
|
||||
: 0
|
||||
|
||||
const avgOutputBlockSize = outputBlocks.length > 0
|
||||
? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
|
||||
: 0
|
||||
|
||||
const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
|
||||
(sum, b) => sum + b.content.length,
|
||||
0,
|
||||
)
|
||||
|
||||
// Complexity based on thinking percentage and block count
|
||||
let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
|
||||
if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
|
||||
reasoningComplexity = 'high'
|
||||
} else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
|
||||
reasoningComplexity = 'medium'
|
||||
}
|
||||
|
||||
return {
|
||||
hasThinking,
|
||||
thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
|
||||
outputPercentage: Math.round(outputPercentage * 10) / 10,
|
||||
blockCount: thinkingBlocks.length + outputBlocks.length,
|
||||
avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
|
||||
avgOutputBlockSize: Math.round(avgOutputBlockSize),
|
||||
totalTextLength,
|
||||
reasoningComplexity,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if message has significant thinking
|
||||
*/
|
||||
static hasSignificantThinking(
|
||||
message: AssistantMessage,
|
||||
thresholdPercent = 20,
|
||||
): boolean {
|
||||
const analysis = this.analyze(message)
|
||||
return analysis.thinkingPercentage >= thresholdPercent
|
||||
}
|
||||
|
||||
/**
|
||||
* Get thinking-only messages from an array
|
||||
*/
|
||||
static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
|
||||
return messages
|
||||
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||
.filter(m => this.hasSignificantThinking(m))
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total thinking tokens across messages
|
||||
*/
|
||||
static totalThinkingTokens(messages: Message[]): number {
|
||||
return messages
|
||||
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||
.reduce((sum, m) => sum + this.extract(m).thinking, 0)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy export for backward compatibility
|
||||
*/
|
||||
export function extractThinkingTokens(
|
||||
message: AssistantMessage,
|
||||
): { thinking: number; output: number; total: number } {
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
return {
|
||||
thinking: result.thinking,
|
||||
output: result.output,
|
||||
total: result.total,
|
||||
}
|
||||
}
|
||||
69
src/utils/thinkingTokens.test.ts
Normal file
69
src/utils/thinkingTokens.test.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { extractThinkingTokens } from './tokens.js'
|
||||
|
||||
describe('extractThinkingTokens', () => {
|
||||
it('extracts thinking and output separately', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||
{ type: 'text', text: 'Here is my answer.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
expect(result.total).toBe(result.thinking + result.output)
|
||||
})
|
||||
|
||||
it('handles no thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Hello world' }],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBe(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles redacted thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||
{ type: 'text', text: 'Answer here.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles tool use', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
|
||||
{ type: 'text', text: 'Ran command.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
84
src/utils/tokenAnalytics.test.ts
Normal file
84
src/utils/tokenAnalytics.test.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import { describe, expect, it, beforeEach } from 'bun:test'
|
||||
import { TokenUsageTracker } from './tokenAnalytics.js'
|
||||
|
||||
describe('TokenUsageTracker', () => {
|
||||
let tracker: TokenUsageTracker
|
||||
|
||||
beforeEach(() => {
|
||||
tracker = new TokenUsageTracker(100)
|
||||
})
|
||||
|
||||
it('records token usage', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
cache_read_input_tokens: 200,
|
||||
cache_creation_input_tokens: 100,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
expect(tracker.size).toBe(1)
|
||||
})
|
||||
|
||||
it('calculates analytics', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
tracker.record({
|
||||
input_tokens: 2000,
|
||||
output_tokens: 300,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
const analytics = tracker.getAnalytics()
|
||||
|
||||
expect(analytics.totalRequests).toBe(2)
|
||||
expect(analytics.totalInputTokens).toBe(3000)
|
||||
expect(analytics.totalOutputTokens).toBe(800)
|
||||
expect(analytics.averageInputPerRequest).toBe(1500)
|
||||
expect(analytics.averageOutputPerRequest).toBe(400)
|
||||
})
|
||||
|
||||
it('tracks cache hit rate', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
cache_read_input_tokens: 500, // 33% cache
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
const analytics = tracker.getAnalytics()
|
||||
|
||||
expect(analytics.cacheHitRate).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('tracks most used model', () => {
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
|
||||
|
||||
expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
|
||||
})
|
||||
|
||||
it('respects max entries limit', () => {
|
||||
const smallTracker = new TokenUsageTracker(3)
|
||||
|
||||
smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
|
||||
smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
|
||||
smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
|
||||
smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
|
||||
smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
|
||||
|
||||
expect(smallTracker.size).toBe(3)
|
||||
})
|
||||
|
||||
it('clears history', () => {
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
|
||||
tracker.clear()
|
||||
|
||||
expect(tracker.size).toBe(0)
|
||||
})
|
||||
})
|
||||
211
src/utils/tokenAnalytics.ts
Normal file
211
src/utils/tokenAnalytics.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
/**
|
||||
* Token Analytics - Historical token usage tracking and analysis
|
||||
*
|
||||
* Tracks token usage patterns over time for cost optimization
|
||||
* and capacity planning.
|
||||
*/
|
||||
|
||||
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
|
||||
export interface TokenUsageEntry {
|
||||
timestamp: number
|
||||
inputTokens: number
|
||||
outputTokens: number
|
||||
cacheReadTokens: number
|
||||
cacheCreationTokens: number
|
||||
model: string
|
||||
}
|
||||
|
||||
export interface TokenAnalytics {
|
||||
totalRequests: number
|
||||
totalInputTokens: number
|
||||
totalOutputTokens: number
|
||||
totalCacheRead: number
|
||||
totalCacheCreation: number
|
||||
averageInputPerRequest: number
|
||||
averageOutputPerRequest: number
|
||||
cacheHitRate: number
|
||||
mostUsedModel: string
|
||||
requestsLastHour: number
|
||||
requestsLastDay: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Historical Token Analytics Tracker
|
||||
*
|
||||
* Tracks token usage patterns over time for analytics,
|
||||
* cost optimization, and capacity planning.
|
||||
*/
|
||||
export class TokenUsageTracker {
|
||||
private history: TokenUsageEntry[] = []
|
||||
private readonly maxEntries: number
|
||||
|
||||
constructor(maxEntries = 1000) {
|
||||
this.maxEntries = maxEntries
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a token usage event from API response.
|
||||
*/
|
||||
record(usage: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
cache_read_input_tokens?: number
|
||||
cache_creation_input_tokens?: number
|
||||
model: string
|
||||
}): void {
|
||||
const entry: TokenUsageEntry = {
|
||||
timestamp: Date.now(),
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||
model: usage.model,
|
||||
}
|
||||
|
||||
this.history.push(entry)
|
||||
|
||||
if (this.history.length > this.maxEntries) {
|
||||
this.history = this.history.slice(-this.maxEntries)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analytics summary for all recorded usage.
|
||||
*/
|
||||
getAnalytics(): TokenAnalytics {
|
||||
if (this.history.length === 0) {
|
||||
return {
|
||||
totalRequests: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
totalCacheRead: 0,
|
||||
totalCacheCreation: 0,
|
||||
averageInputPerRequest: 0,
|
||||
averageOutputPerRequest: 0,
|
||||
cacheHitRate: 0,
|
||||
mostUsedModel: 'unknown',
|
||||
requestsLastHour: 0,
|
||||
requestsLastDay: 0,
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const hourAgo = now - 60 * 60 * 1000
|
||||
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||
|
||||
let totalInput = 0
|
||||
let totalOutput = 0
|
||||
let totalCacheRead = 0
|
||||
let totalCacheCreation = 0
|
||||
const modelCounts = new Map<string, number>()
|
||||
let requestsLastHour = 0
|
||||
let requestsLastDay = 0
|
||||
|
||||
for (const entry of this.history) {
|
||||
totalInput += entry.inputTokens
|
||||
totalOutput += entry.outputTokens
|
||||
totalCacheRead += entry.cacheReadTokens
|
||||
totalCacheCreation += entry.cacheCreationTokens
|
||||
|
||||
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||
|
||||
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||
}
|
||||
|
||||
let mostUsedModel = 'unknown'
|
||||
let maxCount = 0
|
||||
for (const [model, count] of modelCounts) {
|
||||
if (count > maxCount) {
|
||||
maxCount = count
|
||||
mostUsedModel = model
|
||||
}
|
||||
}
|
||||
|
||||
const totalRequests = this.history.length
|
||||
const totalCache = totalCacheRead + totalCacheCreation
|
||||
const totalTokens = totalInput + totalOutput + totalCache
|
||||
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||
|
||||
return {
|
||||
totalRequests,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
totalCacheRead,
|
||||
totalCacheCreation,
|
||||
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||
cacheHitRate: Math.round(cacheHitRate),
|
||||
mostUsedModel,
|
||||
requestsLastHour,
|
||||
requestsLastDay,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent entries within time window.
|
||||
*/
|
||||
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||
const cutoff = Date.now() - windowMs
|
||||
return this.history.filter(e => e.timestamp >= cutoff)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get entries for a specific model
|
||||
*/
|
||||
getByModel(model: string): TokenUsageEntry[] {
|
||||
return this.history.filter(e => e.model === model)
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate cost estimate (approximate)
|
||||
*/
|
||||
estimateCost(): { input: number; output: number; cache: number } {
|
||||
const analytics = this.getAnalytics()
|
||||
|
||||
// Approximate pricing (adjust as needed)
|
||||
const inputCost = analytics.totalInputTokens * 0.00015
|
||||
const outputCost = analytics.totalOutputTokens * 0.0006
|
||||
const cacheCost = analytics.totalCacheRead * 0.000075
|
||||
|
||||
return {
|
||||
input: Math.round(inputCost * 100) / 100,
|
||||
output: Math.round(outputCost * 100) / 100,
|
||||
cache: Math.round(cacheCost * 100) / 100,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear history.
|
||||
*/
|
||||
clear(): void {
|
||||
this.history = []
|
||||
}
|
||||
|
||||
/**
|
||||
* Get history size.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.history.length
|
||||
}
|
||||
|
||||
/**
|
||||
* Export history as JSON
|
||||
*/
|
||||
export(): string {
|
||||
return JSON.stringify(this.history, null, 2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Import history from JSON
|
||||
*/
|
||||
import(json: string): void {
|
||||
try {
|
||||
const entries = JSON.parse(json) as TokenUsageEntry[]
|
||||
this.history = entries.slice(-this.maxEntries)
|
||||
} catch {
|
||||
// Invalid JSON, ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
||||
import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
||||
import type { AssistantMessage, Message } from '../types/message.js'
|
||||
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
|
||||
import { jsonStringify } from './slowOperations.js'
|
||||
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
|
||||
return contentLength
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract thinking tokens from an assistant message.
|
||||
* Returns breakdown of thinking vs output tokens.
|
||||
*/
|
||||
export function extractThinkingTokens(
|
||||
message: AssistantMessage,
|
||||
): { thinking: number; output: number; total: number } {
|
||||
let thinking = 0
|
||||
let output = 0
|
||||
|
||||
for (const block of message.message.content) {
|
||||
if (block.type === 'thinking') {
|
||||
thinking += roughTokenCountEstimation(block.thinking)
|
||||
} else if (block.type === 'redacted_thinking') {
|
||||
thinking += roughTokenCountEstimation(block.data)
|
||||
} else if (block.type === 'text') {
|
||||
output += roughTokenCountEstimation(block.text)
|
||||
} else if (block.type === 'tool_use') {
|
||||
output += roughTokenCountEstimation(jsonStringify(block.input))
|
||||
}
|
||||
}
|
||||
|
||||
return { thinking, output, total: thinking + output }
|
||||
}
|
||||
|
||||
/**
|
||||
* Token usage history entry for tracking patterns over time.
|
||||
*/
|
||||
export interface TokenUsageEntry {
|
||||
timestamp: number
|
||||
inputTokens: number
|
||||
outputTokens: number
|
||||
cacheReadTokens: number
|
||||
cacheCreationTokens: number
|
||||
model: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Token analytics summary from historical data.
|
||||
*/
|
||||
export interface TokenAnalytics {
|
||||
totalRequests: number
|
||||
totalInputTokens: number
|
||||
totalOutputTokens: number
|
||||
totalCacheRead: number
|
||||
totalCacheCreation: number
|
||||
averageInputPerRequest: number
|
||||
averageOutputPerRequest: number
|
||||
cacheHitRate: number
|
||||
mostUsedModel: string
|
||||
requestsLastHour: number
|
||||
requestsLastDay: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Historical Token Analytics Tracker
|
||||
*
|
||||
* Tracks token usage patterns over time for analytics,
|
||||
* cost optimization, and capacity planning.
|
||||
*/
|
||||
export class TokenUsageTracker {
|
||||
private history: TokenUsageEntry[] = []
|
||||
private readonly maxEntries: number
|
||||
|
||||
constructor(maxEntries = 1000) {
|
||||
this.maxEntries = maxEntries
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a token usage event from API response.
|
||||
*/
|
||||
record(usage: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
cache_read_input_tokens?: number
|
||||
cache_creation_input_tokens?: number
|
||||
model: string
|
||||
}): void {
|
||||
const entry: TokenUsageEntry = {
|
||||
timestamp: Date.now(),
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||
model: usage.model,
|
||||
}
|
||||
|
||||
this.history.push(entry)
|
||||
|
||||
// Trim old entries
|
||||
if (this.history.length > this.maxEntries) {
|
||||
this.history = this.history.slice(-this.maxEntries)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analytics summary for all recorded usage.
|
||||
*/
|
||||
getAnalytics(): TokenAnalytics {
|
||||
if (this.history.length === 0) {
|
||||
return {
|
||||
totalRequests: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
totalCacheRead: 0,
|
||||
totalCacheCreation: 0,
|
||||
averageInputPerRequest: 0,
|
||||
averageOutputPerRequest: 0,
|
||||
cacheHitRate: 0,
|
||||
mostUsedModel: 'unknown',
|
||||
requestsLastHour: 0,
|
||||
requestsLastDay: 0,
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const hourAgo = now - 60 * 60 * 1000
|
||||
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||
|
||||
let totalInput = 0
|
||||
let totalOutput = 0
|
||||
let totalCacheRead = 0
|
||||
let totalCacheCreation = 0
|
||||
let modelCounts = new Map<string, number>()
|
||||
let requestsLastHour = 0
|
||||
let requestsLastDay = 0
|
||||
|
||||
for (const entry of this.history) {
|
||||
totalInput += entry.inputTokens
|
||||
totalOutput += entry.outputTokens
|
||||
totalCacheRead += entry.cacheReadTokens
|
||||
totalCacheCreation += entry.cacheCreationTokens
|
||||
|
||||
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||
|
||||
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||
}
|
||||
|
||||
// Find most used model
|
||||
let mostUsedModel = 'unknown'
|
||||
let maxCount = 0
|
||||
for (const [model, count] of modelCounts) {
|
||||
if (count > maxCount) {
|
||||
maxCount = count
|
||||
mostUsedModel = model
|
||||
}
|
||||
}
|
||||
|
||||
const totalRequests = this.history.length
|
||||
const totalCache = totalCacheRead + totalCacheCreation
|
||||
const totalTokens = totalInput + totalOutput + totalCache
|
||||
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||
|
||||
return {
|
||||
totalRequests,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
totalCacheRead,
|
||||
totalCacheCreation,
|
||||
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||
cacheHitRate: Math.round(cacheHitRate),
|
||||
mostUsedModel,
|
||||
requestsLastHour,
|
||||
requestsLastDay,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent entries within time window.
|
||||
*/
|
||||
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||
const cutoff = Date.now() - windowMs
|
||||
return this.history.filter(e => e.timestamp >= cutoff)
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear history.
|
||||
*/
|
||||
clear(): void {
|
||||
this.history = []
|
||||
}
|
||||
|
||||
/**
|
||||
* Get history size.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.history.length
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current context window size in tokens.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user