feat: add thinking token extraction (#798)
* feat: add thinking token tracking and historical analytics - extractThinkingTokens(): separate thinking from output tokens - TokenUsageTracker class for historical analytics - Track: cache hit rate, most used model, requests per hour/day - Analytics: average tokens per request, totals - Add tests (7 passing) PR 4B: Features 1.10 + 1.11 * refactor: extract thinking and analytics to separate files - Create thinkingTokenExtractor.ts with ThinkingTokenAnalyzer - Create tokenAnalytics.ts with TokenUsageTracker - Add production-grade methods and tests - Update test imports
This commit is contained in:
committed by
GitHub
parent
761924daa7
commit
268c0398e4
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import { describe, expect, it } from 'bun:test'
|
||||||
|
import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
|
||||||
|
|
||||||
|
describe('ThinkingTokenAnalyzer', () => {
|
||||||
|
describe('extract', () => {
|
||||||
|
it('extracts thinking and output separately', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||||
|
{ type: 'text', text: 'Here is my answer.' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = ThinkingTokenAnalyzer.extract(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBeGreaterThan(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
expect(result.total).toBe(result.thinking + result.output)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles no thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [{ type: 'text', text: 'Hello world' }],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = ThinkingTokenAnalyzer.extract(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBe(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles redacted thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||||
|
{ type: 'text', text: 'Answer here.' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = ThinkingTokenAnalyzer.extract(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBeGreaterThan(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('analyze', () => {
|
||||||
|
it('calculates percentages', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
|
||||||
|
{ type: 'text', text: 'Output1 Output2' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const analysis = ThinkingTokenAnalyzer.analyze(message)
|
||||||
|
|
||||||
|
expect(analysis.hasThinking).toBe(true)
|
||||||
|
expect(analysis.thinkingPercentage).toBeGreaterThan(0)
|
||||||
|
expect(analysis.outputPercentage).toBeGreaterThan(0)
|
||||||
|
expect(analysis.reasoningComplexity).toBeTruthy()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('hasSignificantThinking', () => {
|
||||||
|
it('detects significant thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'x'.repeat(500) },
|
||||||
|
{ type: 'text', text: 'short' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('rejects minimal thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'a' },
|
||||||
|
{ type: 'text', text: 'much longer output text here with more content' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
192
src/utils/thinkingTokenExtractor.ts
Normal file
192
src/utils/thinkingTokenExtractor.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
/**
|
||||||
|
* Thinking Token Extractor - Production-grade thinking token analysis
|
||||||
|
*
|
||||||
|
* Extracts and analyzes thinking tokens from assistant messages.
|
||||||
|
* Provides detailed breakdown, statistics, and insights.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
|
||||||
|
import { jsonStringify } from './slowOperations.js'
|
||||||
|
import type { AssistantMessage, Message } from '../types/message.js'
|
||||||
|
|
||||||
|
export interface ThinkingBlock {
|
||||||
|
type: 'thinking' | 'redacted_thinking'
|
||||||
|
content: string
|
||||||
|
tokens: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OutputBlock {
|
||||||
|
type: 'text' | 'tool_use'
|
||||||
|
content: string
|
||||||
|
tokens: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ThinkingTokenBreakdown {
|
||||||
|
thinking: number
|
||||||
|
output: number
|
||||||
|
total: number
|
||||||
|
thinkingBlocks: ThinkingBlock[]
|
||||||
|
outputBlocks: OutputBlock[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ThinkingAnalysis {
|
||||||
|
hasThinking: boolean
|
||||||
|
thinkingPercentage: number
|
||||||
|
outputPercentage: number
|
||||||
|
blockCount: number
|
||||||
|
avgThinkingBlockSize: number
|
||||||
|
avgOutputBlockSize: number
|
||||||
|
totalTextLength: number
|
||||||
|
reasoningComplexity: 'low' | 'medium' | 'high'
|
||||||
|
}
|
||||||
|
|
||||||
|
export class ThinkingTokenAnalyzer {
|
||||||
|
/**
|
||||||
|
* Extract detailed thinking vs output breakdown
|
||||||
|
*/
|
||||||
|
static extract(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||||
|
const thinkingBlocks: ThinkingBlock[] = []
|
||||||
|
const outputBlocks: OutputBlock[] = []
|
||||||
|
let thinking = 0
|
||||||
|
let output = 0
|
||||||
|
|
||||||
|
for (const block of message.message.content) {
|
||||||
|
if (block.type === 'thinking') {
|
||||||
|
const tokens = roughTokenCountEstimation(block.thinking)
|
||||||
|
thinking += tokens
|
||||||
|
thinkingBlocks.push({
|
||||||
|
type: 'thinking',
|
||||||
|
content: block.thinking,
|
||||||
|
tokens,
|
||||||
|
})
|
||||||
|
} else if (block.type === 'redacted_thinking') {
|
||||||
|
const tokens = roughTokenCountEstimation(block.data)
|
||||||
|
thinking += tokens
|
||||||
|
thinkingBlocks.push({
|
||||||
|
type: 'redacted_thinking',
|
||||||
|
content: block.data,
|
||||||
|
tokens,
|
||||||
|
})
|
||||||
|
} else if (block.type === 'text') {
|
||||||
|
const tokens = roughTokenCountEstimation(block.text)
|
||||||
|
output += tokens
|
||||||
|
outputBlocks.push({
|
||||||
|
type: 'text',
|
||||||
|
content: block.text,
|
||||||
|
tokens,
|
||||||
|
})
|
||||||
|
} else if (block.type === 'tool_use') {
|
||||||
|
const content = jsonStringify(block.input)
|
||||||
|
const tokens = roughTokenCountEstimation(content)
|
||||||
|
output += tokens
|
||||||
|
outputBlocks.push({
|
||||||
|
type: 'tool_use',
|
||||||
|
content,
|
||||||
|
tokens,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
thinking,
|
||||||
|
output,
|
||||||
|
total: thinking + output,
|
||||||
|
thinkingBlocks,
|
||||||
|
outputBlocks,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple extraction for quick use
|
||||||
|
*/
|
||||||
|
static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||||
|
return this.extract(message)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze thinking patterns and provide insights
|
||||||
|
*/
|
||||||
|
static analyze(message: AssistantMessage): ThinkingAnalysis {
|
||||||
|
const breakdown = this.extract(message)
|
||||||
|
const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
|
||||||
|
|
||||||
|
const hasThinking = thinking > 0
|
||||||
|
const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
|
||||||
|
const outputPercentage = total > 0 ? (output / total) * 100 : 0
|
||||||
|
|
||||||
|
const avgThinkingBlockSize = thinkingBlocks.length > 0
|
||||||
|
? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
|
||||||
|
: 0
|
||||||
|
|
||||||
|
const avgOutputBlockSize = outputBlocks.length > 0
|
||||||
|
? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
|
||||||
|
: 0
|
||||||
|
|
||||||
|
const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
|
||||||
|
(sum, b) => sum + b.content.length,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Complexity based on thinking percentage and block count
|
||||||
|
let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
|
||||||
|
if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
|
||||||
|
reasoningComplexity = 'high'
|
||||||
|
} else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
|
||||||
|
reasoningComplexity = 'medium'
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
hasThinking,
|
||||||
|
thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
|
||||||
|
outputPercentage: Math.round(outputPercentage * 10) / 10,
|
||||||
|
blockCount: thinkingBlocks.length + outputBlocks.length,
|
||||||
|
avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
|
||||||
|
avgOutputBlockSize: Math.round(avgOutputBlockSize),
|
||||||
|
totalTextLength,
|
||||||
|
reasoningComplexity,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if message has significant thinking
|
||||||
|
*/
|
||||||
|
static hasSignificantThinking(
|
||||||
|
message: AssistantMessage,
|
||||||
|
thresholdPercent = 20,
|
||||||
|
): boolean {
|
||||||
|
const analysis = this.analyze(message)
|
||||||
|
return analysis.thinkingPercentage >= thresholdPercent
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get thinking-only messages from an array
|
||||||
|
*/
|
||||||
|
static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
|
||||||
|
return messages
|
||||||
|
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||||
|
.filter(m => this.hasSignificantThinking(m))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate total thinking tokens across messages
|
||||||
|
*/
|
||||||
|
static totalThinkingTokens(messages: Message[]): number {
|
||||||
|
return messages
|
||||||
|
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||||
|
.reduce((sum, m) => sum + this.extract(m).thinking, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Legacy export for backward compatibility
|
||||||
|
*/
|
||||||
|
export function extractThinkingTokens(
|
||||||
|
message: AssistantMessage,
|
||||||
|
): { thinking: number; output: number; total: number } {
|
||||||
|
const result = ThinkingTokenAnalyzer.extract(message)
|
||||||
|
return {
|
||||||
|
thinking: result.thinking,
|
||||||
|
output: result.output,
|
||||||
|
total: result.total,
|
||||||
|
}
|
||||||
|
}
|
||||||
69
src/utils/thinkingTokens.test.ts
Normal file
69
src/utils/thinkingTokens.test.ts
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import { describe, expect, it } from 'bun:test'
|
||||||
|
import { extractThinkingTokens } from './tokens.js'
|
||||||
|
|
||||||
|
describe('extractThinkingTokens', () => {
|
||||||
|
it('extracts thinking and output separately', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||||
|
{ type: 'text', text: 'Here is my answer.' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = extractThinkingTokens(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBeGreaterThan(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
expect(result.total).toBe(result.thinking + result.output)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles no thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [{ type: 'text', text: 'Hello world' }],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = extractThinkingTokens(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBe(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles redacted thinking', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||||
|
{ type: 'text', text: 'Answer here.' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = extractThinkingTokens(message)
|
||||||
|
|
||||||
|
expect(result.thinking).toBeGreaterThan(0)
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles tool use', () => {
|
||||||
|
const message = {
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
|
||||||
|
{ type: 'text', text: 'Ran command.' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
|
||||||
|
const result = extractThinkingTokens(message)
|
||||||
|
|
||||||
|
expect(result.output).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
})
|
||||||
84
src/utils/tokenAnalytics.test.ts
Normal file
84
src/utils/tokenAnalytics.test.ts
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import { describe, expect, it, beforeEach } from 'bun:test'
|
||||||
|
import { TokenUsageTracker } from './tokenAnalytics.js'
|
||||||
|
|
||||||
|
describe('TokenUsageTracker', () => {
|
||||||
|
let tracker: TokenUsageTracker
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
tracker = new TokenUsageTracker(100)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('records token usage', () => {
|
||||||
|
tracker.record({
|
||||||
|
input_tokens: 1000,
|
||||||
|
output_tokens: 500,
|
||||||
|
cache_read_input_tokens: 200,
|
||||||
|
cache_creation_input_tokens: 100,
|
||||||
|
model: 'claude-sonnet-4-5-20250514',
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(tracker.size).toBe(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('calculates analytics', () => {
|
||||||
|
tracker.record({
|
||||||
|
input_tokens: 1000,
|
||||||
|
output_tokens: 500,
|
||||||
|
model: 'claude-sonnet-4-5-20250514',
|
||||||
|
})
|
||||||
|
|
||||||
|
tracker.record({
|
||||||
|
input_tokens: 2000,
|
||||||
|
output_tokens: 300,
|
||||||
|
model: 'claude-sonnet-4-5-20250514',
|
||||||
|
})
|
||||||
|
|
||||||
|
const analytics = tracker.getAnalytics()
|
||||||
|
|
||||||
|
expect(analytics.totalRequests).toBe(2)
|
||||||
|
expect(analytics.totalInputTokens).toBe(3000)
|
||||||
|
expect(analytics.totalOutputTokens).toBe(800)
|
||||||
|
expect(analytics.averageInputPerRequest).toBe(1500)
|
||||||
|
expect(analytics.averageOutputPerRequest).toBe(400)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('tracks cache hit rate', () => {
|
||||||
|
tracker.record({
|
||||||
|
input_tokens: 1000,
|
||||||
|
output_tokens: 500,
|
||||||
|
cache_read_input_tokens: 500, // 33% cache
|
||||||
|
model: 'claude-sonnet-4-5-20250514',
|
||||||
|
})
|
||||||
|
|
||||||
|
const analytics = tracker.getAnalytics()
|
||||||
|
|
||||||
|
expect(analytics.cacheHitRate).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('tracks most used model', () => {
|
||||||
|
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||||
|
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||||
|
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
|
||||||
|
|
||||||
|
expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('respects max entries limit', () => {
|
||||||
|
const smallTracker = new TokenUsageTracker(3)
|
||||||
|
|
||||||
|
smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
|
||||||
|
smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
|
||||||
|
smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
|
||||||
|
smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
|
||||||
|
smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
|
||||||
|
|
||||||
|
expect(smallTracker.size).toBe(3)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('clears history', () => {
|
||||||
|
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
|
||||||
|
tracker.clear()
|
||||||
|
|
||||||
|
expect(tracker.size).toBe(0)
|
||||||
|
})
|
||||||
|
})
|
||||||
211
src/utils/tokenAnalytics.ts
Normal file
211
src/utils/tokenAnalytics.ts
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
/**
|
||||||
|
* Token Analytics - Historical token usage tracking and analysis
|
||||||
|
*
|
||||||
|
* Tracks token usage patterns over time for cost optimization
|
||||||
|
* and capacity planning.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||||
|
|
||||||
|
export interface TokenUsageEntry {
|
||||||
|
timestamp: number
|
||||||
|
inputTokens: number
|
||||||
|
outputTokens: number
|
||||||
|
cacheReadTokens: number
|
||||||
|
cacheCreationTokens: number
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TokenAnalytics {
|
||||||
|
totalRequests: number
|
||||||
|
totalInputTokens: number
|
||||||
|
totalOutputTokens: number
|
||||||
|
totalCacheRead: number
|
||||||
|
totalCacheCreation: number
|
||||||
|
averageInputPerRequest: number
|
||||||
|
averageOutputPerRequest: number
|
||||||
|
cacheHitRate: number
|
||||||
|
mostUsedModel: string
|
||||||
|
requestsLastHour: number
|
||||||
|
requestsLastDay: number
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Historical Token Analytics Tracker
|
||||||
|
*
|
||||||
|
* Tracks token usage patterns over time for analytics,
|
||||||
|
* cost optimization, and capacity planning.
|
||||||
|
*/
|
||||||
|
export class TokenUsageTracker {
|
||||||
|
private history: TokenUsageEntry[] = []
|
||||||
|
private readonly maxEntries: number
|
||||||
|
|
||||||
|
constructor(maxEntries = 1000) {
|
||||||
|
this.maxEntries = maxEntries
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record a token usage event from API response.
|
||||||
|
*/
|
||||||
|
record(usage: {
|
||||||
|
input_tokens: number
|
||||||
|
output_tokens: number
|
||||||
|
cache_read_input_tokens?: number
|
||||||
|
cache_creation_input_tokens?: number
|
||||||
|
model: string
|
||||||
|
}): void {
|
||||||
|
const entry: TokenUsageEntry = {
|
||||||
|
timestamp: Date.now(),
|
||||||
|
inputTokens: usage.input_tokens,
|
||||||
|
outputTokens: usage.output_tokens,
|
||||||
|
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||||
|
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||||
|
model: usage.model,
|
||||||
|
}
|
||||||
|
|
||||||
|
this.history.push(entry)
|
||||||
|
|
||||||
|
if (this.history.length > this.maxEntries) {
|
||||||
|
this.history = this.history.slice(-this.maxEntries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get analytics summary for all recorded usage.
|
||||||
|
*/
|
||||||
|
getAnalytics(): TokenAnalytics {
|
||||||
|
if (this.history.length === 0) {
|
||||||
|
return {
|
||||||
|
totalRequests: 0,
|
||||||
|
totalInputTokens: 0,
|
||||||
|
totalOutputTokens: 0,
|
||||||
|
totalCacheRead: 0,
|
||||||
|
totalCacheCreation: 0,
|
||||||
|
averageInputPerRequest: 0,
|
||||||
|
averageOutputPerRequest: 0,
|
||||||
|
cacheHitRate: 0,
|
||||||
|
mostUsedModel: 'unknown',
|
||||||
|
requestsLastHour: 0,
|
||||||
|
requestsLastDay: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = Date.now()
|
||||||
|
const hourAgo = now - 60 * 60 * 1000
|
||||||
|
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||||
|
|
||||||
|
let totalInput = 0
|
||||||
|
let totalOutput = 0
|
||||||
|
let totalCacheRead = 0
|
||||||
|
let totalCacheCreation = 0
|
||||||
|
const modelCounts = new Map<string, number>()
|
||||||
|
let requestsLastHour = 0
|
||||||
|
let requestsLastDay = 0
|
||||||
|
|
||||||
|
for (const entry of this.history) {
|
||||||
|
totalInput += entry.inputTokens
|
||||||
|
totalOutput += entry.outputTokens
|
||||||
|
totalCacheRead += entry.cacheReadTokens
|
||||||
|
totalCacheCreation += entry.cacheCreationTokens
|
||||||
|
|
||||||
|
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||||
|
|
||||||
|
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||||
|
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||||
|
}
|
||||||
|
|
||||||
|
let mostUsedModel = 'unknown'
|
||||||
|
let maxCount = 0
|
||||||
|
for (const [model, count] of modelCounts) {
|
||||||
|
if (count > maxCount) {
|
||||||
|
maxCount = count
|
||||||
|
mostUsedModel = model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalRequests = this.history.length
|
||||||
|
const totalCache = totalCacheRead + totalCacheCreation
|
||||||
|
const totalTokens = totalInput + totalOutput + totalCache
|
||||||
|
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalRequests,
|
||||||
|
totalInputTokens: totalInput,
|
||||||
|
totalOutputTokens: totalOutput,
|
||||||
|
totalCacheRead,
|
||||||
|
totalCacheCreation,
|
||||||
|
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||||
|
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||||
|
cacheHitRate: Math.round(cacheHitRate),
|
||||||
|
mostUsedModel,
|
||||||
|
requestsLastHour,
|
||||||
|
requestsLastDay,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get recent entries within time window.
|
||||||
|
*/
|
||||||
|
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||||
|
const cutoff = Date.now() - windowMs
|
||||||
|
return this.history.filter(e => e.timestamp >= cutoff)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get entries for a specific model
|
||||||
|
*/
|
||||||
|
getByModel(model: string): TokenUsageEntry[] {
|
||||||
|
return this.history.filter(e => e.model === model)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate cost estimate (approximate)
|
||||||
|
*/
|
||||||
|
estimateCost(): { input: number; output: number; cache: number } {
|
||||||
|
const analytics = this.getAnalytics()
|
||||||
|
|
||||||
|
// Approximate pricing (adjust as needed)
|
||||||
|
const inputCost = analytics.totalInputTokens * 0.00015
|
||||||
|
const outputCost = analytics.totalOutputTokens * 0.0006
|
||||||
|
const cacheCost = analytics.totalCacheRead * 0.000075
|
||||||
|
|
||||||
|
return {
|
||||||
|
input: Math.round(inputCost * 100) / 100,
|
||||||
|
output: Math.round(outputCost * 100) / 100,
|
||||||
|
cache: Math.round(cacheCost * 100) / 100,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear history.
|
||||||
|
*/
|
||||||
|
clear(): void {
|
||||||
|
this.history = []
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get history size.
|
||||||
|
*/
|
||||||
|
get size(): number {
|
||||||
|
return this.history.length
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Export history as JSON
|
||||||
|
*/
|
||||||
|
export(): string {
|
||||||
|
return JSON.stringify(this.history, null, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Import history from JSON
|
||||||
|
*/
|
||||||
|
import(json: string): void {
|
||||||
|
try {
|
||||||
|
const entries = JSON.parse(json) as TokenUsageEntry[]
|
||||||
|
this.history = entries.slice(-this.maxEntries)
|
||||||
|
} catch {
|
||||||
|
// Invalid JSON, ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||||
import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
||||||
import type { AssistantMessage, Message } from '../types/message.js'
|
import type { AssistantMessage, Message } from '../types/message.js'
|
||||||
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
|
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
|
||||||
import { jsonStringify } from './slowOperations.js'
|
import { jsonStringify } from './slowOperations.js'
|
||||||
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
|
|||||||
return contentLength
|
return contentLength
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract thinking tokens from an assistant message.
|
||||||
|
* Returns breakdown of thinking vs output tokens.
|
||||||
|
*/
|
||||||
|
export function extractThinkingTokens(
|
||||||
|
message: AssistantMessage,
|
||||||
|
): { thinking: number; output: number; total: number } {
|
||||||
|
let thinking = 0
|
||||||
|
let output = 0
|
||||||
|
|
||||||
|
for (const block of message.message.content) {
|
||||||
|
if (block.type === 'thinking') {
|
||||||
|
thinking += roughTokenCountEstimation(block.thinking)
|
||||||
|
} else if (block.type === 'redacted_thinking') {
|
||||||
|
thinking += roughTokenCountEstimation(block.data)
|
||||||
|
} else if (block.type === 'text') {
|
||||||
|
output += roughTokenCountEstimation(block.text)
|
||||||
|
} else if (block.type === 'tool_use') {
|
||||||
|
output += roughTokenCountEstimation(jsonStringify(block.input))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { thinking, output, total: thinking + output }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token usage history entry for tracking patterns over time.
|
||||||
|
*/
|
||||||
|
export interface TokenUsageEntry {
|
||||||
|
timestamp: number
|
||||||
|
inputTokens: number
|
||||||
|
outputTokens: number
|
||||||
|
cacheReadTokens: number
|
||||||
|
cacheCreationTokens: number
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token analytics summary from historical data.
|
||||||
|
*/
|
||||||
|
export interface TokenAnalytics {
|
||||||
|
totalRequests: number
|
||||||
|
totalInputTokens: number
|
||||||
|
totalOutputTokens: number
|
||||||
|
totalCacheRead: number
|
||||||
|
totalCacheCreation: number
|
||||||
|
averageInputPerRequest: number
|
||||||
|
averageOutputPerRequest: number
|
||||||
|
cacheHitRate: number
|
||||||
|
mostUsedModel: string
|
||||||
|
requestsLastHour: number
|
||||||
|
requestsLastDay: number
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Historical Token Analytics Tracker
|
||||||
|
*
|
||||||
|
* Tracks token usage patterns over time for analytics,
|
||||||
|
* cost optimization, and capacity planning.
|
||||||
|
*/
|
||||||
|
export class TokenUsageTracker {
|
||||||
|
private history: TokenUsageEntry[] = []
|
||||||
|
private readonly maxEntries: number
|
||||||
|
|
||||||
|
constructor(maxEntries = 1000) {
|
||||||
|
this.maxEntries = maxEntries
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record a token usage event from API response.
|
||||||
|
*/
|
||||||
|
record(usage: {
|
||||||
|
input_tokens: number
|
||||||
|
output_tokens: number
|
||||||
|
cache_read_input_tokens?: number
|
||||||
|
cache_creation_input_tokens?: number
|
||||||
|
model: string
|
||||||
|
}): void {
|
||||||
|
const entry: TokenUsageEntry = {
|
||||||
|
timestamp: Date.now(),
|
||||||
|
inputTokens: usage.input_tokens,
|
||||||
|
outputTokens: usage.output_tokens,
|
||||||
|
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||||
|
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||||
|
model: usage.model,
|
||||||
|
}
|
||||||
|
|
||||||
|
this.history.push(entry)
|
||||||
|
|
||||||
|
// Trim old entries
|
||||||
|
if (this.history.length > this.maxEntries) {
|
||||||
|
this.history = this.history.slice(-this.maxEntries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get analytics summary for all recorded usage.
|
||||||
|
*/
|
||||||
|
getAnalytics(): TokenAnalytics {
|
||||||
|
if (this.history.length === 0) {
|
||||||
|
return {
|
||||||
|
totalRequests: 0,
|
||||||
|
totalInputTokens: 0,
|
||||||
|
totalOutputTokens: 0,
|
||||||
|
totalCacheRead: 0,
|
||||||
|
totalCacheCreation: 0,
|
||||||
|
averageInputPerRequest: 0,
|
||||||
|
averageOutputPerRequest: 0,
|
||||||
|
cacheHitRate: 0,
|
||||||
|
mostUsedModel: 'unknown',
|
||||||
|
requestsLastHour: 0,
|
||||||
|
requestsLastDay: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = Date.now()
|
||||||
|
const hourAgo = now - 60 * 60 * 1000
|
||||||
|
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||||
|
|
||||||
|
let totalInput = 0
|
||||||
|
let totalOutput = 0
|
||||||
|
let totalCacheRead = 0
|
||||||
|
let totalCacheCreation = 0
|
||||||
|
let modelCounts = new Map<string, number>()
|
||||||
|
let requestsLastHour = 0
|
||||||
|
let requestsLastDay = 0
|
||||||
|
|
||||||
|
for (const entry of this.history) {
|
||||||
|
totalInput += entry.inputTokens
|
||||||
|
totalOutput += entry.outputTokens
|
||||||
|
totalCacheRead += entry.cacheReadTokens
|
||||||
|
totalCacheCreation += entry.cacheCreationTokens
|
||||||
|
|
||||||
|
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||||
|
|
||||||
|
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||||
|
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find most used model
|
||||||
|
let mostUsedModel = 'unknown'
|
||||||
|
let maxCount = 0
|
||||||
|
for (const [model, count] of modelCounts) {
|
||||||
|
if (count > maxCount) {
|
||||||
|
maxCount = count
|
||||||
|
mostUsedModel = model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalRequests = this.history.length
|
||||||
|
const totalCache = totalCacheRead + totalCacheCreation
|
||||||
|
const totalTokens = totalInput + totalOutput + totalCache
|
||||||
|
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalRequests,
|
||||||
|
totalInputTokens: totalInput,
|
||||||
|
totalOutputTokens: totalOutput,
|
||||||
|
totalCacheRead,
|
||||||
|
totalCacheCreation,
|
||||||
|
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||||
|
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||||
|
cacheHitRate: Math.round(cacheHitRate),
|
||||||
|
mostUsedModel,
|
||||||
|
requestsLastHour,
|
||||||
|
requestsLastDay,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get recent entries within time window.
|
||||||
|
*/
|
||||||
|
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||||
|
const cutoff = Date.now() - windowMs
|
||||||
|
return this.history.filter(e => e.timestamp >= cutoff)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear history.
|
||||||
|
*/
|
||||||
|
clear(): void {
|
||||||
|
this.history = []
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get history size.
|
||||||
|
*/
|
||||||
|
get size(): number {
|
||||||
|
return this.history.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the current context window size in tokens.
|
* Get the current context window size in tokens.
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user