feat: add thinking token extraction (#798)

* feat: add thinking token tracking and historical analytics

- extractThinkingTokens(): separate thinking from output tokens
- TokenUsageTracker class for historical analytics
- Track: cache hit rate, most used model, requests per hour/day
- Analytics: average tokens per request, totals
- Add tests (7 passing)

PR 4B: Features 1.10 + 1.11

* refactor: extract thinking and analytics to separate files

- Create thinkingTokenExtractor.ts with ThinkingTokenAnalyzer
- Create tokenAnalytics.ts with TokenUsageTracker
- Add production-grade methods and tests
- Update test imports
This commit is contained in:
ArkhAngelLifeJiggy
2026-04-21 16:25:12 +01:00
committed by GitHub
parent 761924daa7
commit 268c0398e4
6 changed files with 855 additions and 1 deletions

View File

@@ -0,0 +1,106 @@
import { describe, expect, it } from 'bun:test'
import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
describe('ThinkingTokenAnalyzer', () => {
describe('extract', () => {
it('extracts thinking and output separately', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'thinking', thinking: 'Let me think about this...' },
{ type: 'text', text: 'Here is my answer.' },
],
},
} as any
const result = ThinkingTokenAnalyzer.extract(message)
expect(result.thinking).toBeGreaterThan(0)
expect(result.output).toBeGreaterThan(0)
expect(result.total).toBe(result.thinking + result.output)
})
it('handles no thinking', () => {
const message = {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Hello world' }],
},
} as any
const result = ThinkingTokenAnalyzer.extract(message)
expect(result.thinking).toBe(0)
expect(result.output).toBeGreaterThan(0)
})
it('handles redacted thinking', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'redacted_thinking', data: '[thinking hidden]' },
{ type: 'text', text: 'Answer here.' },
],
},
} as any
const result = ThinkingTokenAnalyzer.extract(message)
expect(result.thinking).toBeGreaterThan(0)
expect(result.output).toBeGreaterThan(0)
})
})
describe('analyze', () => {
it('calculates percentages', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
{ type: 'text', text: 'Output1 Output2' },
],
},
} as any
const analysis = ThinkingTokenAnalyzer.analyze(message)
expect(analysis.hasThinking).toBe(true)
expect(analysis.thinkingPercentage).toBeGreaterThan(0)
expect(analysis.outputPercentage).toBeGreaterThan(0)
expect(analysis.reasoningComplexity).toBeTruthy()
})
})
describe('hasSignificantThinking', () => {
it('detects significant thinking', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'thinking', thinking: 'x'.repeat(500) },
{ type: 'text', text: 'short' },
],
},
} as any
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
})
it('rejects minimal thinking', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'thinking', thinking: 'a' },
{ type: 'text', text: 'much longer output text here with more content' },
],
},
} as any
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
})
})
})

View File

@@ -0,0 +1,192 @@
/**
* Thinking Token Extractor - Production-grade thinking token analysis
*
* Extracts and analyzes thinking tokens from assistant messages.
* Provides detailed breakdown, statistics, and insights.
*/
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
import { jsonStringify } from './slowOperations.js'
import type { AssistantMessage, Message } from '../types/message.js'
export interface ThinkingBlock {
type: 'thinking' | 'redacted_thinking'
content: string
tokens: number
}
export interface OutputBlock {
type: 'text' | 'tool_use'
content: string
tokens: number
}
export interface ThinkingTokenBreakdown {
thinking: number
output: number
total: number
thinkingBlocks: ThinkingBlock[]
outputBlocks: OutputBlock[]
}
export interface ThinkingAnalysis {
hasThinking: boolean
thinkingPercentage: number
outputPercentage: number
blockCount: number
avgThinkingBlockSize: number
avgOutputBlockSize: number
totalTextLength: number
reasoningComplexity: 'low' | 'medium' | 'high'
}
export class ThinkingTokenAnalyzer {
/**
* Extract detailed thinking vs output breakdown
*/
static extract(message: AssistantMessage): ThinkingTokenBreakdown {
const thinkingBlocks: ThinkingBlock[] = []
const outputBlocks: OutputBlock[] = []
let thinking = 0
let output = 0
for (const block of message.message.content) {
if (block.type === 'thinking') {
const tokens = roughTokenCountEstimation(block.thinking)
thinking += tokens
thinkingBlocks.push({
type: 'thinking',
content: block.thinking,
tokens,
})
} else if (block.type === 'redacted_thinking') {
const tokens = roughTokenCountEstimation(block.data)
thinking += tokens
thinkingBlocks.push({
type: 'redacted_thinking',
content: block.data,
tokens,
})
} else if (block.type === 'text') {
const tokens = roughTokenCountEstimation(block.text)
output += tokens
outputBlocks.push({
type: 'text',
content: block.text,
tokens,
})
} else if (block.type === 'tool_use') {
const content = jsonStringify(block.input)
const tokens = roughTokenCountEstimation(content)
output += tokens
outputBlocks.push({
type: 'tool_use',
content,
tokens,
})
}
}
return {
thinking,
output,
total: thinking + output,
thinkingBlocks,
outputBlocks,
}
}
/**
* Simple extraction for quick use
*/
static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
return this.extract(message)
}
/**
* Analyze thinking patterns and provide insights
*/
static analyze(message: AssistantMessage): ThinkingAnalysis {
const breakdown = this.extract(message)
const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
const hasThinking = thinking > 0
const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
const outputPercentage = total > 0 ? (output / total) * 100 : 0
const avgThinkingBlockSize = thinkingBlocks.length > 0
? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
: 0
const avgOutputBlockSize = outputBlocks.length > 0
? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
: 0
const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
(sum, b) => sum + b.content.length,
0,
)
// Complexity based on thinking percentage and block count
let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
reasoningComplexity = 'high'
} else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
reasoningComplexity = 'medium'
}
return {
hasThinking,
thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
outputPercentage: Math.round(outputPercentage * 10) / 10,
blockCount: thinkingBlocks.length + outputBlocks.length,
avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
avgOutputBlockSize: Math.round(avgOutputBlockSize),
totalTextLength,
reasoningComplexity,
}
}
/**
* Check if message has significant thinking
*/
static hasSignificantThinking(
message: AssistantMessage,
thresholdPercent = 20,
): boolean {
const analysis = this.analyze(message)
return analysis.thinkingPercentage >= thresholdPercent
}
/**
* Get thinking-only messages from an array
*/
static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
return messages
.filter((m): m is AssistantMessage => m.type === 'assistant')
.filter(m => this.hasSignificantThinking(m))
}
/**
* Calculate total thinking tokens across messages
*/
static totalThinkingTokens(messages: Message[]): number {
return messages
.filter((m): m is AssistantMessage => m.type === 'assistant')
.reduce((sum, m) => sum + this.extract(m).thinking, 0)
}
}
/**
* Legacy export for backward compatibility
*/
export function extractThinkingTokens(
message: AssistantMessage,
): { thinking: number; output: number; total: number } {
const result = ThinkingTokenAnalyzer.extract(message)
return {
thinking: result.thinking,
output: result.output,
total: result.total,
}
}

View File

@@ -0,0 +1,69 @@
import { describe, expect, it } from 'bun:test'
import { extractThinkingTokens } from './tokens.js'
describe('extractThinkingTokens', () => {
it('extracts thinking and output separately', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'thinking', thinking: 'Let me think about this...' },
{ type: 'text', text: 'Here is my answer.' },
],
},
} as any
const result = extractThinkingTokens(message)
expect(result.thinking).toBeGreaterThan(0)
expect(result.output).toBeGreaterThan(0)
expect(result.total).toBe(result.thinking + result.output)
})
it('handles no thinking', () => {
const message = {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Hello world' }],
},
} as any
const result = extractThinkingTokens(message)
expect(result.thinking).toBe(0)
expect(result.output).toBeGreaterThan(0)
})
it('handles redacted thinking', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'redacted_thinking', data: '[thinking hidden]' },
{ type: 'text', text: 'Answer here.' },
],
},
} as any
const result = extractThinkingTokens(message)
expect(result.thinking).toBeGreaterThan(0)
expect(result.output).toBeGreaterThan(0)
})
it('handles tool use', () => {
const message = {
type: 'assistant',
message: {
content: [
{ type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
{ type: 'text', text: 'Ran command.' },
],
},
} as any
const result = extractThinkingTokens(message)
expect(result.output).toBeGreaterThan(0)
})
})

View File

@@ -0,0 +1,84 @@
import { describe, expect, it, beforeEach } from 'bun:test'
import { TokenUsageTracker } from './tokenAnalytics.js'
describe('TokenUsageTracker', () => {
let tracker: TokenUsageTracker
beforeEach(() => {
tracker = new TokenUsageTracker(100)
})
it('records token usage', () => {
tracker.record({
input_tokens: 1000,
output_tokens: 500,
cache_read_input_tokens: 200,
cache_creation_input_tokens: 100,
model: 'claude-sonnet-4-5-20250514',
})
expect(tracker.size).toBe(1)
})
it('calculates analytics', () => {
tracker.record({
input_tokens: 1000,
output_tokens: 500,
model: 'claude-sonnet-4-5-20250514',
})
tracker.record({
input_tokens: 2000,
output_tokens: 300,
model: 'claude-sonnet-4-5-20250514',
})
const analytics = tracker.getAnalytics()
expect(analytics.totalRequests).toBe(2)
expect(analytics.totalInputTokens).toBe(3000)
expect(analytics.totalOutputTokens).toBe(800)
expect(analytics.averageInputPerRequest).toBe(1500)
expect(analytics.averageOutputPerRequest).toBe(400)
})
it('tracks cache hit rate', () => {
tracker.record({
input_tokens: 1000,
output_tokens: 500,
cache_read_input_tokens: 500, // 33% cache
model: 'claude-sonnet-4-5-20250514',
})
const analytics = tracker.getAnalytics()
expect(analytics.cacheHitRate).toBeGreaterThan(0)
})
it('tracks most used model', () => {
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
})
it('respects max entries limit', () => {
const smallTracker = new TokenUsageTracker(3)
smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
expect(smallTracker.size).toBe(3)
})
it('clears history', () => {
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
tracker.clear()
expect(tracker.size).toBe(0)
})
})

211
src/utils/tokenAnalytics.ts Normal file
View File

@@ -0,0 +1,211 @@
/**
* Token Analytics - Historical token usage tracking and analysis
*
* Tracks token usage patterns over time for cost optimization
* and capacity planning.
*/
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
export interface TokenUsageEntry {
timestamp: number
inputTokens: number
outputTokens: number
cacheReadTokens: number
cacheCreationTokens: number
model: string
}
export interface TokenAnalytics {
totalRequests: number
totalInputTokens: number
totalOutputTokens: number
totalCacheRead: number
totalCacheCreation: number
averageInputPerRequest: number
averageOutputPerRequest: number
cacheHitRate: number
mostUsedModel: string
requestsLastHour: number
requestsLastDay: number
}
/**
* Historical Token Analytics Tracker
*
* Tracks token usage patterns over time for analytics,
* cost optimization, and capacity planning.
*/
export class TokenUsageTracker {
private history: TokenUsageEntry[] = []
private readonly maxEntries: number
constructor(maxEntries = 1000) {
this.maxEntries = maxEntries
}
/**
* Record a token usage event from API response.
*/
record(usage: {
input_tokens: number
output_tokens: number
cache_read_input_tokens?: number
cache_creation_input_tokens?: number
model: string
}): void {
const entry: TokenUsageEntry = {
timestamp: Date.now(),
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
model: usage.model,
}
this.history.push(entry)
if (this.history.length > this.maxEntries) {
this.history = this.history.slice(-this.maxEntries)
}
}
/**
* Get analytics summary for all recorded usage.
*/
getAnalytics(): TokenAnalytics {
if (this.history.length === 0) {
return {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheRead: 0,
totalCacheCreation: 0,
averageInputPerRequest: 0,
averageOutputPerRequest: 0,
cacheHitRate: 0,
mostUsedModel: 'unknown',
requestsLastHour: 0,
requestsLastDay: 0,
}
}
const now = Date.now()
const hourAgo = now - 60 * 60 * 1000
const dayAgo = now - 24 * 60 * 60 * 1000
let totalInput = 0
let totalOutput = 0
let totalCacheRead = 0
let totalCacheCreation = 0
const modelCounts = new Map<string, number>()
let requestsLastHour = 0
let requestsLastDay = 0
for (const entry of this.history) {
totalInput += entry.inputTokens
totalOutput += entry.outputTokens
totalCacheRead += entry.cacheReadTokens
totalCacheCreation += entry.cacheCreationTokens
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
if (entry.timestamp >= hourAgo) requestsLastHour++
if (entry.timestamp >= dayAgo) requestsLastDay++
}
let mostUsedModel = 'unknown'
let maxCount = 0
for (const [model, count] of modelCounts) {
if (count > maxCount) {
maxCount = count
mostUsedModel = model
}
}
const totalRequests = this.history.length
const totalCache = totalCacheRead + totalCacheCreation
const totalTokens = totalInput + totalOutput + totalCache
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
return {
totalRequests,
totalInputTokens: totalInput,
totalOutputTokens: totalOutput,
totalCacheRead,
totalCacheCreation,
averageInputPerRequest: Math.round(totalInput / totalRequests),
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
cacheHitRate: Math.round(cacheHitRate),
mostUsedModel,
requestsLastHour,
requestsLastDay,
}
}
/**
* Get recent entries within time window.
*/
getRecent(windowMs: number): TokenUsageEntry[] {
const cutoff = Date.now() - windowMs
return this.history.filter(e => e.timestamp >= cutoff)
}
/**
* Get entries for a specific model
*/
getByModel(model: string): TokenUsageEntry[] {
return this.history.filter(e => e.model === model)
}
/**
* Calculate cost estimate (approximate)
*/
estimateCost(): { input: number; output: number; cache: number } {
const analytics = this.getAnalytics()
// Approximate pricing (adjust as needed)
const inputCost = analytics.totalInputTokens * 0.00015
const outputCost = analytics.totalOutputTokens * 0.0006
const cacheCost = analytics.totalCacheRead * 0.000075
return {
input: Math.round(inputCost * 100) / 100,
output: Math.round(outputCost * 100) / 100,
cache: Math.round(cacheCost * 100) / 100,
}
}
/**
* Clear history.
*/
clear(): void {
this.history = []
}
/**
* Get history size.
*/
get size(): number {
return this.history.length
}
/**
* Export history as JSON
*/
export(): string {
return JSON.stringify(this.history, null, 2)
}
/**
* Import history from JSON
*/
import(json: string): void {
try {
const entries = JSON.parse(json) as TokenUsageEntry[]
this.history = entries.slice(-this.maxEntries)
} catch {
// Invalid JSON, ignore
}
}
}

View File

@@ -1,5 +1,5 @@
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
import type { AssistantMessage, Message } from '../types/message.js'
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
import { jsonStringify } from './slowOperations.js'
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
return contentLength
}
/**
* Extract thinking tokens from an assistant message.
* Returns breakdown of thinking vs output tokens.
*/
export function extractThinkingTokens(
message: AssistantMessage,
): { thinking: number; output: number; total: number } {
let thinking = 0
let output = 0
for (const block of message.message.content) {
if (block.type === 'thinking') {
thinking += roughTokenCountEstimation(block.thinking)
} else if (block.type === 'redacted_thinking') {
thinking += roughTokenCountEstimation(block.data)
} else if (block.type === 'text') {
output += roughTokenCountEstimation(block.text)
} else if (block.type === 'tool_use') {
output += roughTokenCountEstimation(jsonStringify(block.input))
}
}
return { thinking, output, total: thinking + output }
}
/**
* Token usage history entry for tracking patterns over time.
*/
export interface TokenUsageEntry {
timestamp: number
inputTokens: number
outputTokens: number
cacheReadTokens: number
cacheCreationTokens: number
model: string
}
/**
* Token analytics summary from historical data.
*/
export interface TokenAnalytics {
totalRequests: number
totalInputTokens: number
totalOutputTokens: number
totalCacheRead: number
totalCacheCreation: number
averageInputPerRequest: number
averageOutputPerRequest: number
cacheHitRate: number
mostUsedModel: string
requestsLastHour: number
requestsLastDay: number
}
/**
* Historical Token Analytics Tracker
*
* Tracks token usage patterns over time for analytics,
* cost optimization, and capacity planning.
*/
export class TokenUsageTracker {
private history: TokenUsageEntry[] = []
private readonly maxEntries: number
constructor(maxEntries = 1000) {
this.maxEntries = maxEntries
}
/**
* Record a token usage event from API response.
*/
record(usage: {
input_tokens: number
output_tokens: number
cache_read_input_tokens?: number
cache_creation_input_tokens?: number
model: string
}): void {
const entry: TokenUsageEntry = {
timestamp: Date.now(),
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
model: usage.model,
}
this.history.push(entry)
// Trim old entries
if (this.history.length > this.maxEntries) {
this.history = this.history.slice(-this.maxEntries)
}
}
/**
* Get analytics summary for all recorded usage.
*/
getAnalytics(): TokenAnalytics {
if (this.history.length === 0) {
return {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheRead: 0,
totalCacheCreation: 0,
averageInputPerRequest: 0,
averageOutputPerRequest: 0,
cacheHitRate: 0,
mostUsedModel: 'unknown',
requestsLastHour: 0,
requestsLastDay: 0,
}
}
const now = Date.now()
const hourAgo = now - 60 * 60 * 1000
const dayAgo = now - 24 * 60 * 60 * 1000
let totalInput = 0
let totalOutput = 0
let totalCacheRead = 0
let totalCacheCreation = 0
let modelCounts = new Map<string, number>()
let requestsLastHour = 0
let requestsLastDay = 0
for (const entry of this.history) {
totalInput += entry.inputTokens
totalOutput += entry.outputTokens
totalCacheRead += entry.cacheReadTokens
totalCacheCreation += entry.cacheCreationTokens
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
if (entry.timestamp >= hourAgo) requestsLastHour++
if (entry.timestamp >= dayAgo) requestsLastDay++
}
// Find most used model
let mostUsedModel = 'unknown'
let maxCount = 0
for (const [model, count] of modelCounts) {
if (count > maxCount) {
maxCount = count
mostUsedModel = model
}
}
const totalRequests = this.history.length
const totalCache = totalCacheRead + totalCacheCreation
const totalTokens = totalInput + totalOutput + totalCache
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
return {
totalRequests,
totalInputTokens: totalInput,
totalOutputTokens: totalOutput,
totalCacheRead,
totalCacheCreation,
averageInputPerRequest: Math.round(totalInput / totalRequests),
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
cacheHitRate: Math.round(cacheHitRate),
mostUsedModel,
requestsLastHour,
requestsLastDay,
}
}
/**
* Get recent entries within time window.
*/
getRecent(windowMs: number): TokenUsageEntry[] {
const cutoff = Date.now() - windowMs
return this.history.filter(e => e.timestamp >= cutoff)
}
/**
* Clear history.
*/
clear(): void {
this.history = []
}
/**
* Get history size.
*/
get size(): number {
return this.history.length
}
}
/**
* Get the current context window size in tokens.
*