Files
orcs-code/src/cost-tracker.ts
viudes 9e23c2bec4 feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command

* fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer

* test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift

* fix(api): always reset cache turn counter + include date in /cache-stats rows

* refactor(api): unify shim usage builder + add cost-tracker wiring test

* fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold

* fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match

* perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD

* fix(api): null guards on formatters + document Codex Responses API shape

* fix(api): defensive start-of-turn reset + config gate fallback + env var docs

* fix(api): trust forwarded cache data on self-hosted URLs (data-driven)

* refactor(api): delegate streaming Responses usage to shared makeUsage helper
2026-04-25 12:38:25 +08:00

394 lines
13 KiB
TypeScript

import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import chalk from 'chalk'
import {
extractCacheMetrics,
resolveCacheProvider,
} from './services/api/cacheMetrics.js'
import {
recordRequest as recordCacheRequest,
resetSessionCacheStats,
} from './services/api/cacheStatsTracker.js'
import { getAPIProvider, isGithubNativeAnthropicMode } from './utils/model/providers.js'
import {
addToTotalCostState,
addToTotalLinesChanged,
getCostCounter,
getModelUsage,
getSdkBetas,
getSessionId,
getTokenCounter,
getTotalAPIDuration,
getTotalAPIDurationWithoutRetries,
getTotalCacheCreationInputTokens,
getTotalCacheReadInputTokens,
getTotalCostUSD,
getTotalDuration,
getTotalInputTokens,
getTotalLinesAdded,
getTotalLinesRemoved,
getTotalOutputTokens,
getTotalToolDuration,
getTotalWebSearchRequests,
getUsageForModel,
hasUnknownModelCost,
resetCostState as baseResetCostState,
resetStateForTests,
setCostStateForRestore,
setHasUnknownModelCost,
} from './bootstrap/state.js'
import type { ModelUsage } from './entrypoints/agentSdkTypes.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from './services/analytics/index.js'
import { getAdvisorUsage } from './utils/advisor.js'
import {
getCurrentProjectConfig,
saveCurrentProjectConfig,
} from './utils/config.js'
import {
getContextWindowForModel,
getModelMaxOutputTokens,
} from './utils/context.js'
import { isFastModeEnabled } from './utils/fastMode.js'
import { formatDuration, formatNumber } from './utils/format.js'
import type { FpsMetrics } from './utils/fpsTracker.js'
import { getCanonicalName } from './utils/model/model.js'
import { calculateUSDCost } from './utils/modelCost.js'
export {
getTotalCostUSD as getTotalCost,
getTotalDuration,
getTotalAPIDuration,
getTotalAPIDurationWithoutRetries,
addToTotalLinesChanged,
getTotalLinesAdded,
getTotalLinesRemoved,
getTotalInputTokens,
getTotalOutputTokens,
getTotalCacheReadInputTokens,
getTotalCacheCreationInputTokens,
getTotalWebSearchRequests,
formatCost,
hasUnknownModelCost,
resetStateForTests,
setHasUnknownModelCost,
getModelUsage,
getUsageForModel,
}
/**
* Wraps bootstrap's resetCostState() so /clear, /compact and session
* switches zero the cache-stats tracker alongside the cost counters.
* Exported under the same name so existing callers pick up the cache
* reset without any call-site changes.
*/
export function resetCostState(): void {
baseResetCostState()
resetSessionCacheStats()
}
type StoredCostState = {
totalCostUSD: number
totalAPIDuration: number
totalAPIDurationWithoutRetries: number
totalToolDuration: number
totalLinesAdded: number
totalLinesRemoved: number
lastDuration: number | undefined
modelUsage: { [modelName: string]: ModelUsage } | undefined
}
/**
* Gets stored cost state from project config for a specific session.
* Returns the cost data if the session ID matches, or undefined otherwise.
* Use this to read costs BEFORE overwriting the config with saveCurrentSessionCosts().
*/
export function getStoredSessionCosts(
sessionId: string,
): StoredCostState | undefined {
const projectConfig = getCurrentProjectConfig()
// Only return costs if this is the same session that was last saved
if (projectConfig.lastSessionId !== sessionId) {
return undefined
}
// Build model usage with context windows
let modelUsage: { [modelName: string]: ModelUsage } | undefined
if (projectConfig.lastModelUsage) {
modelUsage = Object.fromEntries(
Object.entries(projectConfig.lastModelUsage).map(([model, usage]) => [
model,
{
...usage,
contextWindow: getContextWindowForModel(model, getSdkBetas()),
maxOutputTokens: getModelMaxOutputTokens(model).default,
},
]),
)
}
return {
totalCostUSD: projectConfig.lastCost ?? 0,
totalAPIDuration: projectConfig.lastAPIDuration ?? 0,
totalAPIDurationWithoutRetries:
projectConfig.lastAPIDurationWithoutRetries ?? 0,
totalToolDuration: projectConfig.lastToolDuration ?? 0,
totalLinesAdded: projectConfig.lastLinesAdded ?? 0,
totalLinesRemoved: projectConfig.lastLinesRemoved ?? 0,
lastDuration: projectConfig.lastDuration,
modelUsage,
}
}
/**
* Restores cost state from project config when resuming a session.
* Only restores if the session ID matches the last saved session.
* @returns true if cost state was restored, false otherwise
*/
export function restoreCostStateForSession(sessionId: string): boolean {
const data = getStoredSessionCosts(sessionId)
if (!data) {
return false
}
setCostStateForRestore(data)
return true
}
/**
* Saves the current session's costs to project config.
* Call this before switching sessions to avoid losing accumulated costs.
*/
export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void {
saveCurrentProjectConfig(current => ({
...current,
lastCost: getTotalCostUSD(),
lastAPIDuration: getTotalAPIDuration(),
lastAPIDurationWithoutRetries: getTotalAPIDurationWithoutRetries(),
lastToolDuration: getTotalToolDuration(),
lastDuration: getTotalDuration(),
lastLinesAdded: getTotalLinesAdded(),
lastLinesRemoved: getTotalLinesRemoved(),
lastTotalInputTokens: getTotalInputTokens(),
lastTotalOutputTokens: getTotalOutputTokens(),
lastTotalCacheCreationInputTokens: getTotalCacheCreationInputTokens(),
lastTotalCacheReadInputTokens: getTotalCacheReadInputTokens(),
lastTotalWebSearchRequests: getTotalWebSearchRequests(),
lastFpsAverage: fpsMetrics?.averageFps,
lastFpsLow1Pct: fpsMetrics?.low1PctFps,
lastModelUsage: Object.fromEntries(
Object.entries(getModelUsage()).map(([model, usage]) => [
model,
{
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
cacheReadInputTokens: usage.cacheReadInputTokens,
cacheCreationInputTokens: usage.cacheCreationInputTokens,
webSearchRequests: usage.webSearchRequests,
costUSD: usage.costUSD,
},
]),
),
lastSessionId: getSessionId(),
}))
}
function formatCost(cost: number, maxDecimalPlaces: number = 4): string {
return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
}
function formatModelUsage(): string {
const modelUsageMap = getModelUsage()
if (Object.keys(modelUsageMap).length === 0) {
return 'Usage: 0 input, 0 output'
}
// Accumulate usage by short name
const usageByShortName: { [shortName: string]: ModelUsage } = {}
for (const [model, usage] of Object.entries(modelUsageMap)) {
const shortName = getCanonicalName(model)
if (!usageByShortName[shortName]) {
usageByShortName[shortName] = {
inputTokens: 0,
outputTokens: 0,
cacheReadInputTokens: 0,
cacheCreationInputTokens: 0,
webSearchRequests: 0,
costUSD: 0,
contextWindow: 0,
maxOutputTokens: 0,
}
}
const accumulated = usageByShortName[shortName]
accumulated.inputTokens += usage.inputTokens
accumulated.outputTokens += usage.outputTokens
accumulated.cacheReadInputTokens += usage.cacheReadInputTokens
accumulated.cacheCreationInputTokens += usage.cacheCreationInputTokens
accumulated.webSearchRequests += usage.webSearchRequests
accumulated.costUSD += usage.costUSD
}
let result = 'Usage by model:'
for (const [shortName, usage] of Object.entries(usageByShortName)) {
let usageString =
` ${formatNumber(usage.inputTokens)} input, ` +
`${formatNumber(usage.outputTokens)} output`
if (usage.cacheReadInputTokens > 0) {
usageString += `, ${formatNumber(usage.cacheReadInputTokens)} cache read`
}
if (usage.cacheCreationInputTokens > 0) {
usageString += `, ${formatNumber(usage.cacheCreationInputTokens)} cache write`
}
if (usage.webSearchRequests > 0) {
usageString += `, ${formatNumber(usage.webSearchRequests)} web search`
}
usageString += ` (${formatCost(usage.costUSD)})`
result += `\n` + `${shortName}:`.padStart(21) + usageString
}
return result
}
export function formatTotalCost(): string {
const costDisplay =
formatCost(getTotalCostUSD()) +
(hasUnknownModelCost()
? ' (costs may be inaccurate due to usage of unknown models)'
: '')
const modelUsageDisplay = formatModelUsage()
return chalk.dim(
`Total cost: ${costDisplay}\n` +
`Total duration (API): ${formatDuration(getTotalAPIDuration())}
Total duration (wall): ${formatDuration(getTotalDuration())}
Total code changes: ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed
${modelUsageDisplay}`,
)
}
function round(number: number, precision: number): number {
return Math.round(number * precision) / precision
}
// Env-gated verbose token usage log. Treated as a boolean regardless of
// value specifics — any truthy-ish string switches it on. `verbose` is the
// documented keyword but we accept `1`/`true` for ergonomic parity with
// other OPENCLAUDE_* flags.
function shouldLogTokenUsageVerbose(): boolean {
const v = (process.env.OPENCLAUDE_LOG_TOKEN_USAGE ?? '').trim().toLowerCase()
if (!v) return false
return v !== '0' && v !== 'false' && v !== 'off'
}
function addToTotalModelUsage(
cost: number,
usage: Usage,
model: string,
): ModelUsage {
const modelUsage = getUsageForModel(model) ?? {
inputTokens: 0,
outputTokens: 0,
cacheReadInputTokens: 0,
cacheCreationInputTokens: 0,
webSearchRequests: 0,
costUSD: 0,
contextWindow: 0,
maxOutputTokens: 0,
}
modelUsage.inputTokens += usage.input_tokens
modelUsage.outputTokens += usage.output_tokens
modelUsage.cacheReadInputTokens += usage.cache_read_input_tokens ?? 0
modelUsage.cacheCreationInputTokens += usage.cache_creation_input_tokens ?? 0
modelUsage.webSearchRequests +=
usage.server_tool_use?.web_search_requests ?? 0
modelUsage.costUSD += cost
modelUsage.contextWindow = getContextWindowForModel(model, getSdkBetas())
modelUsage.maxOutputTokens = getModelMaxOutputTokens(model).default
return modelUsage
}
export function addToTotalSessionCost(
cost: number,
usage: Usage,
model: string,
): number {
const modelUsage = addToTotalModelUsage(cost, usage, model)
addToTotalCostState(cost, modelUsage, model)
// Record normalized cache metrics for REPL display + /cache-stats.
// Resolved from the current process provider — at this point `usage` has
// already been Anthropic-shaped by the shim layer, so we feed the
// corresponding bucket (anthropic / copilot-claude / openai-like) to the
// extractor. For providers that genuinely don't report cache data
// (vanilla Copilot, Ollama), resolveCacheProvider steers us to
// supported:false so the UI shows "N/A" instead of lying with "0%".
const cacheProvider = resolveCacheProvider(getAPIProvider(), {
githubNativeAnthropic: isGithubNativeAnthropicMode(model),
openAiBaseUrl: process.env.OPENAI_BASE_URL ?? process.env.OPENAI_API_BASE,
})
const cacheMetrics = extractCacheMetrics(
usage as unknown as Record<string, unknown>,
cacheProvider,
)
recordCacheRequest(cacheMetrics, model)
// Opt-in structured per-request debug log on stderr. Power-user knob, not
// shown in the REPL — complements CLAUDE_CODE_ENABLE_TOKEN_USAGE_ATTACHMENT
// (which is model-facing). Any truthy value except "0"/"false" enables it.
if (shouldLogTokenUsageVerbose()) {
process.stderr.write(
JSON.stringify({
tag: 'openclaude.tokenUsage',
model,
provider: cacheProvider,
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
cache_supported: cacheMetrics.supported,
cache_hit_rate: cacheMetrics.hitRate,
cost_usd: cost,
}) + '\n',
)
}
const attrs =
isFastModeEnabled() && usage.speed === 'fast'
? { model, speed: 'fast' }
: { model }
getCostCounter()?.add(cost, attrs)
getTokenCounter()?.add(usage.input_tokens, { ...attrs, type: 'input' })
getTokenCounter()?.add(usage.output_tokens, { ...attrs, type: 'output' })
getTokenCounter()?.add(usage.cache_read_input_tokens ?? 0, {
...attrs,
type: 'cacheRead',
})
getTokenCounter()?.add(usage.cache_creation_input_tokens ?? 0, {
...attrs,
type: 'cacheCreation',
})
let totalCost = cost
for (const advisorUsage of getAdvisorUsage(usage)) {
const advisorCost = calculateUSDCost(advisorUsage.model, advisorUsage)
logEvent('tengu_advisor_tool_token_usage', {
advisor_model:
advisorUsage.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
input_tokens: advisorUsage.input_tokens,
output_tokens: advisorUsage.output_tokens,
cache_read_input_tokens: advisorUsage.cache_read_input_tokens ?? 0,
cache_creation_input_tokens:
advisorUsage.cache_creation_input_tokens ?? 0,
cost_usd_micros: Math.round(advisorCost * 1_000_000),
})
totalCost += addToTotalSessionCost(
advisorCost,
advisorUsage,
advisorUsage.model,
)
}
return totalCost
}