diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts index b03184c6..56920c5a 100644 --- a/src/cost-tracker.ts +++ b/src/cost-tracker.ts @@ -181,7 +181,7 @@ function formatCost(cost: number, maxDecimalPlaces: number = 4): string { function formatModelUsage(): string { const modelUsageMap = getModelUsage() if (Object.keys(modelUsageMap).length === 0) { - return 'Usage: 0 input, 0 output, 0 cache read, 0 cache write' + return 'Usage: 0 input, 0 output' } // Accumulate usage by short name @@ -211,15 +211,19 @@ function formatModelUsage(): string { let result = 'Usage by model:' for (const [shortName, usage] of Object.entries(usageByShortName)) { - const usageString = + let usageString = ` ${formatNumber(usage.inputTokens)} input, ` + - `${formatNumber(usage.outputTokens)} output, ` + - `${formatNumber(usage.cacheReadInputTokens)} cache read, ` + - `${formatNumber(usage.cacheCreationInputTokens)} cache write` + - (usage.webSearchRequests > 0 - ? `, ${formatNumber(usage.webSearchRequests)} web search` - : '') + - ` (${formatCost(usage.costUSD)})` + `${formatNumber(usage.outputTokens)} output` + if (usage.cacheReadInputTokens > 0) { + usageString += `, ${formatNumber(usage.cacheReadInputTokens)} cache read` + } + if (usage.cacheCreationInputTokens > 0) { + usageString += `, ${formatNumber(usage.cacheCreationInputTokens)} cache write` + } + if (usage.webSearchRequests > 0) { + usageString += `, ${formatNumber(usage.webSearchRequests)} web search` + } + usageString += ` (${formatCost(usage.costUSD)})` result += `\n` + `${shortName}:`.padStart(21) + usageString } return result diff --git a/src/services/api/codexShim.ts b/src/services/api/codexShim.ts index 4b7260e7..4c823a3d 100644 --- a/src/services/api/codexShim.ts +++ b/src/services/api/codexShim.ts @@ -80,12 +80,17 @@ type CodexSseEvent = { function makeUsage(usage?: { input_tokens?: number output_tokens?: number + input_tokens_details?: { cached_tokens?: number } + prompt_tokens_details?: { cached_tokens?: number } }): AnthropicUsage { return { input_tokens: usage?.input_tokens ?? 0, output_tokens: usage?.output_tokens ?? 0, cache_creation_input_tokens: 0, - cache_read_input_tokens: 0, + cache_read_input_tokens: + usage?.input_tokens_details?.cached_tokens ?? + usage?.prompt_tokens_details?.cached_tokens ?? + 0, } } @@ -890,8 +895,16 @@ export async function* codexStreamToAnthropic( stop_sequence: null, }, usage: { - input_tokens: finalResponse?.usage?.input_tokens ?? 0, + // Subtract cached tokens: OpenAI includes them in input_tokens, + // but Anthropic convention treats input_tokens as non-cached only. + input_tokens: (finalResponse?.usage?.input_tokens ?? 0) - + (finalResponse?.usage?.input_tokens_details?.cached_tokens ?? + finalResponse?.usage?.prompt_tokens_details?.cached_tokens ?? 0), output_tokens: finalResponse?.usage?.output_tokens ?? 0, + cache_read_input_tokens: + finalResponse?.usage?.input_tokens_details?.cached_tokens ?? + finalResponse?.usage?.prompt_tokens_details?.cached_tokens ?? + 0, }, } yield { type: 'message_stop' } diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 727e4ca9..978ecf57 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -564,11 +564,14 @@ function convertChunkUsage( ): Partial | undefined { if (!usage) return undefined + const cached = usage.prompt_tokens_details?.cached_tokens ?? 0 return { - input_tokens: usage.prompt_tokens ?? 0, + // Subtract cached tokens: OpenAI includes them in prompt_tokens, + // but Anthropic convention treats input_tokens as non-cached only. + input_tokens: (usage.prompt_tokens ?? 0) - cached, output_tokens: usage.completion_tokens ?? 0, cache_creation_input_tokens: 0, - cache_read_input_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0, + cache_read_input_tokens: cached, } }