feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { APIError } from '@anthropic-ai/sdk'
|
||||
import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
|
||||
import { compressToolHistory } from './compressToolHistory.js'
|
||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||
import type {
|
||||
@@ -78,21 +79,12 @@ type CodexSseEvent = {
|
||||
data: Record<string, any>
|
||||
}
|
||||
|
||||
function makeUsage(usage?: {
|
||||
input_tokens?: number
|
||||
output_tokens?: number
|
||||
input_tokens_details?: { cached_tokens?: number }
|
||||
prompt_tokens_details?: { cached_tokens?: number }
|
||||
}): AnthropicUsage {
|
||||
return {
|
||||
input_tokens: usage?.input_tokens ?? 0,
|
||||
output_tokens: usage?.output_tokens ?? 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens:
|
||||
usage?.input_tokens_details?.cached_tokens ??
|
||||
usage?.prompt_tokens_details?.cached_tokens ??
|
||||
0,
|
||||
}
|
||||
function makeUsage(usage?: Record<string, unknown>): AnthropicUsage {
|
||||
// Single source of truth for raw → Anthropic shape. Lives in
|
||||
// cacheMetrics.ts alongside the raw-shape extractor so any new
|
||||
// provider quirk requires a one-file change and the integration test
|
||||
// can call the exact same function instead of re-implementing it.
|
||||
return buildAnthropicUsageFromRawUsage(usage)
|
||||
}
|
||||
|
||||
function makeMessageId(): string {
|
||||
@@ -911,18 +903,14 @@ export async function* codexStreamToAnthropic(
|
||||
stop_reason: determineStopReason(finalResponse, sawToolUse),
|
||||
stop_sequence: null,
|
||||
},
|
||||
usage: {
|
||||
// Subtract cached tokens: OpenAI includes them in input_tokens,
|
||||
// but Anthropic convention treats input_tokens as non-cached only.
|
||||
input_tokens: (finalResponse?.usage?.input_tokens ?? 0) -
|
||||
(finalResponse?.usage?.input_tokens_details?.cached_tokens ??
|
||||
finalResponse?.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
output_tokens: finalResponse?.usage?.output_tokens ?? 0,
|
||||
cache_read_input_tokens:
|
||||
finalResponse?.usage?.input_tokens_details?.cached_tokens ??
|
||||
finalResponse?.usage?.prompt_tokens_details?.cached_tokens ??
|
||||
0,
|
||||
},
|
||||
// Delegate to the shared normalizer so the streaming message_delta
|
||||
// path uses the same raw→Anthropic conversion as makeUsage() above
|
||||
// and the non-streaming response converter below. Previously this
|
||||
// block had its own inline subtraction that missed Kimi / DeepSeek
|
||||
// / Gemini raw shapes that the shared helper handles.
|
||||
usage: makeUsage(
|
||||
finalResponse?.usage as Record<string, unknown> | undefined,
|
||||
),
|
||||
}
|
||||
yield { type: 'message_stop' }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user