feat(api): expose cache metrics in REPL + normalize across providers (#813)

* feat(api): expose cache metrics in REPL + /cache-stats command

* fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer

* test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift

* fix(api): always reset cache turn counter + include date in /cache-stats rows

* refactor(api): unify shim usage builder + add cost-tracker wiring test

* fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold

* fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match

* perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD

* fix(api): null guards on formatters + document Codex Responses API shape

* fix(api): defensive start-of-turn reset + config gate fallback + env var docs

* fix(api): trust forwarded cache data on self-hosted URLs (data-driven)

* refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
viudes
2026-04-25 01:38:25 -03:00
committed by GitHub
parent 9070220292
commit 9e23c2bec4
20 changed files with 2749 additions and 46 deletions

View File

@@ -1,4 +1,5 @@
import { APIError } from '@anthropic-ai/sdk'
import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
import { compressToolHistory } from './compressToolHistory.js'
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
import type {
@@ -78,21 +79,12 @@ type CodexSseEvent = {
data: Record<string, any>
}
function makeUsage(usage?: {
input_tokens?: number
output_tokens?: number
input_tokens_details?: { cached_tokens?: number }
prompt_tokens_details?: { cached_tokens?: number }
}): AnthropicUsage {
return {
input_tokens: usage?.input_tokens ?? 0,
output_tokens: usage?.output_tokens ?? 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens:
usage?.input_tokens_details?.cached_tokens ??
usage?.prompt_tokens_details?.cached_tokens ??
0,
}
function makeUsage(usage?: Record<string, unknown>): AnthropicUsage {
// Single source of truth for raw → Anthropic shape. Lives in
// cacheMetrics.ts alongside the raw-shape extractor so any new
// provider quirk requires a one-file change and the integration test
// can call the exact same function instead of re-implementing it.
return buildAnthropicUsageFromRawUsage(usage)
}
function makeMessageId(): string {
@@ -911,18 +903,14 @@ export async function* codexStreamToAnthropic(
stop_reason: determineStopReason(finalResponse, sawToolUse),
stop_sequence: null,
},
usage: {
// Subtract cached tokens: OpenAI includes them in input_tokens,
// but Anthropic convention treats input_tokens as non-cached only.
input_tokens: (finalResponse?.usage?.input_tokens ?? 0) -
(finalResponse?.usage?.input_tokens_details?.cached_tokens ??
finalResponse?.usage?.prompt_tokens_details?.cached_tokens ?? 0),
output_tokens: finalResponse?.usage?.output_tokens ?? 0,
cache_read_input_tokens:
finalResponse?.usage?.input_tokens_details?.cached_tokens ??
finalResponse?.usage?.prompt_tokens_details?.cached_tokens ??
0,
},
// Delegate to the shared normalizer so the streaming message_delta
// path uses the same raw→Anthropic conversion as makeUsage() above
// and the non-streaming response converter below. Previously this
// block had its own inline subtraction that missed Kimi / DeepSeek
// / Gemini raw shapes that the shared helper handles.
usage: makeUsage(
finalResponse?.usage as Record<string, unknown> | undefined,
),
}
yield { type: 'message_stop' }
}