feat(api): expose cache metrics in REPL + normalize across providers (#813)

* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
2026-04-25 01:38:25 -03:00
parent 9070220292
commit 9e23c2bec4
20 changed files with 2749 additions and 46 deletions
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -1,4 +1,5 @@
 import { APIError } from '@anthropic-ai/sdk'
+import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
 import { compressToolHistory } from './compressToolHistory.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
 import type {
@@ -78,21 +79,12 @@ type CodexSseEvent = {
  data: Record<string, any>
 }

-function makeUsage(usage?: {
-  input_tokens?: number
-  output_tokens?: number
-  input_tokens_details?: { cached_tokens?: number }
-  prompt_tokens_details?: { cached_tokens?: number }
-}): AnthropicUsage {
-  return {
-    input_tokens: usage?.input_tokens ?? 0,
-    output_tokens: usage?.output_tokens ?? 0,
-    cache_creation_input_tokens: 0,
-    cache_read_input_tokens:
-      usage?.input_tokens_details?.cached_tokens ??
-      usage?.prompt_tokens_details?.cached_tokens ??
-      0,
-  }
+function makeUsage(usage?: Record<string, unknown>): AnthropicUsage {
+  // Single source of truth for raw → Anthropic shape. Lives in
+  // cacheMetrics.ts alongside the raw-shape extractor so any new
+  // provider quirk requires a one-file change and the integration test
+  // can call the exact same function instead of re-implementing it.
+  return buildAnthropicUsageFromRawUsage(usage)
 }

 function makeMessageId(): string {
@@ -911,18 +903,14 @@ export async function* codexStreamToAnthropic(
      stop_reason: determineStopReason(finalResponse, sawToolUse),
      stop_sequence: null,
    },
-    usage: {
-      // Subtract cached tokens: OpenAI includes them in input_tokens,
-      // but Anthropic convention treats input_tokens as non-cached only.
-      input_tokens: (finalResponse?.usage?.input_tokens ?? 0) -
-        (finalResponse?.usage?.input_tokens_details?.cached_tokens ??
-         finalResponse?.usage?.prompt_tokens_details?.cached_tokens ?? 0),
-      output_tokens: finalResponse?.usage?.output_tokens ?? 0,
-      cache_read_input_tokens:
-        finalResponse?.usage?.input_tokens_details?.cached_tokens ??
-        finalResponse?.usage?.prompt_tokens_details?.cached_tokens ??
-        0,
-    },
+    // Delegate to the shared normalizer so the streaming message_delta
+    // path uses the same raw→Anthropic conversion as makeUsage() above
+    // and the non-streaming response converter below. Previously this
+    // block had its own inline subtraction that missed Kimi / DeepSeek
+    // / Gemini raw shapes that the shared helper handles.
+    usage: makeUsage(
+      finalResponse?.usage as Record<string, unknown> | undefined,
+    ),
  }
  yield { type: 'message_stop' }
 }