feat(api): expose cache metrics in REPL + normalize across providers (#813)

* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
2026-04-25 01:38:25 -03:00
parent 9070220292
commit 9e23c2bec4
20 changed files with 2749 additions and 46 deletions
--- a/src/cost-tracker.ts
+++ b/src/cost-tracker.ts
@@ -1,5 +1,14 @@
 import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
 import chalk from 'chalk'
+import {
+  extractCacheMetrics,
+  resolveCacheProvider,
+} from './services/api/cacheMetrics.js'
+import {
+  recordRequest as recordCacheRequest,
+  resetSessionCacheStats,
+} from './services/api/cacheStatsTracker.js'
+import { getAPIProvider, isGithubNativeAnthropicMode } from './utils/model/providers.js'
 import {
  addToTotalCostState,
  addToTotalLinesChanged,
@@ -22,7 +31,7 @@ import {
  getTotalWebSearchRequests,
  getUsageForModel,
  hasUnknownModelCost,
-  resetCostState,
+  resetCostState as baseResetCostState,
  resetStateForTests,
  setCostStateForRestore,
  setHasUnknownModelCost,
@@ -62,12 +71,22 @@ export {
  formatCost,
  hasUnknownModelCost,
  resetStateForTests,
-  resetCostState,
  setHasUnknownModelCost,
  getModelUsage,
  getUsageForModel,
 }

+/**
+ * Wraps bootstrap's resetCostState() so /clear, /compact and session
+ * switches zero the cache-stats tracker alongside the cost counters.
+ * Exported under the same name so existing callers pick up the cache
+ * reset without any call-site changes.
+ */
+export function resetCostState(): void {
+  baseResetCostState()
+  resetSessionCacheStats()
+}
+
 type StoredCostState = {
  totalCostUSD: number
  totalAPIDuration: number
@@ -251,6 +270,16 @@ function round(number: number, precision: number): number {
  return Math.round(number * precision) / precision
 }

+// Env-gated verbose token usage log. Treated as a boolean regardless of
+// value specifics — any truthy-ish string switches it on. `verbose` is the
+// documented keyword but we accept `1`/`true` for ergonomic parity with
+// other OPENCLAUDE_* flags.
+function shouldLogTokenUsageVerbose(): boolean {
+  const v = (process.env.OPENCLAUDE_LOG_TOKEN_USAGE ?? '').trim().toLowerCase()
+  if (!v) return false
+  return v !== '0' && v !== 'false' && v !== 'off'
+}
+
 function addToTotalModelUsage(
  cost: number,
  usage: Usage,
@@ -287,6 +316,43 @@ export function addToTotalSessionCost(
  const modelUsage = addToTotalModelUsage(cost, usage, model)
  addToTotalCostState(cost, modelUsage, model)

+  // Record normalized cache metrics for REPL display + /cache-stats.
+  // Resolved from the current process provider — at this point `usage` has
+  // already been Anthropic-shaped by the shim layer, so we feed the
+  // corresponding bucket (anthropic / copilot-claude / openai-like) to the
+  // extractor. For providers that genuinely don't report cache data
+  // (vanilla Copilot, Ollama), resolveCacheProvider steers us to
+  // supported:false so the UI shows "N/A" instead of lying with "0%".
+  const cacheProvider = resolveCacheProvider(getAPIProvider(), {
+    githubNativeAnthropic: isGithubNativeAnthropicMode(model),
+    openAiBaseUrl: process.env.OPENAI_BASE_URL ?? process.env.OPENAI_API_BASE,
+  })
+  const cacheMetrics = extractCacheMetrics(
+    usage as unknown as Record<string, unknown>,
+    cacheProvider,
+  )
+  recordCacheRequest(cacheMetrics, model)
+
+  // Opt-in structured per-request debug log on stderr. Power-user knob, not
+  // shown in the REPL — complements CLAUDE_CODE_ENABLE_TOKEN_USAGE_ATTACHMENT
+  // (which is model-facing). Any truthy value except "0"/"false" enables it.
+  if (shouldLogTokenUsageVerbose()) {
+    process.stderr.write(
+      JSON.stringify({
+        tag: 'openclaude.tokenUsage',
+        model,
+        provider: cacheProvider,
+        input_tokens: usage.input_tokens,
+        output_tokens: usage.output_tokens,
+        cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
+        cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
+        cache_supported: cacheMetrics.supported,
+        cache_hit_rate: cacheMetrics.hitRate,
+        cost_usd: cost,
+      }) + '\n',
+    )
+  }
+
  const attrs =
    isFastModeEnabled() && usage.speed === 'fast'
      ? { model, speed: 'fast' }