feat(api): expose cache metrics in REPL + normalize across providers (#813)

* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
2026-04-25 01:38:25 -03:00
parent 9070220292
commit 9e23c2bec4
20 changed files with 2749 additions and 46 deletions
--- a/src/commands/cacheStats/cacheStats.test.ts
+++ b/src/commands/cacheStats/cacheStats.test.ts
@@ -0,0 +1,157 @@
+/**
+ * Tests for `/cache-stats` command rendering.
+ *
+ * The command has non-trivial string formatting (timestamp slicing, model
+ * label padding, conditional N/A footnote, recent-rows cap) which can
+ * silently regress — these snapshot tests keep it honest.
+ */
+import { beforeEach, describe, expect, test } from 'bun:test'
+import type { CacheMetrics } from '../../services/api/cacheMetrics.js'
+import {
+  _setHistoryCapForTesting,
+  recordRequest,
+  resetSessionCacheStats,
+} from '../../services/api/cacheStatsTracker.js'
+import { call } from './cacheStats.js'
+
+function supported(partial: Partial<CacheMetrics>): CacheMetrics {
+  return {
+    read: 0,
+    created: 0,
+    total: 0,
+    hitRate: null,
+    supported: true,
+    ...partial,
+  }
+}
+
+const UNSUPPORTED: CacheMetrics = {
+  read: 0,
+  created: 0,
+  total: 0,
+  hitRate: null,
+  supported: false,
+}
+
+// The command signature requires a LocalJSXCommandContext. Our command
+// doesn't actually read it — we pass an empty stand-in so the test can
+// invoke call() without dragging the whole REPL context in.
+const EMPTY_CTX = {} as Parameters<typeof call>[1]
+
+// /cache-stats always returns a text result. Narrow the union here so
+// the assertions don't need to redo the discriminant check every call.
+async function runCommand(): Promise<string> {
+  const result = await call('', EMPTY_CTX)
+  if (result.type !== 'text') {
+    throw new Error(
+      `cacheStats command must return type:'text', got ${result.type}`,
+    )
+  }
+  return result.value
+}
+
+beforeEach(() => {
+  resetSessionCacheStats()
+  _setHistoryCapForTesting(500)
+})
+
+describe('/cache-stats — empty session', () => {
+  test('shows friendly "no requests yet" message', async () => {
+    const value = await runCommand()
+    expect(value).toContain('No API requests yet this session')
+    expect(value).toContain('/cache-stats')
+  })
+})
+
+describe('/cache-stats — supported-only session', () => {
+  test('renders Cache stats header, turn and session summaries', async () => {
+    recordRequest(
+      supported({ read: 500, total: 1_000, hitRate: 0.5 }),
+      'claude-sonnet-4',
+    )
+    const value = await runCommand()
+    expect(value).toContain('Cache stats')
+    expect(value).toContain('Current turn:')
+    expect(value).toContain('Session total:')
+    // Compact metric line should appear in the recent-requests table.
+    expect(value).toContain('claude-sonnet-4')
+    expect(value).toContain('read')
+  })
+
+  test('omits the N/A footnote when every row is supported', async () => {
+    recordRequest(supported({ read: 200, total: 400, hitRate: 0.5 }), 'model-A')
+    const value = await runCommand()
+    expect(value).not.toContain('N/A rows')
+  })
+})
+
+describe('/cache-stats — mixed supported + unsupported', () => {
+  test('renders N/A footnote when any row is unsupported', async () => {
+    recordRequest(UNSUPPORTED, 'gpt-4-copilot')
+    recordRequest(
+      supported({ read: 100, total: 500, hitRate: 0.2 }),
+      'claude-sonnet-4',
+    )
+    const value = await runCommand()
+    expect(value).toContain(
+      'N/A rows: provider API does not expose cache usage',
+    )
+    expect(value).toContain('GitHub Copilot')
+    expect(value).toContain('Ollama')
+  })
+})
+
+describe('/cache-stats — recent-rows cap', () => {
+  test('caps the breakdown at 20 rows and reports omitted count', async () => {
+    for (let i = 0; i < 25; i++) {
+      recordRequest(
+        supported({ read: i, total: 100, hitRate: i / 100 }),
+        `model-${i}`,
+      )
+    }
+    const value = await runCommand()
+    // 20 shown, 5 omitted from the oldest end.
+    expect(value).toContain('(20 of 25, 5 older omitted)')
+    // Oldest rows (model-0..model-4) should not appear; newest must.
+    expect(value).toContain('model-24')
+    expect(value).not.toContain('model-0 ')
+  })
+
+  test('does not mention "older omitted" when all rows fit', async () => {
+    for (let i = 0; i < 5; i++) {
+      recordRequest(supported({ read: i, total: 10 }), `m${i}`)
+    }
+    const value = await runCommand()
+    expect(value).not.toContain('older omitted')
+    expect(value).toContain('(5)')
+  })
+})
+
+describe('/cache-stats — model label rendering', () => {
+  test('truncates long model labels to fit the column width', async () => {
+    // cacheStats.ts pads+slices the label to 28 chars for alignment.
+    const longLabel = 'some-extremely-long-model-identifier-that-wraps'
+    recordRequest(supported({ read: 10, total: 100, hitRate: 0.1 }), longLabel)
+    const value = await runCommand()
+    // Sliced to 28 chars.
+    expect(value).toContain(longLabel.slice(0, 28))
+    // And the full string should NOT appear (would mean no truncation).
+    expect(value).not.toContain(longLabel)
+  })
+})
+
+describe('/cache-stats — timestamp rendering', () => {
+  test('renders each row with full date and time (YYYY-MM-DD HH:MM:SS)', async () => {
+    recordRequest(supported({ read: 5, total: 10, hitRate: 0.5 }), 'claude-x')
+    const value = await runCommand()
+    // Match the full ISO-ish date + time the row uses. We assert the shape,
+    // not a specific timestamp — real clock is used, so a regex on the
+    // format is the right assertion.
+    expect(value).toMatch(/\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/)
+    // Bare time-of-day alone (no date) should NOT appear in isolation — it
+    // must always be preceded by the date. Guards against regression if
+    // someone shortens the formatter again.
+    const timeOnlyInRow = /\n\s*#\s*\d+\s+\d{2}:\d{2}:\d{2}\s/.test(value)
+    expect(timeOnlyInRow).toBe(false)
+  })
+})
--- a/src/commands/cacheStats/cacheStats.ts
+++ b/src/commands/cacheStats/cacheStats.ts
@@ -0,0 +1,74 @@
+import {
+  getCacheStatsHistory,
+  getCurrentTurnCacheMetrics,
+  getSessionCacheMetrics,
+  type CacheStatsEntry,
+} from '../../services/api/cacheStatsTracker.js'
+import {
+  formatCacheMetricsCompact,
+  formatCacheMetricsFull,
+  type CacheMetrics,
+} from '../../services/api/cacheMetrics.js'
+import type { LocalCommandCall } from '../../types/command.js'
+
+// Cap the per-request breakdown to keep output readable. Users wanting
+// the full history can rely on OPENCLAUDE_LOG_TOKEN_USAGE=verbose for
+// structured per-request stderr output.
+const MAX_RECENT_ROWS = 20
+
+function formatRow(entry: CacheStatsEntry, idx: number): string {
+  // `YYYY-MM-DD HH:MM:SS` — long-running sessions can span midnight and a
+  // bare time-of-day makes the wrong row look "most recent" when two
+  // entries on different days share the same HH:MM:SS.
+  const iso = new Date(entry.timestamp).toISOString()
+  const ts = `${iso.slice(0, 10)} ${iso.slice(11, 19)}`
+  const line = formatCacheMetricsCompact(entry.metrics)
+  return `  #${String(idx + 1).padStart(3)}  ${ts}  ${entry.label.padEnd(28).slice(0, 28)}  ${line}`
+}
+
+function summarize(label: string, m: CacheMetrics): string {
+  return `${label.padEnd(18)}${formatCacheMetricsFull(m)}`
+}
+
+export const call: LocalCommandCall = async () => {
+  const history = getCacheStatsHistory()
+  const session = getSessionCacheMetrics()
+  const turn = getCurrentTurnCacheMetrics()
+
+  if (history.length === 0) {
+    return {
+      type: 'text',
+      value:
+        'Cache stats\n  No API requests yet this session.\n  Start a turn and re-run /cache-stats to see results.',
+    }
+  }
+
+  const recent = history.slice(-MAX_RECENT_ROWS)
+  const omitted = history.length - recent.length
+
+  const lines: string[] = ['Cache stats', '']
+  lines.push(summarize('Current turn:', turn))
+  lines.push(summarize('Session total:', session))
+  lines.push('')
+  lines.push(`Recent requests (${recent.length}${omitted > 0 ? ` of ${history.length}, ${omitted} older omitted` : ''}):`)
+  lines.push(`  #     time      model                         cache`)
+  for (const [i, entry] of recent.entries()) {
+    lines.push(formatRow(entry, history.length - recent.length + i))
+  }
+
+  // Honesty footnote — providers without cache reporting (vanilla Copilot,
+  // Ollama) show [Cache: N/A] rather than a fake 0%. Tell the user so they
+  // don't read "N/A" as "broken".
+  const hasUnsupported = recent.some((e) => !e.metrics.supported)
+  if (hasUnsupported) {
+    lines.push('')
+    lines.push(
+      '  N/A rows: provider API does not expose cache usage (GitHub Copilot, Ollama).',
+    )
+    lines.push(
+      '  The request still ran normally — only the metric is unavailable.',
+    )
+  }
+
+  return { type: 'text', value: lines.join('\n') }
+}
--- a/src/commands/cacheStats/index.ts
+++ b/src/commands/cacheStats/index.ts
@@ -0,0 +1,24 @@
+/**
+ * /cache-stats — per-session cache diagnostics.
+ *
+ * Always-on diagnostic command (no toggle) that surfaces the metrics
+ * tracked in `cacheStatsTracker.ts`. Breaks cache usage down by request
+ * and also reports the session-wide aggregate — useful when the user
+ * suspects a cache bust (e.g. after /reload-plugins) and wants to see
+ * whether recent turns still hit the cache.
+ *
+ * Lazy-loaded (implementation in cacheStats.ts) to keep startup time
+ * minimal — same pattern used by /cost and /cache-probe.
+ */
+import type { Command } from '../../commands.js'
+
+const cacheStats = {
+  type: 'local',
+  name: 'cache-stats',
+  description:
+    'Show per-turn and session cache hit/miss stats (works across all providers)',
+  supportsNonInteractive: true,
+  load: () => import('./cacheStats.js'),
+} satisfies Command
+
+export default cacheStats