feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
782
src/services/api/cacheMetrics.test.ts
Normal file
782
src/services/api/cacheMetrics.test.ts
Normal file
@@ -0,0 +1,782 @@
|
||||
import { expect, test, describe } from 'bun:test'
|
||||
import {
|
||||
extractCacheMetrics,
|
||||
extractCacheReadFromRawUsage,
|
||||
resolveCacheProvider,
|
||||
formatCacheMetricsCompact,
|
||||
formatCacheMetricsFull,
|
||||
addCacheMetrics,
|
||||
} from './cacheMetrics.js'
|
||||
|
||||
describe('extractCacheMetrics — Anthropic (firstParty/bedrock/vertex/foundry)', () => {
|
||||
test('reports read/created separately and computes hit rate over total input', () => {
|
||||
const usage = {
|
||||
input_tokens: 300,
|
||||
output_tokens: 100,
|
||||
cache_read_input_tokens: 800,
|
||||
cache_creation_input_tokens: 200,
|
||||
}
|
||||
const m = extractCacheMetrics(usage, 'anthropic')
|
||||
expect(m.supported).toBe(true)
|
||||
expect(m.read).toBe(800)
|
||||
expect(m.created).toBe(200)
|
||||
// total = fresh(300) + created(200) + read(800) = 1300
|
||||
expect(m.total).toBe(1300)
|
||||
expect(m.hitRate).toBeCloseTo(800 / 1300, 4)
|
||||
})
|
||||
|
||||
test('returns cold metrics when no cache activity yet', () => {
|
||||
const m = extractCacheMetrics({ input_tokens: 500 }, 'anthropic')
|
||||
expect(m.supported).toBe(true)
|
||||
expect(m.read).toBe(0)
|
||||
expect(m.created).toBe(0)
|
||||
expect(m.hitRate).toBe(0)
|
||||
})
|
||||
|
||||
test('null hit rate when usage has no input at all', () => {
|
||||
const m = extractCacheMetrics({}, 'anthropic')
|
||||
expect(m.supported).toBe(true)
|
||||
expect(m.hitRate).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
// NOTE: OpenAI/Codex/Kimi/DeepSeek/Gemini raw shapes are now tested through
|
||||
// extractCacheReadFromRawUsage (below). extractCacheMetrics sees the
|
||||
// post-shim Anthropic shape for every provider, so the tests here verify
|
||||
// that the shape lookup works uniformly against the shimmed fields.
|
||||
|
||||
describe('extractCacheMetrics — post-shim Anthropic shape (applies to all providers)', () => {
|
||||
test('OpenAI post-shim (openai bucket) — reads Anthropic fields injected by convertChunkUsage', () => {
|
||||
// This is what cost-tracker actually sees for OpenAI upstreams: the
|
||||
// shim has already subtracted cached from prompt_tokens and moved it
|
||||
// to cache_read_input_tokens.
|
||||
const shimmed = {
|
||||
input_tokens: 800, // fresh = 2000 - 1200
|
||||
output_tokens: 300,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 1_200,
|
||||
}
|
||||
const m = extractCacheMetrics(shimmed, 'openai')
|
||||
expect(m.supported).toBe(true)
|
||||
expect(m.read).toBe(1_200)
|
||||
expect(m.created).toBe(0)
|
||||
expect(m.total).toBe(2_000) // 800 fresh + 1200 read
|
||||
expect(m.hitRate).toBe(0.6)
|
||||
})
|
||||
|
||||
test('Codex post-shim — same Anthropic shape as OpenAI', () => {
|
||||
const shimmed = {
|
||||
input_tokens: 900, // 1500 - 600
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 600,
|
||||
}
|
||||
const m = extractCacheMetrics(shimmed, 'codex')
|
||||
expect(m.read).toBe(600)
|
||||
expect(m.total).toBe(1_500)
|
||||
expect(m.hitRate).toBe(0.4)
|
||||
})
|
||||
|
||||
test('Kimi post-shim — shim moved top-level cached_tokens into Anthropic field', () => {
|
||||
const shimmed = {
|
||||
input_tokens: 600, // 1000 - 400
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 400,
|
||||
}
|
||||
const m = extractCacheMetrics(shimmed, 'kimi')
|
||||
expect(m.read).toBe(400)
|
||||
expect(m.total).toBe(1_000)
|
||||
expect(m.hitRate).toBe(0.4)
|
||||
})
|
||||
|
||||
test('DeepSeek post-shim — hit moved to cache_read_input_tokens, miss to input_tokens', () => {
|
||||
const shimmed = {
|
||||
input_tokens: 300, // miss
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 700, // hit
|
||||
}
|
||||
const m = extractCacheMetrics(shimmed, 'deepseek')
|
||||
expect(m.read).toBe(700)
|
||||
expect(m.total).toBe(1_000)
|
||||
expect(m.hitRate).toBe(0.7)
|
||||
})
|
||||
|
||||
test('Gemini post-shim — cached_content_token_count moved to cache_read_input_tokens', () => {
|
||||
const shimmed = {
|
||||
input_tokens: 800, // 4000 - 3200
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 3_200,
|
||||
}
|
||||
const m = extractCacheMetrics(shimmed, 'gemini')
|
||||
expect(m.read).toBe(3_200)
|
||||
expect(m.total).toBe(4_000)
|
||||
expect(m.hitRate).toBe(0.8)
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractCacheReadFromRawUsage — single source of truth for shim layer', () => {
|
||||
test('Anthropic-native passthrough: cache_read_input_tokens', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({ cache_read_input_tokens: 1_500 }),
|
||||
).toBe(1_500)
|
||||
})
|
||||
|
||||
test('OpenAI Chat Completions: prompt_tokens_details.cached_tokens', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({
|
||||
prompt_tokens: 2_000,
|
||||
prompt_tokens_details: { cached_tokens: 1_200 },
|
||||
}),
|
||||
).toBe(1_200)
|
||||
})
|
||||
|
||||
test('Codex Responses API: input_tokens_details.cached_tokens', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({
|
||||
input_tokens: 1_500,
|
||||
input_tokens_details: { cached_tokens: 600 },
|
||||
}),
|
||||
).toBe(600)
|
||||
})
|
||||
|
||||
test('Kimi / Moonshot: top-level cached_tokens', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({ prompt_tokens: 1_000, cached_tokens: 400 }),
|
||||
).toBe(400)
|
||||
})
|
||||
|
||||
test('DeepSeek: prompt_cache_hit_tokens', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({
|
||||
prompt_cache_hit_tokens: 700,
|
||||
prompt_cache_miss_tokens: 300,
|
||||
}),
|
||||
).toBe(700)
|
||||
})
|
||||
|
||||
test('Gemini: cached_content_token_count', () => {
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({
|
||||
prompt_token_count: 4_000,
|
||||
cached_content_token_count: 3_200,
|
||||
}),
|
||||
).toBe(3_200)
|
||||
})
|
||||
|
||||
test('no cache fields at all → 0 (Copilot/Ollama/unknown shape)', () => {
|
||||
expect(extractCacheReadFromRawUsage({ prompt_tokens: 500 })).toBe(0)
|
||||
})
|
||||
|
||||
test('Anthropic field wins over OpenAI field when both present', () => {
|
||||
// Shouldn't happen in practice, but if usage was double-annotated we
|
||||
// trust the Anthropic-native number (it's the more authoritative one).
|
||||
expect(
|
||||
extractCacheReadFromRawUsage({
|
||||
cache_read_input_tokens: 999,
|
||||
prompt_tokens_details: { cached_tokens: 111 },
|
||||
}),
|
||||
).toBe(999)
|
||||
})
|
||||
|
||||
test('null/undefined/non-object → 0', () => {
|
||||
expect(extractCacheReadFromRawUsage(null)).toBe(0)
|
||||
expect(extractCacheReadFromRawUsage(undefined)).toBe(0)
|
||||
expect(extractCacheReadFromRawUsage('nope' as unknown as never)).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractCacheMetrics — Copilot / Ollama (unsupported)', () => {
|
||||
test('returns supported:false with all zeros and null hitRate for Copilot', () => {
|
||||
const m = extractCacheMetrics({ prompt_tokens: 1000 }, 'copilot')
|
||||
expect(m.supported).toBe(false)
|
||||
expect(m.read).toBe(0)
|
||||
expect(m.created).toBe(0)
|
||||
expect(m.hitRate).toBeNull()
|
||||
})
|
||||
|
||||
test('returns supported:false for Ollama', () => {
|
||||
const m = extractCacheMetrics({ prompt_tokens: 42 }, 'ollama')
|
||||
expect(m.supported).toBe(false)
|
||||
expect(m.hitRate).toBeNull()
|
||||
})
|
||||
|
||||
test('Copilot serving Claude (copilot-claude) is supported and uses Anthropic fields', () => {
|
||||
const usage = {
|
||||
input_tokens: 200,
|
||||
cache_read_input_tokens: 800,
|
||||
cache_creation_input_tokens: 100,
|
||||
}
|
||||
const m = extractCacheMetrics(usage, 'copilot-claude')
|
||||
expect(m.supported).toBe(true)
|
||||
expect(m.read).toBe(800)
|
||||
expect(m.created).toBe(100)
|
||||
expect(m.total).toBe(1_100)
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractCacheMetrics — bad/empty input', () => {
|
||||
test('null usage returns unsupported', () => {
|
||||
expect(extractCacheMetrics(null, 'anthropic').supported).toBe(false)
|
||||
})
|
||||
|
||||
test('non-object usage returns unsupported', () => {
|
||||
expect(extractCacheMetrics('oops' as unknown as never, 'openai').supported).toBe(
|
||||
false,
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveCacheProvider', () => {
|
||||
test('firstParty → anthropic', () => {
|
||||
expect(resolveCacheProvider('firstParty')).toBe('anthropic')
|
||||
})
|
||||
test('bedrock/vertex/foundry → anthropic', () => {
|
||||
expect(resolveCacheProvider('bedrock')).toBe('anthropic')
|
||||
expect(resolveCacheProvider('vertex')).toBe('anthropic')
|
||||
expect(resolveCacheProvider('foundry')).toBe('anthropic')
|
||||
})
|
||||
test('github without claude hint → copilot (unsupported)', () => {
|
||||
expect(resolveCacheProvider('github')).toBe('copilot')
|
||||
})
|
||||
test('github with claude hint → copilot-claude', () => {
|
||||
expect(
|
||||
resolveCacheProvider('github', { githubNativeAnthropic: true }),
|
||||
).toBe('copilot-claude')
|
||||
})
|
||||
test('openai with localhost / loopback → self-hosted', () => {
|
||||
// These used to return 'ollama'; the bucket is now 'self-hosted'
|
||||
// because not every local OpenAI-compatible server is Ollama
|
||||
// (could be vLLM, LM Studio, LocalAI, text-generation-webui).
|
||||
// Both buckets collapse to supported=false downstream.
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://localhost:8080/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://127.0.0.1:1234/v1' }),
|
||||
).toBe('self-hosted')
|
||||
// Localhost:11434 hits the self-hosted branch first — 'ollama' only
|
||||
// kicks in when the :11434 port appears on a public-looking URL
|
||||
// (which would be unusual but still deserves honest classification).
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://localhost:11434/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://[::1]:5000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('openai on RFC1918 private IP → self-hosted (pre-fix: misclassified as openai)', () => {
|
||||
// These are the exact cases the reviewer flagged. Before this fix,
|
||||
// a vLLM / LocalAI server on a LAN address fell through to the
|
||||
// 'openai' branch and /cache-stats showed '[Cache: cold]' — which
|
||||
// users read as "my cache is broken" when the provider simply
|
||||
// didn't report cache fields. Now they land in 'self-hosted' and
|
||||
// /cache-stats shows '[Cache: N/A]'.
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://192.168.1.50:8000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://10.0.0.7:8080/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://172.20.0.3:5000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('openai on link-local / CGNAT → self-hosted', () => {
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://169.254.169.254/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://100.64.1.5:8000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('openai on reserved TLD (.local / .internal / .lan / .home.arpa) → self-hosted', () => {
|
||||
// Per RFC 6761 (.local/mDNS), RFC 8375 (.home.arpa), and widely
|
||||
// used .internal / .lan conventions. These never resolve publicly.
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://llm.internal:5000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://llm.local:8080/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://vllm.home.arpa/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://box.lan:1234/v1' }),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('openai on IPv6 local / link-local → self-hosted', () => {
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://[fe80::1]:8000/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://[fd12:3456::7]:8080/v1' }),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'http://[fc00::1]:8080/v1' }),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('IPv6 ULA prefix (fc/fd) does NOT over-match public hostnames', () => {
|
||||
// Regression guard: an early version of isLocalOrPrivateUrl checked
|
||||
// `h.startsWith('fc')` / `startsWith('fd')` without a colon guard,
|
||||
// which misclassified legitimate public hosts whose names happen to
|
||||
// begin with those letters. The fix requires a colon in the match
|
||||
// so only real IPv6 literals hit the branch.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://fc-api.example.com/v1',
|
||||
}),
|
||||
).toBe('openai')
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://fd-hosted.example.com/v1',
|
||||
}),
|
||||
).toBe('openai')
|
||||
// Same goes for names that look like hex prefixes but aren't IPv6.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://fcbench.net/v1',
|
||||
}),
|
||||
).toBe('openai')
|
||||
})
|
||||
|
||||
test('openai with :11434 on a public host → ollama (default-port heuristic)', () => {
|
||||
// Contrived but the heuristic should still fire — someone running
|
||||
// Ollama behind a reverse proxy with port preserved.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://ollama.example.com:11434/v1',
|
||||
}),
|
||||
).toBe('ollama')
|
||||
})
|
||||
|
||||
test('openai with moonshot URL → kimi', () => {
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'https://api.moonshot.ai/v1' }),
|
||||
).toBe('kimi')
|
||||
})
|
||||
test('openai with deepseek URL → deepseek', () => {
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'https://api.deepseek.com/v1' }),
|
||||
).toBe('deepseek')
|
||||
})
|
||||
test('private IP beats hosted-keyword matching (self-hosted takes priority)', () => {
|
||||
// A pathological URL: a private-IP host whose path string contains
|
||||
// "deepseek". Self-hosted detection must run FIRST so the URL
|
||||
// classifies honestly — the path alone doesn't prove the upstream
|
||||
// is the real DeepSeek API.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'http://10.0.0.5:8000/v1/deepseek-proxy',
|
||||
}),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
test('plain openai remains openai', () => {
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'https://api.openai.com/v1' }),
|
||||
).toBe('openai')
|
||||
})
|
||||
test('unparseable base URL falls back to substring heuristic', () => {
|
||||
// Bare host:port without a scheme is common in misconfigured env.
|
||||
// We can't URL-parse it, but we still honor the "localhost" hint so
|
||||
// a broken config doesn't silently masquerade as cache-capable.
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: 'localhost:8000' }),
|
||||
).toBe('self-hosted')
|
||||
// An unparseable and opaque string falls through to plain 'openai'
|
||||
// (best-effort — nothing we can infer from "foo-bar-baz").
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: '???' }),
|
||||
).toBe('openai')
|
||||
})
|
||||
test('empty base URL → plain openai', () => {
|
||||
// No hint at all: assume the canonical api.openai.com.
|
||||
expect(resolveCacheProvider('openai')).toBe('openai')
|
||||
expect(
|
||||
resolveCacheProvider('openai', { openAiBaseUrl: '' }),
|
||||
).toBe('openai')
|
||||
})
|
||||
test('codex → codex', () => {
|
||||
expect(resolveCacheProvider('codex')).toBe('codex')
|
||||
})
|
||||
test('gemini → gemini', () => {
|
||||
expect(resolveCacheProvider('gemini')).toBe('gemini')
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveCacheProvider — .localhost TLD (RFC 6761)', () => {
|
||||
test('subdomains of .localhost classify as self-hosted', () => {
|
||||
// Chrome, Firefox, and systemd-resolved all natively resolve
|
||||
// *.localhost to 127.0.0.1. Kubernetes Ingress and docker-compose
|
||||
// setups commonly use app.localhost, api.localhost, etc.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'http://app.localhost:3000/v1',
|
||||
}),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'http://api.localhost/v1',
|
||||
}),
|
||||
).toBe('self-hosted')
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'http://llm.dev.localhost:8080/v1',
|
||||
}),
|
||||
).toBe('self-hosted')
|
||||
})
|
||||
|
||||
test('.localhost TLD does NOT match substring collisions', () => {
|
||||
// Guard against regressions where `localhost` would match via
|
||||
// substring rather than TLD semantics. `localhostify.com` and
|
||||
// `mylocalhost.net` must stay on the public `openai` path.
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://localhostify.com/v1',
|
||||
}),
|
||||
).toBe('openai')
|
||||
expect(
|
||||
resolveCacheProvider('openai', {
|
||||
openAiBaseUrl: 'https://mylocalhost.net/v1',
|
||||
}),
|
||||
).toBe('openai')
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractCacheMetrics — hit rate clamp', () => {
|
||||
test('hitRate is clamped to 1.0 on pathological input (read > total)', () => {
|
||||
// Defensive guard: with valid non-negative inputs the math enforces
|
||||
// read <= total, so hitRate cannot exceed 1. But an upstream shim
|
||||
// bug (e.g. reading a negative `fresh` from a future provider) could
|
||||
// break the invariant. `Math.min(1, read/total)` caps the display at
|
||||
// 100% rather than letting a `read=800 total=500` case render as
|
||||
// "hit 160%" or (worse) null, which would hide the anomaly.
|
||||
const metrics = extractCacheMetrics(
|
||||
{
|
||||
cache_read_input_tokens: 800,
|
||||
cache_creation_input_tokens: 0,
|
||||
// asNumber keeps finite negatives, so fresh = -500 → total =
|
||||
// 800 + 0 + (-500) = 300, read=800 → raw ratio 2.67, clamp to 1.
|
||||
input_tokens: -500,
|
||||
} as unknown as Record<string, unknown>,
|
||||
'anthropic',
|
||||
)
|
||||
expect(metrics.supported).toBe(true)
|
||||
expect(metrics.hitRate).toBe(1)
|
||||
})
|
||||
|
||||
test('normal inputs still yield accurate fractional hit rates', () => {
|
||||
// Regression: clamp must not perturb the happy path.
|
||||
const metrics = extractCacheMetrics(
|
||||
{
|
||||
cache_read_input_tokens: 300,
|
||||
cache_creation_input_tokens: 0,
|
||||
input_tokens: 700,
|
||||
},
|
||||
'anthropic',
|
||||
)
|
||||
expect(metrics.hitRate).toBeCloseTo(0.3, 5)
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractCacheMetrics — self-hosted bucket (data-driven)', () => {
|
||||
test('vanilla self-hosted endpoint without cache fields → unsupported / N/A', () => {
|
||||
// vLLM, LocalAI, text-generation-webui, etc. emit no cache fields
|
||||
// at all. With read=created=0 we mark unsupported so the REPL shows
|
||||
// honest '[Cache: N/A]' instead of a fabricated 0%.
|
||||
const metrics = extractCacheMetrics(
|
||||
{ input_tokens: 1_000, output_tokens: 200 },
|
||||
'self-hosted',
|
||||
)
|
||||
expect(metrics.supported).toBe(false)
|
||||
expect(metrics.hitRate).toBeNull()
|
||||
expect(metrics.read).toBe(0)
|
||||
expect(metrics.created).toBe(0)
|
||||
})
|
||||
|
||||
test('internal reverse proxy forwarding real cache data → supported', () => {
|
||||
// Review-blocker regression guard: an enterprise setup with an
|
||||
// internal proxy on a private URL (e.g. `http://llm.internal:5000/v1`)
|
||||
// forwarding to OpenAI / Kimi / DeepSeek / Gemini WILL deliver real
|
||||
// cache fields via the shim. Pre-fix we would discard them because
|
||||
// the URL heuristic classified the endpoint as 'self-hosted'. Now
|
||||
// the data itself decides: any non-zero cache activity flows through
|
||||
// the same normalization as an OpenAI bucket.
|
||||
const shimmed = {
|
||||
input_tokens: 800, // fresh (post-shim, cached already subtracted)
|
||||
cache_read_input_tokens: 1_200, // shim extracted from upstream
|
||||
cache_creation_input_tokens: 0,
|
||||
}
|
||||
const metrics = extractCacheMetrics(shimmed, 'self-hosted')
|
||||
expect(metrics.supported).toBe(true)
|
||||
expect(metrics.read).toBe(1_200)
|
||||
expect(metrics.total).toBe(2_000)
|
||||
expect(metrics.hitRate).toBe(0.6)
|
||||
})
|
||||
|
||||
test('proxy with cache_creation but zero cache_read → still supported', () => {
|
||||
// Mirror of the above for the first-call / cold-cache scenario:
|
||||
// Anthropic-compatible upstreams emit creation tokens on the first
|
||||
// request that primes the cache. Self-hosted proxy must preserve
|
||||
// that signal, not swallow it because read is still 0.
|
||||
const shimmed = {
|
||||
input_tokens: 500,
|
||||
cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 800,
|
||||
}
|
||||
const metrics = extractCacheMetrics(shimmed, 'self-hosted')
|
||||
expect(metrics.supported).toBe(true)
|
||||
expect(metrics.created).toBe(800)
|
||||
expect(metrics.read).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('formatCacheMetrics — defensive null/undefined guards', () => {
|
||||
test('formatCacheMetricsCompact returns N/A for undefined input', () => {
|
||||
// Signature says `CacheMetrics` but runtime bug on a failed API
|
||||
// response could leave the caller with nothing. The formatter
|
||||
// should degrade gracefully rather than throw on `.supported`.
|
||||
expect(formatCacheMetricsCompact(undefined)).toBe('[Cache: N/A]')
|
||||
expect(formatCacheMetricsCompact(null as unknown as undefined)).toBe(
|
||||
'[Cache: N/A]',
|
||||
)
|
||||
})
|
||||
|
||||
test('formatCacheMetricsFull returns N/A for undefined input', () => {
|
||||
expect(formatCacheMetricsFull(undefined)).toBe('[Cache: N/A]')
|
||||
expect(formatCacheMetricsFull(null as unknown as undefined)).toBe(
|
||||
'[Cache: N/A]',
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('formatCacheMetricsCompact — self-hosted display paths', () => {
|
||||
test('vanilla self-hosted (no cache data) renders as N/A', () => {
|
||||
const metrics = extractCacheMetrics(
|
||||
{ input_tokens: 500 },
|
||||
'self-hosted',
|
||||
)
|
||||
expect(formatCacheMetricsCompact(metrics)).toBe('[Cache: N/A]')
|
||||
expect(formatCacheMetricsFull(metrics)).toBe('[Cache: N/A]')
|
||||
})
|
||||
|
||||
test('self-hosted proxy with forwarded cache data renders real metrics', () => {
|
||||
// Full display-path regression guard for the review-blocker fix:
|
||||
// the user must see the real hit rate that the upstream emitted,
|
||||
// not a silent N/A because the URL looked private.
|
||||
const metrics = extractCacheMetrics(
|
||||
{
|
||||
input_tokens: 800,
|
||||
cache_read_input_tokens: 1_200,
|
||||
cache_creation_input_tokens: 0,
|
||||
},
|
||||
'self-hosted',
|
||||
)
|
||||
expect(formatCacheMetricsCompact(metrics)).toBe('[Cache: 1.2k read • hit 60%]')
|
||||
expect(formatCacheMetricsFull(metrics)).toBe(
|
||||
'[Cache: read=1.2k created=0 hit=60%]',
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('formatCacheMetricsCompact — snapshot-stable output', () => {
|
||||
test('supported with reads shows "k" abbreviation and hit rate', () => {
|
||||
const out = formatCacheMetricsCompact({
|
||||
read: 1_234,
|
||||
created: 0,
|
||||
total: 10_000,
|
||||
hitRate: 0.1234,
|
||||
supported: true,
|
||||
})
|
||||
expect(out).toBe('[Cache: 1.2k read • hit 12%]')
|
||||
})
|
||||
|
||||
test('supported with no cache activity renders "cold"', () => {
|
||||
const out = formatCacheMetricsCompact({
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 500,
|
||||
hitRate: 0,
|
||||
supported: true,
|
||||
})
|
||||
expect(out).toBe('[Cache: cold]')
|
||||
})
|
||||
|
||||
test('unsupported renders "N/A"', () => {
|
||||
const out = formatCacheMetricsCompact({
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 0,
|
||||
hitRate: null,
|
||||
supported: false,
|
||||
})
|
||||
expect(out).toBe('[Cache: N/A]')
|
||||
})
|
||||
|
||||
test('small numbers render without abbreviation', () => {
|
||||
const out = formatCacheMetricsCompact({
|
||||
read: 42,
|
||||
created: 0,
|
||||
total: 100,
|
||||
hitRate: 0.42,
|
||||
supported: true,
|
||||
})
|
||||
expect(out).toBe('[Cache: 42 read • hit 42%]')
|
||||
})
|
||||
})
|
||||
|
||||
describe('formatCacheMetricsFull — snapshot-stable output', () => {
|
||||
test('supported shows all fields', () => {
|
||||
const out = formatCacheMetricsFull({
|
||||
read: 1_234,
|
||||
created: 250,
|
||||
total: 10_000,
|
||||
hitRate: 0.1234,
|
||||
supported: true,
|
||||
})
|
||||
expect(out).toBe('[Cache: read=1.2k created=250 hit=12%]')
|
||||
})
|
||||
|
||||
test('null hit rate renders n/a', () => {
|
||||
const out = formatCacheMetricsFull({
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 0,
|
||||
hitRate: null,
|
||||
supported: true,
|
||||
})
|
||||
expect(out).toBe('[Cache: read=0 created=0 hit=n/a]')
|
||||
})
|
||||
|
||||
test('unsupported renders "N/A"', () => {
|
||||
const out = formatCacheMetricsFull({
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 0,
|
||||
hitRate: null,
|
||||
supported: false,
|
||||
})
|
||||
expect(out).toBe('[Cache: N/A]')
|
||||
})
|
||||
})
|
||||
|
||||
describe('hit-rate edge cases (plan-mandated coverage)', () => {
|
||||
test('0 read / 0 created on supported provider → hitRate = 0 (not null) when total > 0', () => {
|
||||
const m = extractCacheMetrics({ input_tokens: 500 }, 'anthropic')
|
||||
expect(m.read).toBe(0)
|
||||
expect(m.created).toBe(0)
|
||||
expect(m.hitRate).toBe(0)
|
||||
})
|
||||
|
||||
test('read only (no created) computes proportion correctly', () => {
|
||||
const m = extractCacheMetrics(
|
||||
{ input_tokens: 0, cache_read_input_tokens: 800, cache_creation_input_tokens: 0 },
|
||||
'anthropic',
|
||||
)
|
||||
expect(m.read).toBe(800)
|
||||
expect(m.created).toBe(0)
|
||||
expect(m.total).toBe(800)
|
||||
expect(m.hitRate).toBe(1)
|
||||
})
|
||||
|
||||
test('created only (first turn — no reads yet) gives 0 hit rate', () => {
|
||||
const m = extractCacheMetrics(
|
||||
{
|
||||
input_tokens: 200,
|
||||
cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 1_000,
|
||||
},
|
||||
'anthropic',
|
||||
)
|
||||
expect(m.read).toBe(0)
|
||||
expect(m.created).toBe(1_000)
|
||||
expect(m.total).toBe(1_200)
|
||||
expect(m.hitRate).toBe(0)
|
||||
})
|
||||
|
||||
test('mixed read + created + fresh input — full denominator', () => {
|
||||
const m = extractCacheMetrics(
|
||||
{
|
||||
input_tokens: 500,
|
||||
cache_read_input_tokens: 3_000,
|
||||
cache_creation_input_tokens: 1_500,
|
||||
},
|
||||
'anthropic',
|
||||
)
|
||||
// Denominator = fresh(500) + created(1500) + read(3000) = 5_000
|
||||
// Hit = read/total = 3000 / 5000 = 0.6
|
||||
expect(m.total).toBe(5_000)
|
||||
expect(m.hitRate).toBe(0.6)
|
||||
})
|
||||
|
||||
test('N/A (unsupported provider) preserves null hit-rate even with populated usage', () => {
|
||||
// Simulate a Copilot usage payload that might look like OpenAI shape —
|
||||
// we must NOT try to read it and must report supported:false.
|
||||
const m = extractCacheMetrics(
|
||||
{ prompt_tokens: 5_000, prompt_tokens_details: { cached_tokens: 2_000 } },
|
||||
'copilot',
|
||||
)
|
||||
expect(m.supported).toBe(false)
|
||||
expect(m.read).toBe(0)
|
||||
expect(m.hitRate).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('addCacheMetrics — session aggregation', () => {
|
||||
test('sums read/created/total and recomputes hit rate', () => {
|
||||
const a = {
|
||||
read: 100,
|
||||
created: 50,
|
||||
total: 300,
|
||||
hitRate: 100 / 300,
|
||||
supported: true,
|
||||
}
|
||||
const b = {
|
||||
read: 200,
|
||||
created: 0,
|
||||
total: 400,
|
||||
hitRate: 0.5,
|
||||
supported: true,
|
||||
}
|
||||
const sum = addCacheMetrics(a, b)
|
||||
expect(sum.read).toBe(300)
|
||||
expect(sum.created).toBe(50)
|
||||
expect(sum.total).toBe(700)
|
||||
expect(sum.hitRate).toBeCloseTo(300 / 700, 5)
|
||||
})
|
||||
|
||||
test('unsupported + supported = supported (so we never lose honest data)', () => {
|
||||
const unsupported = {
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 0,
|
||||
hitRate: null,
|
||||
supported: false,
|
||||
}
|
||||
const supported = {
|
||||
read: 10,
|
||||
created: 0,
|
||||
total: 100,
|
||||
hitRate: 0.1,
|
||||
supported: true,
|
||||
}
|
||||
expect(addCacheMetrics(unsupported, supported)).toBe(supported)
|
||||
expect(addCacheMetrics(supported, unsupported)).toBe(supported)
|
||||
})
|
||||
|
||||
test('unsupported + unsupported = unsupported', () => {
|
||||
const u = {
|
||||
read: 0,
|
||||
created: 0,
|
||||
total: 0,
|
||||
hitRate: null,
|
||||
supported: false,
|
||||
}
|
||||
const sum = addCacheMetrics(u, u)
|
||||
expect(sum.supported).toBe(false)
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user