Files
orcs-code/src/cost-tracker.cacheIntegration.test.ts
viudes 9e23c2bec4 feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command

* fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer

* test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift

* fix(api): always reset cache turn counter + include date in /cache-stats rows

* refactor(api): unify shim usage builder + add cost-tracker wiring test

* fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold

* fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match

* perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD

* fix(api): null guards on formatters + document Codex Responses API shape

* fix(api): defensive start-of-turn reset + config gate fallback + env var docs

* fix(api): trust forwarded cache data on self-hosted URLs (data-driven)

* refactor(api): delegate streaming Responses usage to shared makeUsage helper
2026-04-25 12:38:25 +08:00

129 lines
4.4 KiB
TypeScript

/**
* Integration test for cost-tracker → cacheStatsTracker wiring.
*
* The unit tests in services/api/cacheMetrics.test.ts and
* services/api/cacheStatsTracker.test.ts verify that each piece works
* in isolation. This file verifies that they're ACTUALLY CONNECTED —
* that `addToTotalSessionCost` resolves the provider, extracts metrics,
* and records them on the tracker on every call. Without this test, a
* future refactor could silently unwire the call chain (wrong param
* order, renamed symbol, removed call) and every individual unit test
* would still pass while `/cache-stats` showed empty data.
*
* We use real state — `resetCostState` + `getCurrentTurnCacheMetrics` —
* rather than mocking the tracker module. Fewer moving parts, and the
* test fails for the right reason if anyone breaks the wrapping.
*/
import { beforeEach, describe, expect, test } from 'bun:test'
import { addToTotalSessionCost, resetCostState } from './cost-tracker.js'
import {
getCurrentTurnCacheMetrics,
getSessionCacheMetrics,
} from './services/api/cacheStatsTracker.js'
// BetaUsage-compatible shape — minimum fields addToTotalSessionCost
// needs to run without throwing. Cache fields are the ones we care
// about here; input/output go into model cost calc.
function anthropicUsage(partial: {
input?: number
output?: number
cacheRead?: number
cacheCreation?: number
}): Parameters<typeof addToTotalSessionCost>[1] {
return {
input_tokens: partial.input ?? 0,
output_tokens: partial.output ?? 0,
cache_read_input_tokens: partial.cacheRead ?? 0,
cache_creation_input_tokens: partial.cacheCreation ?? 0,
// BetaUsage has several other optional fields; they're not read by
// the cache-tracking path so we leave them undefined.
} as Parameters<typeof addToTotalSessionCost>[1]
}
beforeEach(() => {
// resetCostState is the wrapped version that ALSO clears the cache
// tracker — this line is itself part of what we're verifying.
resetCostState()
})
describe('addToTotalSessionCost → cacheStatsTracker wiring', () => {
test('records normalized cache metrics on the tracker for each call', () => {
addToTotalSessionCost(
0.01,
anthropicUsage({
input: 200,
output: 50,
cacheRead: 800,
cacheCreation: 100,
}),
'claude-sonnet-4',
)
const turn = getCurrentTurnCacheMetrics()
expect(turn.supported).toBe(true)
expect(turn.read).toBe(800)
expect(turn.created).toBe(100)
// total = fresh(200) + read(800) + created(100) = 1100
expect(turn.total).toBe(1_100)
// hitRate = read / total = 800 / 1100 ≈ 0.727
expect(turn.hitRate).toBeCloseTo(800 / 1_100, 4)
})
test('session aggregate accumulates across multiple API calls', () => {
addToTotalSessionCost(
0.01,
anthropicUsage({ input: 100, cacheRead: 400 }),
'claude-sonnet-4',
)
addToTotalSessionCost(
0.02,
anthropicUsage({ input: 200, cacheRead: 600 }),
'claude-sonnet-4',
)
const session = getSessionCacheMetrics()
expect(session.read).toBe(1_000)
// total = (100+400) + (200+600) = 1300
expect(session.total).toBe(1_300)
expect(session.hitRate).toBeCloseTo(1_000 / 1_300, 4)
})
test('cold turn (no cache read/created) still records as supported', () => {
addToTotalSessionCost(
0.005,
anthropicUsage({ input: 500, output: 100 }),
'claude-sonnet-4',
)
const turn = getCurrentTurnCacheMetrics()
expect(turn.supported).toBe(true)
expect(turn.read).toBe(0)
expect(turn.created).toBe(0)
expect(turn.total).toBe(500)
// hitRate computed against a non-zero total is 0, not null — empty
// cache on a cacheable provider is a legitimate "no-hit" signal.
expect(turn.hitRate).toBe(0)
})
})
describe('resetCostState wrapper also clears cache tracker', () => {
test('resetCostState() zeros both cost counters and cache stats', () => {
// Populate both systems
addToTotalSessionCost(
0.01,
anthropicUsage({ input: 100, cacheRead: 500 }),
'claude-sonnet-4',
)
expect(getSessionCacheMetrics().read).toBe(500)
// resetCostState is the WRAPPED version — bootstrap's
// resetCostState cleared cost state historically but not cache
// stats. The wrapper in cost-tracker.ts adds the second call.
resetCostState()
const session = getSessionCacheMetrics()
expect(session.read).toBe(0)
expect(session.supported).toBe(false)
})
})