feat(api): expose cache metrics in REPL + normalize across providers (#813)

* feat(api): expose cache metrics in REPL + /cache-stats command

* fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer

* test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift

* fix(api): always reset cache turn counter + include date in /cache-stats rows

* refactor(api): unify shim usage builder + add cost-tracker wiring test

* fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold

* fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match

* perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD

* fix(api): null guards on formatters + document Codex Responses API shape

* fix(api): defensive start-of-turn reset + config gate fallback + env var docs

* fix(api): trust forwarded cache data on self-hosted URLs (data-driven)

* refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
viudes
2026-04-25 01:38:25 -03:00
committed by GitHub
parent 9070220292
commit 9e23c2bec4
20 changed files with 2749 additions and 46 deletions

View File

@@ -133,6 +133,8 @@ import { hasConsoleBillingAccess } from '../utils/billing.js';
import { logEvent, type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from 'src/services/analytics/index.js';
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js';
import { textForResubmit, handleMessageFromStream, type StreamingToolUse, type StreamingThinking, isCompactBoundaryMessage, getMessagesAfterCompactBoundary, getContentText, createUserMessage, createAssistantMessage, createTurnDurationMessage, createAgentsKilledMessage, createApiMetricsMessage, createSystemMessage, createCommandInputMessage, formatCommandInputTags } from '../utils/messages.js';
import { getCurrentTurnCacheMetrics, resetCurrentTurn } from '../services/api/cacheStatsTracker.js';
import { formatCacheMetricsCompact, formatCacheMetricsFull } from '../services/api/cacheMetrics.js';
import { generateSessionTitle } from '../utils/sessionTitle.js';
import { BASH_INPUT_TAG, COMMAND_MESSAGE_TAG, COMMAND_NAME_TAG, LOCAL_COMMAND_STDOUT_TAG } from '../constants/xml.js';
import { escapeXml } from '../utils/xml.js';
@@ -2921,6 +2923,13 @@ export function REPL({
// isLoading is derived from queryGuard — tryStart() above already
// transitioned dispatching→running, so no setter call needed here.
resetTimingRefs();
// Start-of-turn cache tracker reset. The end-of-turn path at the
// bottom of this function already resets, but mirror the call here
// so a turn that never reaches end-of-turn (crash, unhandled
// rejection, process exit) still starts clean on the next one.
// Idempotent with respect to the end-of-turn reset — double-reset
// is a no-op.
resetCurrentTurn();
setMessages(oldMessages => [...oldMessages, ...newMessages]);
responseLengthRef.current = 0;
if (feature('TOKEN_BUDGET')) {
@@ -3019,6 +3028,38 @@ export function REPL({
setMessages(prev => [...prev, createTurnDurationMessage(turnDurationMs, budgetInfo, count(prev, isLoggableMessage))]);
}
}
// Cache stats line — controlled by `/config showCacheStats`. Shows
// per-query read/hit stats using the provider-normalized metrics
// from cacheStatsTracker. 'off' skips, 'compact' gives a one-liner,
// 'full' gives a breakdown. Display is skipped when the user
// aborted or proactive mode is active — but the counter reset
// below still runs in those cases.
if (!abortController.signal.aborted && !proactiveActive) {
// Defensive default: config layer already merges 'compact' from
// DEFAULT_GLOBAL_CONFIG (see config.ts:1494) for configs that
// predate this feature, so `mode` should always be defined.
// The `?? 'compact'` fallback covers pathological cases — a
// corrupt config read that returned an empty object, or a
// race between writer and reader — where the merge didn't
// land. Rendering the line is the safer failure mode than
// silently hiding it.
const mode = getGlobalConfig().showCacheStats ?? 'compact';
if (mode !== 'off') {
const turnMetrics = getCurrentTurnCacheMetrics();
// Skip rendering if the turn recorded no API activity at all —
// avoids a spurious "[Cache: cold]" on local-only commands.
if (turnMetrics.supported || turnMetrics.read > 0 || turnMetrics.total > 0) {
const line = mode === 'full' ? formatCacheMetricsFull(turnMetrics) : formatCacheMetricsCompact(turnMetrics);
setMessages(prev => [...prev, createSystemMessage(line, 'info')]);
}
}
}
// Reset turn counters UNCONDITIONALLY — users routinely interrupt
// (Ctrl+C) mid-turn, and if we kept the reset gated on
// !aborted, the in-flight turn's metrics would leak into the
// next turn's aggregate. Proactive turns also need the reset so
// their metrics don't pile onto the following user turn.
resetCurrentTurn();
// Clear the controller so CancelRequestHandler's canCancelRunningTask
// reads false at the idle prompt. Without this, the stale non-aborted
// controller makes ctrl+c fire onCancel() (aborting nothing) instead of