feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
@@ -133,6 +133,8 @@ import { hasConsoleBillingAccess } from '../utils/billing.js';
|
||||
import { logEvent, type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from 'src/services/analytics/index.js';
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js';
|
||||
import { textForResubmit, handleMessageFromStream, type StreamingToolUse, type StreamingThinking, isCompactBoundaryMessage, getMessagesAfterCompactBoundary, getContentText, createUserMessage, createAssistantMessage, createTurnDurationMessage, createAgentsKilledMessage, createApiMetricsMessage, createSystemMessage, createCommandInputMessage, formatCommandInputTags } from '../utils/messages.js';
|
||||
import { getCurrentTurnCacheMetrics, resetCurrentTurn } from '../services/api/cacheStatsTracker.js';
|
||||
import { formatCacheMetricsCompact, formatCacheMetricsFull } from '../services/api/cacheMetrics.js';
|
||||
import { generateSessionTitle } from '../utils/sessionTitle.js';
|
||||
import { BASH_INPUT_TAG, COMMAND_MESSAGE_TAG, COMMAND_NAME_TAG, LOCAL_COMMAND_STDOUT_TAG } from '../constants/xml.js';
|
||||
import { escapeXml } from '../utils/xml.js';
|
||||
@@ -2921,6 +2923,13 @@ export function REPL({
|
||||
// isLoading is derived from queryGuard — tryStart() above already
|
||||
// transitioned dispatching→running, so no setter call needed here.
|
||||
resetTimingRefs();
|
||||
// Start-of-turn cache tracker reset. The end-of-turn path at the
|
||||
// bottom of this function already resets, but mirror the call here
|
||||
// so a turn that never reaches end-of-turn (crash, unhandled
|
||||
// rejection, process exit) still starts clean on the next one.
|
||||
// Idempotent with respect to the end-of-turn reset — double-reset
|
||||
// is a no-op.
|
||||
resetCurrentTurn();
|
||||
setMessages(oldMessages => [...oldMessages, ...newMessages]);
|
||||
responseLengthRef.current = 0;
|
||||
if (feature('TOKEN_BUDGET')) {
|
||||
@@ -3019,6 +3028,38 @@ export function REPL({
|
||||
setMessages(prev => [...prev, createTurnDurationMessage(turnDurationMs, budgetInfo, count(prev, isLoggableMessage))]);
|
||||
}
|
||||
}
|
||||
// Cache stats line — controlled by `/config showCacheStats`. Shows
|
||||
// per-query read/hit stats using the provider-normalized metrics
|
||||
// from cacheStatsTracker. 'off' skips, 'compact' gives a one-liner,
|
||||
// 'full' gives a breakdown. Display is skipped when the user
|
||||
// aborted or proactive mode is active — but the counter reset
|
||||
// below still runs in those cases.
|
||||
if (!abortController.signal.aborted && !proactiveActive) {
|
||||
// Defensive default: config layer already merges 'compact' from
|
||||
// DEFAULT_GLOBAL_CONFIG (see config.ts:1494) for configs that
|
||||
// predate this feature, so `mode` should always be defined.
|
||||
// The `?? 'compact'` fallback covers pathological cases — a
|
||||
// corrupt config read that returned an empty object, or a
|
||||
// race between writer and reader — where the merge didn't
|
||||
// land. Rendering the line is the safer failure mode than
|
||||
// silently hiding it.
|
||||
const mode = getGlobalConfig().showCacheStats ?? 'compact';
|
||||
if (mode !== 'off') {
|
||||
const turnMetrics = getCurrentTurnCacheMetrics();
|
||||
// Skip rendering if the turn recorded no API activity at all —
|
||||
// avoids a spurious "[Cache: cold]" on local-only commands.
|
||||
if (turnMetrics.supported || turnMetrics.read > 0 || turnMetrics.total > 0) {
|
||||
const line = mode === 'full' ? formatCacheMetricsFull(turnMetrics) : formatCacheMetricsCompact(turnMetrics);
|
||||
setMessages(prev => [...prev, createSystemMessage(line, 'info')]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reset turn counters UNCONDITIONALLY — users routinely interrupt
|
||||
// (Ctrl+C) mid-turn, and if we kept the reset gated on
|
||||
// !aborted, the in-flight turn's metrics would leak into the
|
||||
// next turn's aggregate. Proactive turns also need the reset so
|
||||
// their metrics don't pile onto the following user turn.
|
||||
resetCurrentTurn();
|
||||
// Clear the controller so CancelRequestHandler's canCancelRunningTask
|
||||
// reads false at the idle prompt. Without this, the stale non-aborted
|
||||
// controller makes ctrl+c fire onCancel() (aborting nothing) instead of
|
||||
|
||||
Reference in New Issue
Block a user