feat(api): expose cache metrics in REPL + normalize across providers (#813)
* feat(api): expose cache metrics in REPL + /cache-stats command * fix(api): normalize Kimi/DeepSeek/Gemini cache fields through shim layer * test(api): cover /cache-stats rendering + fix CacheMetrics docstring drift * fix(api): always reset cache turn counter + include date in /cache-stats rows * refactor(api): unify shim usage builder + add cost-tracker wiring test * fix(api): classify private-IP/self-hosted OpenAI endpoints as N/A instead of cold * fix(api): require colon guard on IPv6 ULA prefix to avoid public-host over-match * perf(api): ring buffer for cache history + hit rate clamp + .localhost TLD * fix(api): null guards on formatters + document Codex Responses API shape * fix(api): defensive start-of-turn reset + config gate fallback + env var docs * fix(api): trust forwarded cache data on self-hosted URLs (data-driven) * refactor(api): delegate streaming Responses usage to shared makeUsage helper
This commit is contained in:
@@ -46,6 +46,7 @@ import {
|
||||
type AnthropicUsage,
|
||||
type ShimCreateParams,
|
||||
} from './codexShim.js'
|
||||
import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
|
||||
import { compressToolHistory } from './compressToolHistory.js'
|
||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||
import {
|
||||
@@ -845,16 +846,12 @@ function convertChunkUsage(
|
||||
usage: OpenAIStreamChunk['usage'] | undefined,
|
||||
): Partial<AnthropicUsage> | undefined {
|
||||
if (!usage) return undefined
|
||||
|
||||
const cached = usage.prompt_tokens_details?.cached_tokens ?? 0
|
||||
return {
|
||||
// Subtract cached tokens: OpenAI includes them in prompt_tokens,
|
||||
// but Anthropic convention treats input_tokens as non-cached only.
|
||||
input_tokens: (usage.prompt_tokens ?? 0) - cached,
|
||||
output_tokens: usage.completion_tokens ?? 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: cached,
|
||||
}
|
||||
// Delegates to the shared helper so this path, codexShim.makeUsage,
|
||||
// the non-streaming response below, and the integration tests all
|
||||
// produce byte-identical output for the same raw input.
|
||||
return buildAnthropicUsageFromRawUsage(
|
||||
usage as unknown as Record<string, unknown>,
|
||||
)
|
||||
}
|
||||
|
||||
const JSON_REPAIR_SUFFIXES = [
|
||||
@@ -2154,12 +2151,9 @@ class OpenAIShimMessages {
|
||||
model: data.model ?? model,
|
||||
stop_reason: stopReason,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: data.usage?.prompt_tokens ?? 0,
|
||||
output_tokens: data.usage?.completion_tokens ?? 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: data.usage?.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
},
|
||||
usage: buildAnthropicUsageFromRawUsage(
|
||||
data.usage as unknown as Record<string, unknown> | undefined,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user