fix(errors): show actual host in 404 message instead of Ollama hint (#926 )

When an OpenAI-compatible provider returns a 404, the user-facing error message hardcoded "for Ollama: http://127.0.0.1:11434/v1" as a hint regardless of the configured base URL. Users on remote providers (NVIDIA NIM, OpenRouter, etc.) read this as the app ignoring their custom OPENAI_BASE_URL and routing to localhost. Plumb the request URL through the classifier and marker so the user-facing message can name the actual host. Localhost endpoints keep the existing Ollama-flavored guidance for backward compatibility. - classifyOpenAIHttpFailure now accepts an optional url and produces a host-aware hint for non-localhost 404s - the [openai_category=...] marker carries an optional host segment - mapOpenAICompatibilityFailureToAssistantMessage branches on host to show "Endpoint at <host> returned 404. Verify OPENAI_BASE_URL is correct and the selected model (<model>) is supported by this provider." for remote URLs - backward compatibility preserved when no URL is available
feat(api): deterministic request-body serialization via stableStringify (#882 )
2026-04-28 08:58:04 +05:30 · 2026-04-27 23:33:15 +08:00 · 2026-04-27 20:05:17 +08:00
10 changed files with 622 additions and 15 deletions
--- a/README.md
+++ b/README.md
@@ -170,7 +170,7 @@ For best results, use models with strong tool/function calling support.
 OpenClaude can route different agents to different models through settings-based routing. This is useful for cost optimization or splitting work by model strength.
-Add to `~/.claude/settings.json`:
+Add to `~/.openclaude.json`:
 ```json
 {
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -2,6 +2,7 @@ import { APIError } from '@anthropic-ai/sdk'
 import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
 import { compressToolHistory } from './compressToolHistory.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
 import { stableStringify } from '../../utils/stableStringify.js'
 import type {
  ResolvedCodexCredentials,
  ResolvedProviderRequest,
@@ -559,7 +560,9 @@ export async function performCodexRequest(options: {
    {
      method: 'POST',
      headers,
-      body: JSON.stringify(body),
+      // WHY: byte-identity required for implicit prefix caching on
      // OpenAI Responses API. See src/utils/stableStringify.ts.
      body: stableStringify(body),
      signal: options.signal,
    },
  )
--- a/src/services/api/errors.openaiCompatibility.test.ts
+++ b/src/services/api/errors.openaiCompatibility.test.ts
@@ -28,6 +28,38 @@ test('maps endpoint_not_found category markers to actionable setup guidance', ()
  expect(text).toContain('/v1')
 })
 test('endpoint_not_found from a remote host shows the actual host, not Ollama (issue #926)', () => {
  const error = APIError.generate(
    404,
    undefined,
    'OpenAI API error 404: Not Found [openai_category=endpoint_not_found,host=integrate.api.nvidia.com] Hint: Endpoint at integrate.api.nvidia.com returned 404.',
    new Headers(),
  )
  const message = getAssistantMessageFromError(error, 'moonshotai/kimi-k2.5-thinking')
  const text = getFirstText(message)
  expect(text).toContain('integrate.api.nvidia.com')
  expect(text).toContain('moonshotai/kimi-k2.5-thinking')
  expect(text).not.toContain('Ollama')
  expect(text).not.toContain('11434')
 })
 test('endpoint_not_found without a host falls back to the Ollama-aware message', () => {
  const error = APIError.generate(
    404,
    undefined,
    'OpenAI API error 404: Not Found [openai_category=endpoint_not_found] Hint: Confirm OPENAI_BASE_URL includes /v1.',
    new Headers(),
  )
  const message = getAssistantMessageFromError(error, 'qwen2.5-coder:7b')
  const text = getFirstText(message)
  expect(text).toContain('Provider endpoint was not found')
  expect(text).toContain('Ollama')
 })
 test('maps tool_call_incompatible category markers to model/tool guidance', () => {
  const error = APIError.generate(
    400,
--- a/src/services/api/errors.ts
+++ b/src/services/api/errors.ts
@@ -51,7 +51,9 @@ import {
 import { shouldProcessRateLimits } from '../rateLimitMocking.js' // Used for /mock-limits command
 import { extractConnectionErrorDetails, formatAPIError } from './errorUtils.js'
 import {
  extractOpenAICategoryHost,
  extractOpenAICategoryMarker,
  isLocalhostLikeHost,
  type OpenAICompatibilityFailureCategory,
 } from './openaiErrorClassification.js'
@@ -68,25 +70,29 @@ function mapOpenAICompatibilityFailureToAssistantMessage(options: {
  category: OpenAICompatibilityFailureCategory
  model: string
  rawMessage: string
  host?: string
 }): AssistantMessage {
  const switchCmd = getIsNonInteractiveSession() ? '--model' : '/model'
  const compactHint = getIsNonInteractiveSession()
    ? 'Reduce prompt size or start a new session.'
    : 'Run /compact or start a new session with /new.'
  const isLocalhost = options.host === undefined || isLocalhostLikeHost(options.host)
  switch (options.category) {
    case 'localhost_resolution_failed':
    case 'connection_refused':
      return createAssistantAPIErrorMessage({
-        content:
+        content: isLocalhost
-          'Could not connect to the local OpenAI-compatible provider. Ensure the local server is running, then use OPENAI_BASE_URL=http://127.0.0.1:11434/v1 for Ollama.',
+          ? 'Could not connect to the local OpenAI-compatible provider. Ensure the local server is running, then use OPENAI_BASE_URL=http://127.0.0.1:11434/v1 for Ollama.'
          : `Could not connect to the provider at ${options.host}. Verify OPENAI_BASE_URL is correct and that the host is reachable.`,
        error: 'unknown',
      })
    case 'endpoint_not_found':
      return createAssistantAPIErrorMessage({
-        content:
+        content: isLocalhost
-          'Provider endpoint was not found. Confirm OPENAI_BASE_URL targets an OpenAI-compatible /v1 endpoint (for Ollama: http://127.0.0.1:11434/v1).',
+          ? 'Provider endpoint was not found. Confirm OPENAI_BASE_URL targets an OpenAI-compatible /v1 endpoint (for Ollama: http://127.0.0.1:11434/v1).'
          : `Provider endpoint at ${options.host} returned 404. Verify OPENAI_BASE_URL is correct and that the selected model (${options.model}) is supported by this provider.`,
        error: 'invalid_request',
      })
@@ -567,6 +573,7 @@ export function getAssistantMessageFromError(
        category: openaiCategory,
        model,
        rawMessage: error.message,
        host: extractOpenAICategoryHost(error.message),
      })
    }
  }
--- a/src/services/api/openaiErrorClassification.test.ts
+++ b/src/services/api/openaiErrorClassification.test.ts
@@ -4,8 +4,10 @@ import {
  buildOpenAICompatibilityErrorMessage,
  classifyOpenAIHttpFailure,
  classifyOpenAINetworkFailure,
  extractOpenAICategoryHost,
  extractOpenAICategoryMarker,
  formatOpenAICategoryMarker,
  isLocalhostLikeHost,
 } from './openaiErrorClassification.js'
 test('classifies localhost ECONNREFUSED as connection_refused', () => {
@@ -95,3 +97,58 @@ test('ignores unknown category markers during extraction', () => {
  const malformed = 'OpenAI API error 500 [openai_category=totally_fake_category]'
  expect(extractOpenAICategoryMarker(malformed)).toBeUndefined()
 })
 test('endpoint_not_found 404 from a remote host gets a host-aware hint (issue #926)', () => {
  const failure = classifyOpenAIHttpFailure({
    status: 404,
    body: 'Not Found',
    url: 'https://integrate.api.nvidia.com/v1/chat/completions',
  })
  expect(failure.category).toBe('endpoint_not_found')
  expect(failure.requestUrl).toBe('https://integrate.api.nvidia.com/v1/chat/completions')
  expect(failure.hint).toContain('integrate.api.nvidia.com')
  expect(failure.hint).not.toContain('local providers')
 })
 test('endpoint_not_found 404 from localhost keeps the Ollama-flavored hint', () => {
  const failure = classifyOpenAIHttpFailure({
    status: 404,
    body: 'Not Found',
    url: 'http://127.0.0.1:11434/v1/chat/completions',
  })
  expect(failure.category).toBe('endpoint_not_found')
  expect(failure.hint).toContain('local providers')
 })
 test('marker round-trip preserves host segment', () => {
  const formatted = buildOpenAICompatibilityErrorMessage(
    'OpenAI API error 404: Not Found',
    {
      category: 'endpoint_not_found',
      hint: 'Endpoint at integrate.api.nvidia.com returned 404.',
      requestUrl: 'https://integrate.api.nvidia.com/v1/chat/completions',
    },
  )
  expect(formatted).toContain('[openai_category=endpoint_not_found,host=integrate.api.nvidia.com]')
  expect(extractOpenAICategoryMarker(formatted)).toBe('endpoint_not_found')
  expect(extractOpenAICategoryHost(formatted)).toBe('integrate.api.nvidia.com')
 })
 test('marker without host stays backward-compatible', () => {
  const marker = formatOpenAICategoryMarker('endpoint_not_found')
  expect(marker).toBe('[openai_category=endpoint_not_found]')
  expect(extractOpenAICategoryMarker(marker)).toBe('endpoint_not_found')
  expect(extractOpenAICategoryHost(marker)).toBeUndefined()
 })
 test('isLocalhostLikeHost matches loopback variants', () => {
  expect(isLocalhostLikeHost('localhost')).toBe(true)
  expect(isLocalhostLikeHost('127.0.0.1')).toBe(true)
  expect(isLocalhostLikeHost('127.0.0.5')).toBe(true)
  expect(isLocalhostLikeHost('::1')).toBe(true)
  expect(isLocalhostLikeHost('integrate.api.nvidia.com')).toBe(false)
  expect(isLocalhostLikeHost(undefined)).toBe(false)
 })
--- a/src/services/api/openaiErrorClassification.ts
+++ b/src/services/api/openaiErrorClassification.ts
@@ -21,6 +21,7 @@ export type OpenAICompatibilityFailure = {
  hint?: string
  code?: string
  status?: number
  requestUrl?: string
 }
 const OPENAI_CATEGORY_MARKER_PREFIX = '[openai_category='
@@ -96,6 +97,11 @@ function isLocalhostLikeHostname(hostname: string | null): boolean {
  return /^127\./.test(hostname)
 }
 export function isLocalhostLikeHost(host: string | null | undefined): boolean {
  if (!host) return false
  return isLocalhostLikeHostname(host.toLowerCase())
 }
 function isContextOverflowMessage(body: string): boolean {
  const lower = body.toLowerCase()
  return (
@@ -149,14 +155,18 @@ function isModelNotFoundMessage(body: string): boolean {
 export function formatOpenAICategoryMarker(
  category: OpenAICompatibilityFailureCategory,
  host?: string,
 ): string {
  if (host && /^[A-Za-z0-9.\-:]+$/.test(host)) {
    return `${OPENAI_CATEGORY_MARKER_PREFIX}${category},host=${host}]`
  }
  return `${OPENAI_CATEGORY_MARKER_PREFIX}${category}]`
 }
 export function extractOpenAICategoryMarker(
  message: string,
 ): OpenAICompatibilityFailureCategory | undefined {
-  const match = message.match(/\[openai_category=([a-z_]+)]/)
+  const match = message.match(/\[openai_category=([a-z_]+)(?:,host=[^\]]+)?]/)
  const category = match?.[1]
  if (!category || !isOpenAICompatibilityFailureCategory(category)) {
@@ -166,11 +176,17 @@ export function extractOpenAICategoryMarker(
  return category
 }
 export function extractOpenAICategoryHost(message: string): string | undefined {
  const match = message.match(/\[openai_category=[a-z_]+,host=([A-Za-z0-9.\-:]+)]/)
  return match?.[1]
 }
 export function buildOpenAICompatibilityErrorMessage(
  baseMessage: string,
-  failure: Pick<OpenAICompatibilityFailure, 'category' | 'hint'>,
+  failure: Pick<OpenAICompatibilityFailure, 'category' | 'hint' | 'requestUrl'>,
 ): string {
-  const marker = formatOpenAICategoryMarker(failure.category)
+  const host = failure.requestUrl ? getHostname(failure.requestUrl) ?? undefined : undefined
  const marker = formatOpenAICategoryMarker(failure.category, host)
  const hint = failure.hint ? ` Hint: ${failure.hint}` : ''
  return `${baseMessage} ${marker}${hint}`
 }
@@ -247,8 +263,11 @@ export function classifyOpenAINetworkFailure(
 export function classifyOpenAIHttpFailure(options: {
  status: number
  body: string
  url?: string
 }): OpenAICompatibilityFailure {
  const body = options.body ?? ''
  const hostname = options.url ? getHostname(options.url) : null
  const isLocalHost = isLocalhostLikeHostname(hostname)
  if (options.status === 401 || options.status === 403) {
    return {
@@ -284,13 +303,17 @@ export function classifyOpenAIHttpFailure(options: {
  }
  if (options.status === 404) {
    const isRemote = hostname !== null && !isLocalHost
    return {
      source: 'http',
      category: 'endpoint_not_found',
      retryable: false,
      status: options.status,
      message: body,
-      hint: 'Endpoint was not found. Confirm OPENAI_BASE_URL includes /v1 for OpenAI-compatible local providers.',
+      requestUrl: options.url,
      hint: isRemote
        ? `Endpoint at ${hostname} returned 404. Verify OPENAI_BASE_URL is correct and the requested model is supported by this provider.`
        : 'Endpoint was not found. Confirm OPENAI_BASE_URL includes /v1 for OpenAI-compatible local providers.',
    }
  }
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -74,7 +74,12 @@ import {
  hasToolFieldMapping,
 } from './toolArgumentNormalization.js'
 import { logApiCallStart, logApiCallEnd } from '../../utils/requestLogging.js'
-import { createStreamState, processStreamChunk, getStreamStats } from '../../utils/streamingOptimizer.js'
+import {
  createStreamState,
  processStreamChunk,
  getStreamStats,
 } from '../../utils/streamingOptimizer.js'
 import { stableStringify } from '../../utils/stableStringify.js'
 type SecretValueSource = Partial<{
  OPENAI_API_KEY: string
@@ -1852,12 +1857,17 @@ class OpenAIShimMessages {
      return false
    }
-    let serializedBody = JSON.stringify(
+    // WHY: byte-identity required for implicit prefix caching in
    // OpenAI/Kimi/DeepSeek. stableStringify sorts object keys at every
    // depth so spurious insertion-order differences across rebuilds of
    // `body` (spread-merge, conditional assignments above) don't bust
    // the provider's prefix hash.
    let serializedBody = stableStringify(
      request.transport === 'responses' ? buildResponsesBody() : body,
    )
    const refreshSerializedBody = (): void => {
-      serializedBody = JSON.stringify(
+      serializedBody = stableStringify(
        request.transport === 'responses' ? buildResponsesBody() : body,
      )
    }
@@ -1925,7 +1935,9 @@ class OpenAIShimMessages {
        classifyOpenAIHttpFailure({
          status,
          body: errorBody,
          url: requestUrl,
        })
      const failureWithUrl = { ...failure, requestUrl: failure.requestUrl ?? requestUrl }
      const redactedUrl = redactUrlForDiagnostics(requestUrl)
      logForDebugging(
@@ -1938,7 +1950,7 @@ class OpenAIShimMessages {
        parsedBody,
        buildOpenAICompatibilityErrorMessage(
          `OpenAI API error ${status}: ${errorBody}${rateHint}`,
-          failure,
+          failureWithUrl,
        ),
        responseHeaders,
      )
@@ -2036,7 +2048,7 @@ class OpenAIShimMessages {
            responsesResponse = await fetchWithProxyRetry(responsesUrl, {
              method: 'POST',
              headers,
-              body: JSON.stringify(responsesBody),
+              body: stableStringify(responsesBody),
              signal: options?.signal,
            })
          } catch (error) {
--- a/src/utils/serializationStability.test.ts
+++ b/src/utils/serializationStability.test.ts
@@ -0,0 +1,142 @@
 import { describe, expect, test } from 'bun:test'
 import { sortKeysDeep, stableStringify } from './stableStringify.js'
 // These tests pin byte-level stability of serialization helpers. The
 // invariant that matters for implicit prefix caching in OpenAI / Kimi /
 // DeepSeek / Codex — and for Anthropic cache_control breakpoints — is:
 // semantically-equal inputs must produce byte-identical output across
 // invocations and across key-order permutations.
 describe('stableStringify', () => {
  test('two invocations with the same object produce identical strings', () => {
    const a = stableStringify({ b: 1, a: 2 })
    const b = stableStringify({ b: 1, a: 2 })
    expect(a).toBe(b)
  })
  test('key order at the top level does not affect output', () => {
    expect(stableStringify({ a: 1, b: 2 })).toBe(stableStringify({ b: 2, a: 1 }))
  })
  test('key order at nested depths does not affect output', () => {
    const x = { outer: { z: 1, a: 2, m: { b: 3, a: 4 } } }
    const y = { outer: { m: { a: 4, b: 3 }, a: 2, z: 1 } }
    expect(stableStringify(x)).toBe(stableStringify(y))
  })
  test('array element order IS preserved (semantic in API contracts)', () => {
    expect(stableStringify({ messages: ['a', 'b', 'c'] })).not.toBe(
      stableStringify({ messages: ['c', 'b', 'a'] }),
    )
  })
  test('arrays of objects have keys sorted inside each element', () => {
    const out = stableStringify({
      tools: [
        { name: 'Bash', description: 'run' },
        { description: 'read', name: 'Read' },
      ],
    })
    expect(out).toBe(
      '{"tools":[{"description":"run","name":"Bash"},{"description":"read","name":"Read"}]}',
    )
  })
  test('undefined values are omitted (matches JSON.stringify)', () => {
    const out = stableStringify({ a: undefined, b: 1 })
    expect(out).toBe('{"b":1}')
  })
  test('primitive and null pass through unchanged', () => {
    expect(stableStringify(null)).toBe('null')
    expect(stableStringify(42)).toBe('42')
    expect(stableStringify('x')).toBe('"x"')
    expect(stableStringify(true)).toBe('true')
  })
  test('throws TypeError on circular structures (same behavior as JSON.stringify)', () => {
    const obj: Record<string, unknown> = {}
    obj.self = obj
    // The exact message varies by engine (V8: "Converting circular structure
    // to JSON", Bun: "JSON.stringify cannot serialize cyclic structures.").
    // We only pin the error class — same contract as native JSON.stringify.
    expect(() => stableStringify(obj)).toThrow(TypeError)
    expect(() => JSON.stringify(obj)).toThrow(TypeError)
  })
  test('throws TypeError on circular references nested deep in the graph', () => {
    const inner: Record<string, unknown> = { val: 1 }
    const outer = { a: { b: inner } }
    inner.cycle = outer
    expect(() => stableStringify(outer)).toThrow(TypeError)
  })
  test('does not throw on DAGs (same object referenced from multiple keys)', () => {
    const shared = { x: 1 }
    // Native JSON.stringify handles this fine — stableStringify must too.
    expect(() => stableStringify({ a: shared, b: shared })).not.toThrow()
    expect(stableStringify({ a: shared, b: shared })).toBe(
      '{"a":{"x":1},"b":{"x":1}}',
    )
  })
 })
 describe('sortKeysDeep', () => {
  test('returns an object with sorted keys at every depth', () => {
    const sorted = sortKeysDeep({
      b: 1,
      a: { y: 2, x: { d: 3, c: 4 } },
    }) as Record<string, unknown>
    expect(Object.keys(sorted)).toEqual(['a', 'b'])
    expect(Object.keys(sorted.a as Record<string, unknown>)).toEqual([
      'x',
      'y',
    ])
  })
  test('arrays are preserved element-wise', () => {
    const sorted = sortKeysDeep([
      { b: 1, a: 2 },
      { d: 3, c: 4 },
    ]) as Array<Record<string, unknown>>
    expect(Object.keys(sorted[0]!)).toEqual(['a', 'b'])
    expect(Object.keys(sorted[1]!)).toEqual(['c', 'd'])
  })
 })
 describe('prefix caching invariants — end-to-end', () => {
  // This is the real payload shape that an OpenAI-compatible body
  // takes on its way to the upstream provider. We exercise it via
  // stableStringify to verify that rebuilding the body with different
  // key insertion orders yields the same bytes.
  const bodyA = {
    model: 'gpt-4o-mini',
    stream: true,
    messages: [
      { role: 'system', content: 'you are helpful' },
      { role: 'user', content: 'hi' },
    ],
    tools: [{ name: 't', description: 'x' }],
    temperature: 0.7,
    top_p: 1,
  }
  const bodyB = {
    top_p: 1,
    temperature: 0.7,
    tools: [{ description: 'x', name: 't' }],
    messages: [
      { content: 'you are helpful', role: 'system' },
      { content: 'hi', role: 'user' },
    ],
    stream: true,
    model: 'gpt-4o-mini',
  }
  test('two spread-merged request bodies produce identical stable bytes', () => {
    expect(stableStringify(bodyA)).toBe(stableStringify(bodyB))
  })
  test('calling stableStringify twice yields identical bytes (idempotent)', () => {
    expect(stableStringify(bodyA)).toBe(stableStringify(bodyA))
  })
 })
--- a/src/utils/stableStringify.test.ts
+++ b/src/utils/stableStringify.test.ts
@@ -0,0 +1,199 @@
 import { describe, expect, test } from 'bun:test'
 import { sortKeysDeep, stableStringify } from './stableStringify'
 /**
 * Contract: `stableStringify(input)` must equal `JSON.stringify(input)`
 * for every value where the latter is well-defined, except that object
 * keys are emitted in lexicographic order at every depth. These tests
 * focus on the native pre-processing semantics — `toJSON(key)` and
 * primitive-wrapper unboxing — that the deep-sort path must preserve.
 */
 describe('stableStringify — toJSON semantics', () => {
  test('Date at top level → ISO string', () => {
    const d = new Date('2024-01-02T03:04:05.678Z')
    expect(stableStringify(d)).toBe(JSON.stringify(d))
  })
  test('Date nested in object → ISO string + sorted keys', () => {
    const d = new Date('2024-01-02T03:04:05.678Z')
    const input = { z: 1, when: d, a: 'x' }
    expect(stableStringify(input)).toBe(
      `{"a":"x","␟when␟":"PLACEHOLDER","z":1}`
        .replace('␟when␟', 'when')
        .replace('"PLACEHOLDER"', JSON.stringify(d.toISOString())),
    )
  })
  test('Date inside an array → each element converted', () => {
    const a = new Date('2024-01-02T03:04:05.678Z')
    const b = new Date('2025-06-07T08:09:10.111Z')
    const input = [a, b]
    expect(stableStringify(input)).toBe(JSON.stringify(input))
  })
  test('URL value serializes via URL.prototype.toJSON', () => {
    const u = new URL('https://example.com/path?q=1')
    expect(stableStringify(u)).toBe(JSON.stringify(u))
    expect(stableStringify({ url: u })).toBe(JSON.stringify({ url: u }))
  })
  test('custom class with toJSON returning a plain object → keys sorted', () => {
    class Thing {
      toJSON() {
        return { z: 1, a: 2, m: 3 }
      }
    }
    const out = stableStringify(new Thing())
    expect(out).toBe('{"a":2,"m":3,"z":1}')
  })
  test('toJSON(key) receives the property name for object values', () => {
    const seen: string[] = []
    class Trace {
      toJSON(k: string) {
        seen.push(k)
        return k
      }
    }
    const t = new Trace()
    stableStringify({ alpha: t, beta: t })
    // Object keys are sorted, so toJSON is invoked alpha-first.
    expect(seen).toEqual(['alpha', 'beta'])
  })
  test('toJSON(key) receives the array index as a string for array elements', () => {
    const seen: string[] = []
    class Trace {
      toJSON(k: string) {
        seen.push(k)
        return k
      }
    }
    const t = new Trace()
    stableStringify([t, t, t])
    expect(seen).toEqual(['0', '1', '2'])
  })
  test('toJSON(key) receives empty string at top level', () => {
    let captured: string | undefined
    class Trace {
      toJSON(k: string) {
        captured = k
        return 'ok'
      }
    }
    stableStringify(new Trace())
    expect(captured).toBe('')
  })
  test('toJSON returning undefined drops the property (matches native)', () => {
    class Hidden {
      toJSON() {
        return undefined
      }
    }
    const input = { a: 1, gone: new Hidden(), b: 2 }
    expect(stableStringify(input)).toBe(JSON.stringify(input))
    expect(stableStringify(input)).toBe('{"a":1,"b":2}')
  })
  test('nested mix: object with a Date field and a regular field → keys sorted, Date as ISO', () => {
    const d = new Date('2024-01-02T03:04:05.678Z')
    const input = { z: { when: d, a: 1 }, a: 'first' }
    expect(stableStringify(input)).toBe(
      `{"a":"first","z":{"a":1,"when":${JSON.stringify(d.toISOString())}}}`,
    )
  })
 })
 describe('stableStringify — primitive wrapper unboxing', () => {
  test('new Number at top level → numeric primitive', () => {
    const n = new Number(42)
    expect(stableStringify(n)).toBe(JSON.stringify(n))
    expect(stableStringify(n)).toBe('42')
  })
  test('new String at top level → string primitive', () => {
    const s = new String('hello')
    expect(stableStringify(s)).toBe(JSON.stringify(s))
    expect(stableStringify(s)).toBe('"hello"')
  })
  test('new Boolean at top level → boolean primitive', () => {
    const b = new Boolean(true)
    expect(stableStringify(b)).toBe(JSON.stringify(b))
    expect(stableStringify(b)).toBe('true')
  })
  test('new Boolean(false) at top level → false', () => {
    const b = new Boolean(false)
    expect(stableStringify(b)).toBe(JSON.stringify(b))
    expect(stableStringify(b)).toBe('false')
  })
  test('boxed wrappers as object values → primitives + sorted keys', () => {
    const input = {
      z: new Number(1),
      a: new String('x'),
      m: new Boolean(false),
    }
    expect(stableStringify(input)).toBe('{"a":"x","m":false,"z":1}')
    // Native form: same primitive shape (without sort guarantee).
    expect(JSON.parse(stableStringify(input))).toEqual(JSON.parse(JSON.stringify(input)))
  })
 })
 describe('stableStringify — cycles vs DAGs', () => {
  test('top-level cycle throws TypeError (regression guard)', () => {
    const obj: Record<string, unknown> = { a: 1 }
    obj.self = obj
    expect(() => stableStringify(obj)).toThrow(TypeError)
  })
  test('deep cycle throws TypeError', () => {
    const a: Record<string, unknown> = { name: 'a' }
    const b: Record<string, unknown> = { name: 'b' }
    a.next = b
    b.back = a
    expect(() => stableStringify(a)).toThrow(TypeError)
  })
  test('toJSON returning an ancestor still triggers the cycle check', () => {
    type Node = { name: string; child?: { toJSON(): Node } }
    const parent: Node = { name: 'parent' }
    parent.child = {
      toJSON() {
        return parent
      },
    }
    expect(() => stableStringify(parent)).toThrow(TypeError)
  })
  test('DAG (same object referenced twice via different keys) does NOT throw', () => {
    const shared = { v: 1 }
    const input = { left: shared, right: shared }
    expect(() => stableStringify(input)).not.toThrow()
    expect(stableStringify(input)).toBe('{"left":{"v":1},"right":{"v":1}}')
  })
  test('DAG of arrays does NOT throw', () => {
    const shared = [1, 2, 3]
    const input = { a: shared, b: shared }
    expect(() => stableStringify(input)).not.toThrow()
    expect(stableStringify(input)).toBe('{"a":[1,2,3],"b":[1,2,3]}')
  })
 })
 describe('sortKeysDeep — same toJSON/unbox semantics', () => {
  test('returns the post-toJSON, post-unbox sorted shape', () => {
    const d = new Date('2024-01-02T03:04:05.678Z')
    const out = sortKeysDeep({ z: 1, a: new Number(7), when: d }) as Record<
      string,
      unknown
    >
    expect(out).toEqual({ a: 7, when: d.toISOString(), z: 1 })
    // Key order in the returned object is lexicographic.
    expect(Object.keys(out)).toEqual(['a', 'when', 'z'])
  })
 })
--- a/src/utils/stableStringify.ts
+++ b/src/utils/stableStringify.ts
@@ -0,0 +1,132 @@
 /**
 * Deterministic JSON serialization.
 *
 * WHY: OpenAI / Kimi / DeepSeek / Codex all use **implicit prefix caching**
 * — the server hashes the request prefix and reuses cached reasoning if
 * the bytes match exactly. Even a trivial key-order difference between
 * two otherwise-identical requests invalidates the hash and forces a
 * full re-parse.
 *
 * This is also a pre-requisite for Anthropic / Bedrock / Vertex
 * `cache_control` breakpoints: ephemeral cache entries match on exact
 * content, so a re-ordered object literal busts the breakpoint.
 *
 * `JSON.stringify` is nondeterministic across engines and across
 * successive iterations when objects carry keys added at different
 * times (V8 preserves insertion order, which is the common failure
 * mode when building a body from spread-merged configs).
 *
 * This helper recursively sorts object keys. Arrays preserve order
 * (element order IS semantically significant in message/content arrays).
 *
 * Complements `sortKeysDeep` in src/services/remoteManagedSettings and
 * src/services/policyLimits. Those two are INTENTIONALLY separate:
 *   - remoteManagedSettings: matches Python `json.dumps(sort_keys=True)`
 *     byte-for-byte to validate server-computed checksums. Must NOT
 *     drop undefined (Python preserves null).
 *   - policyLimits: uses `localeCompare` (keeps legacy behavior; locale-
 *     sensitive but stable for a given runtime).
 *   - this module (stableStringify): byte-identity for API body caching.
 *     Drops undefined to match `JSON.stringify` — the openaiShim/codexShim
 *     body is always downstream of `JSON.stringify` semantics.
 * Do not consolidate without auditing the 3 callers — each has a
 * different server-compat contract.
 */
 /**
 * Returns a byte-stable JSON string representation.
 * - Object keys are emitted in lexicographic order at every depth.
 * - Array element order is preserved.
 * - Undefined values are dropped (matching `JSON.stringify`).
 * - Indentation matches the `space` argument (0 by default → compact).
 *
 * Native `JSON.stringify` pre-processing is preserved before sorting:
 *   - `toJSON(key)` is invoked on objects that define it (own or
 *     inherited — covers `Date`, `URL`, and any user class). The `key`
 *     argument is the property name for nested object values, the array
 *     index as a string for array elements, and `''` for the top-level
 *     call, matching native semantics.
 *   - Boxed primitive wrappers (`new Number(...)`, `new String(...)`,
 *     `new Boolean(...)`) are unboxed to their primitive form.
 * Both happen BEFORE the array/object branches dispatch, so the value
 * actually walked is the post-conversion form. If `toJSON` returns
 * `undefined`, the value is dropped from its parent (matching native
 * `JSON.stringify`).
 *
 * Single-pass: `deepSort` walks the (possibly converted) value tree
 * once, building a sorted clone. A `WeakSet` of ancestors tracks the
 * current path through the object graph so that circular references
 * throw `TypeError` (same contract as native `JSON.stringify`). The
 * cycle check runs on the post-`toJSON` value, so a `toJSON` impl that
 * returns an ancestor still throws. Ancestors are always removed in a
 * `finally` block when unwinding out of each object branch (even on
 * exception), so DAG inputs — where the same object is reachable via
 * multiple keys — are handled correctly and do not throw.
 */
 export function stableStringify(value: unknown, space?: number): string {
  return JSON.stringify(deepSort(value, new WeakSet(), ''), null, space)
 }
 /**
 * Returns a deep-sorted clone of the input: object keys lexicographic
 * at every depth, arrays preserved. Useful when callers need to feed
 * the sorted shape into a downstream serializer (e.g., when they must
 * call `JSON.stringify` with a custom spacing or replacer).
 *
 * Applies the same `toJSON(key)` invocation and primitive-wrapper
 * unboxing as `stableStringify`, so the returned shape mirrors what
 * native `JSON.stringify` would have walked.
 */
 export function sortKeysDeep<T>(value: T): T {
  return deepSort(value, new WeakSet(), '') as T
 }
 function deepSort(
  value: unknown,
  ancestors: WeakSet<object>,
  key: string,
 ): unknown {
  // Step 1: invoke toJSON(key) if present — matches native pre-processing.
  if (
    value !== null &&
    typeof value === 'object' &&
    typeof (value as { toJSON?: unknown }).toJSON === 'function'
  ) {
    value = (value as { toJSON: (k: string) => unknown }).toJSON(key)
  }
  // Step 2: unbox primitive wrappers.
  if (value instanceof Number) value = Number(value)
  else if (value instanceof String) value = String(value)
  else if (value instanceof Boolean) value = Boolean(value.valueOf())
  // Step 3: primitives short-circuit (post-toJSON the value may now be one).
  if (value === null || typeof value !== 'object') return value
  // Step 4: arrays — element key is the index as a string.
  if (Array.isArray(value)) {
    return value.map((v, i) => deepSort(v, ancestors, String(i)))
  }
  // Step 5: cycle check on the post-toJSON value.
  if (ancestors.has(value as object)) {
    throw new TypeError('Converting circular structure to JSON')
  }
  ancestors.add(value as object)
  try {
    const sorted: Record<string, unknown> = {}
    for (const k of Object.keys(value as Record<string, unknown>).sort()) {
      const child = deepSort(
        (value as Record<string, unknown>)[k],
        ancestors,
        k,
      )
      if (child === undefined) continue
      sorted[k] = child
    }
    return sorted
  } finally {
    ancestors.delete(value as object)
  }
 }