fix: disable experimental API betas by default, reduce side query token usage, standardize Headers type (#281)

* fix: disable experimental API betas by default to prevent 500 errors

Tool search (defer_loading), global cache scope, and context management
betas require internal Anthropic server-side support. External accounts
receive 500 Internal Server Error when these are sent.

Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=true by default in the CLI
entrypoint. Users with internal access can opt back in with =false.

Also includes: cache key stability fixes (Sonnet 1M latch, system-before-
messages key ordering, resume fingerprint isMeta skip), sideQuery default
cleanup, and /dream command.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor: standardize API headers to Headers type and enable tengu feature flags by default

* fix: address PR review — dream lock, MCP betas guard, redundant Partial

- Call recordConsolidation() programmatically in /dream instead of
  delegating to model prompt (unreliable)
- Add CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS guard to MCP entrypoint
  (was only in CLI entrypoint, causing 500s in MCP server mode)
- Remove redundant ? markers from SecretValueSource Partial<{}> type

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
step325
2026-04-03 19:40:07 +02:00
committed by GitHub
parent afed73fa5a
commit 70cfa61582
16 changed files with 124 additions and 26 deletions

View File

@@ -1466,6 +1466,10 @@ async function* queryModel(
}
}
// Latch Sonnet 1M experiment at query start so mid-retry GB refreshes
// don't flip the beta header and bust the cache key.
const sonnet1mExpLatched = getSonnet1mExpTreatmentEnabled(options.model)
const effort = resolveAppliedEffort(options.model, options.effortValue)
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
@@ -1549,11 +1553,9 @@ async function* queryModel(
const paramsFromContext = (retryContext: RetryContext) => {
const betasParams = [...betas]
// Append 1M beta dynamically for the Sonnet 1M experiment.
if (
!betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
getSonnet1mExpTreatmentEnabled(retryContext.model)
) {
// Append 1M beta from the latched experiment state (computed once before
// the closure to avoid mid-retry GB flips changing the cache key).
if (!betasParams.includes(CONTEXT_1M_BETA_HEADER) && sonnet1mExpLatched) {
betasParams.push(CONTEXT_1M_BETA_HEADER)
}
@@ -1709,6 +1711,13 @@ async function* queryModel(
return {
model: normalizeModelStringForAPI(options.model),
// IMPORTANT: `system` must appear before `messages` in the object literal.
// JSON.stringify preserves insertion order. The native Bun attestation
// (Attestation.zig) overwrites the FIRST `cch=00000` sentinel in the
// serialized body. If `messages` is serialized first and conversation
// history contains this literal string, the wrong occurrence is replaced,
// producing a different system prompt on each request and breaking cache.
system,
messages: addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
@@ -1718,7 +1727,6 @@ async function* queryModel(
consumedPinnedEdits,
options.skipCacheWrite,
),
system,
tools: allTools,
tool_choice: options.toolChoice,
...(useBetas && { betas: betasParams }),

View File

@@ -563,7 +563,7 @@ export async function performCodexRequest(options: {
throw APIError.generate(
response.status, errorResponse,
`Codex API error ${response.status}: ${errorBody}`,
response.headers as unknown as Record<string, string>,
response.headers as unknown as Headers,
)
}
@@ -646,7 +646,7 @@ export async function collectCodexCompletedResponse(
if (event.event === 'response.failed') {
const msg = event.data?.response?.error?.message ??
event.data?.error?.message ?? 'Codex response failed'
throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
throw APIError.generate(500, undefined, msg, new Headers())
}
if (
@@ -661,7 +661,7 @@ export async function collectCodexCompletedResponse(
if (!completedResponse) {
throw APIError.generate(
500, undefined, 'Codex response ended without a completed payload',
{} as Record<string, string>,
new Headers(),
)
}
@@ -820,7 +820,7 @@ export async function* codexStreamToAnthropic(
if (event.event === 'response.failed') {
const msg = payload?.response?.error?.message ??
payload?.error?.message ?? 'Codex response failed'
throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
throw APIError.generate(500, undefined, msg, new Headers())
}
}

View File

@@ -41,6 +41,13 @@ import {
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
type SecretValueSource = Partial<{
OPENAI_API_KEY: string
CODEX_API_KEY: string
GEMINI_API_KEY: string
GOOGLE_API_KEY: string
}>
const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference'
const GITHUB_API_VERSION = '2022-11-28'
const GITHUB_429_MAX_RETRIES = 3
@@ -750,7 +757,7 @@ class OpenAIShimMessages {
? ` or place a Codex auth.json at ${credentials.authPath}`
: ''
const safeModel =
redactSecretValueForDisplay(request.requestedModel, process.env) ??
redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ??
'the requested model'
throw new Error(
`Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`,
@@ -941,13 +948,13 @@ class OpenAIShimMessages {
response.status,
errorResponse,
`OpenAI API error ${response.status}: ${errorBody}${rateHint}`,
response.headers as unknown as Record<string, string>,
response.headers as unknown as Headers,
)
}
throw APIError.generate(
500, undefined, 'OpenAI shim: request loop exited unexpectedly',
{} as Record<string, string>,
new Headers(),
)
}