fix: disable experimental API betas by default, reduce side query token usage, standardize Headers type (#281)
* fix: disable experimental API betas by default to prevent 500 errors Tool search (defer_loading), global cache scope, and context management betas require internal Anthropic server-side support. External accounts receive 500 Internal Server Error when these are sent. Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=true by default in the CLI entrypoint. Users with internal access can opt back in with =false. Also includes: cache key stability fixes (Sonnet 1M latch, system-before- messages key ordering, resume fingerprint isMeta skip), sideQuery default cleanup, and /dream command. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: standardize API headers to Headers type and enable tengu feature flags by default * fix: address PR review — dream lock, MCP betas guard, redundant Partial - Call recordConsolidation() programmatically in /dream instead of delegating to model prompt (unreliable) - Add CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS guard to MCP entrypoint (was only in CLI entrypoint, causing 500s in MCP server mode) - Remove redundant ? markers from SecretValueSource Partial<{}> type --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1466,6 +1466,10 @@ async function* queryModel(
|
||||
}
|
||||
}
|
||||
|
||||
// Latch Sonnet 1M experiment at query start so mid-retry GB refreshes
|
||||
// don't flip the beta header and bust the cache key.
|
||||
const sonnet1mExpLatched = getSonnet1mExpTreatmentEnabled(options.model)
|
||||
|
||||
const effort = resolveAppliedEffort(options.model, options.effortValue)
|
||||
|
||||
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
||||
@@ -1549,11 +1553,9 @@ async function* queryModel(
|
||||
const paramsFromContext = (retryContext: RetryContext) => {
|
||||
const betasParams = [...betas]
|
||||
|
||||
// Append 1M beta dynamically for the Sonnet 1M experiment.
|
||||
if (
|
||||
!betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
|
||||
getSonnet1mExpTreatmentEnabled(retryContext.model)
|
||||
) {
|
||||
// Append 1M beta from the latched experiment state (computed once before
|
||||
// the closure to avoid mid-retry GB flips changing the cache key).
|
||||
if (!betasParams.includes(CONTEXT_1M_BETA_HEADER) && sonnet1mExpLatched) {
|
||||
betasParams.push(CONTEXT_1M_BETA_HEADER)
|
||||
}
|
||||
|
||||
@@ -1709,6 +1711,13 @@ async function* queryModel(
|
||||
|
||||
return {
|
||||
model: normalizeModelStringForAPI(options.model),
|
||||
// IMPORTANT: `system` must appear before `messages` in the object literal.
|
||||
// JSON.stringify preserves insertion order. The native Bun attestation
|
||||
// (Attestation.zig) overwrites the FIRST `cch=00000` sentinel in the
|
||||
// serialized body. If `messages` is serialized first and conversation
|
||||
// history contains this literal string, the wrong occurrence is replaced,
|
||||
// producing a different system prompt on each request and breaking cache.
|
||||
system,
|
||||
messages: addCacheBreakpoints(
|
||||
messagesForAPI,
|
||||
enablePromptCaching,
|
||||
@@ -1718,7 +1727,6 @@ async function* queryModel(
|
||||
consumedPinnedEdits,
|
||||
options.skipCacheWrite,
|
||||
),
|
||||
system,
|
||||
tools: allTools,
|
||||
tool_choice: options.toolChoice,
|
||||
...(useBetas && { betas: betasParams }),
|
||||
|
||||
Reference in New Issue
Block a user