fix: disable experimental API betas by default, reduce side query token usage, standardize Headers type (#281)

* fix: disable experimental API betas by default to prevent 500 errors

Tool search (defer_loading), global cache scope, and context management
betas require internal Anthropic server-side support. External accounts
receive 500 Internal Server Error when these are sent.

Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=true by default in the CLI
entrypoint. Users with internal access can opt back in with =false.

Also includes: cache key stability fixes (Sonnet 1M latch, system-before-
messages key ordering, resume fingerprint isMeta skip), sideQuery default
cleanup, and /dream command.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor: standardize API headers to Headers type and enable tengu feature flags by default

* fix: address PR review — dream lock, MCP betas guard, redundant Partial

- Call recordConsolidation() programmatically in /dream instead of
  delegating to model prompt (unreliable)
- Add CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS guard to MCP entrypoint
  (was only in CLI entrypoint, causing 500s in MCP server mode)
- Remove redundant ? markers from SecretValueSource Partial<{}> type

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
step325
2026-04-03 19:40:07 +02:00
committed by GitHub
parent afed73fa5a
commit 70cfa61582
16 changed files with 124 additions and 26 deletions

View File

@@ -181,8 +181,6 @@ export function createChromeContext(
usage?: { input_tokens: number; output_tokens: number }
}> => {
// sideQuery handles OAuth attribution fingerprint, proxy, model betas.
// skipSystemPromptPrefix: the lightning prompt is complete on its own;
// the CLI prefix would dilute the batching instructions.
// tools: [] is load-bearing — without it Sonnet emits
// <function_calls> XML before the text commands. Original
// lightning-harness.js (apps repo) does the same.
@@ -193,7 +191,6 @@ export function createChromeContext(
max_tokens: req.max_tokens,
stop_sequences: req.stop_sequences,
signal: req.signal,
skipSystemPromptPrefix: true,
tools: [],
querySource: 'chrome_mcp',
})

View File

@@ -16,7 +16,13 @@ export const FINGERPRINT_SALT = '59cf53e54c78'
export function extractFirstMessageText(
messages: (UserMessage | AssistantMessage)[],
): string {
const firstUserMessage = messages.find(msg => msg.type === 'user')
// Skip isMeta messages (system-injected attachments) so the fingerprint
// reflects the actual user input. On --resume, reorderAttachmentsForAPI
// can bubble meta messages before the real first user message, changing
// the fingerprint and breaking cache.
const firstUserMessage =
messages.find(msg => msg.type === 'user' && !msg.isMeta) ??
messages.find(msg => msg.type === 'user')
if (!firstUserMessage) {
return ''
}

View File

@@ -39,7 +39,7 @@ export function isMcpInstructionsDeltaEnabled(): boolean {
if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_MCP_INSTR_DELTA)) return false
return (
process.env.USER_TYPE === 'ant' ||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', false)
getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', true)
)
}

View File

@@ -780,7 +780,6 @@ async function classifyYoloActionXml(
model,
max_tokens: (mode === 'fast' ? 256 : 64) + thinkingPadding,
system: systemBlocks,
skipSystemPromptPrefix: true,
temperature: 0,
thinking: disableThinking,
messages: [
@@ -867,7 +866,6 @@ async function classifyYoloActionXml(
model,
max_tokens: 4096 + thinkingPadding,
system: systemBlocks,
skipSystemPromptPrefix: true,
temperature: 0,
thinking: disableThinking,
messages: [
@@ -1141,7 +1139,6 @@ export async function classifyYoloAction(
cache_control: getCacheControl({ querySource: 'auto_mode' }),
},
],
skipSystemPromptPrefix: true,
temperature: 0,
thinking: disableThinking,
messages: [

View File

@@ -51,7 +51,7 @@ export type SideQueryOptions = {
maxRetries?: number
/** Abort signal */
signal?: AbortSignal
/** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
/** Skip CLI system prompt prefix (keeps attribution header for OAuth). Default true — side queries are internal classifiers with their own prompt. Set false only for queries that need the full "You are Claude Code…" prefix. */
skipSystemPromptPrefix?: boolean
/** Temperature override */
temperature?: number
@@ -115,7 +115,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
max_tokens = 1024,
maxRetries = 2,
signal,
skipSystemPromptPrefix,
skipSystemPromptPrefix = true,
temperature,
thinking,
stop_sequences,