fix: disable experimental API betas by default, reduce side query token usage, standardize Headers type (#281)

* fix: disable experimental API betas by default to prevent 500 errors Tool search (defer_loading), global cache scope, and context management betas require internal Anthropic server-side support. External accounts receive 500 Internal Server Error when these are sent. Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=true by default in the CLI entrypoint. Users with internal access can opt back in with =false. Also includes: cache key stability fixes (Sonnet 1M latch, system-before- messages key ordering, resume fingerprint isMeta skip), sideQuery default cleanup, and /dream command. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: standardize API headers to Headers type and enable tengu feature flags by default * fix: address PR review — dream lock, MCP betas guard, redundant Partial - Call recordConsolidation() programmatically in /dream instead of delegating to model prompt (unreliable) - Add CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS guard to MCP entrypoint (was only in CLI entrypoint, causing 500s in MCP server mode) - Remove redundant ? markers from SecretValueSource Partial<{}> type --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 19:40:07 +02:00
parent afed73fa5a
commit 70cfa61582
16 changed files with 124 additions and 26 deletions
--- a/src/cli/handlers/autoMode.ts
+++ b/src/cli/handlers/autoMode.ts
@@ -116,7 +116,6 @@ export async function autoModeCritiqueHandler(options: {
      querySource: 'auto_mode_critique',
      model,
      system: CRITIQUE_SYSTEM_PROMPT,
-      skipSystemPromptPrefix: true,
      max_tokens: 4096,
      messages: [
        {
--- a/src/commands.ts
+++ b/src/commands.ts
@@ -17,6 +17,7 @@ import config from './commands/config/index.js'
 import { context, contextNonInteractive } from './commands/context/index.js'
 import cost from './commands/cost/index.js'
 import diff from './commands/diff/index.js'
+import dream from './commands/dream/index.js'
 import ctx_viz from './commands/ctx_viz/index.js'
 import doctor from './commands/doctor/index.js'
 import onboardGithub from './commands/onboard-github/index.js'
@@ -274,6 +275,7 @@ const COMMANDS = memoize((): Command[] => [
  contextNonInteractive,
  cost,
  diff,
+  dream,
  doctor,
  effort,
  exit,
--- a/src/commands/dream/dream.ts
+++ b/src/commands/dream/dream.ts
@@ -0,0 +1,68 @@
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import type { Command } from '../../commands.js'
+import { isAutoMemoryEnabled, getAutoMemPath } from '../../memdir/paths.js'
+import { getProjectDir } from '../../utils/sessionStorage.js'
+import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
+import { buildConsolidationPrompt } from '../../services/autoDream/consolidationPrompt.js'
+import {
+  readLastConsolidatedAt,
+  listSessionsTouchedSince,
+  recordConsolidation,
+} from '../../services/autoDream/consolidationLock.js'
+
+const command = {
+  type: 'prompt',
+  name: 'dream',
+  description:
+    'Run memory consolidation — synthesize recent sessions into durable memories',
+  isEnabled: () => isAutoMemoryEnabled(),
+  progressMessage: 'consolidating memories',
+  contentLength: 0,
+  source: 'builtin',
+  async getPromptForCommand(): Promise<ContentBlockParam[]> {
+    const memoryRoot = getAutoMemPath()
+    const transcriptDir = getProjectDir(getOriginalCwd())
+
+    let lastAt: number
+    try {
+      lastAt = await readLastConsolidatedAt()
+    } catch {
+      lastAt = 0
+    }
+
+    let sessionIds: string[]
+    try {
+      sessionIds = await listSessionsTouchedSince(lastAt)
+    } catch {
+      sessionIds = []
+    }
+
+    const currentSession = getSessionId()
+    sessionIds = sessionIds.filter(id => id !== currentSession)
+
+    if (sessionIds.length === 0) {
+      sessionIds = [currentSession]
+    }
+
+    const hoursSince =
+      lastAt > 0
+        ? `${((Date.now() - lastAt) / 3_600_000).toFixed(1)}h ago`
+        : 'never'
+
+    const extra = `
+**Manually triggered by user via /dream.**
+
+Sessions since last consolidation (${sessionIds.length}, last run: ${hoursSince}):
+${sessionIds.map(id => `- ${id}`).join('\n')}`
+
+    const prompt = buildConsolidationPrompt(memoryRoot, transcriptDir, extra)
+
+    // Record consolidation timestamp programmatically so auto-dream
+    // knows when the last manual run happened.
+    await recordConsolidation()
+
+    return [{ type: 'text', text: prompt }]
+  },
+} satisfies Command
+
+export default command
--- a/src/commands/dream/index.ts
+++ b/src/commands/dream/index.ts
@@ -0,0 +1 @@
+export { default } from './dream.js'
--- a/src/entrypoints/cli.tsx
+++ b/src/entrypoints/cli.tsx
@@ -10,6 +10,13 @@ import {
  redactSecretValueForDisplay,
 } from '../utils/providerProfile.js'

+// OpenClaude: disable experimental API betas by default.
+// Tool search (defer_loading), global cache scope, and context management
+// require internal API support not available to external accounts → 500.
+// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false.
+// eslint-disable-next-line custom-rules/no-top-level-side-effects
+process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true'
+
 // Bugfix for corepack auto-pinning, which adds yarnpkg to peoples' package.jsons
 // eslint-disable-next-line custom-rules/no-top-level-side-effects
 process.env.COREPACK_ENABLE_AUTO_PIN = '0';
--- a/src/entrypoints/mcp.ts
+++ b/src/entrypoints/mcp.ts
@@ -1,3 +1,10 @@
+// OpenClaude: disable experimental API betas by default.
+// Tool search (defer_loading), global cache scope, and context management
+// require internal API support not available to external accounts → 500.
+// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false.
+// eslint-disable-next-line custom-rules/no-top-level-side-effects
+process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true'
+
 import { Server } from '@modelcontextprotocol/sdk/server/index.js'
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
 import {
--- a/src/memdir/findRelevantMemories.ts
+++ b/src/memdir/findRelevantMemories.ts
@@ -98,7 +98,6 @@ async function selectRelevantMemories(
    const result = await sideQuery({
      model: getDefaultSonnetModel(),
      system: SELECT_MEMORIES_SYSTEM_PROMPT,
-      skipSystemPromptPrefix: true,
      messages: [
        {
          role: 'user',
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -1466,6 +1466,10 @@ async function* queryModel(
    }
  }

+  // Latch Sonnet 1M experiment at query start so mid-retry GB refreshes
+  // don't flip the beta header and bust the cache key.
+  const sonnet1mExpLatched = getSonnet1mExpTreatmentEnabled(options.model)
+
  const effort = resolveAppliedEffort(options.model, options.effortValue)

  if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
@@ -1549,11 +1553,9 @@ async function* queryModel(
  const paramsFromContext = (retryContext: RetryContext) => {
    const betasParams = [...betas]

-    // Append 1M beta dynamically for the Sonnet 1M experiment.
-    if (
-      !betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
-      getSonnet1mExpTreatmentEnabled(retryContext.model)
-    ) {
+    // Append 1M beta from the latched experiment state (computed once before
+    // the closure to avoid mid-retry GB flips changing the cache key).
+    if (!betasParams.includes(CONTEXT_1M_BETA_HEADER) && sonnet1mExpLatched) {
      betasParams.push(CONTEXT_1M_BETA_HEADER)
    }

@@ -1709,6 +1711,13 @@ async function* queryModel(

    return {
      model: normalizeModelStringForAPI(options.model),
+      // IMPORTANT: `system` must appear before `messages` in the object literal.
+      // JSON.stringify preserves insertion order. The native Bun attestation
+      // (Attestation.zig) overwrites the FIRST `cch=00000` sentinel in the
+      // serialized body. If `messages` is serialized first and conversation
+      // history contains this literal string, the wrong occurrence is replaced,
+      // producing a different system prompt on each request and breaking cache.
+      system,
      messages: addCacheBreakpoints(
        messagesForAPI,
        enablePromptCaching,
@@ -1718,7 +1727,6 @@ async function* queryModel(
        consumedPinnedEdits,
        options.skipCacheWrite,
      ),
-      system,
      tools: allTools,
      tool_choice: options.toolChoice,
      ...(useBetas && { betas: betasParams }),
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -563,7 +563,7 @@ export async function performCodexRequest(options: {
    throw APIError.generate(
      response.status, errorResponse,
      `Codex API error ${response.status}: ${errorBody}`,
-      response.headers as unknown as Record<string, string>,
+      response.headers as unknown as Headers,
    )
  }

@@ -646,7 +646,7 @@ export async function collectCodexCompletedResponse(
    if (event.event === 'response.failed') {
      const msg = event.data?.response?.error?.message ??
        event.data?.error?.message ?? 'Codex response failed'
-      throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
+      throw APIError.generate(500, undefined, msg, new Headers())
    }

    if (
@@ -661,7 +661,7 @@ export async function collectCodexCompletedResponse(
  if (!completedResponse) {
    throw APIError.generate(
      500, undefined, 'Codex response ended without a completed payload',
-      {} as Record<string, string>,
+      new Headers(),
    )
  }

@@ -820,7 +820,7 @@ export async function* codexStreamToAnthropic(
    if (event.event === 'response.failed') {
      const msg = payload?.response?.error?.message ??
        payload?.error?.message ?? 'Codex response failed'
-      throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
+      throw APIError.generate(500, undefined, msg, new Headers())
    }
  }

--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -41,6 +41,13 @@ import {
 import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
 import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'

+type SecretValueSource = Partial<{
+  OPENAI_API_KEY: string
+  CODEX_API_KEY: string
+  GEMINI_API_KEY: string
+  GOOGLE_API_KEY: string
+}>
+
 const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference'
 const GITHUB_API_VERSION = '2022-11-28'
 const GITHUB_429_MAX_RETRIES = 3
@@ -750,7 +757,7 @@ class OpenAIShimMessages {
          ? ` or place a Codex auth.json at ${credentials.authPath}`
          : ''
        const safeModel =
-          redactSecretValueForDisplay(request.requestedModel, process.env) ??
+          redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ??
          'the requested model'
        throw new Error(
          `Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`,
@@ -941,13 +948,13 @@ class OpenAIShimMessages {
        response.status,
        errorResponse,
        `OpenAI API error ${response.status}: ${errorBody}${rateHint}`,
-        response.headers as unknown as Record<string, string>,
+        response.headers as unknown as Headers,
      )
    }

    throw APIError.generate(
      500, undefined, 'OpenAI shim: request loop exited unexpectedly',
-      {} as Record<string, string>,
+      new Headers(),
    )
  }

--- a/src/tools/AgentTool/prompt.ts
+++ b/src/tools/AgentTool/prompt.ts
@@ -60,7 +60,7 @@ export function shouldInjectAgentListInMessages(): boolean {
  if (isEnvTruthy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) return true
  if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES))
    return false
-  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', false)
+  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', true)
 }

 export async function getPrompt(
--- a/src/utils/claudeInChrome/mcpServer.ts
+++ b/src/utils/claudeInChrome/mcpServer.ts
@@ -181,8 +181,6 @@ export function createChromeContext(
        usage?: { input_tokens: number; output_tokens: number }
      }> => {
        // sideQuery handles OAuth attribution fingerprint, proxy, model betas.
-        // skipSystemPromptPrefix: the lightning prompt is complete on its own;
-        // the CLI prefix would dilute the batching instructions.
        // tools: [] is load-bearing — without it Sonnet emits
        // <function_calls> XML before the text commands. Original
        // lightning-harness.js (apps repo) does the same.
@@ -193,7 +191,6 @@ export function createChromeContext(
          max_tokens: req.max_tokens,
          stop_sequences: req.stop_sequences,
          signal: req.signal,
-          skipSystemPromptPrefix: true,
          tools: [],
          querySource: 'chrome_mcp',
        })
--- a/src/utils/fingerprint.ts
+++ b/src/utils/fingerprint.ts
@@ -16,7 +16,13 @@ export const FINGERPRINT_SALT = '59cf53e54c78'
 export function extractFirstMessageText(
  messages: (UserMessage | AssistantMessage)[],
 ): string {
-  const firstUserMessage = messages.find(msg => msg.type === 'user')
+  // Skip isMeta messages (system-injected attachments) so the fingerprint
+  // reflects the actual user input. On --resume, reorderAttachmentsForAPI
+  // can bubble meta messages before the real first user message, changing
+  // the fingerprint and breaking cache.
+  const firstUserMessage =
+    messages.find(msg => msg.type === 'user' && !msg.isMeta) ??
+    messages.find(msg => msg.type === 'user')
  if (!firstUserMessage) {
    return ''
  }
--- a/src/utils/mcpInstructionsDelta.ts
+++ b/src/utils/mcpInstructionsDelta.ts
@@ -39,7 +39,7 @@ export function isMcpInstructionsDeltaEnabled(): boolean {
  if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_MCP_INSTR_DELTA)) return false
  return (
    process.env.USER_TYPE === 'ant' ||
-    getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', false)
+    getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', true)
  )
 }

--- a/src/utils/permissions/yoloClassifier.ts
+++ b/src/utils/permissions/yoloClassifier.ts
@@ -780,7 +780,6 @@ async function classifyYoloActionXml(
        model,
        max_tokens: (mode === 'fast' ? 256 : 64) + thinkingPadding,
        system: systemBlocks,
-        skipSystemPromptPrefix: true,
        temperature: 0,
        thinking: disableThinking,
        messages: [
@@ -867,7 +866,6 @@ async function classifyYoloActionXml(
      model,
      max_tokens: 4096 + thinkingPadding,
      system: systemBlocks,
-      skipSystemPromptPrefix: true,
      temperature: 0,
      thinking: disableThinking,
      messages: [
@@ -1141,7 +1139,6 @@ export async function classifyYoloAction(
          cache_control: getCacheControl({ querySource: 'auto_mode' }),
        },
      ],
-      skipSystemPromptPrefix: true,
      temperature: 0,
      thinking: disableThinking,
      messages: [
--- a/src/utils/sideQuery.ts
+++ b/src/utils/sideQuery.ts
@@ -51,7 +51,7 @@ export type SideQueryOptions = {
  maxRetries?: number
  /** Abort signal */
  signal?: AbortSignal
-  /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
+  /** Skip CLI system prompt prefix (keeps attribution header for OAuth). Default true — side queries are internal classifiers with their own prompt. Set false only for queries that need the full "You are Claude Code…" prefix. */
  skipSystemPromptPrefix?: boolean
  /** Temperature override */
  temperature?: number
@@ -115,7 +115,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
    max_tokens = 1024,
    maxRetries = 2,
    signal,
-    skipSystemPromptPrefix,
+    skipSystemPromptPrefix = true,
    temperature,
    thinking,
    stop_sequences,