fix: disable experimental API betas by default, reduce side query token usage, standardize Headers type (#281)
* fix: disable experimental API betas by default to prevent 500 errors Tool search (defer_loading), global cache scope, and context management betas require internal Anthropic server-side support. External accounts receive 500 Internal Server Error when these are sent. Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=true by default in the CLI entrypoint. Users with internal access can opt back in with =false. Also includes: cache key stability fixes (Sonnet 1M latch, system-before- messages key ordering, resume fingerprint isMeta skip), sideQuery default cleanup, and /dream command. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: standardize API headers to Headers type and enable tengu feature flags by default * fix: address PR review — dream lock, MCP betas guard, redundant Partial - Call recordConsolidation() programmatically in /dream instead of delegating to model prompt (unreliable) - Add CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS guard to MCP entrypoint (was only in CLI entrypoint, causing 500s in MCP server mode) - Remove redundant ? markers from SecretValueSource Partial<{}> type --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -116,7 +116,6 @@ export async function autoModeCritiqueHandler(options: {
|
||||
querySource: 'auto_mode_critique',
|
||||
model,
|
||||
system: CRITIQUE_SYSTEM_PROMPT,
|
||||
skipSystemPromptPrefix: true,
|
||||
max_tokens: 4096,
|
||||
messages: [
|
||||
{
|
||||
|
||||
@@ -17,6 +17,7 @@ import config from './commands/config/index.js'
|
||||
import { context, contextNonInteractive } from './commands/context/index.js'
|
||||
import cost from './commands/cost/index.js'
|
||||
import diff from './commands/diff/index.js'
|
||||
import dream from './commands/dream/index.js'
|
||||
import ctx_viz from './commands/ctx_viz/index.js'
|
||||
import doctor from './commands/doctor/index.js'
|
||||
import onboardGithub from './commands/onboard-github/index.js'
|
||||
@@ -274,6 +275,7 @@ const COMMANDS = memoize((): Command[] => [
|
||||
contextNonInteractive,
|
||||
cost,
|
||||
diff,
|
||||
dream,
|
||||
doctor,
|
||||
effort,
|
||||
exit,
|
||||
|
||||
68
src/commands/dream/dream.ts
Normal file
68
src/commands/dream/dream.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
|
||||
import type { Command } from '../../commands.js'
|
||||
import { isAutoMemoryEnabled, getAutoMemPath } from '../../memdir/paths.js'
|
||||
import { getProjectDir } from '../../utils/sessionStorage.js'
|
||||
import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
|
||||
import { buildConsolidationPrompt } from '../../services/autoDream/consolidationPrompt.js'
|
||||
import {
|
||||
readLastConsolidatedAt,
|
||||
listSessionsTouchedSince,
|
||||
recordConsolidation,
|
||||
} from '../../services/autoDream/consolidationLock.js'
|
||||
|
||||
const command = {
|
||||
type: 'prompt',
|
||||
name: 'dream',
|
||||
description:
|
||||
'Run memory consolidation — synthesize recent sessions into durable memories',
|
||||
isEnabled: () => isAutoMemoryEnabled(),
|
||||
progressMessage: 'consolidating memories',
|
||||
contentLength: 0,
|
||||
source: 'builtin',
|
||||
async getPromptForCommand(): Promise<ContentBlockParam[]> {
|
||||
const memoryRoot = getAutoMemPath()
|
||||
const transcriptDir = getProjectDir(getOriginalCwd())
|
||||
|
||||
let lastAt: number
|
||||
try {
|
||||
lastAt = await readLastConsolidatedAt()
|
||||
} catch {
|
||||
lastAt = 0
|
||||
}
|
||||
|
||||
let sessionIds: string[]
|
||||
try {
|
||||
sessionIds = await listSessionsTouchedSince(lastAt)
|
||||
} catch {
|
||||
sessionIds = []
|
||||
}
|
||||
|
||||
const currentSession = getSessionId()
|
||||
sessionIds = sessionIds.filter(id => id !== currentSession)
|
||||
|
||||
if (sessionIds.length === 0) {
|
||||
sessionIds = [currentSession]
|
||||
}
|
||||
|
||||
const hoursSince =
|
||||
lastAt > 0
|
||||
? `${((Date.now() - lastAt) / 3_600_000).toFixed(1)}h ago`
|
||||
: 'never'
|
||||
|
||||
const extra = `
|
||||
**Manually triggered by user via /dream.**
|
||||
|
||||
Sessions since last consolidation (${sessionIds.length}, last run: ${hoursSince}):
|
||||
${sessionIds.map(id => `- ${id}`).join('\n')}`
|
||||
|
||||
const prompt = buildConsolidationPrompt(memoryRoot, transcriptDir, extra)
|
||||
|
||||
// Record consolidation timestamp programmatically so auto-dream
|
||||
// knows when the last manual run happened.
|
||||
await recordConsolidation()
|
||||
|
||||
return [{ type: 'text', text: prompt }]
|
||||
},
|
||||
} satisfies Command
|
||||
|
||||
export default command
|
||||
1
src/commands/dream/index.ts
Normal file
1
src/commands/dream/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { default } from './dream.js'
|
||||
@@ -10,6 +10,13 @@ import {
|
||||
redactSecretValueForDisplay,
|
||||
} from '../utils/providerProfile.js'
|
||||
|
||||
// OpenClaude: disable experimental API betas by default.
|
||||
// Tool search (defer_loading), global cache scope, and context management
|
||||
// require internal API support not available to external accounts → 500.
|
||||
// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false.
|
||||
// eslint-disable-next-line custom-rules/no-top-level-side-effects
|
||||
process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true'
|
||||
|
||||
// Bugfix for corepack auto-pinning, which adds yarnpkg to peoples' package.jsons
|
||||
// eslint-disable-next-line custom-rules/no-top-level-side-effects
|
||||
process.env.COREPACK_ENABLE_AUTO_PIN = '0';
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
// OpenClaude: disable experimental API betas by default.
|
||||
// Tool search (defer_loading), global cache scope, and context management
|
||||
// require internal API support not available to external accounts → 500.
|
||||
// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false.
|
||||
// eslint-disable-next-line custom-rules/no-top-level-side-effects
|
||||
process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true'
|
||||
|
||||
import { Server } from '@modelcontextprotocol/sdk/server/index.js'
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
||||
import {
|
||||
|
||||
@@ -98,7 +98,6 @@ async function selectRelevantMemories(
|
||||
const result = await sideQuery({
|
||||
model: getDefaultSonnetModel(),
|
||||
system: SELECT_MEMORIES_SYSTEM_PROMPT,
|
||||
skipSystemPromptPrefix: true,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
|
||||
@@ -1466,6 +1466,10 @@ async function* queryModel(
|
||||
}
|
||||
}
|
||||
|
||||
// Latch Sonnet 1M experiment at query start so mid-retry GB refreshes
|
||||
// don't flip the beta header and bust the cache key.
|
||||
const sonnet1mExpLatched = getSonnet1mExpTreatmentEnabled(options.model)
|
||||
|
||||
const effort = resolveAppliedEffort(options.model, options.effortValue)
|
||||
|
||||
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
||||
@@ -1549,11 +1553,9 @@ async function* queryModel(
|
||||
const paramsFromContext = (retryContext: RetryContext) => {
|
||||
const betasParams = [...betas]
|
||||
|
||||
// Append 1M beta dynamically for the Sonnet 1M experiment.
|
||||
if (
|
||||
!betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
|
||||
getSonnet1mExpTreatmentEnabled(retryContext.model)
|
||||
) {
|
||||
// Append 1M beta from the latched experiment state (computed once before
|
||||
// the closure to avoid mid-retry GB flips changing the cache key).
|
||||
if (!betasParams.includes(CONTEXT_1M_BETA_HEADER) && sonnet1mExpLatched) {
|
||||
betasParams.push(CONTEXT_1M_BETA_HEADER)
|
||||
}
|
||||
|
||||
@@ -1709,6 +1711,13 @@ async function* queryModel(
|
||||
|
||||
return {
|
||||
model: normalizeModelStringForAPI(options.model),
|
||||
// IMPORTANT: `system` must appear before `messages` in the object literal.
|
||||
// JSON.stringify preserves insertion order. The native Bun attestation
|
||||
// (Attestation.zig) overwrites the FIRST `cch=00000` sentinel in the
|
||||
// serialized body. If `messages` is serialized first and conversation
|
||||
// history contains this literal string, the wrong occurrence is replaced,
|
||||
// producing a different system prompt on each request and breaking cache.
|
||||
system,
|
||||
messages: addCacheBreakpoints(
|
||||
messagesForAPI,
|
||||
enablePromptCaching,
|
||||
@@ -1718,7 +1727,6 @@ async function* queryModel(
|
||||
consumedPinnedEdits,
|
||||
options.skipCacheWrite,
|
||||
),
|
||||
system,
|
||||
tools: allTools,
|
||||
tool_choice: options.toolChoice,
|
||||
...(useBetas && { betas: betasParams }),
|
||||
|
||||
@@ -563,7 +563,7 @@ export async function performCodexRequest(options: {
|
||||
throw APIError.generate(
|
||||
response.status, errorResponse,
|
||||
`Codex API error ${response.status}: ${errorBody}`,
|
||||
response.headers as unknown as Record<string, string>,
|
||||
response.headers as unknown as Headers,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -646,7 +646,7 @@ export async function collectCodexCompletedResponse(
|
||||
if (event.event === 'response.failed') {
|
||||
const msg = event.data?.response?.error?.message ??
|
||||
event.data?.error?.message ?? 'Codex response failed'
|
||||
throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
|
||||
throw APIError.generate(500, undefined, msg, new Headers())
|
||||
}
|
||||
|
||||
if (
|
||||
@@ -661,7 +661,7 @@ export async function collectCodexCompletedResponse(
|
||||
if (!completedResponse) {
|
||||
throw APIError.generate(
|
||||
500, undefined, 'Codex response ended without a completed payload',
|
||||
{} as Record<string, string>,
|
||||
new Headers(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -820,7 +820,7 @@ export async function* codexStreamToAnthropic(
|
||||
if (event.event === 'response.failed') {
|
||||
const msg = payload?.response?.error?.message ??
|
||||
payload?.error?.message ?? 'Codex response failed'
|
||||
throw APIError.generate(500, undefined, msg, {} as Record<string, string>)
|
||||
throw APIError.generate(500, undefined, msg, new Headers())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,13 @@ import {
|
||||
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
|
||||
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
|
||||
|
||||
type SecretValueSource = Partial<{
|
||||
OPENAI_API_KEY: string
|
||||
CODEX_API_KEY: string
|
||||
GEMINI_API_KEY: string
|
||||
GOOGLE_API_KEY: string
|
||||
}>
|
||||
|
||||
const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference'
|
||||
const GITHUB_API_VERSION = '2022-11-28'
|
||||
const GITHUB_429_MAX_RETRIES = 3
|
||||
@@ -750,7 +757,7 @@ class OpenAIShimMessages {
|
||||
? ` or place a Codex auth.json at ${credentials.authPath}`
|
||||
: ''
|
||||
const safeModel =
|
||||
redactSecretValueForDisplay(request.requestedModel, process.env) ??
|
||||
redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ??
|
||||
'the requested model'
|
||||
throw new Error(
|
||||
`Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`,
|
||||
@@ -941,13 +948,13 @@ class OpenAIShimMessages {
|
||||
response.status,
|
||||
errorResponse,
|
||||
`OpenAI API error ${response.status}: ${errorBody}${rateHint}`,
|
||||
response.headers as unknown as Record<string, string>,
|
||||
response.headers as unknown as Headers,
|
||||
)
|
||||
}
|
||||
|
||||
throw APIError.generate(
|
||||
500, undefined, 'OpenAI shim: request loop exited unexpectedly',
|
||||
{} as Record<string, string>,
|
||||
new Headers(),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ export function shouldInjectAgentListInMessages(): boolean {
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) return true
|
||||
if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES))
|
||||
return false
|
||||
return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', false)
|
||||
return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', true)
|
||||
}
|
||||
|
||||
export async function getPrompt(
|
||||
|
||||
@@ -181,8 +181,6 @@ export function createChromeContext(
|
||||
usage?: { input_tokens: number; output_tokens: number }
|
||||
}> => {
|
||||
// sideQuery handles OAuth attribution fingerprint, proxy, model betas.
|
||||
// skipSystemPromptPrefix: the lightning prompt is complete on its own;
|
||||
// the CLI prefix would dilute the batching instructions.
|
||||
// tools: [] is load-bearing — without it Sonnet emits
|
||||
// <function_calls> XML before the text commands. Original
|
||||
// lightning-harness.js (apps repo) does the same.
|
||||
@@ -193,7 +191,6 @@ export function createChromeContext(
|
||||
max_tokens: req.max_tokens,
|
||||
stop_sequences: req.stop_sequences,
|
||||
signal: req.signal,
|
||||
skipSystemPromptPrefix: true,
|
||||
tools: [],
|
||||
querySource: 'chrome_mcp',
|
||||
})
|
||||
|
||||
@@ -16,7 +16,13 @@ export const FINGERPRINT_SALT = '59cf53e54c78'
|
||||
export function extractFirstMessageText(
|
||||
messages: (UserMessage | AssistantMessage)[],
|
||||
): string {
|
||||
const firstUserMessage = messages.find(msg => msg.type === 'user')
|
||||
// Skip isMeta messages (system-injected attachments) so the fingerprint
|
||||
// reflects the actual user input. On --resume, reorderAttachmentsForAPI
|
||||
// can bubble meta messages before the real first user message, changing
|
||||
// the fingerprint and breaking cache.
|
||||
const firstUserMessage =
|
||||
messages.find(msg => msg.type === 'user' && !msg.isMeta) ??
|
||||
messages.find(msg => msg.type === 'user')
|
||||
if (!firstUserMessage) {
|
||||
return ''
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ export function isMcpInstructionsDeltaEnabled(): boolean {
|
||||
if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_MCP_INSTR_DELTA)) return false
|
||||
return (
|
||||
process.env.USER_TYPE === 'ant' ||
|
||||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', false)
|
||||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', true)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -780,7 +780,6 @@ async function classifyYoloActionXml(
|
||||
model,
|
||||
max_tokens: (mode === 'fast' ? 256 : 64) + thinkingPadding,
|
||||
system: systemBlocks,
|
||||
skipSystemPromptPrefix: true,
|
||||
temperature: 0,
|
||||
thinking: disableThinking,
|
||||
messages: [
|
||||
@@ -867,7 +866,6 @@ async function classifyYoloActionXml(
|
||||
model,
|
||||
max_tokens: 4096 + thinkingPadding,
|
||||
system: systemBlocks,
|
||||
skipSystemPromptPrefix: true,
|
||||
temperature: 0,
|
||||
thinking: disableThinking,
|
||||
messages: [
|
||||
@@ -1141,7 +1139,6 @@ export async function classifyYoloAction(
|
||||
cache_control: getCacheControl({ querySource: 'auto_mode' }),
|
||||
},
|
||||
],
|
||||
skipSystemPromptPrefix: true,
|
||||
temperature: 0,
|
||||
thinking: disableThinking,
|
||||
messages: [
|
||||
|
||||
@@ -51,7 +51,7 @@ export type SideQueryOptions = {
|
||||
maxRetries?: number
|
||||
/** Abort signal */
|
||||
signal?: AbortSignal
|
||||
/** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
|
||||
/** Skip CLI system prompt prefix (keeps attribution header for OAuth). Default true — side queries are internal classifiers with their own prompt. Set false only for queries that need the full "You are Claude Code…" prefix. */
|
||||
skipSystemPromptPrefix?: boolean
|
||||
/** Temperature override */
|
||||
temperature?: number
|
||||
@@ -115,7 +115,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
||||
max_tokens = 1024,
|
||||
maxRetries = 2,
|
||||
signal,
|
||||
skipSystemPromptPrefix,
|
||||
skipSystemPromptPrefix = true,
|
||||
temperature,
|
||||
thinking,
|
||||
stop_sequences,
|
||||
|
||||
Reference in New Issue
Block a user