diff --git a/src/cli/handlers/autoMode.ts b/src/cli/handlers/autoMode.ts index fb2c3d2e..86bff197 100644 --- a/src/cli/handlers/autoMode.ts +++ b/src/cli/handlers/autoMode.ts @@ -116,7 +116,6 @@ export async function autoModeCritiqueHandler(options: { querySource: 'auto_mode_critique', model, system: CRITIQUE_SYSTEM_PROMPT, - skipSystemPromptPrefix: true, max_tokens: 4096, messages: [ { diff --git a/src/commands.ts b/src/commands.ts index a11f3be2..7b8034e8 100644 --- a/src/commands.ts +++ b/src/commands.ts @@ -17,6 +17,7 @@ import config from './commands/config/index.js' import { context, contextNonInteractive } from './commands/context/index.js' import cost from './commands/cost/index.js' import diff from './commands/diff/index.js' +import dream from './commands/dream/index.js' import ctx_viz from './commands/ctx_viz/index.js' import doctor from './commands/doctor/index.js' import onboardGithub from './commands/onboard-github/index.js' @@ -274,6 +275,7 @@ const COMMANDS = memoize((): Command[] => [ contextNonInteractive, cost, diff, + dream, doctor, effort, exit, diff --git a/src/commands/dream/dream.ts b/src/commands/dream/dream.ts new file mode 100644 index 00000000..085b7fde --- /dev/null +++ b/src/commands/dream/dream.ts @@ -0,0 +1,68 @@ +import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs' +import type { Command } from '../../commands.js' +import { isAutoMemoryEnabled, getAutoMemPath } from '../../memdir/paths.js' +import { getProjectDir } from '../../utils/sessionStorage.js' +import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js' +import { buildConsolidationPrompt } from '../../services/autoDream/consolidationPrompt.js' +import { + readLastConsolidatedAt, + listSessionsTouchedSince, + recordConsolidation, +} from '../../services/autoDream/consolidationLock.js' + +const command = { + type: 'prompt', + name: 'dream', + description: + 'Run memory consolidation — synthesize recent sessions into durable memories', + isEnabled: () => isAutoMemoryEnabled(), + progressMessage: 'consolidating memories', + contentLength: 0, + source: 'builtin', + async getPromptForCommand(): Promise { + const memoryRoot = getAutoMemPath() + const transcriptDir = getProjectDir(getOriginalCwd()) + + let lastAt: number + try { + lastAt = await readLastConsolidatedAt() + } catch { + lastAt = 0 + } + + let sessionIds: string[] + try { + sessionIds = await listSessionsTouchedSince(lastAt) + } catch { + sessionIds = [] + } + + const currentSession = getSessionId() + sessionIds = sessionIds.filter(id => id !== currentSession) + + if (sessionIds.length === 0) { + sessionIds = [currentSession] + } + + const hoursSince = + lastAt > 0 + ? `${((Date.now() - lastAt) / 3_600_000).toFixed(1)}h ago` + : 'never' + + const extra = ` +**Manually triggered by user via /dream.** + +Sessions since last consolidation (${sessionIds.length}, last run: ${hoursSince}): +${sessionIds.map(id => `- ${id}`).join('\n')}` + + const prompt = buildConsolidationPrompt(memoryRoot, transcriptDir, extra) + + // Record consolidation timestamp programmatically so auto-dream + // knows when the last manual run happened. + await recordConsolidation() + + return [{ type: 'text', text: prompt }] + }, +} satisfies Command + +export default command diff --git a/src/commands/dream/index.ts b/src/commands/dream/index.ts new file mode 100644 index 00000000..b00d5d21 --- /dev/null +++ b/src/commands/dream/index.ts @@ -0,0 +1 @@ +export { default } from './dream.js' diff --git a/src/entrypoints/cli.tsx b/src/entrypoints/cli.tsx index 39614f19..e67a8764 100644 --- a/src/entrypoints/cli.tsx +++ b/src/entrypoints/cli.tsx @@ -10,6 +10,13 @@ import { redactSecretValueForDisplay, } from '../utils/providerProfile.js' +// OpenClaude: disable experimental API betas by default. +// Tool search (defer_loading), global cache scope, and context management +// require internal API support not available to external accounts → 500. +// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false. +// eslint-disable-next-line custom-rules/no-top-level-side-effects +process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true' + // Bugfix for corepack auto-pinning, which adds yarnpkg to peoples' package.jsons // eslint-disable-next-line custom-rules/no-top-level-side-effects process.env.COREPACK_ENABLE_AUTO_PIN = '0'; diff --git a/src/entrypoints/mcp.ts b/src/entrypoints/mcp.ts index deaf9d6a..05421b4f 100644 --- a/src/entrypoints/mcp.ts +++ b/src/entrypoints/mcp.ts @@ -1,3 +1,10 @@ +// OpenClaude: disable experimental API betas by default. +// Tool search (defer_loading), global cache scope, and context management +// require internal API support not available to external accounts → 500. +// Users can opt-in with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=false. +// eslint-disable-next-line custom-rules/no-top-level-side-effects +process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS ??= 'true' + import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import { diff --git a/src/memdir/findRelevantMemories.ts b/src/memdir/findRelevantMemories.ts index c239e0a3..78b6f844 100644 --- a/src/memdir/findRelevantMemories.ts +++ b/src/memdir/findRelevantMemories.ts @@ -98,7 +98,6 @@ async function selectRelevantMemories( const result = await sideQuery({ model: getDefaultSonnetModel(), system: SELECT_MEMORIES_SYSTEM_PROMPT, - skipSystemPromptPrefix: true, messages: [ { role: 'user', diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 32416683..e6576de9 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1466,6 +1466,10 @@ async function* queryModel( } } + // Latch Sonnet 1M experiment at query start so mid-retry GB refreshes + // don't flip the beta header and bust the cache key. + const sonnet1mExpLatched = getSonnet1mExpTreatmentEnabled(options.model) + const effort = resolveAppliedEffort(options.model, options.effortValue) if (feature('PROMPT_CACHE_BREAK_DETECTION')) { @@ -1549,11 +1553,9 @@ async function* queryModel( const paramsFromContext = (retryContext: RetryContext) => { const betasParams = [...betas] - // Append 1M beta dynamically for the Sonnet 1M experiment. - if ( - !betasParams.includes(CONTEXT_1M_BETA_HEADER) && - getSonnet1mExpTreatmentEnabled(retryContext.model) - ) { + // Append 1M beta from the latched experiment state (computed once before + // the closure to avoid mid-retry GB flips changing the cache key). + if (!betasParams.includes(CONTEXT_1M_BETA_HEADER) && sonnet1mExpLatched) { betasParams.push(CONTEXT_1M_BETA_HEADER) } @@ -1709,6 +1711,13 @@ async function* queryModel( return { model: normalizeModelStringForAPI(options.model), + // IMPORTANT: `system` must appear before `messages` in the object literal. + // JSON.stringify preserves insertion order. The native Bun attestation + // (Attestation.zig) overwrites the FIRST `cch=00000` sentinel in the + // serialized body. If `messages` is serialized first and conversation + // history contains this literal string, the wrong occurrence is replaced, + // producing a different system prompt on each request and breaking cache. + system, messages: addCacheBreakpoints( messagesForAPI, enablePromptCaching, @@ -1718,7 +1727,6 @@ async function* queryModel( consumedPinnedEdits, options.skipCacheWrite, ), - system, tools: allTools, tool_choice: options.toolChoice, ...(useBetas && { betas: betasParams }), diff --git a/src/services/api/codexShim.ts b/src/services/api/codexShim.ts index 7b66ac5d..27ec6f2a 100644 --- a/src/services/api/codexShim.ts +++ b/src/services/api/codexShim.ts @@ -563,7 +563,7 @@ export async function performCodexRequest(options: { throw APIError.generate( response.status, errorResponse, `Codex API error ${response.status}: ${errorBody}`, - response.headers as unknown as Record, + response.headers as unknown as Headers, ) } @@ -646,7 +646,7 @@ export async function collectCodexCompletedResponse( if (event.event === 'response.failed') { const msg = event.data?.response?.error?.message ?? event.data?.error?.message ?? 'Codex response failed' - throw APIError.generate(500, undefined, msg, {} as Record) + throw APIError.generate(500, undefined, msg, new Headers()) } if ( @@ -661,7 +661,7 @@ export async function collectCodexCompletedResponse( if (!completedResponse) { throw APIError.generate( 500, undefined, 'Codex response ended without a completed payload', - {} as Record, + new Headers(), ) } @@ -820,7 +820,7 @@ export async function* codexStreamToAnthropic( if (event.event === 'response.failed') { const msg = payload?.response?.error?.message ?? payload?.error?.message ?? 'Codex response failed' - throw APIError.generate(500, undefined, msg, {} as Record) + throw APIError.generate(500, undefined, msg, new Headers()) } } diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index b8407be3..f1e6ed82 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -41,6 +41,13 @@ import { import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js' import { redactSecretValueForDisplay } from '../../utils/providerProfile.js' +type SecretValueSource = Partial<{ + OPENAI_API_KEY: string + CODEX_API_KEY: string + GEMINI_API_KEY: string + GOOGLE_API_KEY: string +}> + const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference' const GITHUB_API_VERSION = '2022-11-28' const GITHUB_429_MAX_RETRIES = 3 @@ -750,7 +757,7 @@ class OpenAIShimMessages { ? ` or place a Codex auth.json at ${credentials.authPath}` : '' const safeModel = - redactSecretValueForDisplay(request.requestedModel, process.env) ?? + redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ?? 'the requested model' throw new Error( `Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`, @@ -941,13 +948,13 @@ class OpenAIShimMessages { response.status, errorResponse, `OpenAI API error ${response.status}: ${errorBody}${rateHint}`, - response.headers as unknown as Record, + response.headers as unknown as Headers, ) } throw APIError.generate( 500, undefined, 'OpenAI shim: request loop exited unexpectedly', - {} as Record, + new Headers(), ) } diff --git a/src/tools/AgentTool/prompt.ts b/src/tools/AgentTool/prompt.ts index 2a051768..2328c51c 100644 --- a/src/tools/AgentTool/prompt.ts +++ b/src/tools/AgentTool/prompt.ts @@ -60,7 +60,7 @@ export function shouldInjectAgentListInMessages(): boolean { if (isEnvTruthy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) return true if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) return false - return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', false) + return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', true) } export async function getPrompt( diff --git a/src/utils/claudeInChrome/mcpServer.ts b/src/utils/claudeInChrome/mcpServer.ts index 4195d2c4..8bbd3d2d 100644 --- a/src/utils/claudeInChrome/mcpServer.ts +++ b/src/utils/claudeInChrome/mcpServer.ts @@ -181,8 +181,6 @@ export function createChromeContext( usage?: { input_tokens: number; output_tokens: number } }> => { // sideQuery handles OAuth attribution fingerprint, proxy, model betas. - // skipSystemPromptPrefix: the lightning prompt is complete on its own; - // the CLI prefix would dilute the batching instructions. // tools: [] is load-bearing — without it Sonnet emits // XML before the text commands. Original // lightning-harness.js (apps repo) does the same. @@ -193,7 +191,6 @@ export function createChromeContext( max_tokens: req.max_tokens, stop_sequences: req.stop_sequences, signal: req.signal, - skipSystemPromptPrefix: true, tools: [], querySource: 'chrome_mcp', }) diff --git a/src/utils/fingerprint.ts b/src/utils/fingerprint.ts index 17906ebd..d65cf309 100644 --- a/src/utils/fingerprint.ts +++ b/src/utils/fingerprint.ts @@ -16,7 +16,13 @@ export const FINGERPRINT_SALT = '59cf53e54c78' export function extractFirstMessageText( messages: (UserMessage | AssistantMessage)[], ): string { - const firstUserMessage = messages.find(msg => msg.type === 'user') + // Skip isMeta messages (system-injected attachments) so the fingerprint + // reflects the actual user input. On --resume, reorderAttachmentsForAPI + // can bubble meta messages before the real first user message, changing + // the fingerprint and breaking cache. + const firstUserMessage = + messages.find(msg => msg.type === 'user' && !msg.isMeta) ?? + messages.find(msg => msg.type === 'user') if (!firstUserMessage) { return '' } diff --git a/src/utils/mcpInstructionsDelta.ts b/src/utils/mcpInstructionsDelta.ts index acac0c1e..23dce08b 100644 --- a/src/utils/mcpInstructionsDelta.ts +++ b/src/utils/mcpInstructionsDelta.ts @@ -39,7 +39,7 @@ export function isMcpInstructionsDeltaEnabled(): boolean { if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_MCP_INSTR_DELTA)) return false return ( process.env.USER_TYPE === 'ant' || - getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', false) + getFeatureValue_CACHED_MAY_BE_STALE('tengu_basalt_3kr', true) ) } diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts index 1ec78b51..77ee5187 100644 --- a/src/utils/permissions/yoloClassifier.ts +++ b/src/utils/permissions/yoloClassifier.ts @@ -780,7 +780,6 @@ async function classifyYoloActionXml( model, max_tokens: (mode === 'fast' ? 256 : 64) + thinkingPadding, system: systemBlocks, - skipSystemPromptPrefix: true, temperature: 0, thinking: disableThinking, messages: [ @@ -867,7 +866,6 @@ async function classifyYoloActionXml( model, max_tokens: 4096 + thinkingPadding, system: systemBlocks, - skipSystemPromptPrefix: true, temperature: 0, thinking: disableThinking, messages: [ @@ -1141,7 +1139,6 @@ export async function classifyYoloAction( cache_control: getCacheControl({ querySource: 'auto_mode' }), }, ], - skipSystemPromptPrefix: true, temperature: 0, thinking: disableThinking, messages: [ diff --git a/src/utils/sideQuery.ts b/src/utils/sideQuery.ts index 4e6d4d73..55bba2c0 100644 --- a/src/utils/sideQuery.ts +++ b/src/utils/sideQuery.ts @@ -51,7 +51,7 @@ export type SideQueryOptions = { maxRetries?: number /** Abort signal */ signal?: AbortSignal - /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */ + /** Skip CLI system prompt prefix (keeps attribution header for OAuth). Default true — side queries are internal classifiers with their own prompt. Set false only for queries that need the full "You are Claude Code…" prefix. */ skipSystemPromptPrefix?: boolean /** Temperature override */ temperature?: number @@ -115,7 +115,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise { max_tokens = 1024, maxRetries = 2, signal, - skipSystemPromptPrefix, + skipSystemPromptPrefix = true, temperature, thinking, stop_sequences,