diff --git a/src/QueryEngine.ts b/src/QueryEngine.ts index 0a80c613..d5a12001 100644 --- a/src/QueryEngine.ts +++ b/src/QueryEngine.ts @@ -46,6 +46,7 @@ import type { AttributionState } from './utils/commitAttribution.js' import { getGlobalConfig } from './utils/config.js' import { getCwd } from './utils/cwd.js' import { isBareMode, isEnvTruthy } from './utils/envUtils.js' +import { logForDebugging } from './utils/debug.js' import { getFastModeState } from './utils/fastMode.js' import { type FileHistoryState, @@ -695,9 +696,11 @@ export class QueryEngine { // progress are now recorded inline (their switch cases below), but // this flush still matters for the preservedSegment tail walk. // If the SDK subprocess restarts before then (claude-desktop kills - // between turns), tailUuid points to a never-written message → - // applyPreservedSegmentRelinks fails its tail→head walk → returns - // without pruning → resume loads full pre-compact history. + // between turns), tailUuid can point to a never-written message. In + // that case strip preservedSegment before transcript persistence so + // resume falls back to ordinary boundary pruning instead of relying on + // broken relink metadata. + let transcriptMessage = message if ( persistSession && message.type === 'system' && @@ -710,10 +713,21 @@ export class QueryEngine { ) if (tailIdx !== -1) { await recordTranscript(this.mutableMessages.slice(0, tailIdx + 1)) + } else { + transcriptMessage = { + ...message, + compactMetadata: { + ...message.compactMetadata, + preservedSegment: undefined, + }, + } + logForDebugging( + `[QueryEngine] stripped preservedSegment before transcript write; missing tail ${tailUuid}`, + ) } } } - messages.push(message) + messages.push(transcriptMessage) if (persistSession) { // Fire-and-forget for assistant messages. claude.ts yields one // assistant message per content block, then mutates the last diff --git a/src/main.tsx b/src/main.tsx index 219d7f09..5aeee505 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -3137,7 +3137,7 @@ async function run(): Promise { }); } logError(error); - process.exit(1); + return await exitWithError(root, errorMessage(error), () => gracefulShutdown(1)); } } else if (feature('DIRECT_CONNECT') && _pendingConnect?.url) { // `claude connect ` — full interactive TUI connected to a remote server @@ -3644,7 +3644,7 @@ async function run(): Promise { success: false }); logError(error); - await exitWithError(root, `Unable to load transcript from file: ${options.resume}`, () => gracefulShutdown(1)); + await exitWithError(root, errorMessage(error), () => gracefulShutdown(1)); } } } @@ -3686,7 +3686,7 @@ async function run(): Promise { success: false }); logError(error); - await exitWithError(root, `Failed to resume session ${sessionId}`); + await exitWithError(root, errorMessage(error)); } } diff --git a/src/screens/ResumeConversation.tsx b/src/screens/ResumeConversation.tsx index 38e01bb9..7e290d25 100644 --- a/src/screens/ResumeConversation.tsx +++ b/src/screens/ResumeConversation.tsx @@ -25,6 +25,7 @@ import { renameRecordingForSession } from '../utils/asciicast.js'; import { updateSessionName } from '../utils/concurrentSessions.js'; import { loadConversationForResume } from '../utils/conversationRecovery.js'; import { checkCrossProjectResume } from '../utils/crossProjectResume.js'; +import { errorMessage } from '../utils/errors.js'; import type { FileHistorySnapshot } from '../utils/fileHistory.js'; import { logError } from '../utils/log.js'; import { createSystemMessage } from '../utils/messages.js'; @@ -101,6 +102,7 @@ export function ResumeConversation({ agentColor?: AgentColorName; mainThreadAgentDefinition?: AgentDefinition; } | null>(null); + const [resumeError, setResumeError] = React.useState(null); const [crossProjectCommand, setCrossProjectCommand] = React.useState(null); const sessionLogResultRef = React.useRef(null); // Mirror of logs.length so loadMoreLogs can compute value indices outside @@ -176,6 +178,7 @@ export function ResumeConversation({ process.exit(1); } async function onSelect(log_0: LogOption) { + setResumeError(null); setResuming(true); const resumeStart = performance.now(); const crossProjectCheck = checkCrossProjectResume(log_0, showAllProjects, worktreePaths); @@ -287,7 +290,8 @@ export function ResumeConversation({ success: false }); logError(e as Error); - throw e; + setResumeError(errorMessage(e)); + setResuming(false); } } if (crossProjectCommand) { @@ -308,10 +312,18 @@ export function ResumeConversation({ Resuming conversation… ; } + const resumeErrorBanner = resumeError ? + Failed to resume conversation. + {resumeError} + Choose a different conversation to continue. + : null; if (filteredLogs.length === 0) { return ; } - return loadLogs(showAllProjects) : undefined} onLoadMore={loadMoreLogs} initialSearchQuery={initialSearchQuery} showAllProjects={showAllProjects} onToggleAllProjects={handleToggleAllProjects} onAgenticSearch={agenticSessionSearch} />; + return + {resumeErrorBanner} + loadLogs(showAllProjects) : undefined} onLoadMore={loadMoreLogs} initialSearchQuery={initialSearchQuery} showAllProjects={showAllProjects} onToggleAllProjects={handleToggleAllProjects} onAgenticSearch={agenticSessionSearch} /> + ; } function NoConversationsMessage() { const $ = _c(2); diff --git a/src/utils/conversationRecovery.hooks.test.ts b/src/utils/conversationRecovery.hooks.test.ts new file mode 100644 index 00000000..4f466aae --- /dev/null +++ b/src/utils/conversationRecovery.hooks.test.ts @@ -0,0 +1,71 @@ +/** + * Hook-side-effect regression lives in a separate file with no static import of + * conversationRecovery so Bun's mock.module can replace sessionStart before + * that module is first loaded. + */ +import { afterEach, expect, mock, test } from 'bun:test' +import { mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +const tempDirs: string[] = [] +const originalSimple = process.env.CLAUDE_CODE_SIMPLE +const sessionId = '00000000-0000-4000-8000-000000001999' +const ts = '2026-04-02T00:00:00.000Z' + +function id(n: number): string { + return `00000000-0000-4000-8000-${String(n).padStart(12, '0')}` +} + +function user(uuid: string, content: string) { + return { + type: 'user', + uuid, + parentUuid: null, + timestamp: ts, + cwd: '/tmp', + userType: 'external', + sessionId, + version: 'test', + isSidechain: false, + isMeta: false, + message: { + role: 'user', + content, + }, + } +} + +async function writeJsonl(entry: unknown): Promise { + const dir = await mkdtemp(join(tmpdir(), 'openclaude-conversation-recovery-hooks-')) + tempDirs.push(dir) + const filePath = join(dir, 'resume.jsonl') + await writeFile(filePath, `${JSON.stringify(entry)}\n`) + return filePath +} + +afterEach(async () => { + mock.restore() + process.env.CLAUDE_CODE_SIMPLE = originalSimple + await Promise.all(tempDirs.splice(0).map(dir => rm(dir, { recursive: true, force: true }))) +}) + +test('loadConversationForResume rejects oversized transcripts before resume hooks run', async () => { + delete process.env.CLAUDE_CODE_SIMPLE + const hugeContent = 'x'.repeat(8 * 1024 * 1024 + 32 * 1024) + const path = await writeJsonl(user(id(3), hugeContent)) + const hookSpy = mock(() => Promise.resolve([{ type: 'hook' }])) + + mock.module('./sessionStart.js', () => ({ + processSessionStartHooks: hookSpy, + })) + + const { loadConversationForResume, ResumeTranscriptTooLargeError } = await import( + './conversationRecovery.ts' + ) + + await expect(loadConversationForResume('fixture', path)).rejects.toBeInstanceOf( + ResumeTranscriptTooLargeError, + ) + expect(hookSpy).not.toHaveBeenCalled() +}) diff --git a/src/utils/conversationRecovery.test.ts b/src/utils/conversationRecovery.test.ts new file mode 100644 index 00000000..cd9e7bd3 --- /dev/null +++ b/src/utils/conversationRecovery.test.ts @@ -0,0 +1,79 @@ +import { afterEach, expect, test } from 'bun:test' +import { mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +import { + loadConversationForResume, + ResumeTranscriptTooLargeError, +} from './conversationRecovery.ts' + +const tempDirs: string[] = [] +const originalSimple = process.env.CLAUDE_CODE_SIMPLE +const sessionId = '00000000-0000-4000-8000-000000001999' +const ts = '2026-04-02T00:00:00.000Z' + +function id(n: number): string { + return `00000000-0000-4000-8000-${String(n).padStart(12, '0')}` +} + +function user(uuid: string, content: string) { + return { + type: 'user', + uuid, + parentUuid: null, + timestamp: ts, + cwd: '/tmp', + userType: 'external', + sessionId, + version: 'test', + isSidechain: false, + isMeta: false, + message: { + role: 'user', + content, + }, + } +} + +async function writeJsonl(entry: unknown): Promise { + const dir = await mkdtemp(join(tmpdir(), 'openclaude-conversation-recovery-')) + tempDirs.push(dir) + const filePath = join(dir, 'resume.jsonl') + await writeFile(filePath, `${JSON.stringify(entry)}\n`) + return filePath +} + +afterEach(async () => { + process.env.CLAUDE_CODE_SIMPLE = originalSimple + await Promise.all(tempDirs.splice(0).map(dir => rm(dir, { recursive: true, force: true }))) +}) + +test('loadConversationForResume accepts a small transcript from jsonl path', async () => { + process.env.CLAUDE_CODE_SIMPLE = '1' + const path = await writeJsonl(user(id(1), 'hello')) + + const result = await loadConversationForResume('fixture', path) + expect(result).not.toBeNull() + expect(result?.sessionId).toBe(sessionId) + expect(result?.messages.length).toBeGreaterThan(0) +}) + +test('loadConversationForResume rejects oversized reconstructed transcripts', async () => { + process.env.CLAUDE_CODE_SIMPLE = '1' + const hugeContent = 'x'.repeat(8 * 1024 * 1024 + 32 * 1024) + const path = await writeJsonl(user(id(2), hugeContent)) + + let caught: unknown + try { + await loadConversationForResume('fixture', path) + } catch (error) { + caught = error + } + + expect(caught).toBeInstanceOf(ResumeTranscriptTooLargeError) + expect((caught as Error).message).toContain( + 'Reconstructed transcript is too large to resume safely', + ) +}) + diff --git a/src/utils/conversationRecovery.ts b/src/utils/conversationRecovery.ts index af5ea230..938502bf 100644 --- a/src/utils/conversationRecovery.ts +++ b/src/utils/conversationRecovery.ts @@ -47,6 +47,7 @@ import { loadTranscriptFile, removeExtraFields, } from './sessionStorage.js' +import { jsonStringify } from './slowOperations.js' import type { ContentReplacementRecord } from './toolResultStorage.js' // Dead code elimination: ant-only tool names are conditionally required so @@ -71,6 +72,37 @@ const SEND_USER_FILE_TOOL_NAME: string | null = feature('KAIROS') : null /* eslint-enable @typescript-eslint/no-require-imports */ +// Hard cap for reconstructed resume payloads before REPL boot. 8 MiB keeps +// resume bounded well below the multi-GB failure mode we saw while leaving +// enough room for normal compacted sessions plus resume hook context. +const MAX_RESUME_MESSAGE_BYTES = 8 * 1024 * 1024 + +export class ResumeTranscriptTooLargeError extends Error { + constructor( + readonly bytes: number, + readonly maxBytes: number, + readonly messageCount: number, + ) { + super( + `Reconstructed transcript is too large to resume safely (${( + bytes / (1024 * 1024) + ).toFixed(1)} MiB > ${(maxBytes / (1024 * 1024)).toFixed(1)} MiB, ${messageCount} messages).`, + ) + this.name = 'ResumeTranscriptTooLargeError' + } +} + +function assertResumeMessageSize(messages: Message[]): void { + const bytes = Buffer.byteLength(jsonStringify(messages), 'utf8') + if (bytes > MAX_RESUME_MESSAGE_BYTES) { + throw new ResumeTranscriptTooLargeError( + bytes, + MAX_RESUME_MESSAGE_BYTES, + messages.length, + ) + } +} + /** * Transforms legacy attachment types to current types for backward compatibility */ @@ -561,11 +593,16 @@ export async function loadConversationForResume( const deserialized = deserializeMessagesWithInterruptDetection(messages!) messages = deserialized.messages + // Reject oversized resumes before running side-effectful resume hooks. + assertResumeMessageSize(messages) + // Process session start hooks for resume const hookMessages = await processSessionStartHooks('resume', { sessionId }) - // Append hook messages to the conversation + // Append hook messages to the conversation and guard again in case hook + // output itself pushes the session over the safe resume limit. messages.push(...hookMessages) + assertResumeMessageSize(messages) return { messages, diff --git a/src/utils/sessionStorage.test.ts b/src/utils/sessionStorage.test.ts new file mode 100644 index 00000000..085a395a --- /dev/null +++ b/src/utils/sessionStorage.test.ts @@ -0,0 +1,196 @@ +import { afterEach, expect, test } from 'bun:test' +import { mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +import { buildConversationChain, loadTranscriptFile } from './sessionStorage.ts' + +const tempDirs: string[] = [] +const sessionId = '00000000-0000-4000-8000-000000000999' +const ts = '2026-04-02T00:00:00.000Z' + +function id(n: number): string { + return `00000000-0000-4000-8000-${String(n).padStart(12, '0')}` +} + +function base(uuid: string, parentUuid: string | null) { + return { + uuid, + parentUuid, + timestamp: ts, + cwd: '/tmp', + userType: 'external', + sessionId, + version: 'test', + isSidechain: false, + } +} + +function user(uuid: string, parentUuid: string | null, content: string) { + return { + ...base(uuid, parentUuid), + type: 'user', + isMeta: false, + message: { + role: 'user', + content, + }, + } +} + +function assistant(uuid: string, parentUuid: string | null, text: string) { + return { + ...base(uuid, parentUuid), + type: 'assistant', + message: { + id: uuid, + type: 'message', + role: 'assistant', + content: [{ type: 'text', text }], + model: 'test-model', + stop_reason: 'end_turn', + usage: { + input_tokens: 1, + output_tokens: 1, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + }, + } +} + +function compactBoundary( + uuid: string, + parentUuid: string | null, + preservedSegment: { + headUuid: string + anchorUuid: string + tailUuid: string + }, +) { + return { + ...base(uuid, parentUuid), + type: 'system', + subtype: 'compact_boundary', + level: 'info', + isMeta: false, + content: 'Conversation compacted', + compactMetadata: { + trigger: 'manual', + preTokens: 123, + preservedSegment, + }, + } +} + +async function writeJsonl(entries: unknown[]): Promise { + const dir = await mkdtemp(join(tmpdir(), 'openclaude-session-storage-')) + tempDirs.push(dir) + const filePath = join(dir, 'session.jsonl') + await writeFile(filePath, `${entries.map(e => JSON.stringify(e)).join('\n')}\n`) + return filePath +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map(dir => rm(dir, { recursive: true, force: true }))) +}) + +test('loadTranscriptFile fails closed when preserved-segment tail is missing', async () => { + const oldUser = user(id(1), null, 'old user') + const oldAssistant = assistant(id(2), id(1), 'old assistant') + const preservedHead = assistant(id(3), id(2), 'preserved head') + const boundary = compactBoundary(id(4), id(2), { + headUuid: id(3), + anchorUuid: id(5), + tailUuid: id(30), + }) + const summary = user(id(5), id(4), 'summary') + + const filePath = await writeJsonl([ + oldUser, + oldAssistant, + preservedHead, + boundary, + summary, + ]) + + const { messages } = await loadTranscriptFile(filePath) + expect(messages.has(id(1))).toBe(false) + expect(messages.has(id(2))).toBe(false) + expect(messages.has(id(3))).toBe(false) + expect(messages.has(id(4))).toBe(true) + expect(messages.has(id(5))).toBe(true) + + const chain = buildConversationChain(messages, messages.get(id(5))!) + expect(chain.map(message => message.uuid)).toEqual([id(4), id(5)]) +}) + +test('loadTranscriptFile preserves and relinks a valid preserved segment', async () => { + const oldUser = user(id(11), null, 'old user') + const oldAssistant = assistant(id(12), id(11), 'old assistant') + const preservedHead = assistant(id(13), id(12), 'preserved head') + const preservedTail = assistant(id(14), id(13), 'preserved tail') + const boundary = compactBoundary(id(15), id(12), { + headUuid: id(13), + anchorUuid: id(16), + tailUuid: id(14), + }) + const summary = user(id(16), id(15), 'summary') + + const filePath = await writeJsonl([ + oldUser, + oldAssistant, + preservedHead, + preservedTail, + boundary, + summary, + ]) + + const { messages } = await loadTranscriptFile(filePath) + expect(messages.has(id(11))).toBe(false) + expect(messages.has(id(12))).toBe(false) + expect(messages.has(id(13))).toBe(true) + expect(messages.has(id(14))).toBe(true) + expect(messages.get(id(13))?.parentUuid).toBe(id(16)) + expect(messages.get(id(14))?.parentUuid).toBe(id(13)) + + const chain = buildConversationChain(messages, messages.get(id(14))!) + expect(chain.map(message => message.uuid)).toEqual([ + id(15), + id(16), + id(13), + id(14), + ]) +}) + +test('loadTranscriptFile fails closed when preserved-segment anchor is missing', async () => { + // Models the case where the compact boundary was written but the post-boundary + // summary/anchor message never made it to disk. + const oldUser = user(id(21), null, 'old user') + const oldAssistant = assistant(id(22), id(21), 'old assistant') + const preservedHead = assistant(id(23), id(22), 'preserved head') + const preservedTail = assistant(id(24), id(23), 'preserved tail') + const boundary = compactBoundary(id(25), id(22), { + headUuid: id(23), + anchorUuid: id(26), + tailUuid: id(24), + }) + + const filePath = await writeJsonl([ + oldUser, + oldAssistant, + preservedHead, + preservedTail, + boundary, + ]) + + const { messages } = await loadTranscriptFile(filePath) + expect(messages.has(id(21))).toBe(false) + expect(messages.has(id(22))).toBe(false) + expect(messages.has(id(23))).toBe(false) + expect(messages.has(id(24))).toBe(false) + expect(messages.has(id(25))).toBe(true) + + const chain = buildConversationChain(messages, messages.get(id(25))!) + expect(chain.map(message => message.uuid)).toEqual([id(25)]) +}) diff --git a/src/utils/sessionStorage.ts b/src/utils/sessionStorage.ts index 6d775d6c..8753c239 100644 --- a/src/utils/sessionStorage.ts +++ b/src/utils/sessionStorage.ts @@ -1838,7 +1838,10 @@ export function removeExtraFields( */ function applyPreservedSegmentRelinks( messages: Map, -): void { +): { + relinkFailed: boolean +} { + let relinkFailed = false type Seg = NonNullable< SystemCompactBoundaryMessage['compactMetadata']['preservedSegment'] > @@ -1863,46 +1866,100 @@ function applyPreservedSegmentRelinks( i++ } // No seg anywhere → no-op. findUnresolvedToolUse etc. read the full map. - if (!lastSeg) return + if (!lastSeg) return { relinkFailed } // Seg stale (no-seg boundary came after): skip relink, still prune at // absolute — otherwise the stale preserved chain becomes a phantom leaf. const segIsLive = lastSegBoundaryIdx === absoluteLastBoundaryIdx - // Validate tail→head BEFORE mutating so malformed metadata is a true - // no-op (walk stops at headUuid, doesn't need the relink to run first). + // Validate tail→head BEFORE mutating so malformed metadata never keeps + // the full pre-compact history alive on resume. If the walk breaks, mark + // the relink as failed and fall through to absolute-boundary pruning. const preservedUuids = new Set() if (segIsLive) { const walkSeen = new Set() + const tailInTranscript = messages.has(lastSeg.tailUuid) + const headInTranscript = messages.has(lastSeg.headUuid) + const anchorInTranscript = messages.has(lastSeg.anchorUuid) let cur = messages.get(lastSeg.tailUuid) let reachedHead = false - while (cur && !walkSeen.has(cur.uuid)) { + let failureKind: + | 'missing_tail' + | 'missing_parent' + | 'null_parent_before_head' + | 'cycle_before_head' + | 'missing_anchor' = 'missing_tail' + let lastSeenUuid: UUID | undefined + let lastSeenType: TranscriptMessage['type'] | undefined + let breakParentUuid: UUID | null | undefined + + while (cur) { + if (walkSeen.has(cur.uuid)) { + failureKind = 'cycle_before_head' + break + } walkSeen.add(cur.uuid) preservedUuids.add(cur.uuid) + lastSeenUuid = cur.uuid + lastSeenType = cur.type if (cur.uuid === lastSeg.headUuid) { reachedHead = true break } - cur = cur.parentUuid ? messages.get(cur.parentUuid) : undefined + breakParentUuid = cur.parentUuid + if (!breakParentUuid) { + failureKind = 'null_parent_before_head' + break + } + const next = messages.get(breakParentUuid) + if (!next) { + failureKind = 'missing_parent' + break + } + cur = next } - if (!reachedHead) { + + if (!reachedHead || !anchorInTranscript) { + if (!anchorInTranscript && reachedHead) { + failureKind = 'missing_anchor' + } // tail→head walk broke — a UUID in the preserved segment isn't in the - // transcript. Returning here skips the prune below, so resume loads - // the full pre-compact history. Known cause: mid-turn-yielded - // attachment pushed to mutableMessages but never recordTranscript'd - // (SDK subprocess restarted before next turn's qe:420 flush). + // transcript. Fail closed: keep only the post-boundary chain instead of + // loading the full pre-compact history on resume. + relinkFailed = true + preservedUuids.clear() logEvent('tengu_relink_walk_broken', { - tailInTranscript: messages.has(lastSeg.tailUuid), - headInTranscript: messages.has(lastSeg.headUuid), - anchorInTranscript: messages.has(lastSeg.anchorUuid), + failureKind: + failureKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + tailInTranscript, + headInTranscript, + anchorInTranscript, + walkSteps: walkSeen.size, + transcriptSize: messages.size, + tailIndex: entryIndex.get(lastSeg.tailUuid), + headIndex: entryIndex.get(lastSeg.headUuid), + anchorIndex: entryIndex.get(lastSeg.anchorUuid), + lastSeenType, + breakParentInTranscript: Boolean( + breakParentUuid && messages.has(breakParentUuid), + ), + breakParentIsNull: breakParentUuid === null, + }) + logForDiagnosticsNoPII('warn', 'relink_walk_broken', { + failureKind, + tailInTranscript, + headInTranscript, + anchorInTranscript, walkSteps: walkSeen.size, transcriptSize: messages.size, }) - return + logForDebugging( + `[sessionStorage] preserved-segment relink failed: kind=${failureKind} tail=${lastSeg.tailUuid} head=${lastSeg.headUuid} anchor=${lastSeg.anchorUuid} lastSeen=${lastSeenUuid ?? 'none'} breakParent=${breakParentUuid ?? 'null'}`, + ) } } - if (segIsLive) { + if (segIsLive && !relinkFailed) { const head = messages.get(lastSeg.headUuid) if (head) { messages.set(lastSeg.headUuid, { @@ -1953,6 +2010,7 @@ function applyPreservedSegmentRelinks( } } for (const uuid of toDelete) messages.delete(uuid) + return { relinkFailed } } /** @@ -3701,7 +3759,12 @@ export async function loadTranscriptFile( // File doesn't exist or can't be read } - applyPreservedSegmentRelinks(messages) + const { relinkFailed } = applyPreservedSegmentRelinks(messages) + if (relinkFailed) { + logForDiagnosticsNoPII('warn', 'resume_relink_fail_closed', { + transcriptSize: messages.size, + }) + } applySnipRemovals(messages) // Compute leaf UUIDs once at load time