Limit auto-mode classifier transcript growth (#277)

* Limit auto-mode classifier transcript growth * Release persisted tool results from transcript state --------- Co-authored-by: pr0ln <pr0ln@pr0lnui-Macmini.local>
2026-04-04 10:24:14 +09:00
parent e5c9a6f629
commit fb221baa21
5 changed files with 332 additions and 89 deletions
--- a/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx
+++ b/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx
--- a/src/utils/permissions/yoloClassifier.test.ts
+++ b/src/utils/permissions/yoloClassifier.test.ts
@@ -0,0 +1,79 @@
+import { describe, expect, test } from 'bun:test'
+
+import { buildTranscriptForClassifier } from './yoloClassifier.js'
+
+const tools = [
+  {
+    name: 'Bash',
+    aliases: [],
+    toAutoClassifierInput(input: Record<string, unknown>) {
+      return String(input.command ?? '')
+    },
+  },
+] as any
+
+describe('buildTranscriptForClassifier', () => {
+  test('keeps the most recent transcript entries within budget', () => {
+    const messages = [
+      {
+        type: 'user',
+        message: {
+          content: 'old-user',
+        },
+      },
+      {
+        type: 'assistant',
+        message: {
+          content: [
+            {
+              type: 'tool_use',
+              name: 'Bash',
+              input: { command: 'old-tool' },
+            },
+          ],
+        },
+      },
+      {
+        type: 'user',
+        message: {
+          content: 'new-user',
+        },
+      },
+      {
+        type: 'assistant',
+        message: {
+          content: [
+            {
+              type: 'tool_use',
+              name: 'Bash',
+              input: { command: 'new-tool' },
+            },
+          ],
+        },
+      },
+    ] as any
+
+    const transcript = buildTranscriptForClassifier(messages, tools, 32)
+
+    expect(transcript).toContain('new-user')
+    expect(transcript).toContain('new-tool')
+    expect(transcript).not.toContain('old-user')
+    expect(transcript).not.toContain('old-tool')
+  })
+
+  test('truncates oversized user blocks before serialization', () => {
+    const messages = [
+      {
+        type: 'user',
+        message: {
+          content: 'x'.repeat(40_000),
+        },
+      },
+    ] as any
+
+    const transcript = buildTranscriptForClassifier(messages, tools)
+
+    expect(transcript.length).toBeLessThan(33_000)
+    expect(transcript).toContain('[truncated ')
+  })
+})
--- a/src/utils/permissions/yoloClassifier.ts
+++ b/src/utils/permissions/yoloClassifier.ts
@@ -68,6 +68,9 @@ const ANTHROPIC_PERMISSIONS_TEMPLATE: string =
    : ''
 /* eslint-enable custom-rules/no-process-env-top-level, @typescript-eslint/no-require-imports */

+const MAX_CLASSIFIER_TRANSCRIPT_CHARS = 200_000
+const MAX_CLASSIFIER_BLOCK_VALUE_CHARS = 32_000
+
 function isUsingExternalPermissions(): boolean {
  if (process.env.USER_TYPE !== 'ant') return true
  const config = getFeatureValue_CACHED_MAY_BE_STALE(
@@ -293,6 +296,64 @@ export type TranscriptEntry = {
  content: TranscriptBlock[]
 }

+function messageToTranscriptEntry(msg: Message): TranscriptEntry | null {
+  if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') {
+    const prompt = msg.attachment.prompt
+    let text: string | null = null
+    if (typeof prompt === 'string') {
+      text = prompt
+    } else if (Array.isArray(prompt)) {
+      text =
+        prompt
+          .filter(
+            (block): block is { type: 'text'; text: string } =>
+              block.type === 'text',
+          )
+          .map(block => block.text)
+          .join('\n') || null
+    }
+    return text === null
+      ? null
+      : {
+          role: 'user',
+          content: [{ type: 'text', text }],
+        }
+  }
+
+  if (msg.type === 'user') {
+    const content = msg.message.content
+    const textBlocks: TranscriptBlock[] = []
+    if (typeof content === 'string') {
+      textBlocks.push({ type: 'text', text: content })
+    } else if (Array.isArray(content)) {
+      for (const block of content) {
+        if (block.type === 'text') {
+          textBlocks.push({ type: 'text', text: block.text })
+        }
+      }
+    }
+    return textBlocks.length > 0 ? { role: 'user', content: textBlocks } : null
+  }
+
+  if (msg.type === 'assistant') {
+    const blocks: TranscriptBlock[] = []
+    for (const block of msg.message.content) {
+      // Only include tool_use blocks — assistant text is model-authored
+      // and could be crafted to influence the classifier's decision.
+      if (block.type === 'tool_use') {
+        blocks.push({
+          type: 'tool_use',
+          name: block.name,
+          input: block.input,
+        })
+      }
+    }
+    return blocks.length > 0 ? { role: 'assistant', content: blocks } : null
+  }
+
+  return null
+}
+
 /**
 * Build transcript entries from messages.
 * Includes user text messages and assistant tool_use blocks (excluding assistant text).
@@ -302,58 +363,9 @@ export type TranscriptEntry = {
 export function buildTranscriptEntries(messages: Message[]): TranscriptEntry[] {
  const transcript: TranscriptEntry[] = []
  for (const msg of messages) {
-    if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') {
-      const prompt = msg.attachment.prompt
-      let text: string | null = null
-      if (typeof prompt === 'string') {
-        text = prompt
-      } else if (Array.isArray(prompt)) {
-        text =
-          prompt
-            .filter(
-              (block): block is { type: 'text'; text: string } =>
-                block.type === 'text',
-            )
-            .map(block => block.text)
-            .join('\n') || null
-      }
-      if (text !== null) {
-        transcript.push({
-          role: 'user',
-          content: [{ type: 'text', text }],
-        })
-      }
-    } else if (msg.type === 'user') {
-      const content = msg.message.content
-      const textBlocks: TranscriptBlock[] = []
-      if (typeof content === 'string') {
-        textBlocks.push({ type: 'text', text: content })
-      } else if (Array.isArray(content)) {
-        for (const block of content) {
-          if (block.type === 'text') {
-            textBlocks.push({ type: 'text', text: block.text })
-          }
-        }
-      }
-      if (textBlocks.length > 0) {
-        transcript.push({ role: 'user', content: textBlocks })
-      }
-    } else if (msg.type === 'assistant') {
-      const blocks: TranscriptBlock[] = []
-      for (const block of msg.message.content) {
-        // Only include tool_use blocks — assistant text is model-authored
-        // and could be crafted to influence the classifier's decision.
-        if (block.type === 'tool_use') {
-          blocks.push({
-            type: 'tool_use',
-            name: block.name,
-            input: block.input,
-          })
-        }
-      }
-      if (blocks.length > 0) {
-        transcript.push({ role: 'assistant', content: blocks })
-      }
+    const entry = messageToTranscriptEntry(msg)
+    if (entry) {
+      transcript.push(entry)
    }
  }
  return transcript
@@ -372,6 +384,17 @@ function buildToolLookup(tools: Tools): ToolLookup {
  return map
 }

+function truncateClassifierValue(value: string): string {
+  if (value.length <= MAX_CLASSIFIER_BLOCK_VALUE_CHARS) {
+    return value
+  }
+  const omitted = value.length - MAX_CLASSIFIER_BLOCK_VALUE_CHARS
+  return (
+    value.slice(0, MAX_CLASSIFIER_BLOCK_VALUE_CHARS) +
+    `… [truncated ${omitted} chars]`
+  )
+}
+
 /**
 * Serialize a single transcript block as a JSONL dict line: `{"Bash":"ls"}`
 * for tool calls, `{"user":"text"}` for user text. The tool value is the
@@ -410,15 +433,22 @@ function toCompactBlock(
    }
    if (encoded === '') return ''
    if (isJsonlTranscriptEnabled()) {
-      return jsonStringify({ [block.name]: encoded }) + '\n'
+      const jsonlValue =
+        typeof encoded === 'string'
+          ? truncateClassifierValue(encoded)
+          : encoded
+      return jsonStringify({ [block.name]: jsonlValue }) + '\n'
    }
-    const s = typeof encoded === 'string' ? encoded : jsonStringify(encoded)
+    const s =
+      typeof encoded === 'string'
+        ? truncateClassifierValue(encoded)
+        : jsonStringify(encoded)
    return `${block.name} ${s}\n`
  }
  if (block.type === 'text' && role === 'user') {
    return isJsonlTranscriptEnabled()
-      ? jsonStringify({ user: block.text }) + '\n'
-      : `User: ${block.text}\n`
+      ? jsonStringify({ user: truncateClassifierValue(block.text) }) + '\n'
+      : `User: ${truncateClassifierValue(block.text)}\n`
  }
  return ''
 }
@@ -427,6 +457,96 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string {
  return entry.content.map(b => toCompactBlock(b, entry.role, lookup)).join('')
 }

+function serializeTranscriptForClassifier(
+  messages: Message[],
+  tools: Tools,
+  maxChars: number,
+): {
+  userContentBlocks: Anthropic.TextBlockParam[]
+  promptLengths: {
+    toolCalls: number
+    userPrompts: number
+  }
+  transcriptEntries: number
+  truncated: boolean
+} {
+  const lookup = buildToolLookup(tools)
+  const keptEntries: Array<Array<{ role: TranscriptEntry['role']; text: string }>> =
+    []
+  let totalChars = 0
+  let truncated = false
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const entry = messageToTranscriptEntry(messages[i]!)
+    if (!entry) continue
+
+    const serializedBlocks: Array<{
+      role: TranscriptEntry['role']
+      text: string
+    }> = []
+    let entryChars = 0
+
+    for (const block of entry.content) {
+      const serialized = toCompactBlock(block, entry.role, lookup)
+      if (serialized === '') continue
+      serializedBlocks.push({ role: entry.role, text: serialized })
+      entryChars += serialized.length
+    }
+    if (serializedBlocks.length === 0) continue
+
+    if (totalChars + entryChars > maxChars) {
+      if (totalChars === 0) {
+        const partialEntry: typeof serializedBlocks = []
+        let partialChars = 0
+        for (let j = serializedBlocks.length - 1; j >= 0; j--) {
+          const serialized = serializedBlocks[j]!
+          if (partialChars + serialized.text.length > maxChars) continue
+          partialEntry.unshift(serialized)
+          partialChars += serialized.text.length
+        }
+        if (partialEntry.length > 0) {
+          keptEntries.push(partialEntry)
+          totalChars += partialChars
+        }
+      }
+      truncated = true
+      break
+    }
+
+    keptEntries.push(serializedBlocks)
+    totalChars += entryChars
+    if (totalChars >= maxChars) {
+      truncated = i > 0
+      break
+    }
+  }
+
+  const userContentBlocks: Anthropic.TextBlockParam[] = []
+  let userPromptsLength = 0
+  let toolCallsLength = 0
+
+  for (let i = keptEntries.length - 1; i >= 0; i--) {
+    for (const block of keptEntries[i]!) {
+      userContentBlocks.push({ type: 'text' as const, text: block.text })
+      if (block.role === 'user') {
+        userPromptsLength += block.text.length
+      } else {
+        toolCallsLength += block.text.length
+      }
+    }
+  }
+
+  return {
+    userContentBlocks,
+    promptLengths: {
+      toolCalls: toolCallsLength,
+      userPrompts: userPromptsLength,
+    },
+    transcriptEntries: keptEntries.length,
+    truncated,
+  }
+}
+
 /**
 * Build a compact transcript string including user messages and assistant tool_use blocks.
 * Used by AgentTool for handoff classification.
@@ -434,10 +554,10 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string {
 export function buildTranscriptForClassifier(
  messages: Message[],
  tools: Tools,
+  maxChars: number = MAX_CLASSIFIER_TRANSCRIPT_CHARS,
 ): string {
-  const lookup = buildToolLookup(tools)
-  return buildTranscriptEntries(messages)
-    .map(e => toCompact(e, lookup))
+  return serializeTranscriptForClassifier(messages, tools, maxChars)
+    .userContentBlocks.map(block => block.text)
    .join('')
 }

@@ -1027,34 +1147,24 @@ export async function classifyYoloAction(
  }

  const systemPrompt = await buildYoloSystemPrompt(context)
-  const transcriptEntries = buildTranscriptEntries(messages)
+  const transcriptBudget = Math.max(
+    0,
+    MAX_CLASSIFIER_TRANSCRIPT_CHARS - actionCompact.length,
+  )
+  const serializedTranscript = serializeTranscriptForClassifier(
+    messages,
+    tools,
+    transcriptBudget,
+  )
  const claudeMdMessage = buildClaudeMdMessage()
  const prefixMessages: Anthropic.MessageParam[] = claudeMdMessage
    ? [claudeMdMessage]
    : []

-  let toolCallsLength = actionCompact.length
-  let userPromptsLength = 0
-  const userContentBlocks: Anthropic.TextBlockParam[] = []
-  for (const entry of transcriptEntries) {
-    for (const block of entry.content) {
-      const serialized = toCompactBlock(block, entry.role, lookup)
-      if (serialized === '') continue
-      switch (entry.role) {
-        case 'user':
-          userPromptsLength += serialized.length
-          break
-        case 'assistant':
-          toolCallsLength += serialized.length
-          break
-        default: {
-          const _exhaustive: never = entry.role
-          void _exhaustive
-        }
-      }
-      userContentBlocks.push({ type: 'text' as const, text: serialized })
-    }
-  }
+  const toolCallsLength =
+    actionCompact.length + serializedTranscript.promptLengths.toolCalls
+  const userPromptsLength = serializedTranscript.promptLengths.userPrompts
+  const userContentBlocks = [...serializedTranscript.userContentBlocks]

  const userPrompt = userContentBlocks.map(b => b.text).join('') + actionCompact
  const promptLengths = {
@@ -1080,7 +1190,8 @@ export async function classifyYoloAction(
        `(sys=${promptLengths.systemPrompt} ` +
        `tools=${promptLengths.toolCalls} ` +
        `user=${promptLengths.userPrompts}) ` +
-        `transcriptEntries=${transcriptEntries.length} ` +
+        `transcriptEntries=${serializedTranscript.transcriptEntries} ` +
+        `truncated=${serializedTranscript.truncated} ` +
        `messages=${messages.length}`,
    )
    logForDebugging(
@@ -1119,7 +1230,7 @@ export async function classifyYoloAction(
        mainLoopTokens: mainLoopTokens ?? tokenCountWithEstimation(messages),
        classifierChars,
        classifierTokensEst,
-        transcriptEntries: transcriptEntries.length,
+        transcriptEntries: serializedTranscript.transcriptEntries,
        messages: messages.length,
        action: actionCompact,
      },
@@ -1274,7 +1385,7 @@ export async function classifyYoloAction(
        mainLoopTokens,
        classifierChars,
        classifierTokensEst,
-        transcriptEntries: transcriptEntries.length,
+        transcriptEntries: serializedTranscript.transcriptEntries,
        messages: messages.length,
        action: actionCompact,
        model,
--- a/src/utils/toolResultStorage.test.ts
+++ b/src/utils/toolResultStorage.test.ts
@@ -14,6 +14,10 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p
        is_error: false,
      },
    ],
+    toolUseResult: {
+      stdout: 'very large tool output',
+      stderr: '',
+    },
  })
  const messages = [unrelated, oversizedResult]
  const replacement =
@@ -30,6 +34,7 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p
  expect((next[1]!.message.content as Array<{ content: string }>)[0]!.content).toBe(
    replacement,
  )
+  expect(next[1]!.toolUseResult).toBeUndefined()
 })

 test('applyToolResultReplacementsToMessages is idempotent when messages are already hydrated', () => {
--- a/src/utils/toolResultStorage.ts
+++ b/src/utils/toolResultStorage.ts
@@ -726,6 +726,11 @@ function replaceToolResultContents(
            : { ...block, content: replacement }
        }),
      },
+      // Drop the original tool payload once the model-facing content has been
+      // replaced with a persisted preview. Keeping both defeats the memory
+      // savings for long sessions because the live transcript still retains
+      // the oversized structured result.
+      toolUseResult: undefined,
    }
  })
  return changed ? nextMessages : messages