From fb221baa21250ae618000e79c9b4824d9ed1d12d Mon Sep 17 00:00:00 2001 From: pr0ln Date: Sat, 4 Apr 2026 10:24:14 +0900 Subject: [PATCH] Limit auto-mode classifier transcript growth (#277) * Limit auto-mode classifier transcript growth * Release persisted tool results from transcript state --------- Co-authored-by: pr0ln --- .../UserToolSuccessMessage.tsx | 51 +++- src/utils/permissions/yoloClassifier.test.ts | 79 +++++ src/utils/permissions/yoloClassifier.ts | 281 ++++++++++++------ src/utils/toolResultStorage.test.ts | 5 + src/utils/toolResultStorage.ts | 5 + 5 files changed, 332 insertions(+), 89 deletions(-) create mode 100644 src/utils/permissions/yoloClassifier.test.ts diff --git a/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx b/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx index 593061fc..06f42d6d 100644 --- a/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx +++ b/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx @@ -7,7 +7,7 @@ import { useAppState } from '../../../state/AppState.js'; import { filterToolProgressMessages, type Tool, type Tools } from '../../../Tool.js'; import type { NormalizedUserMessage, ProgressMessage } from '../../../types/message.js'; import { deleteClassifierApproval, getClassifierApproval, getYoloClassifierApproval } from '../../../utils/classifierApprovals.js'; -import type { buildMessageLookups } from '../../../utils/messages.js'; +import { extractTag, type buildMessageLookups } from '../../../utils/messages.js'; import { MessageResponse } from '../../MessageResponse.js'; import { HookProgressMessage } from '../HookProgressMessage.js'; type Props = { @@ -49,8 +49,34 @@ export function UserToolSuccessMessage({ React.useEffect(() => { deleteClassifierApproval(toolUseID); }, [toolUseID]); + + const fallbackContent = React.useMemo(() => { + if (!Array.isArray(message.message.content)) return null; + const toolResultBlock = message.message.content.find(block => block.type === 'tool_result'); + if (!toolResultBlock || typeof toolResultBlock.content !== 'string') { + return null; + } + return extractTag(toolResultBlock.content, 'persisted-output') ?? toolResultBlock.content; + }, [message.message.content]); if (!message.toolUseResult || !tool) { - return null; + return fallbackContent ? + + {fallbackContent} + {feature('BASH_CLASSIFIER') ? classifierRule && + + {figures.tick} + {' Auto-approved · matched '} + {`"${classifierRule}"`} + + : null} + {feature('TRANSCRIPT_CLASSIFIER') ? yoloReason && + Allowed by auto mode classifier + : null} + + + + + : null; } // Resumed transcripts deserialize toolUseResult via raw JSON.parse with no @@ -59,7 +85,24 @@ export function UserToolSuccessMessage({ // Validate against outputSchema before rendering — mirrors CollapsedReadSearchContent. const parsedOutput = tool.outputSchema?.safeParse(message.toolUseResult); if (parsedOutput && !parsedOutput.success) { - return null; + return fallbackContent ? + + {fallbackContent} + {feature('BASH_CLASSIFIER') ? classifierRule && + + {figures.tick} + {' Auto-approved · matched '} + {`"${classifierRule}"`} + + : null} + {feature('TRANSCRIPT_CLASSIFIER') ? yoloReason && + Allowed by auto mode classifier + : null} + + + + + : null; } const toolResult = parsedOutput?.data ?? message.toolUseResult; const renderedMessage = tool.renderToolResultMessage?.(toolResult as never, filterToolProgressMessages(progressMessagesForMessage), { @@ -101,4 +144,4 @@ export function UserToolSuccessMessage({ ; } -//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["feature","figures","React","SentryErrorBoundary","Box","Text","useTheme","useAppState","filterToolProgressMessages","Tool","Tools","NormalizedUserMessage","ProgressMessage","deleteClassifierApproval","getClassifierApproval","getYoloClassifierApproval","buildMessageLookups","MessageResponse","HookProgressMessage","Props","message","lookups","ReturnType","toolUseID","progressMessagesForMessage","style","tool","tools","verbose","width","isTranscriptMode","UserToolSuccessMessage","ReactNode","theme","isBriefOnly","s","classifierRule","useState","yoloReason","useEffect","toolUseResult","parsedOutput","outputSchema","safeParse","success","toolResult","data","renderedMessage","renderToolResultMessage","input","toolUseByToolUseID","get","rendersAsAssistantText","userFacingName","undefined","tick"],"sources":["UserToolSuccessMessage.tsx"],"sourcesContent":["import { feature } from 'bun:bundle'\nimport figures from 'figures'\nimport * as React from 'react'\nimport { SentryErrorBoundary } from 'src/components/SentryErrorBoundary.js'\nimport { Box, Text, useTheme } from '../../../ink.js'\nimport { useAppState } from '../../../state/AppState.js'\nimport {\n  filterToolProgressMessages,\n  type Tool,\n  type Tools,\n} from '../../../Tool.js'\nimport type {\n  NormalizedUserMessage,\n  ProgressMessage,\n} from '../../../types/message.js'\nimport {\n  deleteClassifierApproval,\n  getClassifierApproval,\n  getYoloClassifierApproval,\n} from '../../../utils/classifierApprovals.js'\nimport type { buildMessageLookups } from '../../../utils/messages.js'\nimport { MessageResponse } from '../../MessageResponse.js'\nimport { HookProgressMessage } from '../HookProgressMessage.js'\n\ntype Props = {\n  message: NormalizedUserMessage\n  lookups: ReturnType<typeof buildMessageLookups>\n  toolUseID: string\n  progressMessagesForMessage: ProgressMessage[]\n  style?: 'condensed'\n  tool?: Tool\n  tools: Tools\n  verbose: boolean\n  width: number | string\n  isTranscriptMode?: boolean\n}\n\nexport function UserToolSuccessMessage({\n  message,\n  lookups,\n  toolUseID,\n  progressMessagesForMessage,\n  style,\n  tool,\n  tools,\n  verbose,\n  width,\n  isTranscriptMode,\n}: Props): React.ReactNode {\n  const [theme] = useTheme()\n  // Hook stays inside feature() ternary so external builds don't pay a\n  // per-scrollback-message store subscription — same pattern as\n  // UserPromptMessage.tsx.\n  const isBriefOnly =\n    feature('KAIROS') || feature('KAIROS_BRIEF')\n      ? // biome-ignore lint/correctness/useHookAtTopLevel: feature() is a compile-time constant\n        useAppState(s => s.isBriefOnly)\n      : false\n\n  // Capture classifier approval once on mount, then delete from Map to prevent linear growth.\n  // useState lazy initializer ensures the value persists across re-renders.\n  const [classifierRule] = React.useState(() =>\n    getClassifierApproval(toolUseID),\n  )\n  const [yoloReason] = React.useState(() =>\n    getYoloClassifierApproval(toolUseID),\n  )\n  React.useEffect(() => {\n    deleteClassifierApproval(toolUseID)\n  }, [toolUseID])\n\n  if (!message.toolUseResult || !tool) {\n    return null\n  }\n\n  // Resumed transcripts deserialize toolUseResult via raw JSON.parse with no\n  // validation (parseJSONL). A partial/corrupt/old-format result crashes\n  // renderToolResultMessage on first field access (anthropics/claude-code#39817).\n  // Validate against outputSchema before rendering — mirrors CollapsedReadSearchContent.\n  const parsedOutput = tool.outputSchema?.safeParse(message.toolUseResult)\n  if (parsedOutput && !parsedOutput.success) {\n    return null\n  }\n  const toolResult = parsedOutput?.data ?? message.toolUseResult\n\n  const renderedMessage =\n    tool.renderToolResultMessage?.(\n      toolResult as never,\n      filterToolProgressMessages(progressMessagesForMessage),\n      {\n        style,\n        theme,\n        tools,\n        verbose,\n        isTranscriptMode,\n        isBriefOnly,\n        input: lookups.toolUseByToolUseID.get(toolUseID)?.input,\n      },\n    ) ?? null\n\n  // Don't render anything if the tool result message is null\n  if (renderedMessage === null) {\n    return null\n  }\n\n  // Tools that return '' from userFacingName opt out of tool chrome and\n  // render like plain assistant text. Skip the tool-result width constraint\n  // so MarkdownTable's SAFETY_MARGIN=4 (tuned for the assistant-text 2-col\n  // dot gutter) holds — otherwise tables wrap their box-drawing chars.\n  const rendersAsAssistantText = tool.userFacingName(undefined) === ''\n\n  return (\n    <Box flexDirection=\"column\">\n      <Box\n        flexDirection=\"column\"\n        width={rendersAsAssistantText ? undefined : width}\n      >\n        {renderedMessage}\n        {feature('BASH_CLASSIFIER')\n          ? classifierRule && (\n              <MessageResponse height={1}>\n                <Text dimColor>\n                  <Text color=\"success\">{figures.tick}</Text>\n                  {' Auto-approved \\u00b7 matched '}\n                  {`\"${classifierRule}\"`}\n                </Text>\n              </MessageResponse>\n            )\n          : null}\n        {feature('TRANSCRIPT_CLASSIFIER')\n          ? yoloReason && (\n              <MessageResponse height={1}>\n                <Text dimColor>Allowed by auto mode classifier</Text>\n              </MessageResponse>\n            )\n          : null}\n      </Box>\n      <SentryErrorBoundary>\n        <HookProgressMessage\n          hookEvent=\"PostToolUse\"\n          lookups={lookups}\n          toolUseID={toolUseID}\n          verbose={verbose}\n          isTranscriptMode={isTranscriptMode}\n        />\n      </SentryErrorBoundary>\n    </Box>\n  )\n}\n"],"mappings":"AAAA,SAASA,OAAO,QAAQ,YAAY;AACpC,OAAOC,OAAO,MAAM,SAAS;AAC7B,OAAO,KAAKC,KAAK,MAAM,OAAO;AAC9B,SAASC,mBAAmB,QAAQ,uCAAuC;AAC3E,SAASC,GAAG,EAAEC,IAAI,EAAEC,QAAQ,QAAQ,iBAAiB;AACrD,SAASC,WAAW,QAAQ,4BAA4B;AACxD,SACEC,0BAA0B,EAC1B,KAAKC,IAAI,EACT,KAAKC,KAAK,QACL,kBAAkB;AACzB,cACEC,qBAAqB,EACrBC,eAAe,QACV,2BAA2B;AAClC,SACEC,wBAAwB,EACxBC,qBAAqB,EACrBC,yBAAyB,QACpB,uCAAuC;AAC9C,cAAcC,mBAAmB,QAAQ,4BAA4B;AACrE,SAASC,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,mBAAmB,QAAQ,2BAA2B;AAE/D,KAAKC,KAAK,GAAG;EACXC,OAAO,EAAET,qBAAqB;EAC9BU,OAAO,EAAEC,UAAU,CAAC,OAAON,mBAAmB,CAAC;EAC/CO,SAAS,EAAE,MAAM;EACjBC,0BAA0B,EAAEZ,eAAe,EAAE;EAC7Ca,KAAK,CAAC,EAAE,WAAW;EACnBC,IAAI,CAAC,EAAEjB,IAAI;EACXkB,KAAK,EAAEjB,KAAK;EACZkB,OAAO,EAAE,OAAO;EAChBC,KAAK,EAAE,MAAM,GAAG,MAAM;EACtBC,gBAAgB,CAAC,EAAE,OAAO;AAC5B,CAAC;AAED,OAAO,SAASC,sBAAsBA,CAAC;EACrCX,OAAO;EACPC,OAAO;EACPE,SAAS;EACTC,0BAA0B;EAC1BC,KAAK;EACLC,IAAI;EACJC,KAAK;EACLC,OAAO;EACPC,KAAK;EACLC;AACK,CAAN,EAAEX,KAAK,CAAC,EAAEjB,KAAK,CAAC8B,SAAS,CAAC;EACzB,MAAM,CAACC,KAAK,CAAC,GAAG3B,QAAQ,CAAC,CAAC;EAC1B;EACA;EACA;EACA,MAAM4B,WAAW,GACflC,OAAO,CAAC,QAAQ,CAAC,IAAIA,OAAO,CAAC,cAAc,CAAC;EACxC;EACAO,WAAW,CAAC4B,CAAC,IAAIA,CAAC,CAACD,WAAW,CAAC,GAC/B,KAAK;;EAEX;EACA;EACA,MAAM,CAACE,cAAc,CAAC,GAAGlC,KAAK,CAACmC,QAAQ,CAAC,MACtCvB,qBAAqB,CAACS,SAAS,CACjC,CAAC;EACD,MAAM,CAACe,UAAU,CAAC,GAAGpC,KAAK,CAACmC,QAAQ,CAAC,MAClCtB,yBAAyB,CAACQ,SAAS,CACrC,CAAC;EACDrB,KAAK,CAACqC,SAAS,CAAC,MAAM;IACpB1B,wBAAwB,CAACU,SAAS,CAAC;EACrC,CAAC,EAAE,CAACA,SAAS,CAAC,CAAC;EAEf,IAAI,CAACH,OAAO,CAACoB,aAAa,IAAI,CAACd,IAAI,EAAE;IACnC,OAAO,IAAI;EACb;;EAEA;EACA;EACA;EACA;EACA,MAAMe,YAAY,GAAGf,IAAI,CAACgB,YAAY,EAAEC,SAAS,CAACvB,OAAO,CAACoB,aAAa,CAAC;EACxE,IAAIC,YAAY,IAAI,CAACA,YAAY,CAACG,OAAO,EAAE;IACzC,OAAO,IAAI;EACb;EACA,MAAMC,UAAU,GAAGJ,YAAY,EAAEK,IAAI,IAAI1B,OAAO,CAACoB,aAAa;EAE9D,MAAMO,eAAe,GACnBrB,IAAI,CAACsB,uBAAuB,GAC1BH,UAAU,IAAI,KAAK,EACnBrC,0BAA0B,CAACgB,0BAA0B,CAAC,EACtD;IACEC,KAAK;IACLQ,KAAK;IACLN,KAAK;IACLC,OAAO;IACPE,gBAAgB;IAChBI,WAAW;IACXe,KAAK,EAAE5B,OAAO,CAAC6B,kBAAkB,CAACC,GAAG,CAAC5B,SAAS,CAAC,EAAE0B;EACpD,CACF,CAAC,IAAI,IAAI;;EAEX;EACA,IAAIF,eAAe,KAAK,IAAI,EAAE;IAC5B,OAAO,IAAI;EACb;;EAEA;EACA;EACA;EACA;EACA,MAAMK,sBAAsB,GAAG1B,IAAI,CAAC2B,cAAc,CAACC,SAAS,CAAC,KAAK,EAAE;EAEpE,OACE,CAAC,GAAG,CAAC,aAAa,CAAC,QAAQ;AAC/B,MAAM,CAAC,GAAG,CACF,aAAa,CAAC,QAAQ,CACtB,KAAK,CAAC,CAACF,sBAAsB,GAAGE,SAAS,GAAGzB,KAAK,CAAC;AAE1D,QAAQ,CAACkB,eAAe;AACxB,QAAQ,CAAC/C,OAAO,CAAC,iBAAiB,CAAC,GACvBoC,cAAc,IACZ,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AACzC,gBAAgB,CAAC,IAAI,CAAC,QAAQ;AAC9B,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAACnC,OAAO,CAACsD,IAAI,CAAC,EAAE,IAAI;AAC5D,kBAAkB,CAAC,gCAAgC;AACnD,kBAAkB,CAAC,IAAInB,cAAc,GAAG;AACxC,gBAAgB,EAAE,IAAI;AACtB,cAAc,EAAE,eAAe,CAClB,GACD,IAAI;AAChB,QAAQ,CAACpC,OAAO,CAAC,uBAAuB,CAAC,GAC7BsC,UAAU,IACR,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AACzC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,+BAA+B,EAAE,IAAI;AACpE,cAAc,EAAE,eAAe,CAClB,GACD,IAAI;AAChB,MAAM,EAAE,GAAG;AACX,MAAM,CAAC,mBAAmB;AAC1B,QAAQ,CAAC,mBAAmB,CAClB,SAAS,CAAC,aAAa,CACvB,OAAO,CAAC,CAACjB,OAAO,CAAC,CACjB,SAAS,CAAC,CAACE,SAAS,CAAC,CACrB,OAAO,CAAC,CAACK,OAAO,CAAC,CACjB,gBAAgB,CAAC,CAACE,gBAAgB,CAAC;AAE7C,MAAM,EAAE,mBAAmB;AAC3B,IAAI,EAAE,GAAG,CAAC;AAEV","ignoreList":[]} \ No newline at end of file +//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["feature","figures","React","SentryErrorBoundary","Box","Text","useTheme","useAppState","filterToolProgressMessages","Tool","Tools","NormalizedUserMessage","ProgressMessage","deleteClassifierApproval","getClassifierApproval","getYoloClassifierApproval","buildMessageLookups","MessageResponse","HookProgressMessage","Props","message","lookups","ReturnType","toolUseID","progressMessagesForMessage","style","tool","tools","verbose","width","isTranscriptMode","UserToolSuccessMessage","ReactNode","theme","isBriefOnly","s","classifierRule","useState","yoloReason","useEffect","toolUseResult","parsedOutput","outputSchema","safeParse","success","toolResult","data","renderedMessage","renderToolResultMessage","input","toolUseByToolUseID","get","rendersAsAssistantText","userFacingName","undefined","tick"],"sources":["UserToolSuccessMessage.tsx"],"sourcesContent":["import { feature } from 'bun:bundle'\nimport figures from 'figures'\nimport * as React from 'react'\nimport { SentryErrorBoundary } from 'src/components/SentryErrorBoundary.js'\nimport { Box, Text, useTheme } from '../../../ink.js'\nimport { useAppState } from '../../../state/AppState.js'\nimport {\n  filterToolProgressMessages,\n  type Tool,\n  type Tools,\n} from '../../../Tool.js'\nimport type {\n  NormalizedUserMessage,\n  ProgressMessage,\n} from '../../../types/message.js'\nimport {\n  deleteClassifierApproval,\n  getClassifierApproval,\n  getYoloClassifierApproval,\n} from '../../../utils/classifierApprovals.js'\nimport type { buildMessageLookups } from '../../../utils/messages.js'\nimport { MessageResponse } from '../../MessageResponse.js'\nimport { HookProgressMessage } from '../HookProgressMessage.js'\n\ntype Props = {\n  message: NormalizedUserMessage\n  lookups: ReturnType<typeof buildMessageLookups>\n  toolUseID: string\n  progressMessagesForMessage: ProgressMessage[]\n  style?: 'condensed'\n  tool?: Tool\n  tools: Tools\n  verbose: boolean\n  width: number | string\n  isTranscriptMode?: boolean\n}\n\nexport function UserToolSuccessMessage({\n  message,\n  lookups,\n  toolUseID,\n  progressMessagesForMessage,\n  style,\n  tool,\n  tools,\n  verbose,\n  width,\n  isTranscriptMode,\n}: Props): React.ReactNode {\n  const [theme] = useTheme()\n  // Hook stays inside feature() ternary so external builds don't pay a\n  // per-scrollback-message store subscription — same pattern as\n  // UserPromptMessage.tsx.\n  const isBriefOnly =\n    feature('KAIROS') || feature('KAIROS_BRIEF')\n      ? // biome-ignore lint/correctness/useHookAtTopLevel: feature() is a compile-time constant\n        useAppState(s => s.isBriefOnly)\n      : false\n\n  // Capture classifier approval once on mount, then delete from Map to prevent linear growth.\n  // useState lazy initializer ensures the value persists across re-renders.\n  const [classifierRule] = React.useState(() =>\n    getClassifierApproval(toolUseID),\n  )\n  const [yoloReason] = React.useState(() =>\n    getYoloClassifierApproval(toolUseID),\n  )\n  React.useEffect(() => {\n    deleteClassifierApproval(toolUseID)\n  }, [toolUseID])\n\n  if (!message.toolUseResult || !tool) {\n    return null\n  }\n\n  // Resumed transcripts deserialize toolUseResult via raw JSON.parse with no\n  // validation (parseJSONL). A partial/corrupt/old-format result crashes\n  // renderToolResultMessage on first field access (anthropics/claude-code#39817).\n  // Validate against outputSchema before rendering — mirrors CollapsedReadSearchContent.\n  const parsedOutput = tool.outputSchema?.safeParse(message.toolUseResult)\n  if (parsedOutput && !parsedOutput.success) {\n    return null\n  }\n  const toolResult = parsedOutput?.data ?? message.toolUseResult\n\n  const renderedMessage =\n    tool.renderToolResultMessage?.(\n      toolResult as never,\n      filterToolProgressMessages(progressMessagesForMessage),\n      {\n        style,\n        theme,\n        tools,\n        verbose,\n        isTranscriptMode,\n        isBriefOnly,\n        input: lookups.toolUseByToolUseID.get(toolUseID)?.input,\n      },\n    ) ?? null\n\n  // Don't render anything if the tool result message is null\n  if (renderedMessage === null) {\n    return null\n  }\n\n  // Tools that return '' from userFacingName opt out of tool chrome and\n  // render like plain assistant text. Skip the tool-result width constraint\n  // so MarkdownTable's SAFETY_MARGIN=4 (tuned for the assistant-text 2-col\n  // dot gutter) holds — otherwise tables wrap their box-drawing chars.\n  const rendersAsAssistantText = tool.userFacingName(undefined) === ''\n\n  return (\n    <Box flexDirection=\"column\">\n      <Box\n        flexDirection=\"column\"\n        width={rendersAsAssistantText ? undefined : width}\n      >\n        {renderedMessage}\n        {feature('BASH_CLASSIFIER')\n          ? classifierRule && (\n              <MessageResponse height={1}>\n                <Text dimColor>\n                  <Text color=\"success\">{figures.tick}</Text>\n                  {' Auto-approved \\u00b7 matched '}\n                  {`\"${classifierRule}\"`}\n                </Text>\n              </MessageResponse>\n            )\n          : null}\n        {feature('TRANSCRIPT_CLASSIFIER')\n          ? yoloReason && (\n              <MessageResponse height={1}>\n                <Text dimColor>Allowed by auto mode classifier</Text>\n              </MessageResponse>\n            )\n          : null}\n      </Box>\n      <SentryErrorBoundary>\n        <HookProgressMessage\n          hookEvent=\"PostToolUse\"\n          lookups={lookups}\n          toolUseID={toolUseID}\n          verbose={verbose}\n          isTranscriptMode={isTranscriptMode}\n        />\n      </SentryErrorBoundary>\n    </Box>\n  )\n}\n"],"mappings":"AAAA,SAASA,OAAO,QAAQ,YAAY;AACpC,OAAOC,OAAO,MAAM,SAAS;AAC7B,OAAO,KAAKC,KAAK,MAAM,OAAO;AAC9B,SAASC,mBAAmB,QAAQ,uCAAuC;AAC3E,SAASC,GAAG,EAAEC,IAAI,EAAEC,QAAQ,QAAQ,iBAAiB;AACrD,SAASC,WAAW,QAAQ,4BAA4B;AACxD,SACEC,0BAA0B,EAC1B,KAAKC,IAAI,EACT,KAAKC,KAAK,QACL,kBAAkB;AACzB,cACEC,qBAAqB,EACrBC,eAAe,QACV,2BAA2B;AAClC,SACEC,wBAAwB,EACxBC,qBAAqB,EACrBC,yBAAyB,QACpB,uCAAuC;AAC9C,cAAcC,mBAAmB,QAAQ,4BAA4B;AACrE,SAASC,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,mBAAmB,QAAQ,2BAA2B;AAE/D,KAAKC,KAAK,GAAG;EACXC,OAAO,EAAET,qBAAqB;EAC9BU,OAAO,EAAEC,UAAU,CAAC,OAAON,mBAAmB,CAAC;EAC/CO,SAAS,EAAE,MAAM;EACjBC,0BAA0B,EAAEZ,eAAe,EAAE;EAC7Ca,KAAK,CAAC,EAAE,WAAW;EACnBC,IAAI,CAAC,EAAEjB,IAAI;EACXkB,KAAK,EAAEjB,KAAK;EACZkB,OAAO,EAAE,OAAO;EAChBC,KAAK,EAAE,MAAM,GAAG,MAAM;EACtBC,gBAAgB,CAAC,EAAE,OAAO;AAC5B,CAAC;AAED,OAAO,SAASC,sBAAsBA,CAAC;EACrCX,OAAO;EACPC,OAAO;EACPE,SAAS;EACTC,0BAA0B;EAC1BC,KAAK;EACLC,IAAI;EACJC,KAAK;EACLC,OAAO;EACPC,KAAK;EACLC;AACK,CAAN,EAAEX,KAAK,CAAC,EAAEjB,KAAK,CAAC8B,SAAS,CAAC;EACzB,MAAM,CAACC,KAAK,CAAC,GAAG3B,QAAQ,CAAC,CAAC;EAC1B;EACA;EACA;EACA,MAAM4B,WAAW,GACflC,OAAO,CAAC,QAAQ,CAAC,IAAIA,OAAO,CAAC,cAAc,CAAC;EACxC;EACAO,WAAW,CAAC4B,CAAC,IAAIA,CAAC,CAACD,WAAW,CAAC,GAC/B,KAAK;;EAEX;EACA;EACA,MAAM,CAACE,cAAc,CAAC,GAAGlC,KAAK,CAACmC,QAAQ,CAAC,MACtCvB,qBAAqB,CAACS,SAAS,CACjC,CAAC;EACD,MAAM,CAACe,UAAU,CAAC,GAAGpC,KAAK,CAACmC,QAAQ,CAAC,MAClCtB,yBAAyB,CAACQ,SAAS,CACrC,CAAC;EACDrB,KAAK,CAACqC,SAAS,CAAC,MAAM;IACpB1B,wBAAwB,CAACU,SAAS,CAAC;EACrC,CAAC,EAAE,CAACA,SAAS,CAAC,CAAC;EAEf,IAAI,CAACH,OAAO,CAACoB,aAAa,IAAI,CAACd,IAAI,EAAE;IACnC,OAAO,IAAI;EACb;;EAEA;EACA;EACA;EACA;EACA,MAAMe,YAAY,GAAGf,IAAI,CAACgB,YAAY,EAAEC,SAAS,CAACvB,OAAO,CAACoB,aAAa,CAAC;EACxE,IAAIC,YAAY,IAAI,CAACA,YAAY,CAACG,OAAO,EAAE;IACzC,OAAO,IAAI;EACb;EACA,MAAMC,UAAU,GAAGJ,YAAY,EAAEK,IAAI,IAAI1B,OAAO,CAACoB,aAAa;EAE9D,MAAMO,eAAe,GACnBrB,IAAI,CAACsB,uBAAuB,GAC1BH,UAAU,IAAI,KAAK,EACnBrC,0BAA0B,CAACgB,0BAA0B,CAAC,EACtD;IACEC,KAAK;IACLQ,KAAK;IACLN,KAAK;IACLC,OAAO;IACPE,gBAAgB;IAChBI,WAAW;IACXe,KAAK,EAAE5B,OAAO,CAAC6B,kBAAkB,CAACC,GAAG,CAAC5B,SAAS,CAAC,EAAE0B;EACpD,CACF,CAAC,IAAI,IAAI;;EAEX;EACA,IAAIF,eAAe,KAAK,IAAI,EAAE;IAC5B,OAAO,IAAI;EACb;;EAEA;EACA;EACA;EACA;EACA,MAAMK,sBAAsB,GAAG1B,IAAI,CAAC2B,cAAc,CAACC,SAAS,CAAC,KAAK,EAAE;EAEpE,OACE,CAAC,GAAG,CAAC,aAAa,CAAC,QAAQ;AAC/B,MAAM,CAAC,GAAG,CACF,aAAa,CAAC,QAAQ,CACtB,KAAK,CAAC,CAACF,sBAAsB,GAAGE,SAAS,GAAGzB,KAAK,CAAC;AAE1D,QAAQ,CAACkB,eAAe;AACxB,QAAQ,CAAC/C,OAAO,CAAC,iBAAiB,CAAC,GACvBoC,cAAc,IACZ,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AACzC,gBAAgB,CAAC,IAAI,CAAC,QAAQ;AAC9B,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAACnC,OAAO,CAACsD,IAAI,CAAC,EAAE,IAAI;AAC5D,kBAAkB,CAAC,gCAAgC;AACnD,kBAAkB,CAAC,IAAInB,cAAc,GAAG;AACxC,gBAAgB,EAAE,IAAI;AACtB,cAAc,EAAE,eAAe,CAClB,GACD,IAAI;AAChB,QAAQ,CAACpC,OAAO,CAAC,uBAAuB,CAAC,GAC7BsC,UAAU,IACR,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AACzC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,+BAA+B,EAAE,IAAI;AACpE,cAAc,EAAE,eAAe,CAClB,GACD,IAAI;AAChB,MAAM,EAAE,GAAG;AACX,MAAM,CAAC,mBAAmB;AAC1B,QAAQ,CAAC,mBAAmB,CAClB,SAAS,CAAC,aAAa,CACvB,OAAO,CAAC,CAACjB,OAAO,CAAC,CACjB,SAAS,CAAC,CAACE,SAAS,CAAC,CACrB,OAAO,CAAC,CAACK,OAAO,CAAC,CACjB,gBAAgB,CAAC,CAACE,gBAAgB,CAAC;AAE7C,MAAM,EAAE,mBAAmB;AAC3B,IAAI,EAAE,GAAG,CAAC;AAEV","ignoreList":[]} diff --git a/src/utils/permissions/yoloClassifier.test.ts b/src/utils/permissions/yoloClassifier.test.ts new file mode 100644 index 00000000..cf97c5ad --- /dev/null +++ b/src/utils/permissions/yoloClassifier.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, test } from 'bun:test' + +import { buildTranscriptForClassifier } from './yoloClassifier.js' + +const tools = [ + { + name: 'Bash', + aliases: [], + toAutoClassifierInput(input: Record) { + return String(input.command ?? '') + }, + }, +] as any + +describe('buildTranscriptForClassifier', () => { + test('keeps the most recent transcript entries within budget', () => { + const messages = [ + { + type: 'user', + message: { + content: 'old-user', + }, + }, + { + type: 'assistant', + message: { + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { command: 'old-tool' }, + }, + ], + }, + }, + { + type: 'user', + message: { + content: 'new-user', + }, + }, + { + type: 'assistant', + message: { + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { command: 'new-tool' }, + }, + ], + }, + }, + ] as any + + const transcript = buildTranscriptForClassifier(messages, tools, 32) + + expect(transcript).toContain('new-user') + expect(transcript).toContain('new-tool') + expect(transcript).not.toContain('old-user') + expect(transcript).not.toContain('old-tool') + }) + + test('truncates oversized user blocks before serialization', () => { + const messages = [ + { + type: 'user', + message: { + content: 'x'.repeat(40_000), + }, + }, + ] as any + + const transcript = buildTranscriptForClassifier(messages, tools) + + expect(transcript.length).toBeLessThan(33_000) + expect(transcript).toContain('[truncated ') + }) +}) diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts index 77ee5187..ea7b9e40 100644 --- a/src/utils/permissions/yoloClassifier.ts +++ b/src/utils/permissions/yoloClassifier.ts @@ -68,6 +68,9 @@ const ANTHROPIC_PERMISSIONS_TEMPLATE: string = : '' /* eslint-enable custom-rules/no-process-env-top-level, @typescript-eslint/no-require-imports */ +const MAX_CLASSIFIER_TRANSCRIPT_CHARS = 200_000 +const MAX_CLASSIFIER_BLOCK_VALUE_CHARS = 32_000 + function isUsingExternalPermissions(): boolean { if (process.env.USER_TYPE !== 'ant') return true const config = getFeatureValue_CACHED_MAY_BE_STALE( @@ -293,6 +296,64 @@ export type TranscriptEntry = { content: TranscriptBlock[] } +function messageToTranscriptEntry(msg: Message): TranscriptEntry | null { + if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') { + const prompt = msg.attachment.prompt + let text: string | null = null + if (typeof prompt === 'string') { + text = prompt + } else if (Array.isArray(prompt)) { + text = + prompt + .filter( + (block): block is { type: 'text'; text: string } => + block.type === 'text', + ) + .map(block => block.text) + .join('\n') || null + } + return text === null + ? null + : { + role: 'user', + content: [{ type: 'text', text }], + } + } + + if (msg.type === 'user') { + const content = msg.message.content + const textBlocks: TranscriptBlock[] = [] + if (typeof content === 'string') { + textBlocks.push({ type: 'text', text: content }) + } else if (Array.isArray(content)) { + for (const block of content) { + if (block.type === 'text') { + textBlocks.push({ type: 'text', text: block.text }) + } + } + } + return textBlocks.length > 0 ? { role: 'user', content: textBlocks } : null + } + + if (msg.type === 'assistant') { + const blocks: TranscriptBlock[] = [] + for (const block of msg.message.content) { + // Only include tool_use blocks — assistant text is model-authored + // and could be crafted to influence the classifier's decision. + if (block.type === 'tool_use') { + blocks.push({ + type: 'tool_use', + name: block.name, + input: block.input, + }) + } + } + return blocks.length > 0 ? { role: 'assistant', content: blocks } : null + } + + return null +} + /** * Build transcript entries from messages. * Includes user text messages and assistant tool_use blocks (excluding assistant text). @@ -302,58 +363,9 @@ export type TranscriptEntry = { export function buildTranscriptEntries(messages: Message[]): TranscriptEntry[] { const transcript: TranscriptEntry[] = [] for (const msg of messages) { - if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') { - const prompt = msg.attachment.prompt - let text: string | null = null - if (typeof prompt === 'string') { - text = prompt - } else if (Array.isArray(prompt)) { - text = - prompt - .filter( - (block): block is { type: 'text'; text: string } => - block.type === 'text', - ) - .map(block => block.text) - .join('\n') || null - } - if (text !== null) { - transcript.push({ - role: 'user', - content: [{ type: 'text', text }], - }) - } - } else if (msg.type === 'user') { - const content = msg.message.content - const textBlocks: TranscriptBlock[] = [] - if (typeof content === 'string') { - textBlocks.push({ type: 'text', text: content }) - } else if (Array.isArray(content)) { - for (const block of content) { - if (block.type === 'text') { - textBlocks.push({ type: 'text', text: block.text }) - } - } - } - if (textBlocks.length > 0) { - transcript.push({ role: 'user', content: textBlocks }) - } - } else if (msg.type === 'assistant') { - const blocks: TranscriptBlock[] = [] - for (const block of msg.message.content) { - // Only include tool_use blocks — assistant text is model-authored - // and could be crafted to influence the classifier's decision. - if (block.type === 'tool_use') { - blocks.push({ - type: 'tool_use', - name: block.name, - input: block.input, - }) - } - } - if (blocks.length > 0) { - transcript.push({ role: 'assistant', content: blocks }) - } + const entry = messageToTranscriptEntry(msg) + if (entry) { + transcript.push(entry) } } return transcript @@ -372,6 +384,17 @@ function buildToolLookup(tools: Tools): ToolLookup { return map } +function truncateClassifierValue(value: string): string { + if (value.length <= MAX_CLASSIFIER_BLOCK_VALUE_CHARS) { + return value + } + const omitted = value.length - MAX_CLASSIFIER_BLOCK_VALUE_CHARS + return ( + value.slice(0, MAX_CLASSIFIER_BLOCK_VALUE_CHARS) + + `… [truncated ${omitted} chars]` + ) +} + /** * Serialize a single transcript block as a JSONL dict line: `{"Bash":"ls"}` * for tool calls, `{"user":"text"}` for user text. The tool value is the @@ -410,15 +433,22 @@ function toCompactBlock( } if (encoded === '') return '' if (isJsonlTranscriptEnabled()) { - return jsonStringify({ [block.name]: encoded }) + '\n' + const jsonlValue = + typeof encoded === 'string' + ? truncateClassifierValue(encoded) + : encoded + return jsonStringify({ [block.name]: jsonlValue }) + '\n' } - const s = typeof encoded === 'string' ? encoded : jsonStringify(encoded) + const s = + typeof encoded === 'string' + ? truncateClassifierValue(encoded) + : jsonStringify(encoded) return `${block.name} ${s}\n` } if (block.type === 'text' && role === 'user') { return isJsonlTranscriptEnabled() - ? jsonStringify({ user: block.text }) + '\n' - : `User: ${block.text}\n` + ? jsonStringify({ user: truncateClassifierValue(block.text) }) + '\n' + : `User: ${truncateClassifierValue(block.text)}\n` } return '' } @@ -427,6 +457,96 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string { return entry.content.map(b => toCompactBlock(b, entry.role, lookup)).join('') } +function serializeTranscriptForClassifier( + messages: Message[], + tools: Tools, + maxChars: number, +): { + userContentBlocks: Anthropic.TextBlockParam[] + promptLengths: { + toolCalls: number + userPrompts: number + } + transcriptEntries: number + truncated: boolean +} { + const lookup = buildToolLookup(tools) + const keptEntries: Array> = + [] + let totalChars = 0 + let truncated = false + + for (let i = messages.length - 1; i >= 0; i--) { + const entry = messageToTranscriptEntry(messages[i]!) + if (!entry) continue + + const serializedBlocks: Array<{ + role: TranscriptEntry['role'] + text: string + }> = [] + let entryChars = 0 + + for (const block of entry.content) { + const serialized = toCompactBlock(block, entry.role, lookup) + if (serialized === '') continue + serializedBlocks.push({ role: entry.role, text: serialized }) + entryChars += serialized.length + } + if (serializedBlocks.length === 0) continue + + if (totalChars + entryChars > maxChars) { + if (totalChars === 0) { + const partialEntry: typeof serializedBlocks = [] + let partialChars = 0 + for (let j = serializedBlocks.length - 1; j >= 0; j--) { + const serialized = serializedBlocks[j]! + if (partialChars + serialized.text.length > maxChars) continue + partialEntry.unshift(serialized) + partialChars += serialized.text.length + } + if (partialEntry.length > 0) { + keptEntries.push(partialEntry) + totalChars += partialChars + } + } + truncated = true + break + } + + keptEntries.push(serializedBlocks) + totalChars += entryChars + if (totalChars >= maxChars) { + truncated = i > 0 + break + } + } + + const userContentBlocks: Anthropic.TextBlockParam[] = [] + let userPromptsLength = 0 + let toolCallsLength = 0 + + for (let i = keptEntries.length - 1; i >= 0; i--) { + for (const block of keptEntries[i]!) { + userContentBlocks.push({ type: 'text' as const, text: block.text }) + if (block.role === 'user') { + userPromptsLength += block.text.length + } else { + toolCallsLength += block.text.length + } + } + } + + return { + userContentBlocks, + promptLengths: { + toolCalls: toolCallsLength, + userPrompts: userPromptsLength, + }, + transcriptEntries: keptEntries.length, + truncated, + } +} + /** * Build a compact transcript string including user messages and assistant tool_use blocks. * Used by AgentTool for handoff classification. @@ -434,10 +554,10 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string { export function buildTranscriptForClassifier( messages: Message[], tools: Tools, + maxChars: number = MAX_CLASSIFIER_TRANSCRIPT_CHARS, ): string { - const lookup = buildToolLookup(tools) - return buildTranscriptEntries(messages) - .map(e => toCompact(e, lookup)) + return serializeTranscriptForClassifier(messages, tools, maxChars) + .userContentBlocks.map(block => block.text) .join('') } @@ -1027,34 +1147,24 @@ export async function classifyYoloAction( } const systemPrompt = await buildYoloSystemPrompt(context) - const transcriptEntries = buildTranscriptEntries(messages) + const transcriptBudget = Math.max( + 0, + MAX_CLASSIFIER_TRANSCRIPT_CHARS - actionCompact.length, + ) + const serializedTranscript = serializeTranscriptForClassifier( + messages, + tools, + transcriptBudget, + ) const claudeMdMessage = buildClaudeMdMessage() const prefixMessages: Anthropic.MessageParam[] = claudeMdMessage ? [claudeMdMessage] : [] - let toolCallsLength = actionCompact.length - let userPromptsLength = 0 - const userContentBlocks: Anthropic.TextBlockParam[] = [] - for (const entry of transcriptEntries) { - for (const block of entry.content) { - const serialized = toCompactBlock(block, entry.role, lookup) - if (serialized === '') continue - switch (entry.role) { - case 'user': - userPromptsLength += serialized.length - break - case 'assistant': - toolCallsLength += serialized.length - break - default: { - const _exhaustive: never = entry.role - void _exhaustive - } - } - userContentBlocks.push({ type: 'text' as const, text: serialized }) - } - } + const toolCallsLength = + actionCompact.length + serializedTranscript.promptLengths.toolCalls + const userPromptsLength = serializedTranscript.promptLengths.userPrompts + const userContentBlocks = [...serializedTranscript.userContentBlocks] const userPrompt = userContentBlocks.map(b => b.text).join('') + actionCompact const promptLengths = { @@ -1080,7 +1190,8 @@ export async function classifyYoloAction( `(sys=${promptLengths.systemPrompt} ` + `tools=${promptLengths.toolCalls} ` + `user=${promptLengths.userPrompts}) ` + - `transcriptEntries=${transcriptEntries.length} ` + + `transcriptEntries=${serializedTranscript.transcriptEntries} ` + + `truncated=${serializedTranscript.truncated} ` + `messages=${messages.length}`, ) logForDebugging( @@ -1119,7 +1230,7 @@ export async function classifyYoloAction( mainLoopTokens: mainLoopTokens ?? tokenCountWithEstimation(messages), classifierChars, classifierTokensEst, - transcriptEntries: transcriptEntries.length, + transcriptEntries: serializedTranscript.transcriptEntries, messages: messages.length, action: actionCompact, }, @@ -1274,7 +1385,7 @@ export async function classifyYoloAction( mainLoopTokens, classifierChars, classifierTokensEst, - transcriptEntries: transcriptEntries.length, + transcriptEntries: serializedTranscript.transcriptEntries, messages: messages.length, action: actionCompact, model, diff --git a/src/utils/toolResultStorage.test.ts b/src/utils/toolResultStorage.test.ts index 8040f1ff..8e5dca1b 100644 --- a/src/utils/toolResultStorage.test.ts +++ b/src/utils/toolResultStorage.test.ts @@ -14,6 +14,10 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p is_error: false, }, ], + toolUseResult: { + stdout: 'very large tool output', + stderr: '', + }, }) const messages = [unrelated, oversizedResult] const replacement = @@ -30,6 +34,7 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p expect((next[1]!.message.content as Array<{ content: string }>)[0]!.content).toBe( replacement, ) + expect(next[1]!.toolUseResult).toBeUndefined() }) test('applyToolResultReplacementsToMessages is idempotent when messages are already hydrated', () => { diff --git a/src/utils/toolResultStorage.ts b/src/utils/toolResultStorage.ts index e5f16c1a..9efc8070 100644 --- a/src/utils/toolResultStorage.ts +++ b/src/utils/toolResultStorage.ts @@ -726,6 +726,11 @@ function replaceToolResultContents( : { ...block, content: replacement } }), }, + // Drop the original tool payload once the model-facing content has been + // replaced with a persisted preview. Keeping both defeats the memory + // savings for long sessions because the live transcript still retains + // the oversized structured result. + toolUseResult: undefined, } }) return changed ? nextMessages : messages