Limit auto-mode classifier transcript growth (#277)
* Limit auto-mode classifier transcript growth * Release persisted tool results from transcript state --------- Co-authored-by: pr0ln <pr0ln@pr0lnui-Macmini.local>
This commit is contained in:
File diff suppressed because one or more lines are too long
79
src/utils/permissions/yoloClassifier.test.ts
Normal file
79
src/utils/permissions/yoloClassifier.test.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import { buildTranscriptForClassifier } from './yoloClassifier.js'
|
||||
|
||||
const tools = [
|
||||
{
|
||||
name: 'Bash',
|
||||
aliases: [],
|
||||
toAutoClassifierInput(input: Record<string, unknown>) {
|
||||
return String(input.command ?? '')
|
||||
},
|
||||
},
|
||||
] as any
|
||||
|
||||
describe('buildTranscriptForClassifier', () => {
|
||||
test('keeps the most recent transcript entries within budget', () => {
|
||||
const messages = [
|
||||
{
|
||||
type: 'user',
|
||||
message: {
|
||||
content: 'old-user',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
name: 'Bash',
|
||||
input: { command: 'old-tool' },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'user',
|
||||
message: {
|
||||
content: 'new-user',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
name: 'Bash',
|
||||
input: { command: 'new-tool' },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
] as any
|
||||
|
||||
const transcript = buildTranscriptForClassifier(messages, tools, 32)
|
||||
|
||||
expect(transcript).toContain('new-user')
|
||||
expect(transcript).toContain('new-tool')
|
||||
expect(transcript).not.toContain('old-user')
|
||||
expect(transcript).not.toContain('old-tool')
|
||||
})
|
||||
|
||||
test('truncates oversized user blocks before serialization', () => {
|
||||
const messages = [
|
||||
{
|
||||
type: 'user',
|
||||
message: {
|
||||
content: 'x'.repeat(40_000),
|
||||
},
|
||||
},
|
||||
] as any
|
||||
|
||||
const transcript = buildTranscriptForClassifier(messages, tools)
|
||||
|
||||
expect(transcript.length).toBeLessThan(33_000)
|
||||
expect(transcript).toContain('[truncated ')
|
||||
})
|
||||
})
|
||||
@@ -68,6 +68,9 @@ const ANTHROPIC_PERMISSIONS_TEMPLATE: string =
|
||||
: ''
|
||||
/* eslint-enable custom-rules/no-process-env-top-level, @typescript-eslint/no-require-imports */
|
||||
|
||||
const MAX_CLASSIFIER_TRANSCRIPT_CHARS = 200_000
|
||||
const MAX_CLASSIFIER_BLOCK_VALUE_CHARS = 32_000
|
||||
|
||||
function isUsingExternalPermissions(): boolean {
|
||||
if (process.env.USER_TYPE !== 'ant') return true
|
||||
const config = getFeatureValue_CACHED_MAY_BE_STALE(
|
||||
@@ -293,6 +296,64 @@ export type TranscriptEntry = {
|
||||
content: TranscriptBlock[]
|
||||
}
|
||||
|
||||
function messageToTranscriptEntry(msg: Message): TranscriptEntry | null {
|
||||
if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') {
|
||||
const prompt = msg.attachment.prompt
|
||||
let text: string | null = null
|
||||
if (typeof prompt === 'string') {
|
||||
text = prompt
|
||||
} else if (Array.isArray(prompt)) {
|
||||
text =
|
||||
prompt
|
||||
.filter(
|
||||
(block): block is { type: 'text'; text: string } =>
|
||||
block.type === 'text',
|
||||
)
|
||||
.map(block => block.text)
|
||||
.join('\n') || null
|
||||
}
|
||||
return text === null
|
||||
? null
|
||||
: {
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text }],
|
||||
}
|
||||
}
|
||||
|
||||
if (msg.type === 'user') {
|
||||
const content = msg.message.content
|
||||
const textBlocks: TranscriptBlock[] = []
|
||||
if (typeof content === 'string') {
|
||||
textBlocks.push({ type: 'text', text: content })
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
textBlocks.push({ type: 'text', text: block.text })
|
||||
}
|
||||
}
|
||||
}
|
||||
return textBlocks.length > 0 ? { role: 'user', content: textBlocks } : null
|
||||
}
|
||||
|
||||
if (msg.type === 'assistant') {
|
||||
const blocks: TranscriptBlock[] = []
|
||||
for (const block of msg.message.content) {
|
||||
// Only include tool_use blocks — assistant text is model-authored
|
||||
// and could be crafted to influence the classifier's decision.
|
||||
if (block.type === 'tool_use') {
|
||||
blocks.push({
|
||||
type: 'tool_use',
|
||||
name: block.name,
|
||||
input: block.input,
|
||||
})
|
||||
}
|
||||
}
|
||||
return blocks.length > 0 ? { role: 'assistant', content: blocks } : null
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Build transcript entries from messages.
|
||||
* Includes user text messages and assistant tool_use blocks (excluding assistant text).
|
||||
@@ -302,58 +363,9 @@ export type TranscriptEntry = {
|
||||
export function buildTranscriptEntries(messages: Message[]): TranscriptEntry[] {
|
||||
const transcript: TranscriptEntry[] = []
|
||||
for (const msg of messages) {
|
||||
if (msg.type === 'attachment' && msg.attachment.type === 'queued_command') {
|
||||
const prompt = msg.attachment.prompt
|
||||
let text: string | null = null
|
||||
if (typeof prompt === 'string') {
|
||||
text = prompt
|
||||
} else if (Array.isArray(prompt)) {
|
||||
text =
|
||||
prompt
|
||||
.filter(
|
||||
(block): block is { type: 'text'; text: string } =>
|
||||
block.type === 'text',
|
||||
)
|
||||
.map(block => block.text)
|
||||
.join('\n') || null
|
||||
}
|
||||
if (text !== null) {
|
||||
transcript.push({
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text }],
|
||||
})
|
||||
}
|
||||
} else if (msg.type === 'user') {
|
||||
const content = msg.message.content
|
||||
const textBlocks: TranscriptBlock[] = []
|
||||
if (typeof content === 'string') {
|
||||
textBlocks.push({ type: 'text', text: content })
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
textBlocks.push({ type: 'text', text: block.text })
|
||||
}
|
||||
}
|
||||
}
|
||||
if (textBlocks.length > 0) {
|
||||
transcript.push({ role: 'user', content: textBlocks })
|
||||
}
|
||||
} else if (msg.type === 'assistant') {
|
||||
const blocks: TranscriptBlock[] = []
|
||||
for (const block of msg.message.content) {
|
||||
// Only include tool_use blocks — assistant text is model-authored
|
||||
// and could be crafted to influence the classifier's decision.
|
||||
if (block.type === 'tool_use') {
|
||||
blocks.push({
|
||||
type: 'tool_use',
|
||||
name: block.name,
|
||||
input: block.input,
|
||||
})
|
||||
}
|
||||
}
|
||||
if (blocks.length > 0) {
|
||||
transcript.push({ role: 'assistant', content: blocks })
|
||||
}
|
||||
const entry = messageToTranscriptEntry(msg)
|
||||
if (entry) {
|
||||
transcript.push(entry)
|
||||
}
|
||||
}
|
||||
return transcript
|
||||
@@ -372,6 +384,17 @@ function buildToolLookup(tools: Tools): ToolLookup {
|
||||
return map
|
||||
}
|
||||
|
||||
function truncateClassifierValue(value: string): string {
|
||||
if (value.length <= MAX_CLASSIFIER_BLOCK_VALUE_CHARS) {
|
||||
return value
|
||||
}
|
||||
const omitted = value.length - MAX_CLASSIFIER_BLOCK_VALUE_CHARS
|
||||
return (
|
||||
value.slice(0, MAX_CLASSIFIER_BLOCK_VALUE_CHARS) +
|
||||
`… [truncated ${omitted} chars]`
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a single transcript block as a JSONL dict line: `{"Bash":"ls"}`
|
||||
* for tool calls, `{"user":"text"}` for user text. The tool value is the
|
||||
@@ -410,15 +433,22 @@ function toCompactBlock(
|
||||
}
|
||||
if (encoded === '') return ''
|
||||
if (isJsonlTranscriptEnabled()) {
|
||||
return jsonStringify({ [block.name]: encoded }) + '\n'
|
||||
const jsonlValue =
|
||||
typeof encoded === 'string'
|
||||
? truncateClassifierValue(encoded)
|
||||
: encoded
|
||||
return jsonStringify({ [block.name]: jsonlValue }) + '\n'
|
||||
}
|
||||
const s = typeof encoded === 'string' ? encoded : jsonStringify(encoded)
|
||||
const s =
|
||||
typeof encoded === 'string'
|
||||
? truncateClassifierValue(encoded)
|
||||
: jsonStringify(encoded)
|
||||
return `${block.name} ${s}\n`
|
||||
}
|
||||
if (block.type === 'text' && role === 'user') {
|
||||
return isJsonlTranscriptEnabled()
|
||||
? jsonStringify({ user: block.text }) + '\n'
|
||||
: `User: ${block.text}\n`
|
||||
? jsonStringify({ user: truncateClassifierValue(block.text) }) + '\n'
|
||||
: `User: ${truncateClassifierValue(block.text)}\n`
|
||||
}
|
||||
return ''
|
||||
}
|
||||
@@ -427,6 +457,96 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string {
|
||||
return entry.content.map(b => toCompactBlock(b, entry.role, lookup)).join('')
|
||||
}
|
||||
|
||||
function serializeTranscriptForClassifier(
|
||||
messages: Message[],
|
||||
tools: Tools,
|
||||
maxChars: number,
|
||||
): {
|
||||
userContentBlocks: Anthropic.TextBlockParam[]
|
||||
promptLengths: {
|
||||
toolCalls: number
|
||||
userPrompts: number
|
||||
}
|
||||
transcriptEntries: number
|
||||
truncated: boolean
|
||||
} {
|
||||
const lookup = buildToolLookup(tools)
|
||||
const keptEntries: Array<Array<{ role: TranscriptEntry['role']; text: string }>> =
|
||||
[]
|
||||
let totalChars = 0
|
||||
let truncated = false
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const entry = messageToTranscriptEntry(messages[i]!)
|
||||
if (!entry) continue
|
||||
|
||||
const serializedBlocks: Array<{
|
||||
role: TranscriptEntry['role']
|
||||
text: string
|
||||
}> = []
|
||||
let entryChars = 0
|
||||
|
||||
for (const block of entry.content) {
|
||||
const serialized = toCompactBlock(block, entry.role, lookup)
|
||||
if (serialized === '') continue
|
||||
serializedBlocks.push({ role: entry.role, text: serialized })
|
||||
entryChars += serialized.length
|
||||
}
|
||||
if (serializedBlocks.length === 0) continue
|
||||
|
||||
if (totalChars + entryChars > maxChars) {
|
||||
if (totalChars === 0) {
|
||||
const partialEntry: typeof serializedBlocks = []
|
||||
let partialChars = 0
|
||||
for (let j = serializedBlocks.length - 1; j >= 0; j--) {
|
||||
const serialized = serializedBlocks[j]!
|
||||
if (partialChars + serialized.text.length > maxChars) continue
|
||||
partialEntry.unshift(serialized)
|
||||
partialChars += serialized.text.length
|
||||
}
|
||||
if (partialEntry.length > 0) {
|
||||
keptEntries.push(partialEntry)
|
||||
totalChars += partialChars
|
||||
}
|
||||
}
|
||||
truncated = true
|
||||
break
|
||||
}
|
||||
|
||||
keptEntries.push(serializedBlocks)
|
||||
totalChars += entryChars
|
||||
if (totalChars >= maxChars) {
|
||||
truncated = i > 0
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const userContentBlocks: Anthropic.TextBlockParam[] = []
|
||||
let userPromptsLength = 0
|
||||
let toolCallsLength = 0
|
||||
|
||||
for (let i = keptEntries.length - 1; i >= 0; i--) {
|
||||
for (const block of keptEntries[i]!) {
|
||||
userContentBlocks.push({ type: 'text' as const, text: block.text })
|
||||
if (block.role === 'user') {
|
||||
userPromptsLength += block.text.length
|
||||
} else {
|
||||
toolCallsLength += block.text.length
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
userContentBlocks,
|
||||
promptLengths: {
|
||||
toolCalls: toolCallsLength,
|
||||
userPrompts: userPromptsLength,
|
||||
},
|
||||
transcriptEntries: keptEntries.length,
|
||||
truncated,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a compact transcript string including user messages and assistant tool_use blocks.
|
||||
* Used by AgentTool for handoff classification.
|
||||
@@ -434,10 +554,10 @@ function toCompact(entry: TranscriptEntry, lookup: ToolLookup): string {
|
||||
export function buildTranscriptForClassifier(
|
||||
messages: Message[],
|
||||
tools: Tools,
|
||||
maxChars: number = MAX_CLASSIFIER_TRANSCRIPT_CHARS,
|
||||
): string {
|
||||
const lookup = buildToolLookup(tools)
|
||||
return buildTranscriptEntries(messages)
|
||||
.map(e => toCompact(e, lookup))
|
||||
return serializeTranscriptForClassifier(messages, tools, maxChars)
|
||||
.userContentBlocks.map(block => block.text)
|
||||
.join('')
|
||||
}
|
||||
|
||||
@@ -1027,34 +1147,24 @@ export async function classifyYoloAction(
|
||||
}
|
||||
|
||||
const systemPrompt = await buildYoloSystemPrompt(context)
|
||||
const transcriptEntries = buildTranscriptEntries(messages)
|
||||
const transcriptBudget = Math.max(
|
||||
0,
|
||||
MAX_CLASSIFIER_TRANSCRIPT_CHARS - actionCompact.length,
|
||||
)
|
||||
const serializedTranscript = serializeTranscriptForClassifier(
|
||||
messages,
|
||||
tools,
|
||||
transcriptBudget,
|
||||
)
|
||||
const claudeMdMessage = buildClaudeMdMessage()
|
||||
const prefixMessages: Anthropic.MessageParam[] = claudeMdMessage
|
||||
? [claudeMdMessage]
|
||||
: []
|
||||
|
||||
let toolCallsLength = actionCompact.length
|
||||
let userPromptsLength = 0
|
||||
const userContentBlocks: Anthropic.TextBlockParam[] = []
|
||||
for (const entry of transcriptEntries) {
|
||||
for (const block of entry.content) {
|
||||
const serialized = toCompactBlock(block, entry.role, lookup)
|
||||
if (serialized === '') continue
|
||||
switch (entry.role) {
|
||||
case 'user':
|
||||
userPromptsLength += serialized.length
|
||||
break
|
||||
case 'assistant':
|
||||
toolCallsLength += serialized.length
|
||||
break
|
||||
default: {
|
||||
const _exhaustive: never = entry.role
|
||||
void _exhaustive
|
||||
}
|
||||
}
|
||||
userContentBlocks.push({ type: 'text' as const, text: serialized })
|
||||
}
|
||||
}
|
||||
const toolCallsLength =
|
||||
actionCompact.length + serializedTranscript.promptLengths.toolCalls
|
||||
const userPromptsLength = serializedTranscript.promptLengths.userPrompts
|
||||
const userContentBlocks = [...serializedTranscript.userContentBlocks]
|
||||
|
||||
const userPrompt = userContentBlocks.map(b => b.text).join('') + actionCompact
|
||||
const promptLengths = {
|
||||
@@ -1080,7 +1190,8 @@ export async function classifyYoloAction(
|
||||
`(sys=${promptLengths.systemPrompt} ` +
|
||||
`tools=${promptLengths.toolCalls} ` +
|
||||
`user=${promptLengths.userPrompts}) ` +
|
||||
`transcriptEntries=${transcriptEntries.length} ` +
|
||||
`transcriptEntries=${serializedTranscript.transcriptEntries} ` +
|
||||
`truncated=${serializedTranscript.truncated} ` +
|
||||
`messages=${messages.length}`,
|
||||
)
|
||||
logForDebugging(
|
||||
@@ -1119,7 +1230,7 @@ export async function classifyYoloAction(
|
||||
mainLoopTokens: mainLoopTokens ?? tokenCountWithEstimation(messages),
|
||||
classifierChars,
|
||||
classifierTokensEst,
|
||||
transcriptEntries: transcriptEntries.length,
|
||||
transcriptEntries: serializedTranscript.transcriptEntries,
|
||||
messages: messages.length,
|
||||
action: actionCompact,
|
||||
},
|
||||
@@ -1274,7 +1385,7 @@ export async function classifyYoloAction(
|
||||
mainLoopTokens,
|
||||
classifierChars,
|
||||
classifierTokensEst,
|
||||
transcriptEntries: transcriptEntries.length,
|
||||
transcriptEntries: serializedTranscript.transcriptEntries,
|
||||
messages: messages.length,
|
||||
action: actionCompact,
|
||||
model,
|
||||
|
||||
@@ -14,6 +14,10 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p
|
||||
is_error: false,
|
||||
},
|
||||
],
|
||||
toolUseResult: {
|
||||
stdout: 'very large tool output',
|
||||
stderr: '',
|
||||
},
|
||||
})
|
||||
const messages = [unrelated, oversizedResult]
|
||||
const replacement =
|
||||
@@ -30,6 +34,7 @@ test('applyToolResultReplacementsToMessages replaces matching tool results and p
|
||||
expect((next[1]!.message.content as Array<{ content: string }>)[0]!.content).toBe(
|
||||
replacement,
|
||||
)
|
||||
expect(next[1]!.toolUseResult).toBeUndefined()
|
||||
})
|
||||
|
||||
test('applyToolResultReplacementsToMessages is idempotent when messages are already hydrated', () => {
|
||||
|
||||
@@ -726,6 +726,11 @@ function replaceToolResultContents(
|
||||
: { ...block, content: replacement }
|
||||
}),
|
||||
},
|
||||
// Drop the original tool payload once the model-facing content has been
|
||||
// replaced with a persisted preview. Keeping both defeats the memory
|
||||
// savings for long sessions because the live transcript still retains
|
||||
// the oversized structured result.
|
||||
toolUseResult: undefined,
|
||||
}
|
||||
})
|
||||
return changed ? nextMessages : messages
|
||||
|
||||
Reference in New Issue
Block a user