fix: normalize malformed Bash tool arguments from OpenAI-compatible providers (#385)

* fix: normalize malformed Bash tool arguments from OpenAI-compatible providers

* fix: keep invalid Bash tool args from becoming commands

* fix: preserve malformed Bash JSON literals

* test: stabilize rebased PR 385 checks

* test: isolate provider profile env assertions

* fix: extend tool argument normalization to all tools and harden edge cases

- Extend STRING_ARGUMENT_TOOL_FIELDS to normalize Read, Write, Edit,
  Glob, and Grep plain-string arguments (fixes "Invalid tool parameters"
  errors reported by VennDev)
- Normalize streaming Bash args regardless of finish_reason, not only
  when finish_reason is 'tool_calls'
- Broaden isLikelyStructuredObjectLiteral to catch malformed object-shaped
  strings like {command:"pwd"} and {'command':'pwd'} (fixes CR2 from
  Vasanthdev2004)
- Apply blank/object-literal guard to all tools, not just Bash
- Extract duplicated JSON repair suffix combinations into shared constant
- Add 32 isolated unit tests for toolArgumentNormalization

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: skip streaming normalization on finish_reason length

Truncated tool calls (finish_reason: 'length') now preserve the raw
buffer instead of normalizing into executable commands, preventing
incomplete commands from becoming runnable.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: comprehensive tool argument normalization hardening

- Remove all { raw: ... } returns that caused InputValidationError with
  z.strictObject schemas — return {} instead for clean Zod errors
- Extend normalizeAtStop buffering to all mapped tools (Read, Write,
  Edit, Glob, Grep) so streaming paths also get normalized
- Make repairPossiblyTruncatedObjectJson generic — repair any valid
  JSON object, not just ones with a command field
- Export hasToolFieldMapping for streaming normalizeAtStop decision
- Skip normalization on finish_reason: length to preserve raw truncated
  buffer
- Update all test expectations to match new behavior

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
KRATOS
2026-04-06 19:38:45 +05:30
committed by GitHub
parent 1e057025d6
commit b4bd95b477
8 changed files with 1539 additions and 53 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -42,6 +42,10 @@ import {
} from './providerConfig.js'
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
import {
normalizeToolArguments,
hasToolFieldMapping,
} from './toolArgumentNormalization.js'
type SecretValueSource = Partial<{
OPENAI_API_KEY: string
@@ -514,6 +518,30 @@ function convertChunkUsage(
}
}
const JSON_REPAIR_SUFFIXES = [
'}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
]
function repairPossiblyTruncatedObjectJson(raw: string): string | null {
try {
const parsed = JSON.parse(raw)
return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
? raw
: null
} catch {
for (const combo of JSON_REPAIR_SUFFIXES) {
try {
const repaired = raw + combo
const parsed = JSON.parse(repaired)
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
return repaired
}
} catch {}
}
return null
}
}
/**
* Async generator that transforms an OpenAI SSE stream into
* Anthropic-format BetaRawMessageStreamEvent objects.
@@ -524,7 +552,16 @@ async function* openaiStreamToAnthropic(
): AsyncGenerator<AnthropicStreamEvent> {
const messageId = makeMessageId()
let contentBlockIndex = 0
const activeToolCalls = new Map<number, { id: string; name: string; index: number; jsonBuffer: string }>()
const activeToolCalls = new Map<
number,
{
id: string
name: string
index: number
jsonBuffer: string
normalizeAtStop: boolean
}
>()
let hasEmittedContentStart = false
let hasEmittedThinkingStart = false
let hasClosedThinking = false
@@ -647,11 +684,14 @@ async function* openaiStreamToAnthropic(
}
const toolBlockIndex = contentBlockIndex
const initialArguments = tc.function.arguments ?? ''
const normalizeAtStop = hasToolFieldMapping(tc.function.name)
activeToolCalls.set(tc.index, {
id: tc.id,
name: tc.function.name,
index: toolBlockIndex,
jsonBuffer: tc.function.arguments ?? '',
jsonBuffer: initialArguments,
normalizeAtStop,
})
yield {
@@ -675,7 +715,7 @@ async function* openaiStreamToAnthropic(
contentBlockIndex++
// Emit any initial arguments
if (tc.function.arguments) {
if (tc.function.arguments && !normalizeAtStop) {
yield {
type: 'content_block_delta',
index: toolBlockIndex,
@@ -692,6 +732,11 @@ async function* openaiStreamToAnthropic(
if (tc.function.arguments) {
active.jsonBuffer += tc.function.arguments
}
if (active.normalizeAtStop) {
continue
}
yield {
type: 'content_block_delta',
index: active.index,
@@ -725,16 +770,44 @@ async function* openaiStreamToAnthropic(
}
// Close active tool calls
for (const [, tc] of activeToolCalls) {
if (tc.normalizeAtStop) {
let partialJson: string
if (choice.finish_reason === 'length') {
// Truncated by max tokens — preserve raw buffer to avoid
// turning an incomplete tool call into an executable command
partialJson = tc.jsonBuffer
} else {
const repairedStructuredJson = repairPossiblyTruncatedObjectJson(
tc.jsonBuffer,
)
if (repairedStructuredJson) {
partialJson = repairedStructuredJson
} else {
partialJson = JSON.stringify(
normalizeToolArguments(tc.name, tc.jsonBuffer),
)
}
}
yield {
type: 'content_block_delta',
index: tc.index,
delta: {
type: 'input_json_delta',
partial_json: partialJson,
},
}
yield { type: 'content_block_stop', index: tc.index }
continue
}
let suffixToAdd = ''
if (tc.jsonBuffer) {
try {
JSON.parse(tc.jsonBuffer)
} catch {
const str = tc.jsonBuffer.trimEnd()
const combinations = [
'}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
]
for (const combo of combinations) {
for (const combo of JSON_REPAIR_SUFFIXES) {
try {
JSON.parse(str + combo)
suffixToAdd = combo
@@ -1181,12 +1254,10 @@ class OpenAIShimMessages {
if (choice?.message?.tool_calls) {
for (const tc of choice.message.tool_calls) {
let input: unknown
try {
input = JSON.parse(tc.function.arguments)
} catch {
input = { raw: tc.function.arguments }
}
const input = normalizeToolArguments(
tc.function.name,
tc.function.arguments,
)
content.push({
type: 'tool_use',
id: tc.id,

View File

@@ -0,0 +1,180 @@
import { describe, expect, test } from 'bun:test'
import { normalizeToolArguments } from './toolArgumentNormalization'
describe('normalizeToolArguments', () => {
describe('Bash tool', () => {
test('wraps plain string into { command }', () => {
expect(normalizeToolArguments('Bash', 'pwd')).toEqual({ command: 'pwd' })
})
test('wraps multi-word command', () => {
expect(normalizeToolArguments('Bash', 'ls -la /tmp')).toEqual({
command: 'ls -la /tmp',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments('Bash', '{"command":"echo hi"}'),
).toEqual({ command: 'echo hi' })
})
test('returns empty object for blank string', () => {
expect(normalizeToolArguments('Bash', '')).toEqual({})
expect(normalizeToolArguments('Bash', ' ')).toEqual({})
})
test('returns parsed blank for JSON-encoded blank string', () => {
expect(normalizeToolArguments('Bash', '""')).toEqual('')
expect(normalizeToolArguments('Bash', '" "')).toEqual(' ')
})
test('returns empty object for malformed structured object literal', () => {
expect(normalizeToolArguments('Bash', '{ "command": "pwd"')).toEqual({})
})
test.each([
['{command:"pwd"}'],
["{'command':'pwd'}"],
['{command: pwd}'],
])(
'returns empty object for malformed object-shaped string %s (does not wrap into command)',
(input) => {
expect(normalizeToolArguments('Bash', input)).toEqual({})
},
)
test.each([
['false', false],
['null', null],
['[]', [] as unknown[]],
['0', 0],
['true', true],
['123', 123],
])(
'preserves JSON literal %s as-is (does not wrap into command)',
(input, expected) => {
expect(normalizeToolArguments('Bash', input)).toEqual(expected)
},
)
test('wraps JSON-encoded string into { command }', () => {
expect(normalizeToolArguments('Bash', '"pwd"')).toEqual({
command: 'pwd',
})
})
})
describe('undefined arguments', () => {
test('returns empty object for undefined', () => {
expect(normalizeToolArguments('Bash', undefined)).toEqual({})
expect(normalizeToolArguments('UnknownTool', undefined)).toEqual({})
})
})
describe('Read tool', () => {
test('wraps plain string into { file_path }', () => {
expect(normalizeToolArguments('Read', '/home/user/file.txt')).toEqual({
file_path: '/home/user/file.txt',
})
})
test('wraps JSON-encoded string into { file_path }', () => {
expect(normalizeToolArguments('Read', '"/home/user/file.txt"')).toEqual({
file_path: '/home/user/file.txt',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments('Read', '{"file_path":"/tmp/f.txt","limit":10}'),
).toEqual({ file_path: '/tmp/f.txt', limit: 10 })
})
})
describe('Write tool', () => {
test('wraps plain string into { file_path }', () => {
expect(normalizeToolArguments('Write', '/tmp/out.txt')).toEqual({
file_path: '/tmp/out.txt',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments(
'Write',
'{"file_path":"/tmp/out.txt","content":"hello"}',
),
).toEqual({ file_path: '/tmp/out.txt', content: 'hello' })
})
})
describe('Edit tool', () => {
test('wraps plain string into { file_path }', () => {
expect(normalizeToolArguments('Edit', '/tmp/edit.ts')).toEqual({
file_path: '/tmp/edit.ts',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments(
'Edit',
'{"file_path":"/tmp/f.ts","old_string":"a","new_string":"b"}',
),
).toEqual({ file_path: '/tmp/f.ts', old_string: 'a', new_string: 'b' })
})
})
describe('Glob tool', () => {
test('wraps plain string into { pattern }', () => {
expect(normalizeToolArguments('Glob', '**/*.ts')).toEqual({
pattern: '**/*.ts',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments('Glob', '{"pattern":"*.js","path":"/src"}'),
).toEqual({ pattern: '*.js', path: '/src' })
})
})
describe('Grep tool', () => {
test('wraps plain string into { pattern }', () => {
expect(normalizeToolArguments('Grep', 'TODO')).toEqual({
pattern: 'TODO',
})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments('Grep', '{"pattern":"fixme","path":"/src"}'),
).toEqual({ pattern: 'fixme', path: '/src' })
})
})
describe('unknown tools', () => {
test('returns empty object for plain string (no known field mapping)', () => {
expect(normalizeToolArguments('UnknownTool', 'some value')).toEqual({})
})
test('passes through structured JSON object', () => {
expect(
normalizeToolArguments('UnknownTool', '{"key":"val"}'),
).toEqual({ key: 'val' })
})
test('preserves JSON literals as-is', () => {
expect(normalizeToolArguments('UnknownTool', 'false')).toEqual(false)
expect(normalizeToolArguments('UnknownTool', 'null')).toEqual(null)
expect(normalizeToolArguments('UnknownTool', '[]')).toEqual([])
})
test('returns parsed string for JSON-encoded string on unknown tools', () => {
expect(normalizeToolArguments('UnknownTool', '"hello"')).toEqual(
'hello',
)
})
})
})

View File

@@ -0,0 +1,69 @@
const STRING_ARGUMENT_TOOL_FIELDS: Record<string, string> = {
Bash: 'command',
Read: 'file_path',
Write: 'file_path',
Edit: 'file_path',
Glob: 'pattern',
Grep: 'pattern',
}
function isBlankString(value: string): boolean {
return value.trim().length === 0
}
function isLikelyStructuredObjectLiteral(value: string): boolean {
// Match object-like patterns with key-value syntax:
// {"key":, {key:, {'key':, { "key" :, etc.
// But NOT bash compound commands like { pwd; } or { echo hi; }
return /^\s*\{\s*['"]?\w+['"]?\s*:/.test(value)
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value)
}
function getPlainStringToolArgumentField(toolName: string): string | null {
return STRING_ARGUMENT_TOOL_FIELDS[toolName] ?? null
}
export function hasToolFieldMapping(toolName: string): boolean {
return toolName in STRING_ARGUMENT_TOOL_FIELDS
}
function wrapPlainStringToolArguments(
toolName: string,
value: string,
): Record<string, string> | null {
const field = getPlainStringToolArgumentField(toolName)
if (!field) return null
return { [field]: value }
}
export function normalizeToolArguments(
toolName: string,
rawArguments: string | undefined,
): unknown {
if (rawArguments === undefined) return {}
try {
const parsed = JSON.parse(rawArguments)
if (isRecord(parsed)) {
return parsed
}
// Parsed as a non-object JSON value (string, number, boolean, null, array)
if (typeof parsed === 'string' && !isBlankString(parsed)) {
return wrapPlainStringToolArguments(toolName, parsed) ?? parsed
}
// For blank strings, booleans, null, arrays — pass through as-is
// and let Zod schema validation produce a meaningful error
return parsed
} catch {
// rawArguments is not valid JSON — treat as a plain string
if (isBlankString(rawArguments) || isLikelyStructuredObjectLiteral(rawArguments)) {
// Blank or looks like a malformed object literal — don't wrap into
// a tool field to avoid turning garbage into executable input
return {}
}
return wrapPlainStringToolArguments(toolName, rawArguments) ?? {}
}
}