fix: normalize malformed Bash tool arguments from OpenAI-compatible providers (#385)

* fix: normalize malformed Bash tool arguments from OpenAI-compatible providers * fix: keep invalid Bash tool args from becoming commands * fix: preserve malformed Bash JSON literals * test: stabilize rebased PR 385 checks * test: isolate provider profile env assertions * fix: extend tool argument normalization to all tools and harden edge cases - Extend STRING_ARGUMENT_TOOL_FIELDS to normalize Read, Write, Edit, Glob, and Grep plain-string arguments (fixes "Invalid tool parameters" errors reported by VennDev) - Normalize streaming Bash args regardless of finish_reason, not only when finish_reason is 'tool_calls' - Broaden isLikelyStructuredObjectLiteral to catch malformed object-shaped strings like {command:"pwd"} and {'command':'pwd'} (fixes CR2 from Vasanthdev2004) - Apply blank/object-literal guard to all tools, not just Bash - Extract duplicated JSON repair suffix combinations into shared constant - Add 32 isolated unit tests for toolArgumentNormalization Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: skip streaming normalization on finish_reason length Truncated tool calls (finish_reason: 'length') now preserve the raw buffer instead of normalizing into executable commands, preventing incomplete commands from becoming runnable. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: comprehensive tool argument normalization hardening - Remove all { raw: ... } returns that caused InputValidationError with z.strictObject schemas — return {} instead for clean Zod errors - Extend normalizeAtStop buffering to all mapped tools (Read, Write, Edit, Glob, Grep) so streaming paths also get normalized - Make repairPossiblyTruncatedObjectJson generic — repair any valid JSON object, not just ones with a command field - Export hasToolFieldMapping for streaming normalizeAtStop decision - Skip normalization on finish_reason: length to preserve raw truncated buffer - Update all test expectations to match new behavior Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 19:38:45 +05:30
parent 1e057025d6
commit b4bd95b477
8 changed files with 1539 additions and 53 deletions
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -42,6 +42,10 @@ import {
 } from './providerConfig.js'
 import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
 import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
+import {
+  normalizeToolArguments,
+  hasToolFieldMapping,
+} from './toolArgumentNormalization.js'

 type SecretValueSource = Partial<{
  OPENAI_API_KEY: string
@@ -514,6 +518,30 @@ function convertChunkUsage(
  }
 }

+const JSON_REPAIR_SUFFIXES = [
+  '}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
+]
+
+function repairPossiblyTruncatedObjectJson(raw: string): string | null {
+  try {
+    const parsed = JSON.parse(raw)
+    return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
+      ? raw
+      : null
+  } catch {
+    for (const combo of JSON_REPAIR_SUFFIXES) {
+      try {
+        const repaired = raw + combo
+        const parsed = JSON.parse(repaired)
+        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+          return repaired
+        }
+      } catch {}
+    }
+    return null
+  }
+}
+
 /**
 * Async generator that transforms an OpenAI SSE stream into
 * Anthropic-format BetaRawMessageStreamEvent objects.
@@ -524,7 +552,16 @@ async function* openaiStreamToAnthropic(
 ): AsyncGenerator<AnthropicStreamEvent> {
  const messageId = makeMessageId()
  let contentBlockIndex = 0
-  const activeToolCalls = new Map<number, { id: string; name: string; index: number; jsonBuffer: string }>()
+  const activeToolCalls = new Map<
+    number,
+    {
+      id: string
+      name: string
+      index: number
+      jsonBuffer: string
+      normalizeAtStop: boolean
+    }
+  >()
  let hasEmittedContentStart = false
  let hasEmittedThinkingStart = false
  let hasClosedThinking = false
@@ -647,11 +684,14 @@ async function* openaiStreamToAnthropic(
              }

              const toolBlockIndex = contentBlockIndex
+              const initialArguments = tc.function.arguments ?? ''
+              const normalizeAtStop = hasToolFieldMapping(tc.function.name)
              activeToolCalls.set(tc.index, {
                id: tc.id,
                name: tc.function.name,
                index: toolBlockIndex,
-                jsonBuffer: tc.function.arguments ?? '',
+                jsonBuffer: initialArguments,
+                normalizeAtStop,
              })

              yield {
@@ -675,7 +715,7 @@ async function* openaiStreamToAnthropic(
              contentBlockIndex++

              // Emit any initial arguments
-              if (tc.function.arguments) {
+              if (tc.function.arguments && !normalizeAtStop) {
                yield {
                  type: 'content_block_delta',
                  index: toolBlockIndex,
@@ -692,6 +732,11 @@ async function* openaiStreamToAnthropic(
                if (tc.function.arguments) {
                  active.jsonBuffer += tc.function.arguments
                }
+
+                if (active.normalizeAtStop) {
+                  continue
+                }
+
                yield {
                  type: 'content_block_delta',
                  index: active.index,
@@ -725,16 +770,44 @@ async function* openaiStreamToAnthropic(
          }
          // Close active tool calls
          for (const [, tc] of activeToolCalls) {
+            if (tc.normalizeAtStop) {
+              let partialJson: string
+              if (choice.finish_reason === 'length') {
+                // Truncated by max tokens — preserve raw buffer to avoid
+                // turning an incomplete tool call into an executable command
+                partialJson = tc.jsonBuffer
+              } else {
+                const repairedStructuredJson = repairPossiblyTruncatedObjectJson(
+                  tc.jsonBuffer,
+                )
+                if (repairedStructuredJson) {
+                  partialJson = repairedStructuredJson
+                } else {
+                  partialJson = JSON.stringify(
+                    normalizeToolArguments(tc.name, tc.jsonBuffer),
+                  )
+                }
+              }
+
+              yield {
+                type: 'content_block_delta',
+                index: tc.index,
+                delta: {
+                  type: 'input_json_delta',
+                  partial_json: partialJson,
+                },
+              }
+              yield { type: 'content_block_stop', index: tc.index }
+              continue
+            }
+
            let suffixToAdd = ''
            if (tc.jsonBuffer) {
              try {
                JSON.parse(tc.jsonBuffer)
              } catch {
                const str = tc.jsonBuffer.trimEnd()
-                const combinations = [
-                  '}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
-                ]
-                for (const combo of combinations) {
+                for (const combo of JSON_REPAIR_SUFFIXES) {
                  try {
                    JSON.parse(str + combo)
                    suffixToAdd = combo
@@ -1181,12 +1254,10 @@ class OpenAIShimMessages {

    if (choice?.message?.tool_calls) {
      for (const tc of choice.message.tool_calls) {
-        let input: unknown
-        try {
-          input = JSON.parse(tc.function.arguments)
-        } catch {
-          input = { raw: tc.function.arguments }
-        }
+        const input = normalizeToolArguments(
+          tc.function.name,
+          tc.function.arguments,
+        )
        content.push({
          type: 'tool_use',
          id: tc.id,
--- a/src/services/api/toolArgumentNormalization.test.ts
+++ b/src/services/api/toolArgumentNormalization.test.ts
@@ -0,0 +1,180 @@
+import { describe, expect, test } from 'bun:test'
+import { normalizeToolArguments } from './toolArgumentNormalization'
+
+describe('normalizeToolArguments', () => {
+  describe('Bash tool', () => {
+    test('wraps plain string into { command }', () => {
+      expect(normalizeToolArguments('Bash', 'pwd')).toEqual({ command: 'pwd' })
+    })
+
+    test('wraps multi-word command', () => {
+      expect(normalizeToolArguments('Bash', 'ls -la /tmp')).toEqual({
+        command: 'ls -la /tmp',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments('Bash', '{"command":"echo hi"}'),
+      ).toEqual({ command: 'echo hi' })
+    })
+
+    test('returns empty object for blank string', () => {
+      expect(normalizeToolArguments('Bash', '')).toEqual({})
+      expect(normalizeToolArguments('Bash', '   ')).toEqual({})
+    })
+
+    test('returns parsed blank for JSON-encoded blank string', () => {
+      expect(normalizeToolArguments('Bash', '""')).toEqual('')
+      expect(normalizeToolArguments('Bash', '"  "')).toEqual('  ')
+    })
+
+    test('returns empty object for malformed structured object literal', () => {
+      expect(normalizeToolArguments('Bash', '{ "command": "pwd"')).toEqual({})
+    })
+
+    test.each([
+      ['{command:"pwd"}'],
+      ["{'command':'pwd'}"],
+      ['{command: pwd}'],
+    ])(
+      'returns empty object for malformed object-shaped string %s (does not wrap into command)',
+      (input) => {
+        expect(normalizeToolArguments('Bash', input)).toEqual({})
+      },
+    )
+
+    test.each([
+      ['false', false],
+      ['null', null],
+      ['[]', [] as unknown[]],
+      ['0', 0],
+      ['true', true],
+      ['123', 123],
+    ])(
+      'preserves JSON literal %s as-is (does not wrap into command)',
+      (input, expected) => {
+        expect(normalizeToolArguments('Bash', input)).toEqual(expected)
+      },
+    )
+
+    test('wraps JSON-encoded string into { command }', () => {
+      expect(normalizeToolArguments('Bash', '"pwd"')).toEqual({
+        command: 'pwd',
+      })
+    })
+  })
+
+  describe('undefined arguments', () => {
+    test('returns empty object for undefined', () => {
+      expect(normalizeToolArguments('Bash', undefined)).toEqual({})
+      expect(normalizeToolArguments('UnknownTool', undefined)).toEqual({})
+    })
+  })
+
+  describe('Read tool', () => {
+    test('wraps plain string into { file_path }', () => {
+      expect(normalizeToolArguments('Read', '/home/user/file.txt')).toEqual({
+        file_path: '/home/user/file.txt',
+      })
+    })
+
+    test('wraps JSON-encoded string into { file_path }', () => {
+      expect(normalizeToolArguments('Read', '"/home/user/file.txt"')).toEqual({
+        file_path: '/home/user/file.txt',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments('Read', '{"file_path":"/tmp/f.txt","limit":10}'),
+      ).toEqual({ file_path: '/tmp/f.txt', limit: 10 })
+    })
+  })
+
+  describe('Write tool', () => {
+    test('wraps plain string into { file_path }', () => {
+      expect(normalizeToolArguments('Write', '/tmp/out.txt')).toEqual({
+        file_path: '/tmp/out.txt',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments(
+          'Write',
+          '{"file_path":"/tmp/out.txt","content":"hello"}',
+        ),
+      ).toEqual({ file_path: '/tmp/out.txt', content: 'hello' })
+    })
+  })
+
+  describe('Edit tool', () => {
+    test('wraps plain string into { file_path }', () => {
+      expect(normalizeToolArguments('Edit', '/tmp/edit.ts')).toEqual({
+        file_path: '/tmp/edit.ts',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments(
+          'Edit',
+          '{"file_path":"/tmp/f.ts","old_string":"a","new_string":"b"}',
+        ),
+      ).toEqual({ file_path: '/tmp/f.ts', old_string: 'a', new_string: 'b' })
+    })
+  })
+
+  describe('Glob tool', () => {
+    test('wraps plain string into { pattern }', () => {
+      expect(normalizeToolArguments('Glob', '**/*.ts')).toEqual({
+        pattern: '**/*.ts',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments('Glob', '{"pattern":"*.js","path":"/src"}'),
+      ).toEqual({ pattern: '*.js', path: '/src' })
+    })
+  })
+
+  describe('Grep tool', () => {
+    test('wraps plain string into { pattern }', () => {
+      expect(normalizeToolArguments('Grep', 'TODO')).toEqual({
+        pattern: 'TODO',
+      })
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments('Grep', '{"pattern":"fixme","path":"/src"}'),
+      ).toEqual({ pattern: 'fixme', path: '/src' })
+    })
+  })
+
+  describe('unknown tools', () => {
+    test('returns empty object for plain string (no known field mapping)', () => {
+      expect(normalizeToolArguments('UnknownTool', 'some value')).toEqual({})
+    })
+
+    test('passes through structured JSON object', () => {
+      expect(
+        normalizeToolArguments('UnknownTool', '{"key":"val"}'),
+      ).toEqual({ key: 'val' })
+    })
+
+    test('preserves JSON literals as-is', () => {
+      expect(normalizeToolArguments('UnknownTool', 'false')).toEqual(false)
+      expect(normalizeToolArguments('UnknownTool', 'null')).toEqual(null)
+      expect(normalizeToolArguments('UnknownTool', '[]')).toEqual([])
+    })
+
+    test('returns parsed string for JSON-encoded string on unknown tools', () => {
+      expect(normalizeToolArguments('UnknownTool', '"hello"')).toEqual(
+        'hello',
+      )
+    })
+  })
+})
--- a/src/services/api/toolArgumentNormalization.ts
+++ b/src/services/api/toolArgumentNormalization.ts
@@ -0,0 +1,69 @@
+const STRING_ARGUMENT_TOOL_FIELDS: Record<string, string> = {
+  Bash: 'command',
+  Read: 'file_path',
+  Write: 'file_path',
+  Edit: 'file_path',
+  Glob: 'pattern',
+  Grep: 'pattern',
+}
+
+function isBlankString(value: string): boolean {
+  return value.trim().length === 0
+}
+
+function isLikelyStructuredObjectLiteral(value: string): boolean {
+  // Match object-like patterns with key-value syntax:
+  // {"key":, {key:, {'key':, { "key" :, etc.
+  // But NOT bash compound commands like { pwd; } or { echo hi; }
+  return /^\s*\{\s*['"]?\w+['"]?\s*:/.test(value)
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value)
+}
+
+function getPlainStringToolArgumentField(toolName: string): string | null {
+  return STRING_ARGUMENT_TOOL_FIELDS[toolName] ?? null
+}
+
+export function hasToolFieldMapping(toolName: string): boolean {
+  return toolName in STRING_ARGUMENT_TOOL_FIELDS
+}
+
+function wrapPlainStringToolArguments(
+  toolName: string,
+  value: string,
+): Record<string, string> | null {
+  const field = getPlainStringToolArgumentField(toolName)
+  if (!field) return null
+  return { [field]: value }
+}
+
+export function normalizeToolArguments(
+  toolName: string,
+  rawArguments: string | undefined,
+): unknown {
+  if (rawArguments === undefined) return {}
+
+  try {
+    const parsed = JSON.parse(rawArguments)
+    if (isRecord(parsed)) {
+      return parsed
+    }
+    // Parsed as a non-object JSON value (string, number, boolean, null, array)
+    if (typeof parsed === 'string' && !isBlankString(parsed)) {
+      return wrapPlainStringToolArguments(toolName, parsed) ?? parsed
+    }
+    // For blank strings, booleans, null, arrays — pass through as-is
+    // and let Zod schema validation produce a meaningful error
+    return parsed
+  } catch {
+    // rawArguments is not valid JSON — treat as a plain string
+    if (isBlankString(rawArguments) || isLikelyStructuredObjectLiteral(rawArguments)) {
+      // Blank or looks like a malformed object literal — don't wrap into
+      // a tool field to avoid turning garbage into executable input
+      return {}
+    }
+    return wrapPlainStringToolArguments(toolName, rawArguments) ?? {}
+  }
+}