fix: restore Grep and Glob reliability on OpenAI paths (#461)

* fix: restore Grep and Glob reliability on OpenAI paths Preserve Grep and Glob pattern fields during OpenAI/Codex schema sanitization, and fall back to system ripgrep when the packaged binary is missing. This keeps search tool schemas intact and improves Linux usability for npm/source installs. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: clean up ripgrep fallback test helpers Remove the unused ripgrepCommand import and normalize mocked builtin ripgrep paths so the test behaves consistently across platforms. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: remove duplicate Codex URI schema case Drop the duplicated WebFetch URI-format test in codexShim.test.ts so test names stay unique and failures remain easier to read. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: stabilize ripgrep fallback coverage Avoid fs/module mocking in ripgrep fallback tests by extracting the config selection logic into a pure helper. This preserves the fallback coverage while removing the test interaction that caused the narrowed Bun hang repro. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: tighten ripgrep and schema coverage Align the ripgrep fallback test with the actual auto-fallback branch, clean up strict typing in schema sanitizer tests, and tighten ripgrep error narrowing for type safety. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> --------- Co-authored-by: Claude Opus 4.6 <noreply@openclaude.dev>
2026-04-07 16:26:00 +07:00
parent b07bafa5bd
commit 600c01faf7
6 changed files with 366 additions and 28 deletions
--- a/src/services/api/codexShim.test.ts
+++ b/src/services/api/codexShim.test.ts
@@ -201,6 +201,117 @@ describe('Codex request translation', () => {
    ])
  })

+  test('preserves Grep tool pattern field in Codex strict schemas', () => {
+    const tools = convertToolsToResponsesTools([
+      {
+        name: 'Grep',
+        description: 'Search file contents',
+        input_schema: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Search pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+      },
+    ])
+
+    expect(tools).toEqual([
+      {
+        type: 'function',
+        name: 'Grep',
+        description: 'Search file contents',
+        parameters: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Search pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern', 'path'],
+          additionalProperties: false,
+        },
+        strict: true,
+      },
+    ])
+  })
+
+  test('preserves Glob tool pattern field in Codex strict schemas', () => {
+    const tools = convertToolsToResponsesTools([
+      {
+        name: 'Glob',
+        description: 'Find files by pattern',
+        input_schema: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Glob pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+      },
+    ])
+
+    expect(tools).toEqual([
+      {
+        type: 'function',
+        name: 'Glob',
+        description: 'Find files by pattern',
+        parameters: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Glob pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern', 'path'],
+          additionalProperties: false,
+        },
+        strict: true,
+      },
+    ])
+  })
+
+  test('strips validator pattern keyword but keeps string field named pattern in Codex schemas', () => {
+    const tools = convertToolsToResponsesTools([
+      {
+        name: 'RegexProbe',
+        description: 'Probe regex schema handling',
+        input_schema: {
+          type: 'object',
+          properties: {
+            pattern: {
+              type: 'string',
+              pattern: '^[a-z]+$',
+            },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+      },
+    ])
+
+    expect(tools).toEqual([
+      {
+        type: 'function',
+        name: 'RegexProbe',
+        description: 'Probe regex schema handling',
+        parameters: {
+          type: 'object',
+          properties: {
+            pattern: {
+              type: 'string',
+            },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+        strict: true,
+      },
+    ])
+  })
+
  test('removes unsupported uri format from strict Responses schemas', () => {
    const tools = convertToolsToResponsesTools([
      {
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -261,6 +261,73 @@ test('preserves Gemini tool call extra_content in follow-up requests', async ()
  })
 })

+test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => {
+  let requestBody: Record<string, unknown> | undefined
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-grep-schema',
+        model: 'qwen/qwen3.6-plus',
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'done',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 12,
+          completion_tokens: 4,
+          total_tokens: 16,
+        },
+      }),
+      {
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'qwen/qwen3.6-plus',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Use Grep' }],
+    tools: [
+      {
+        name: 'Grep',
+        description: 'Search file contents',
+        input_schema: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Search pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+      },
+    ],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  const tools = requestBody?.tools as Array<Record<string, unknown>> | undefined
+  const grepTool = tools?.find(tool => (tool.function as Record<string, unknown>)?.name === 'Grep') as
+    | { function?: { parameters?: { properties?: Record<string, unknown>; required?: string[] } } }
+    | undefined
+
+  expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern')
+  expect(grepTool?.function?.parameters?.required).toContain('pattern')
+})
+
 test('does not infer Gemini mode from OPENAI_BASE_URL path substrings', async () => {
  let capturedAuthorization: string | null = null

--- a/src/utils/ripgrep.test.ts
+++ b/src/utils/ripgrep.test.ts
@@ -1,11 +1,52 @@
 import { expect, test } from 'bun:test'
+import path from 'path'

-import { wrapRipgrepUnavailableError } from './ripgrep.ts'
+import { resolveRipgrepConfig, wrapRipgrepUnavailableError } from './ripgrep.js'
+
+const MOCK_BUILTIN_PATH = path.normalize(
+  process.platform === 'win32'
+    ? `vendor/ripgrep/${process.arch}-win32/rg.exe`
+    : `vendor/ripgrep/${process.arch}-${process.platform}/rg`,
+)
+
+test('ripgrepCommand falls back to system rg when builtin binary is missing', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: false,
+    builtinCommand: MOCK_BUILTIN_PATH,
+    builtinExists: false,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'system',
+    command: 'rg',
+    args: [],
+  })
+})
+
+test('ripgrepCommand keeps builtin mode when bundled binary exists', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: false,
+    builtinCommand: MOCK_BUILTIN_PATH,
+    builtinExists: true,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'builtin',
+    command: MOCK_BUILTIN_PATH,
+    args: [],
+  })
+})

 test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
  const error = wrapRipgrepUnavailableError(
    { code: 'ENOENT', message: 'spawn rg ENOENT' },
-    { mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe' },
+    { mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe', args: [] },
    'win32',
  )

@@ -18,7 +59,7 @@ test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
 test('wrapRipgrepUnavailableError explains missing system ripgrep', () => {
  const error = wrapRipgrepUnavailableError(
    { code: 'ENOENT', message: 'spawn rg ENOENT' },
-    { mode: 'system', command: 'rg' },
+    { mode: 'system', command: 'rg', args: [] },
    'linux',
  )

--- a/src/utils/ripgrep.ts
+++ b/src/utils/ripgrep.ts
@@ -1,5 +1,6 @@
 import type { ChildProcess, ExecFileException } from 'child_process'
 import { execFile, spawn } from 'child_process'
+import { existsSync } from 'fs'
 import memoize from 'lodash-es/memoize.js'
 import { homedir } from 'os'
 import * as path from 'path'
@@ -30,40 +31,72 @@ type RipgrepConfig = {

 type RipgrepErrorLike = Pick<NodeJS.ErrnoException, 'code' | 'message'>

-const getRipgrepConfig = memoize((): RipgrepConfig => {
-  const userWantsSystemRipgrep = isEnvDefinedFalsy(
-    process.env.USE_BUILTIN_RIPGREP,
-  )
+function isErrnoException(error: unknown): error is NodeJS.ErrnoException {
+  return error instanceof Error
+}

-  // Try system ripgrep if user wants it
-  if (userWantsSystemRipgrep) {
-    const { cmd: systemPath } = findExecutable('rg', [])
-    if (systemPath !== 'rg') {
-      // SECURITY: Use command name 'rg' instead of systemPath to prevent PATH hijacking
-      // If we used systemPath, a malicious ./rg.exe in current directory could be executed
-      // Using just 'rg' lets the OS resolve it safely with NoDefaultCurrentDirectoryInExePath protection
-      return { mode: 'system', command: 'rg', args: [] }
-    }
+type ResolveRipgrepConfigArgs = {
+  userWantsSystemRipgrep: boolean
+  bundledMode: boolean
+  builtinCommand: string
+  builtinExists: boolean
+  systemExecutablePath: string
+  processExecPath?: string
+}
+
+export function resolveRipgrepConfig({
+  userWantsSystemRipgrep,
+  bundledMode,
+  builtinCommand,
+  builtinExists,
+  systemExecutablePath,
+  processExecPath = process.execPath,
+}: ResolveRipgrepConfigArgs): RipgrepConfig {
+  if (userWantsSystemRipgrep && systemExecutablePath !== 'rg') {
+    // SECURITY: Use command name 'rg' instead of systemExecutablePath to prevent PATH hijacking
+    return { mode: 'system', command: 'rg', args: [] }
  }

-  // In bundled (native) mode, ripgrep is statically compiled into bun-internal
-  // and dispatches based on argv[0]. We spawn ourselves with argv0='rg'.
-  if (isInBundledMode()) {
+  if (bundledMode) {
    return {
      mode: 'embedded',
-      command: process.execPath,
+      command: processExecPath,
      args: ['--no-config'],
      argv0: 'rg',
    }
  }

+  if (builtinExists) {
+    return { mode: 'builtin', command: builtinCommand, args: [] }
+  }
+
+  if (systemExecutablePath !== 'rg') {
+    return { mode: 'system', command: 'rg', args: [] }
+  }
+
+  return { mode: 'builtin', command: builtinCommand, args: [] }
+}
+
+const getRipgrepConfig = memoize((): RipgrepConfig => {
+  const userWantsSystemRipgrep = isEnvDefinedFalsy(
+    process.env.USE_BUILTIN_RIPGREP,
+  )
+  const bundledMode = isInBundledMode()
  const rgRoot = path.resolve(__dirname, 'vendor', 'ripgrep')
-  const command =
+  const builtinCommand =
    process.platform === 'win32'
      ? path.resolve(rgRoot, `${process.arch}-win32`, 'rg.exe')
      : path.resolve(rgRoot, `${process.arch}-${process.platform}`, 'rg')
+  const builtinExists = existsSync(builtinCommand)
+  const { cmd: systemExecutablePath } = findExecutable('rg', [])

-  return { mode: 'builtin', command, args: [] }
+  return resolveRipgrepConfig({
+    userWantsSystemRipgrep,
+    bundledMode,
+    builtinCommand,
+    builtinExists,
+    systemExecutablePath,
+  })
 })

 export function ripgrepCommand(): {
@@ -324,7 +357,9 @@ async function ripGrepFileCount(
      if (settled) return
      settled = true
      reject(
-        err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
+        isErrnoException(err) && err.code === 'ENOENT'
+          ? wrapRipgrepUnavailableError(err)
+          : err,
      )
    })
  })
@@ -388,7 +423,9 @@ export async function ripGrepStream(
      if (settled) return
      settled = true
      reject(
-        err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
+        isErrnoException(err) && err.code === 'ENOENT'
+          ? wrapRipgrepUnavailableError(err)
+          : err,
      )
    })
  })
@@ -436,7 +473,9 @@ export async function ripGrep(
      const CRITICAL_ERROR_CODES = ['ENOENT', 'EACCES', 'EPERM']
      if (CRITICAL_ERROR_CODES.includes(error.code as string)) {
        reject(
-          error.code === 'ENOENT' ? wrapRipgrepUnavailableError(error) : error,
+          isErrnoException(error) && error.code === 'ENOENT'
+            ? wrapRipgrepUnavailableError(error)
+            : error,
        )
        return
      }
--- a/src/utils/schemaSanitizer.test.ts
+++ b/src/utils/schemaSanitizer.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, test } from 'bun:test'
+
+import { sanitizeSchemaForOpenAICompat } from './schemaSanitizer'
+
+describe('sanitizeSchemaForOpenAICompat', () => {
+  test('preserves Grep-like properties.pattern while keeping it required', () => {
+    const schema = {
+      type: 'object',
+      properties: {
+        pattern: {
+          type: 'string',
+          description: 'The regular expression pattern to search for in file contents',
+        },
+        path: { type: 'string' },
+        glob: { type: 'string' },
+      },
+      required: ['pattern'],
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+    const properties = sanitized.properties as Record<string, unknown> | undefined
+
+    expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path', 'glob'])
+    expect(properties?.pattern).toEqual({
+      type: 'string',
+      description: 'The regular expression pattern to search for in file contents',
+    })
+    expect(sanitized.required).toEqual(['pattern'])
+  })
+
+  test('preserves Glob-like properties.pattern while keeping it required', () => {
+    const schema = {
+      type: 'object',
+      properties: {
+        pattern: {
+          type: 'string',
+          description: 'The glob pattern to match files against',
+        },
+        path: { type: 'string' },
+      },
+      required: ['pattern'],
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+    const properties = sanitized.properties as Record<string, unknown> | undefined
+
+    expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path'])
+    expect(properties?.pattern).toEqual({
+      type: 'string',
+      description: 'The glob pattern to match files against',
+    })
+    expect(sanitized.required).toEqual(['pattern'])
+  })
+
+  test('strips JSON Schema validator pattern from string schemas', () => {
+    const schema = {
+      type: 'string',
+      pattern: '^[a-z]+$',
+      minLength: 1,
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+
+    expect(sanitized).toEqual({
+      type: 'string',
+    })
+  })
+})
--- a/src/utils/schemaSanitizer.ts
+++ b/src/utils/schemaSanitizer.ts
@@ -33,6 +33,15 @@ function stripSchemaKeywords(schema: unknown, keywords: Set<string>): unknown {

  const result: Record<string, unknown> = {}
  for (const [key, value] of Object.entries(schema)) {
+    if (key === 'properties' && isSchemaRecord(value)) {
+      const sanitizedProps: Record<string, unknown> = {}
+      for (const [propName, propSchema] of Object.entries(value)) {
+        sanitizedProps[propName] = stripSchemaKeywords(propSchema, keywords)
+      }
+      result[key] = sanitizedProps
+      continue
+    }
+
    if (keywords.has(key)) {
      continue
    }
@@ -215,10 +224,13 @@ export function sanitizeSchemaForOpenAICompat(
    }
  }

-  if (Array.isArray(record.required) && isSchemaRecord(record.properties)) {
+  const properties = isSchemaRecord(record.properties)
+    ? record.properties
+    : undefined
+
+  if (Array.isArray(record.required) && properties) {
    record.required = record.required.filter(
-      (value): value is string =>
-        typeof value === 'string' && value in record.properties,
+      (value): value is string => typeof value === 'string' && value in properties,
    )
  }