fix: restore Grep and Glob reliability on OpenAI paths (#461)

* fix: restore Grep and Glob reliability on OpenAI paths Preserve Grep and Glob pattern fields during OpenAI/Codex schema sanitization, and fall back to system ripgrep when the packaged binary is missing. This keeps search tool schemas intact and improves Linux usability for npm/source installs. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: clean up ripgrep fallback test helpers Remove the unused ripgrepCommand import and normalize mocked builtin ripgrep paths so the test behaves consistently across platforms. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: remove duplicate Codex URI schema case Drop the duplicated WebFetch URI-format test in codexShim.test.ts so test names stay unique and failures remain easier to read. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: stabilize ripgrep fallback coverage Avoid fs/module mocking in ripgrep fallback tests by extracting the config selection logic into a pure helper. This preserves the fallback coverage while removing the test interaction that caused the narrowed Bun hang repro. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> * test: tighten ripgrep and schema coverage Align the ripgrep fallback test with the actual auto-fallback branch, clean up strict typing in schema sanitizer tests, and tighten ripgrep error narrowing for type safety. Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev> --------- Co-authored-by: Claude Opus 4.6 <noreply@openclaude.dev>
2026-04-07 16:26:00 +07:00
parent b07bafa5bd
commit 600c01faf7
6 changed files with 366 additions and 28 deletions
--- a/src/utils/ripgrep.test.ts
+++ b/src/utils/ripgrep.test.ts
@@ -1,11 +1,52 @@
 import { expect, test } from 'bun:test'
+import path from 'path'

-import { wrapRipgrepUnavailableError } from './ripgrep.ts'
+import { resolveRipgrepConfig, wrapRipgrepUnavailableError } from './ripgrep.js'
+
+const MOCK_BUILTIN_PATH = path.normalize(
+  process.platform === 'win32'
+    ? `vendor/ripgrep/${process.arch}-win32/rg.exe`
+    : `vendor/ripgrep/${process.arch}-${process.platform}/rg`,
+)
+
+test('ripgrepCommand falls back to system rg when builtin binary is missing', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: false,
+    builtinCommand: MOCK_BUILTIN_PATH,
+    builtinExists: false,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'system',
+    command: 'rg',
+    args: [],
+  })
+})
+
+test('ripgrepCommand keeps builtin mode when bundled binary exists', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: false,
+    builtinCommand: MOCK_BUILTIN_PATH,
+    builtinExists: true,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'builtin',
+    command: MOCK_BUILTIN_PATH,
+    args: [],
+  })
+})

 test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
  const error = wrapRipgrepUnavailableError(
    { code: 'ENOENT', message: 'spawn rg ENOENT' },
-    { mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe' },
+    { mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe', args: [] },
    'win32',
  )

@@ -18,7 +59,7 @@ test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
 test('wrapRipgrepUnavailableError explains missing system ripgrep', () => {
  const error = wrapRipgrepUnavailableError(
    { code: 'ENOENT', message: 'spawn rg ENOENT' },
-    { mode: 'system', command: 'rg' },
+    { mode: 'system', command: 'rg', args: [] },
    'linux',
  )

--- a/src/utils/ripgrep.ts
+++ b/src/utils/ripgrep.ts
@@ -1,5 +1,6 @@
 import type { ChildProcess, ExecFileException } from 'child_process'
 import { execFile, spawn } from 'child_process'
+import { existsSync } from 'fs'
 import memoize from 'lodash-es/memoize.js'
 import { homedir } from 'os'
 import * as path from 'path'
@@ -30,40 +31,72 @@ type RipgrepConfig = {

 type RipgrepErrorLike = Pick<NodeJS.ErrnoException, 'code' | 'message'>

-const getRipgrepConfig = memoize((): RipgrepConfig => {
-  const userWantsSystemRipgrep = isEnvDefinedFalsy(
-    process.env.USE_BUILTIN_RIPGREP,
-  )
+function isErrnoException(error: unknown): error is NodeJS.ErrnoException {
+  return error instanceof Error
+}

-  // Try system ripgrep if user wants it
-  if (userWantsSystemRipgrep) {
-    const { cmd: systemPath } = findExecutable('rg', [])
-    if (systemPath !== 'rg') {
-      // SECURITY: Use command name 'rg' instead of systemPath to prevent PATH hijacking
-      // If we used systemPath, a malicious ./rg.exe in current directory could be executed
-      // Using just 'rg' lets the OS resolve it safely with NoDefaultCurrentDirectoryInExePath protection
-      return { mode: 'system', command: 'rg', args: [] }
-    }
+type ResolveRipgrepConfigArgs = {
+  userWantsSystemRipgrep: boolean
+  bundledMode: boolean
+  builtinCommand: string
+  builtinExists: boolean
+  systemExecutablePath: string
+  processExecPath?: string
+}
+
+export function resolveRipgrepConfig({
+  userWantsSystemRipgrep,
+  bundledMode,
+  builtinCommand,
+  builtinExists,
+  systemExecutablePath,
+  processExecPath = process.execPath,
+}: ResolveRipgrepConfigArgs): RipgrepConfig {
+  if (userWantsSystemRipgrep && systemExecutablePath !== 'rg') {
+    // SECURITY: Use command name 'rg' instead of systemExecutablePath to prevent PATH hijacking
+    return { mode: 'system', command: 'rg', args: [] }
  }

-  // In bundled (native) mode, ripgrep is statically compiled into bun-internal
-  // and dispatches based on argv[0]. We spawn ourselves with argv0='rg'.
-  if (isInBundledMode()) {
+  if (bundledMode) {
    return {
      mode: 'embedded',
-      command: process.execPath,
+      command: processExecPath,
      args: ['--no-config'],
      argv0: 'rg',
    }
  }

+  if (builtinExists) {
+    return { mode: 'builtin', command: builtinCommand, args: [] }
+  }
+
+  if (systemExecutablePath !== 'rg') {
+    return { mode: 'system', command: 'rg', args: [] }
+  }
+
+  return { mode: 'builtin', command: builtinCommand, args: [] }
+}
+
+const getRipgrepConfig = memoize((): RipgrepConfig => {
+  const userWantsSystemRipgrep = isEnvDefinedFalsy(
+    process.env.USE_BUILTIN_RIPGREP,
+  )
+  const bundledMode = isInBundledMode()
  const rgRoot = path.resolve(__dirname, 'vendor', 'ripgrep')
-  const command =
+  const builtinCommand =
    process.platform === 'win32'
      ? path.resolve(rgRoot, `${process.arch}-win32`, 'rg.exe')
      : path.resolve(rgRoot, `${process.arch}-${process.platform}`, 'rg')
+  const builtinExists = existsSync(builtinCommand)
+  const { cmd: systemExecutablePath } = findExecutable('rg', [])

-  return { mode: 'builtin', command, args: [] }
+  return resolveRipgrepConfig({
+    userWantsSystemRipgrep,
+    bundledMode,
+    builtinCommand,
+    builtinExists,
+    systemExecutablePath,
+  })
 })

 export function ripgrepCommand(): {
@@ -324,7 +357,9 @@ async function ripGrepFileCount(
      if (settled) return
      settled = true
      reject(
-        err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
+        isErrnoException(err) && err.code === 'ENOENT'
+          ? wrapRipgrepUnavailableError(err)
+          : err,
      )
    })
  })
@@ -388,7 +423,9 @@ export async function ripGrepStream(
      if (settled) return
      settled = true
      reject(
-        err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
+        isErrnoException(err) && err.code === 'ENOENT'
+          ? wrapRipgrepUnavailableError(err)
+          : err,
      )
    })
  })
@@ -436,7 +473,9 @@ export async function ripGrep(
      const CRITICAL_ERROR_CODES = ['ENOENT', 'EACCES', 'EPERM']
      if (CRITICAL_ERROR_CODES.includes(error.code as string)) {
        reject(
-          error.code === 'ENOENT' ? wrapRipgrepUnavailableError(error) : error,
+          isErrnoException(error) && error.code === 'ENOENT'
+            ? wrapRipgrepUnavailableError(error)
+            : error,
        )
        return
      }
--- a/src/utils/schemaSanitizer.test.ts
+++ b/src/utils/schemaSanitizer.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, test } from 'bun:test'
+
+import { sanitizeSchemaForOpenAICompat } from './schemaSanitizer'
+
+describe('sanitizeSchemaForOpenAICompat', () => {
+  test('preserves Grep-like properties.pattern while keeping it required', () => {
+    const schema = {
+      type: 'object',
+      properties: {
+        pattern: {
+          type: 'string',
+          description: 'The regular expression pattern to search for in file contents',
+        },
+        path: { type: 'string' },
+        glob: { type: 'string' },
+      },
+      required: ['pattern'],
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+    const properties = sanitized.properties as Record<string, unknown> | undefined
+
+    expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path', 'glob'])
+    expect(properties?.pattern).toEqual({
+      type: 'string',
+      description: 'The regular expression pattern to search for in file contents',
+    })
+    expect(sanitized.required).toEqual(['pattern'])
+  })
+
+  test('preserves Glob-like properties.pattern while keeping it required', () => {
+    const schema = {
+      type: 'object',
+      properties: {
+        pattern: {
+          type: 'string',
+          description: 'The glob pattern to match files against',
+        },
+        path: { type: 'string' },
+      },
+      required: ['pattern'],
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+    const properties = sanitized.properties as Record<string, unknown> | undefined
+
+    expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path'])
+    expect(properties?.pattern).toEqual({
+      type: 'string',
+      description: 'The glob pattern to match files against',
+    })
+    expect(sanitized.required).toEqual(['pattern'])
+  })
+
+  test('strips JSON Schema validator pattern from string schemas', () => {
+    const schema = {
+      type: 'string',
+      pattern: '^[a-z]+$',
+      minLength: 1,
+    }
+
+    const sanitized = sanitizeSchemaForOpenAICompat(schema)
+
+    expect(sanitized).toEqual({
+      type: 'string',
+    })
+  })
+})
--- a/src/utils/schemaSanitizer.ts
+++ b/src/utils/schemaSanitizer.ts
@@ -33,6 +33,15 @@ function stripSchemaKeywords(schema: unknown, keywords: Set<string>): unknown {

  const result: Record<string, unknown> = {}
  for (const [key, value] of Object.entries(schema)) {
+    if (key === 'properties' && isSchemaRecord(value)) {
+      const sanitizedProps: Record<string, unknown> = {}
+      for (const [propName, propSchema] of Object.entries(value)) {
+        sanitizedProps[propName] = stripSchemaKeywords(propSchema, keywords)
+      }
+      result[key] = sanitizedProps
+      continue
+    }
+
    if (keywords.has(key)) {
      continue
    }
@@ -215,10 +224,13 @@ export function sanitizeSchemaForOpenAICompat(
    }
  }

-  if (Array.isArray(record.required) && isSchemaRecord(record.properties)) {
+  const properties = isSchemaRecord(record.properties)
+    ? record.properties
+    : undefined
+
+  if (Array.isArray(record.required) && properties) {
    record.required = record.required.filter(
-      (value): value is string =>
-        typeof value === 'string' && value in record.properties,
+      (value): value is string => typeof value === 'string' && value in properties,
    )
  }