fix: restore Grep and Glob reliability on OpenAI paths (#461)

* fix: restore Grep and Glob reliability on OpenAI paths

Preserve Grep and Glob pattern fields during OpenAI/Codex schema sanitization, and fall back to system ripgrep when the packaged binary is missing. This keeps search tool schemas intact and improves Linux usability for npm/source installs.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>

* test: clean up ripgrep fallback test helpers

Remove the unused ripgrepCommand import and normalize mocked builtin ripgrep paths so the test behaves consistently across platforms.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>

* test: remove duplicate Codex URI schema case

Drop the duplicated WebFetch URI-format test in codexShim.test.ts so test names stay unique and failures remain easier to read.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>

* test: stabilize ripgrep fallback coverage

Avoid fs/module mocking in ripgrep fallback tests by extracting the config selection logic into a pure helper. This preserves the fallback coverage while removing the test interaction that caused the narrowed Bun hang repro.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>

* test: tighten ripgrep and schema coverage

Align the ripgrep fallback test with the actual auto-fallback branch, clean up strict typing in schema sanitizer tests, and tighten ripgrep error narrowing for type safety.

Co-Authored-By: Claude Opus 4.6 <noreply@openclaude.dev>

---------

Co-authored-by: Claude Opus 4.6 <noreply@openclaude.dev>
This commit is contained in:
ibaaaaal
2026-04-07 16:26:00 +07:00
committed by GitHub
parent b07bafa5bd
commit 600c01faf7
6 changed files with 366 additions and 28 deletions

View File

@@ -201,6 +201,117 @@ describe('Codex request translation', () => {
])
})
test('preserves Grep tool pattern field in Codex strict schemas', () => {
const tools = convertToolsToResponsesTools([
{
name: 'Grep',
description: 'Search file contents',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Search pattern' },
path: { type: 'string' },
},
required: ['pattern'],
additionalProperties: false,
},
},
])
expect(tools).toEqual([
{
type: 'function',
name: 'Grep',
description: 'Search file contents',
parameters: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Search pattern' },
path: { type: 'string' },
},
required: ['pattern', 'path'],
additionalProperties: false,
},
strict: true,
},
])
})
test('preserves Glob tool pattern field in Codex strict schemas', () => {
const tools = convertToolsToResponsesTools([
{
name: 'Glob',
description: 'Find files by pattern',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Glob pattern' },
path: { type: 'string' },
},
required: ['pattern'],
additionalProperties: false,
},
},
])
expect(tools).toEqual([
{
type: 'function',
name: 'Glob',
description: 'Find files by pattern',
parameters: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Glob pattern' },
path: { type: 'string' },
},
required: ['pattern', 'path'],
additionalProperties: false,
},
strict: true,
},
])
})
test('strips validator pattern keyword but keeps string field named pattern in Codex schemas', () => {
const tools = convertToolsToResponsesTools([
{
name: 'RegexProbe',
description: 'Probe regex schema handling',
input_schema: {
type: 'object',
properties: {
pattern: {
type: 'string',
pattern: '^[a-z]+$',
},
},
required: ['pattern'],
additionalProperties: false,
},
},
])
expect(tools).toEqual([
{
type: 'function',
name: 'RegexProbe',
description: 'Probe regex schema handling',
parameters: {
type: 'object',
properties: {
pattern: {
type: 'string',
},
},
required: ['pattern'],
additionalProperties: false,
},
strict: true,
},
])
})
test('removes unsupported uri format from strict Responses schemas', () => {
const tools = convertToolsToResponsesTools([
{

View File

@@ -261,6 +261,73 @@ test('preserves Gemini tool call extra_content in follow-up requests', async ()
})
})
test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => {
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-grep-schema',
model: 'qwen/qwen3.6-plus',
choices: [
{
message: {
role: 'assistant',
content: 'done',
},
finish_reason: 'stop',
},
],
usage: {
prompt_tokens: 12,
completion_tokens: 4,
total_tokens: 16,
},
}),
{
headers: {
'Content-Type': 'application/json',
},
},
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'qwen/qwen3.6-plus',
system: 'test system',
messages: [{ role: 'user', content: 'Use Grep' }],
tools: [
{
name: 'Grep',
description: 'Search file contents',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Search pattern' },
path: { type: 'string' },
},
required: ['pattern'],
additionalProperties: false,
},
},
],
max_tokens: 64,
stream: false,
})
const tools = requestBody?.tools as Array<Record<string, unknown>> | undefined
const grepTool = tools?.find(tool => (tool.function as Record<string, unknown>)?.name === 'Grep') as
| { function?: { parameters?: { properties?: Record<string, unknown>; required?: string[] } } }
| undefined
expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern')
expect(grepTool?.function?.parameters?.required).toContain('pattern')
})
test('does not infer Gemini mode from OPENAI_BASE_URL path substrings', async () => {
let capturedAuthorization: string | null = null

View File

@@ -1,11 +1,52 @@
import { expect, test } from 'bun:test'
import path from 'path'
import { wrapRipgrepUnavailableError } from './ripgrep.ts'
import { resolveRipgrepConfig, wrapRipgrepUnavailableError } from './ripgrep.js'
const MOCK_BUILTIN_PATH = path.normalize(
process.platform === 'win32'
? `vendor/ripgrep/${process.arch}-win32/rg.exe`
: `vendor/ripgrep/${process.arch}-${process.platform}/rg`,
)
test('ripgrepCommand falls back to system rg when builtin binary is missing', () => {
const config = resolveRipgrepConfig({
userWantsSystemRipgrep: false,
bundledMode: false,
builtinCommand: MOCK_BUILTIN_PATH,
builtinExists: false,
systemExecutablePath: '/usr/bin/rg',
processExecPath: '/fake/bun',
})
expect(config).toMatchObject({
mode: 'system',
command: 'rg',
args: [],
})
})
test('ripgrepCommand keeps builtin mode when bundled binary exists', () => {
const config = resolveRipgrepConfig({
userWantsSystemRipgrep: false,
bundledMode: false,
builtinCommand: MOCK_BUILTIN_PATH,
builtinExists: true,
systemExecutablePath: '/usr/bin/rg',
processExecPath: '/fake/bun',
})
expect(config).toMatchObject({
mode: 'builtin',
command: MOCK_BUILTIN_PATH,
args: [],
})
})
test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
const error = wrapRipgrepUnavailableError(
{ code: 'ENOENT', message: 'spawn rg ENOENT' },
{ mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe' },
{ mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe', args: [] },
'win32',
)
@@ -18,7 +59,7 @@ test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
test('wrapRipgrepUnavailableError explains missing system ripgrep', () => {
const error = wrapRipgrepUnavailableError(
{ code: 'ENOENT', message: 'spawn rg ENOENT' },
{ mode: 'system', command: 'rg' },
{ mode: 'system', command: 'rg', args: [] },
'linux',
)

View File

@@ -1,5 +1,6 @@
import type { ChildProcess, ExecFileException } from 'child_process'
import { execFile, spawn } from 'child_process'
import { existsSync } from 'fs'
import memoize from 'lodash-es/memoize.js'
import { homedir } from 'os'
import * as path from 'path'
@@ -30,40 +31,72 @@ type RipgrepConfig = {
type RipgrepErrorLike = Pick<NodeJS.ErrnoException, 'code' | 'message'>
const getRipgrepConfig = memoize((): RipgrepConfig => {
const userWantsSystemRipgrep = isEnvDefinedFalsy(
process.env.USE_BUILTIN_RIPGREP,
)
function isErrnoException(error: unknown): error is NodeJS.ErrnoException {
return error instanceof Error
}
// Try system ripgrep if user wants it
if (userWantsSystemRipgrep) {
const { cmd: systemPath } = findExecutable('rg', [])
if (systemPath !== 'rg') {
// SECURITY: Use command name 'rg' instead of systemPath to prevent PATH hijacking
// If we used systemPath, a malicious ./rg.exe in current directory could be executed
// Using just 'rg' lets the OS resolve it safely with NoDefaultCurrentDirectoryInExePath protection
return { mode: 'system', command: 'rg', args: [] }
}
type ResolveRipgrepConfigArgs = {
userWantsSystemRipgrep: boolean
bundledMode: boolean
builtinCommand: string
builtinExists: boolean
systemExecutablePath: string
processExecPath?: string
}
export function resolveRipgrepConfig({
userWantsSystemRipgrep,
bundledMode,
builtinCommand,
builtinExists,
systemExecutablePath,
processExecPath = process.execPath,
}: ResolveRipgrepConfigArgs): RipgrepConfig {
if (userWantsSystemRipgrep && systemExecutablePath !== 'rg') {
// SECURITY: Use command name 'rg' instead of systemExecutablePath to prevent PATH hijacking
return { mode: 'system', command: 'rg', args: [] }
}
// In bundled (native) mode, ripgrep is statically compiled into bun-internal
// and dispatches based on argv[0]. We spawn ourselves with argv0='rg'.
if (isInBundledMode()) {
if (bundledMode) {
return {
mode: 'embedded',
command: process.execPath,
command: processExecPath,
args: ['--no-config'],
argv0: 'rg',
}
}
if (builtinExists) {
return { mode: 'builtin', command: builtinCommand, args: [] }
}
if (systemExecutablePath !== 'rg') {
return { mode: 'system', command: 'rg', args: [] }
}
return { mode: 'builtin', command: builtinCommand, args: [] }
}
const getRipgrepConfig = memoize((): RipgrepConfig => {
const userWantsSystemRipgrep = isEnvDefinedFalsy(
process.env.USE_BUILTIN_RIPGREP,
)
const bundledMode = isInBundledMode()
const rgRoot = path.resolve(__dirname, 'vendor', 'ripgrep')
const command =
const builtinCommand =
process.platform === 'win32'
? path.resolve(rgRoot, `${process.arch}-win32`, 'rg.exe')
: path.resolve(rgRoot, `${process.arch}-${process.platform}`, 'rg')
const builtinExists = existsSync(builtinCommand)
const { cmd: systemExecutablePath } = findExecutable('rg', [])
return { mode: 'builtin', command, args: [] }
return resolveRipgrepConfig({
userWantsSystemRipgrep,
bundledMode,
builtinCommand,
builtinExists,
systemExecutablePath,
})
})
export function ripgrepCommand(): {
@@ -324,7 +357,9 @@ async function ripGrepFileCount(
if (settled) return
settled = true
reject(
err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
isErrnoException(err) && err.code === 'ENOENT'
? wrapRipgrepUnavailableError(err)
: err,
)
})
})
@@ -388,7 +423,9 @@ export async function ripGrepStream(
if (settled) return
settled = true
reject(
err.code === 'ENOENT' ? wrapRipgrepUnavailableError(err) : err,
isErrnoException(err) && err.code === 'ENOENT'
? wrapRipgrepUnavailableError(err)
: err,
)
})
})
@@ -436,7 +473,9 @@ export async function ripGrep(
const CRITICAL_ERROR_CODES = ['ENOENT', 'EACCES', 'EPERM']
if (CRITICAL_ERROR_CODES.includes(error.code as string)) {
reject(
error.code === 'ENOENT' ? wrapRipgrepUnavailableError(error) : error,
isErrnoException(error) && error.code === 'ENOENT'
? wrapRipgrepUnavailableError(error)
: error,
)
return
}

View File

@@ -0,0 +1,68 @@
import { describe, expect, test } from 'bun:test'
import { sanitizeSchemaForOpenAICompat } from './schemaSanitizer'
describe('sanitizeSchemaForOpenAICompat', () => {
test('preserves Grep-like properties.pattern while keeping it required', () => {
const schema = {
type: 'object',
properties: {
pattern: {
type: 'string',
description: 'The regular expression pattern to search for in file contents',
},
path: { type: 'string' },
glob: { type: 'string' },
},
required: ['pattern'],
}
const sanitized = sanitizeSchemaForOpenAICompat(schema)
const properties = sanitized.properties as Record<string, unknown> | undefined
expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path', 'glob'])
expect(properties?.pattern).toEqual({
type: 'string',
description: 'The regular expression pattern to search for in file contents',
})
expect(sanitized.required).toEqual(['pattern'])
})
test('preserves Glob-like properties.pattern while keeping it required', () => {
const schema = {
type: 'object',
properties: {
pattern: {
type: 'string',
description: 'The glob pattern to match files against',
},
path: { type: 'string' },
},
required: ['pattern'],
}
const sanitized = sanitizeSchemaForOpenAICompat(schema)
const properties = sanitized.properties as Record<string, unknown> | undefined
expect(Object.keys(properties ?? {})).toEqual(['pattern', 'path'])
expect(properties?.pattern).toEqual({
type: 'string',
description: 'The glob pattern to match files against',
})
expect(sanitized.required).toEqual(['pattern'])
})
test('strips JSON Schema validator pattern from string schemas', () => {
const schema = {
type: 'string',
pattern: '^[a-z]+$',
minLength: 1,
}
const sanitized = sanitizeSchemaForOpenAICompat(schema)
expect(sanitized).toEqual({
type: 'string',
})
})
})

View File

@@ -33,6 +33,15 @@ function stripSchemaKeywords(schema: unknown, keywords: Set<string>): unknown {
const result: Record<string, unknown> = {}
for (const [key, value] of Object.entries(schema)) {
if (key === 'properties' && isSchemaRecord(value)) {
const sanitizedProps: Record<string, unknown> = {}
for (const [propName, propSchema] of Object.entries(value)) {
sanitizedProps[propName] = stripSchemaKeywords(propSchema, keywords)
}
result[key] = sanitizedProps
continue
}
if (keywords.has(key)) {
continue
}
@@ -215,10 +224,13 @@ export function sanitizeSchemaForOpenAICompat(
}
}
if (Array.isArray(record.required) && isSchemaRecord(record.properties)) {
const properties = isSchemaRecord(record.properties)
? record.properties
: undefined
if (Array.isArray(record.required) && properties) {
record.required = record.required.filter(
(value): value is string =>
typeof value === 'string' && value in record.properties,
(value): value is string => typeof value === 'string' && value in properties,
)
}