fix: make schema sanitization provider-specific
This commit is contained in:
@@ -1,216 +1 @@
|
||||
function isSchemaRecord(value: unknown): value is Record<string, unknown> {
|
||||
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
||||
}
|
||||
|
||||
function deepEqualJsonValue(a: unknown, b: unknown): boolean {
|
||||
if (Object.is(a, b)) return true
|
||||
if (typeof a !== typeof b) return false
|
||||
|
||||
if (Array.isArray(a) && Array.isArray(b)) {
|
||||
return (
|
||||
a.length === b.length &&
|
||||
a.every((value, index) => deepEqualJsonValue(value, b[index]))
|
||||
)
|
||||
}
|
||||
|
||||
if (isSchemaRecord(a) && isSchemaRecord(b)) {
|
||||
const aKeys = Object.keys(a)
|
||||
const bKeys = Object.keys(b)
|
||||
return (
|
||||
aKeys.length === bKeys.length &&
|
||||
aKeys.every(key => key in b && deepEqualJsonValue(a[key], b[key]))
|
||||
)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
function matchesJsonSchemaType(type: string, value: unknown): boolean {
|
||||
switch (type) {
|
||||
case 'string':
|
||||
return typeof value === 'string'
|
||||
case 'number':
|
||||
return typeof value === 'number' && Number.isFinite(value)
|
||||
case 'integer':
|
||||
return typeof value === 'number' && Number.isInteger(value)
|
||||
case 'boolean':
|
||||
return typeof value === 'boolean'
|
||||
case 'object':
|
||||
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
||||
case 'array':
|
||||
return Array.isArray(value)
|
||||
case 'null':
|
||||
return value === null
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
function getJsonSchemaTypes(record: Record<string, unknown>): string[] {
|
||||
const raw = record.type
|
||||
if (typeof raw === 'string') {
|
||||
return [raw]
|
||||
}
|
||||
if (Array.isArray(raw)) {
|
||||
return raw.filter((value): value is string => typeof value === 'string')
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
function schemaAllowsValue(schema: Record<string, unknown>, value: unknown): boolean {
|
||||
if (Array.isArray(schema.anyOf)) {
|
||||
return schema.anyOf.some(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
)
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.oneOf)) {
|
||||
return (
|
||||
schema.oneOf.filter(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
).length === 1
|
||||
)
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.allOf)) {
|
||||
return schema.allOf.every(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
)
|
||||
}
|
||||
|
||||
if ('const' in schema && !deepEqualJsonValue(schema.const, value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.enum)) {
|
||||
if (!schema.enum.some(item => deepEqualJsonValue(item, value))) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const types = getJsonSchemaTypes(schema)
|
||||
if (types.length > 0 && !types.some(type => matchesJsonSchemaType(type, value))) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
function sanitizeTypeField(record: Record<string, unknown>): void {
|
||||
const allowed = new Set([
|
||||
'string',
|
||||
'number',
|
||||
'integer',
|
||||
'boolean',
|
||||
'object',
|
||||
'array',
|
||||
'null',
|
||||
])
|
||||
|
||||
const raw = record.type
|
||||
if (typeof raw === 'string') {
|
||||
if (!allowed.has(raw)) delete record.type
|
||||
return
|
||||
}
|
||||
|
||||
if (!Array.isArray(raw)) return
|
||||
|
||||
const filtered = raw.filter(
|
||||
(value, index): value is string =>
|
||||
typeof value === 'string' &&
|
||||
allowed.has(value) &&
|
||||
raw.indexOf(value) === index,
|
||||
)
|
||||
|
||||
if (filtered.length === 0) {
|
||||
delete record.type
|
||||
} else if (filtered.length === 1) {
|
||||
record.type = filtered[0]
|
||||
} else {
|
||||
record.type = filtered
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize loose/invalid JSON Schema into a form OpenAI-compatible providers
|
||||
* are more likely to accept. This is intentionally defensive for external MCP
|
||||
* servers that may advertise imperfect schemas.
|
||||
*/
|
||||
export function sanitizeSchemaForOpenAICompat(
|
||||
schema: unknown,
|
||||
): Record<string, unknown> {
|
||||
if (!isSchemaRecord(schema)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const record = { ...schema }
|
||||
|
||||
delete record.$schema
|
||||
delete record.propertyNames
|
||||
|
||||
sanitizeTypeField(record)
|
||||
|
||||
if (isSchemaRecord(record.properties)) {
|
||||
const sanitizedProps: Record<string, unknown> = {}
|
||||
for (const [key, value] of Object.entries(record.properties)) {
|
||||
sanitizedProps[key] = sanitizeSchemaForOpenAICompat(value)
|
||||
}
|
||||
record.properties = sanitizedProps
|
||||
}
|
||||
|
||||
if ('items' in record) {
|
||||
if (Array.isArray(record.items)) {
|
||||
record.items = record.items.map(item =>
|
||||
sanitizeSchemaForOpenAICompat(item),
|
||||
)
|
||||
} else {
|
||||
record.items = sanitizeSchemaForOpenAICompat(record.items)
|
||||
}
|
||||
}
|
||||
|
||||
for (const key of ['anyOf', 'oneOf', 'allOf'] as const) {
|
||||
if (Array.isArray(record[key])) {
|
||||
record[key] = record[key].map(item =>
|
||||
sanitizeSchemaForOpenAICompat(item),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(record.required) && isSchemaRecord(record.properties)) {
|
||||
record.required = record.required.filter(
|
||||
(value): value is string =>
|
||||
typeof value === 'string' && value in record.properties,
|
||||
)
|
||||
}
|
||||
|
||||
const schemaWithoutEnum = { ...record }
|
||||
delete schemaWithoutEnum.enum
|
||||
|
||||
if (Array.isArray(record.enum)) {
|
||||
const filteredEnum = record.enum.filter(value =>
|
||||
schemaAllowsValue(schemaWithoutEnum, value),
|
||||
)
|
||||
if (filteredEnum.length > 0) {
|
||||
record.enum = filteredEnum
|
||||
} else {
|
||||
delete record.enum
|
||||
}
|
||||
}
|
||||
|
||||
const schemaWithoutConst = { ...record }
|
||||
delete schemaWithoutConst.const
|
||||
if ('const' in record && !schemaAllowsValue(schemaWithoutConst, record.const)) {
|
||||
delete record.const
|
||||
}
|
||||
|
||||
const schemaWithoutDefault = { ...record }
|
||||
delete schemaWithoutDefault.default
|
||||
if (
|
||||
'default' in record &&
|
||||
!schemaAllowsValue(schemaWithoutDefault, record.default)
|
||||
) {
|
||||
delete record.default
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
export { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
|
||||
|
||||
@@ -38,9 +38,8 @@ import {
|
||||
resolveCodexApiCredentials,
|
||||
resolveProviderRequest,
|
||||
} from './providerConfig.js'
|
||||
import { stripIncompatibleSchemaKeywords } from '../../utils/schemaSanitizer.js'
|
||||
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
|
||||
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
|
||||
import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
|
||||
|
||||
const GITHUB_MODELS_DEFAULT_BASE = 'https://models.github.ai/inference'
|
||||
const GITHUB_API_VERSION = '2022-11-28'
|
||||
|
||||
@@ -3,7 +3,7 @@ import { z } from 'zod/v4'
|
||||
import { getEmptyToolPermissionContext, type Tool, type Tools } from '../Tool.js'
|
||||
import { toolToAPISchema } from './api.js'
|
||||
|
||||
test('toolToAPISchema strips incompatible schema keywords from input_schema', async () => {
|
||||
test('toolToAPISchema preserves provider-specific schema keywords in input_schema', async () => {
|
||||
const schema = await toolToAPISchema(
|
||||
{
|
||||
name: 'WebFetch',
|
||||
@@ -18,6 +18,9 @@ test('toolToAPISchema strips incompatible schema keywords from input_schema', as
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
propertyNames: {
|
||||
pattern: '^[a-z]+$',
|
||||
},
|
||||
properties: {
|
||||
callback: {
|
||||
type: 'string',
|
||||
@@ -42,26 +45,22 @@ test('toolToAPISchema strips incompatible schema keywords from input_schema', as
|
||||
properties: {
|
||||
url: {
|
||||
type: 'string',
|
||||
format: 'uri',
|
||||
description: 'Public HTTP or HTTPS URL',
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
propertyNames: {
|
||||
pattern: '^[a-z]+$',
|
||||
},
|
||||
properties: {
|
||||
callback: {
|
||||
type: 'string',
|
||||
format: 'uri-reference',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const inputSchema = (schema as { input_schema: Record<string, unknown> }).input_schema
|
||||
const properties = inputSchema.properties as Record<string, Record<string, unknown>>
|
||||
expect(properties.url?.format).toBeUndefined()
|
||||
expect(
|
||||
(
|
||||
properties.metadata?.properties as Record<string, Record<string, unknown>>
|
||||
)?.callback?.format,
|
||||
).toBeUndefined()
|
||||
})
|
||||
|
||||
@@ -60,7 +60,6 @@ import {
|
||||
import { getPlatform } from './platform.js'
|
||||
import { countFilesRoundedRg } from './ripgrep.js'
|
||||
import { jsonStringify } from './slowOperations.js'
|
||||
import { stripIncompatibleSchemaKeywords } from './schemaSanitizer.js'
|
||||
import type { SystemPrompt } from './systemPromptType.js'
|
||||
import { getToolSchemaCache } from './toolSchemaCache.js'
|
||||
import { windowsPathToPosixPath } from './windowsPaths.js'
|
||||
@@ -166,7 +165,6 @@ export async function toolToAPISchema(
|
||||
if (!isAgentSwarmsEnabled()) {
|
||||
input_schema = filterSwarmFieldsFromSchema(tool.name, input_schema)
|
||||
}
|
||||
input_schema = stripIncompatibleSchemaKeywords(input_schema)
|
||||
|
||||
base = {
|
||||
name: tool.name,
|
||||
|
||||
@@ -1,30 +1,246 @@
|
||||
/**
|
||||
* Anthropic-compatible tool schemas reject several JSON Schema keywords that
|
||||
* Zod commonly emits, especially string `format` validators like `uri`.
|
||||
* Strip those fields recursively before sending tool schemas to providers.
|
||||
*/
|
||||
export function stripIncompatibleSchemaKeywords<T>(
|
||||
schema: T,
|
||||
): T {
|
||||
if (Array.isArray(schema)) {
|
||||
return schema.map(item => stripIncompatibleSchemaKeywords(item)) as T
|
||||
const OPENAI_INCOMPATIBLE_SCHEMA_KEYWORDS = new Set([
|
||||
'$comment',
|
||||
'$schema',
|
||||
'default',
|
||||
'else',
|
||||
'examples',
|
||||
'format',
|
||||
'if',
|
||||
'maxLength',
|
||||
'maximum',
|
||||
'minLength',
|
||||
'minimum',
|
||||
'multipleOf',
|
||||
'pattern',
|
||||
'patternProperties',
|
||||
'propertyNames',
|
||||
'then',
|
||||
'unevaluatedProperties',
|
||||
])
|
||||
|
||||
function isSchemaRecord(value: unknown): value is Record<string, unknown> {
|
||||
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
||||
}
|
||||
|
||||
if (!schema || typeof schema !== 'object') {
|
||||
function stripSchemaKeywords(schema: unknown, keywords: Set<string>): unknown {
|
||||
if (Array.isArray(schema)) {
|
||||
return schema.map(item => stripSchemaKeywords(item, keywords))
|
||||
}
|
||||
|
||||
if (!isSchemaRecord(schema)) {
|
||||
return schema
|
||||
}
|
||||
|
||||
const result: Record<string, unknown> = {}
|
||||
for (const [key, value] of Object.entries(schema as Record<string, unknown>)) {
|
||||
if (key === '$schema' || key === 'format' || key === 'propertyNames') {
|
||||
for (const [key, value] of Object.entries(schema)) {
|
||||
if (keywords.has(key)) {
|
||||
continue
|
||||
}
|
||||
|
||||
result[key] =
|
||||
value && typeof value === 'object'
|
||||
? stripIncompatibleSchemaKeywords(value)
|
||||
: value
|
||||
result[key] = stripSchemaKeywords(value, keywords)
|
||||
}
|
||||
|
||||
return result as T
|
||||
return result
|
||||
}
|
||||
|
||||
function deepEqualJsonValue(a: unknown, b: unknown): boolean {
|
||||
if (Object.is(a, b)) return true
|
||||
if (typeof a !== typeof b) return false
|
||||
|
||||
if (Array.isArray(a) && Array.isArray(b)) {
|
||||
return (
|
||||
a.length === b.length &&
|
||||
a.every((value, index) => deepEqualJsonValue(value, b[index]))
|
||||
)
|
||||
}
|
||||
|
||||
if (isSchemaRecord(a) && isSchemaRecord(b)) {
|
||||
const aKeys = Object.keys(a)
|
||||
const bKeys = Object.keys(b)
|
||||
return (
|
||||
aKeys.length === bKeys.length &&
|
||||
aKeys.every(key => key in b && deepEqualJsonValue(a[key], b[key]))
|
||||
)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
function matchesJsonSchemaType(type: string, value: unknown): boolean {
|
||||
switch (type) {
|
||||
case 'string':
|
||||
return typeof value === 'string'
|
||||
case 'number':
|
||||
return typeof value === 'number' && Number.isFinite(value)
|
||||
case 'integer':
|
||||
return typeof value === 'number' && Number.isInteger(value)
|
||||
case 'boolean':
|
||||
return typeof value === 'boolean'
|
||||
case 'object':
|
||||
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
||||
case 'array':
|
||||
return Array.isArray(value)
|
||||
case 'null':
|
||||
return value === null
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
function getJsonSchemaTypes(record: Record<string, unknown>): string[] {
|
||||
const raw = record.type
|
||||
if (typeof raw === 'string') {
|
||||
return [raw]
|
||||
}
|
||||
if (Array.isArray(raw)) {
|
||||
return raw.filter((value): value is string => typeof value === 'string')
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
function schemaAllowsValue(schema: Record<string, unknown>, value: unknown): boolean {
|
||||
if (Array.isArray(schema.anyOf)) {
|
||||
return schema.anyOf.some(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
)
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.oneOf)) {
|
||||
return (
|
||||
schema.oneOf.filter(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
).length === 1
|
||||
)
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.allOf)) {
|
||||
return schema.allOf.every(item =>
|
||||
schemaAllowsValue(sanitizeSchemaForOpenAICompat(item), value),
|
||||
)
|
||||
}
|
||||
|
||||
if ('const' in schema && !deepEqualJsonValue(schema.const, value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (Array.isArray(schema.enum)) {
|
||||
if (!schema.enum.some(item => deepEqualJsonValue(item, value))) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const types = getJsonSchemaTypes(schema)
|
||||
if (types.length > 0 && !types.some(type => matchesJsonSchemaType(type, value))) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
function sanitizeTypeField(record: Record<string, unknown>): void {
|
||||
const allowed = new Set([
|
||||
'string',
|
||||
'number',
|
||||
'integer',
|
||||
'boolean',
|
||||
'object',
|
||||
'array',
|
||||
'null',
|
||||
])
|
||||
|
||||
const raw = record.type
|
||||
if (typeof raw === 'string') {
|
||||
if (!allowed.has(raw)) delete record.type
|
||||
return
|
||||
}
|
||||
|
||||
if (!Array.isArray(raw)) return
|
||||
|
||||
const filtered = raw.filter(
|
||||
(value, index): value is string =>
|
||||
typeof value === 'string' &&
|
||||
allowed.has(value) &&
|
||||
raw.indexOf(value) === index,
|
||||
)
|
||||
|
||||
if (filtered.length === 0) {
|
||||
delete record.type
|
||||
} else if (filtered.length === 1) {
|
||||
record.type = filtered[0]
|
||||
} else {
|
||||
record.type = filtered
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize JSON Schema into a shape OpenAI-compatible providers and Codex
|
||||
* strict-mode tooling are more likely to accept. This strips provider-rejected
|
||||
* keywords while keeping enum/const cleanup defensive for imperfect MCP schemas.
|
||||
*/
|
||||
export function sanitizeSchemaForOpenAICompat(
|
||||
schema: unknown,
|
||||
): Record<string, unknown> {
|
||||
const stripped = stripSchemaKeywords(schema, OPENAI_INCOMPATIBLE_SCHEMA_KEYWORDS)
|
||||
if (!isSchemaRecord(stripped)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const record = { ...stripped }
|
||||
|
||||
sanitizeTypeField(record)
|
||||
|
||||
if (isSchemaRecord(record.properties)) {
|
||||
const sanitizedProps: Record<string, unknown> = {}
|
||||
for (const [key, value] of Object.entries(record.properties)) {
|
||||
sanitizedProps[key] = sanitizeSchemaForOpenAICompat(value)
|
||||
}
|
||||
record.properties = sanitizedProps
|
||||
}
|
||||
|
||||
if ('items' in record) {
|
||||
if (Array.isArray(record.items)) {
|
||||
record.items = record.items.map(item =>
|
||||
sanitizeSchemaForOpenAICompat(item),
|
||||
)
|
||||
} else {
|
||||
record.items = sanitizeSchemaForOpenAICompat(record.items)
|
||||
}
|
||||
}
|
||||
|
||||
for (const key of ['anyOf', 'oneOf', 'allOf'] as const) {
|
||||
if (Array.isArray(record[key])) {
|
||||
record[key] = record[key].map(item =>
|
||||
sanitizeSchemaForOpenAICompat(item),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(record.required) && isSchemaRecord(record.properties)) {
|
||||
record.required = record.required.filter(
|
||||
(value): value is string =>
|
||||
typeof value === 'string' && value in record.properties,
|
||||
)
|
||||
}
|
||||
|
||||
const schemaWithoutEnum = { ...record }
|
||||
delete schemaWithoutEnum.enum
|
||||
|
||||
if (Array.isArray(record.enum)) {
|
||||
const filteredEnum = record.enum.filter(value =>
|
||||
schemaAllowsValue(schemaWithoutEnum, value),
|
||||
)
|
||||
if (filteredEnum.length > 0) {
|
||||
record.enum = filteredEnum
|
||||
} else {
|
||||
delete record.enum
|
||||
}
|
||||
}
|
||||
|
||||
const schemaWithoutConst = { ...record }
|
||||
delete schemaWithoutConst.const
|
||||
if ('const' in record && !schemaAllowsValue(schemaWithoutConst, record.const)) {
|
||||
delete record.const
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user