When set, disables strict schema normalization for non-Gemini providers. Useful for OpenAI-compatible endpoints that reject MCP tools with complex optional params (e.g. list[dict]) with "Extra required key ... supplied" errors.
1876 lines
62 KiB
TypeScript
1876 lines
62 KiB
TypeScript
/**
|
|
* OpenAI-compatible API shim for Claude Code.
|
|
*
|
|
* Translates Anthropic SDK calls (anthropic.beta.messages.create) into
|
|
* OpenAI-compatible chat completion requests and streams back events
|
|
* in the Anthropic streaming format so the rest of the codebase is unaware.
|
|
*
|
|
* Supports: OpenAI, Azure OpenAI, Ollama, LM Studio, OpenRouter,
|
|
* Together, Groq, Fireworks, DeepSeek, Mistral, and any OpenAI-compatible API.
|
|
*
|
|
* Environment variables:
|
|
* CLAUDE_CODE_USE_OPENAI=1 — enable this provider
|
|
* OPENAI_API_KEY=sk-... — API key (optional for local models)
|
|
* OPENAI_BASE_URL=http://... — base URL (default: https://api.openai.com/v1)
|
|
* OPENAI_MODEL=gpt-4o — default model override
|
|
* CODEX_API_KEY / ~/.codex/auth.json — Codex auth for codexplan/codexspark
|
|
*
|
|
* GitHub Copilot API (api.githubcopilot.com), OpenAI-compatible:
|
|
* CLAUDE_CODE_USE_GITHUB=1 — enable GitHub inference (no need for USE_OPENAI)
|
|
* GITHUB_TOKEN or GH_TOKEN — Copilot API token (mapped to Bearer auth)
|
|
* OPENAI_MODEL — optional; use github:copilot or openai/gpt-4.1 style IDs
|
|
*/
|
|
|
|
import { APIError } from '@anthropic-ai/sdk'
|
|
import {
|
|
readCodexCredentialsAsync,
|
|
refreshCodexAccessTokenIfNeeded,
|
|
} from '../../utils/codexCredentials.js'
|
|
import { logForDebugging } from '../../utils/debug.js'
|
|
import { isBareMode, isEnvTruthy } from '../../utils/envUtils.js'
|
|
import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
|
|
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
|
|
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
|
|
import {
|
|
looksLikeLeakedReasoningPrefix,
|
|
shouldBufferPotentialReasoningPrefix,
|
|
stripLeakedReasoningPreamble,
|
|
} from './reasoningLeakSanitizer.js'
|
|
import {
|
|
codexStreamToAnthropic,
|
|
collectCodexCompletedResponse,
|
|
convertAnthropicMessagesToResponsesInput,
|
|
convertCodexResponseToAnthropicMessage,
|
|
convertToolsToResponsesTools,
|
|
performCodexRequest,
|
|
type AnthropicStreamEvent,
|
|
type AnthropicUsage,
|
|
type ShimCreateParams,
|
|
} from './codexShim.js'
|
|
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
|
import {
|
|
isLocalProviderUrl,
|
|
resolveRuntimeCodexCredentials,
|
|
resolveProviderRequest,
|
|
getGithubEndpointType,
|
|
} from './providerConfig.js'
|
|
import {
|
|
buildOpenAICompatibilityErrorMessage,
|
|
classifyOpenAIHttpFailure,
|
|
classifyOpenAINetworkFailure,
|
|
} from './openaiErrorClassification.js'
|
|
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
|
|
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
|
|
import {
|
|
normalizeToolArguments,
|
|
hasToolFieldMapping,
|
|
} from './toolArgumentNormalization.js'
|
|
|
|
type SecretValueSource = Partial<{
|
|
OPENAI_API_KEY: string
|
|
CODEX_API_KEY: string
|
|
GEMINI_API_KEY: string
|
|
GOOGLE_API_KEY: string
|
|
GEMINI_ACCESS_TOKEN: string
|
|
MISTRAL_API_KEY: string
|
|
}>
|
|
|
|
const GITHUB_COPILOT_BASE = 'https://api.githubcopilot.com'
|
|
const GITHUB_429_MAX_RETRIES = 3
|
|
const GITHUB_429_BASE_DELAY_SEC = 1
|
|
const GITHUB_429_MAX_DELAY_SEC = 32
|
|
const GEMINI_API_HOST = 'generativelanguage.googleapis.com'
|
|
|
|
const COPILOT_HEADERS: Record<string, string> = {
|
|
'User-Agent': 'GitHubCopilotChat/0.26.7',
|
|
'Editor-Version': 'vscode/1.99.3',
|
|
'Editor-Plugin-Version': 'copilot-chat/0.26.7',
|
|
'Copilot-Integration-Id': 'vscode-chat',
|
|
}
|
|
|
|
const SENSITIVE_URL_QUERY_PARAM_NAMES = [
|
|
'api_key',
|
|
'key',
|
|
'token',
|
|
'access_token',
|
|
'refresh_token',
|
|
'signature',
|
|
'sig',
|
|
'secret',
|
|
'password',
|
|
'authorization',
|
|
]
|
|
|
|
function isGithubModelsMode(): boolean {
|
|
return isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
|
}
|
|
|
|
function isMistralMode(): boolean {
|
|
return isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
|
|
}
|
|
|
|
function filterAnthropicHeaders(
|
|
headers: Record<string, string> | undefined,
|
|
): Record<string, string> {
|
|
if (!headers) return {}
|
|
|
|
const filtered: Record<string, string> = {}
|
|
for (const [key, value] of Object.entries(headers)) {
|
|
const lower = key.toLowerCase()
|
|
if (
|
|
lower.startsWith('x-anthropic') ||
|
|
lower.startsWith('anthropic-') ||
|
|
lower.startsWith('x-claude') ||
|
|
lower === 'x-app' ||
|
|
lower === 'x-client-app' ||
|
|
lower === 'authorization' ||
|
|
lower === 'x-api-key' ||
|
|
lower === 'api-key'
|
|
) {
|
|
continue
|
|
}
|
|
filtered[key] = value
|
|
}
|
|
|
|
return filtered
|
|
}
|
|
|
|
function hasGeminiApiHost(baseUrl: string | undefined): boolean {
|
|
if (!baseUrl) return false
|
|
|
|
try {
|
|
return new URL(baseUrl).hostname.toLowerCase() === GEMINI_API_HOST
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
function formatRetryAfterHint(response: Response): string {
|
|
const ra = response.headers.get('retry-after')
|
|
return ra ? ` (Retry-After: ${ra})` : ''
|
|
}
|
|
|
|
function shouldRedactUrlQueryParam(name: string): boolean {
|
|
const lower = name.toLowerCase()
|
|
return SENSITIVE_URL_QUERY_PARAM_NAMES.some(token => lower.includes(token))
|
|
}
|
|
|
|
function redactUrlForDiagnostics(url: string): string {
|
|
try {
|
|
const parsed = new URL(url)
|
|
if (parsed.username) {
|
|
parsed.username = 'redacted'
|
|
}
|
|
if (parsed.password) {
|
|
parsed.password = 'redacted'
|
|
}
|
|
|
|
for (const key of parsed.searchParams.keys()) {
|
|
if (shouldRedactUrlQueryParam(key)) {
|
|
parsed.searchParams.set(key, 'redacted')
|
|
}
|
|
}
|
|
|
|
const serialized = parsed.toString()
|
|
return redactSecretValueForDisplay(serialized, process.env as SecretValueSource) ?? serialized
|
|
} catch {
|
|
return redactSecretValueForDisplay(url, process.env as SecretValueSource) ?? url
|
|
}
|
|
}
|
|
|
|
function sleepMs(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms))
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types — minimal subset of Anthropic SDK types we need to produce
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Message format conversion: Anthropic → OpenAI
|
|
// ---------------------------------------------------------------------------
|
|
|
|
interface OpenAIMessage {
|
|
role: 'system' | 'user' | 'assistant' | 'tool'
|
|
content?: string | Array<{ type: string; text?: string; image_url?: { url: string } }>
|
|
tool_calls?: Array<{
|
|
id: string
|
|
type: 'function'
|
|
function: { name: string; arguments: string }
|
|
extra_content?: Record<string, unknown>
|
|
}>
|
|
tool_call_id?: string
|
|
name?: string
|
|
}
|
|
|
|
interface OpenAITool {
|
|
type: 'function'
|
|
function: {
|
|
name: string
|
|
description: string
|
|
parameters: Record<string, unknown>
|
|
strict?: boolean
|
|
}
|
|
}
|
|
|
|
function convertSystemPrompt(
|
|
system: unknown,
|
|
): string {
|
|
if (!system) return ''
|
|
if (typeof system === 'string') return system
|
|
if (Array.isArray(system)) {
|
|
return system
|
|
.map((block: { type?: string; text?: string }) =>
|
|
block.type === 'text' ? block.text ?? '' : '',
|
|
)
|
|
.join('\n\n')
|
|
}
|
|
return String(system)
|
|
}
|
|
|
|
function convertToolResultContent(
|
|
content: unknown,
|
|
isError?: boolean,
|
|
): string | Array<{ type: string; text?: string; image_url?: { url: string } }> {
|
|
if (typeof content === 'string') {
|
|
return isError ? `Error: ${content}` : content
|
|
}
|
|
if (!Array.isArray(content)) {
|
|
const text = JSON.stringify(content ?? '')
|
|
return isError ? `Error: ${text}` : text
|
|
}
|
|
|
|
const parts: Array<{
|
|
type: string
|
|
text?: string
|
|
image_url?: { url: string }
|
|
}> = []
|
|
for (const block of content) {
|
|
if (block?.type === 'text' && typeof block.text === 'string') {
|
|
parts.push({ type: 'text', text: block.text })
|
|
continue
|
|
}
|
|
|
|
if (block?.type === 'image') {
|
|
const source = block.source
|
|
if (source?.type === 'url' && source.url) {
|
|
parts.push({ type: 'image_url', image_url: { url: source.url } })
|
|
} else if (source?.type === 'base64' && source.media_type && source.data) {
|
|
parts.push({
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: `data:${source.media_type};base64,${source.data}`,
|
|
},
|
|
})
|
|
}
|
|
continue
|
|
}
|
|
|
|
if (typeof block?.text === 'string') {
|
|
parts.push({ type: 'text', text: block.text })
|
|
}
|
|
}
|
|
|
|
if (parts.length === 0) return ''
|
|
if (parts.length === 1 && parts[0].type === 'text') {
|
|
const text = parts[0].text ?? ''
|
|
return isError ? `Error: ${text}` : text
|
|
}
|
|
if (isError && parts[0]?.type === 'text') {
|
|
parts[0] = { ...parts[0], text: `Error: ${parts[0].text ?? ''}` }
|
|
} else if (isError) {
|
|
parts.unshift({ type: 'text', text: 'Error:' })
|
|
}
|
|
|
|
return parts
|
|
}
|
|
|
|
function convertContentBlocks(
|
|
content: unknown,
|
|
): string | Array<{ type: string; text?: string; image_url?: { url: string } }> {
|
|
if (typeof content === 'string') return content
|
|
if (!Array.isArray(content)) return String(content ?? '')
|
|
|
|
const parts: Array<{ type: string; text?: string; image_url?: { url: string } }> = []
|
|
for (const block of content) {
|
|
switch (block.type) {
|
|
case 'text':
|
|
parts.push({ type: 'text', text: block.text ?? '' })
|
|
break
|
|
case 'image': {
|
|
const src = block.source
|
|
if (src?.type === 'base64') {
|
|
parts.push({
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: `data:${src.media_type};base64,${src.data}`,
|
|
},
|
|
})
|
|
} else if (src?.type === 'url') {
|
|
parts.push({ type: 'image_url', image_url: { url: src.url } })
|
|
}
|
|
break
|
|
}
|
|
case 'tool_use':
|
|
// handled separately
|
|
break
|
|
case 'tool_result':
|
|
// handled separately
|
|
break
|
|
case 'thinking':
|
|
case 'redacted_thinking':
|
|
// Strip thinking blocks for OpenAI-compatible providers.
|
|
// These are Anthropic-specific content types that 3P providers
|
|
// don't understand. Serializing them as <thinking> text corrupts
|
|
// multi-turn context: the model sees the tags as part of its
|
|
// previous reply and may mimic or misattribute them.
|
|
break
|
|
default:
|
|
if (block.text) {
|
|
parts.push({ type: 'text', text: block.text })
|
|
}
|
|
}
|
|
}
|
|
|
|
if (parts.length === 0) return ''
|
|
if (parts.length === 1 && parts[0].type === 'text') return parts[0].text ?? ''
|
|
return parts
|
|
}
|
|
|
|
function isGeminiMode(): boolean {
|
|
return (
|
|
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
|
hasGeminiApiHost(process.env.OPENAI_BASE_URL)
|
|
)
|
|
}
|
|
|
|
function convertMessages(
|
|
messages: Array<{ role: string; message?: { role?: string; content?: unknown }; content?: unknown }>,
|
|
system: unknown,
|
|
): OpenAIMessage[] {
|
|
const result: OpenAIMessage[] = []
|
|
const knownToolCallIds = new Set<string>()
|
|
|
|
// System message first
|
|
const sysText = convertSystemPrompt(system)
|
|
if (sysText) {
|
|
result.push({ role: 'system', content: sysText })
|
|
}
|
|
|
|
for (const msg of messages) {
|
|
// Claude Code wraps messages in { role, message: { role, content } }
|
|
const inner = msg.message ?? msg
|
|
const role = (inner as { role?: string }).role ?? msg.role
|
|
const content = (inner as { content?: unknown }).content
|
|
|
|
if (role === 'user') {
|
|
// Check for tool_result blocks in user messages
|
|
if (Array.isArray(content)) {
|
|
const toolResults = content.filter((b: { type?: string }) => b.type === 'tool_result')
|
|
const otherContent = content.filter((b: { type?: string }) => b.type !== 'tool_result')
|
|
|
|
// Emit tool results as tool messages, but ONLY if we have a matching tool_use ID.
|
|
// Mistral/OpenAI strictly require tool messages to follow an assistant message with tool_calls.
|
|
// If the user interrupted (ESC) and a synthetic tool_result was generated without a recorded tool_use,
|
|
// emitting it here would cause a "role must alternate" or "unexpected role" error.
|
|
for (const tr of toolResults) {
|
|
const id = tr.tool_use_id ?? 'unknown'
|
|
if (knownToolCallIds.has(id)) {
|
|
result.push({
|
|
role: 'tool',
|
|
tool_call_id: id,
|
|
content: convertToolResultContent(tr.content, tr.is_error),
|
|
})
|
|
} else {
|
|
logForDebugging(`Dropping orphan tool_result for ID: ${id} to prevent API error`)
|
|
}
|
|
}
|
|
|
|
// Emit remaining user content
|
|
if (otherContent.length > 0) {
|
|
result.push({
|
|
role: 'user',
|
|
content: convertContentBlocks(otherContent),
|
|
})
|
|
}
|
|
} else {
|
|
result.push({
|
|
role: 'user',
|
|
content: convertContentBlocks(content),
|
|
})
|
|
}
|
|
} else if (role === 'assistant') {
|
|
// Check for tool_use blocks
|
|
if (Array.isArray(content)) {
|
|
const toolUses = content.filter((b: { type?: string }) => b.type === 'tool_use')
|
|
const thinkingBlock = content.find((b: { type?: string }) => b.type === 'thinking')
|
|
const textContent = content.filter(
|
|
(b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking',
|
|
)
|
|
|
|
const assistantMsg: OpenAIMessage = {
|
|
role: 'assistant',
|
|
content: (() => {
|
|
const c = convertContentBlocks(textContent)
|
|
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
|
|
})(),
|
|
}
|
|
|
|
if (toolUses.length > 0) {
|
|
assistantMsg.tool_calls = toolUses.map(
|
|
(tu: {
|
|
id?: string
|
|
name?: string
|
|
input?: unknown
|
|
extra_content?: Record<string, unknown>
|
|
signature?: string
|
|
}) => {
|
|
const id = tu.id ?? `call_${crypto.randomUUID().replace(/-/g, '')}`
|
|
knownToolCallIds.add(id)
|
|
const toolCall: NonNullable<OpenAIMessage['tool_calls']>[number] = {
|
|
id,
|
|
type: 'function' as const,
|
|
function: {
|
|
name: tu.name ?? 'unknown',
|
|
arguments:
|
|
typeof tu.input === 'string'
|
|
? tu.input
|
|
: JSON.stringify(tu.input ?? {}),
|
|
},
|
|
}
|
|
|
|
// Preserve existing extra_content if present
|
|
if (tu.extra_content) {
|
|
toolCall.extra_content = { ...tu.extra_content }
|
|
}
|
|
|
|
// Handle Gemini thought_signature
|
|
if (isGeminiMode()) {
|
|
// If the model provided a signature in the tool_use block itself (e.g. from a previous Turn/Step)
|
|
// Use thinkingBlock.signature for ALL tool calls in the same assistant turn if available.
|
|
// The API requires the same signature on every replayed function call part in a parallel set.
|
|
const signature = tu.signature ?? (thinkingBlock as any)?.signature
|
|
|
|
// Merge into existing google-specific metadata if present
|
|
const existingGoogle = (toolCall.extra_content?.google as Record<string, unknown>) ?? {}
|
|
toolCall.extra_content = {
|
|
...toolCall.extra_content,
|
|
google: {
|
|
...existingGoogle,
|
|
thought_signature: signature ?? "skip_thought_signature_validator"
|
|
}
|
|
}
|
|
}
|
|
|
|
return toolCall
|
|
},
|
|
)
|
|
}
|
|
|
|
result.push(assistantMsg)
|
|
} else {
|
|
result.push({
|
|
role: 'assistant',
|
|
content: (() => {
|
|
const c = convertContentBlocks(content)
|
|
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
|
|
})(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Coalescing pass: merge consecutive messages of the same role.
|
|
// OpenAI/vLLM/Ollama require strict user↔assistant alternation.
|
|
// Multiple consecutive tool messages are allowed (assistant → tool* → user).
|
|
// Consecutive user or assistant messages must be merged to avoid Jinja
|
|
// template errors like "roles must alternate" (Devstral, Mistral models).
|
|
const coalesced: OpenAIMessage[] = []
|
|
for (const msg of result) {
|
|
const prev = coalesced[coalesced.length - 1]
|
|
|
|
if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') {
|
|
const prevContent = prev.content
|
|
const curContent = msg.content
|
|
|
|
if (typeof prevContent === 'string' && typeof curContent === 'string') {
|
|
prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent
|
|
} else {
|
|
const toArray = (
|
|
c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined,
|
|
): Array<{ type: string; text?: string; image_url?: { url: string } }> => {
|
|
if (!c) return []
|
|
if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : []
|
|
return c
|
|
}
|
|
prev.content = [...toArray(prevContent), ...toArray(curContent)]
|
|
}
|
|
|
|
if (msg.tool_calls?.length) {
|
|
prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls]
|
|
}
|
|
} else {
|
|
coalesced.push(msg)
|
|
}
|
|
}
|
|
|
|
return coalesced
|
|
}
|
|
|
|
/**
|
|
* OpenAI requires every key in `properties` to also appear in `required`.
|
|
* Anthropic schemas often mark fields as optional (omitted from `required`),
|
|
* which causes 400 errors on OpenAI/Codex endpoints. This normalizes the
|
|
* schema by ensuring `required` is a superset of `properties` keys.
|
|
*/
|
|
function normalizeSchemaForOpenAI(
|
|
schema: Record<string, unknown>,
|
|
strict = true,
|
|
): Record<string, unknown> {
|
|
const record = sanitizeSchemaForOpenAICompat(schema)
|
|
|
|
if (record.type === 'object' && record.properties) {
|
|
const properties = record.properties as Record<string, Record<string, unknown>>
|
|
const existingRequired = Array.isArray(record.required) ? record.required as string[] : []
|
|
|
|
// Recurse into each property
|
|
const normalizedProps: Record<string, unknown> = {}
|
|
for (const [key, value] of Object.entries(properties)) {
|
|
normalizedProps[key] = normalizeSchemaForOpenAI(
|
|
value as Record<string, unknown>,
|
|
strict,
|
|
)
|
|
}
|
|
record.properties = normalizedProps
|
|
|
|
if (strict) {
|
|
// Keep only the properties that were originally marked required in the schema.
|
|
// Adding every property to required[] (the previous behaviour) caused strict
|
|
// OpenAI-compatible providers (Groq, Azure, etc.) to reject tool calls because
|
|
// the model correctly omits optional arguments — but the provider treats them
|
|
// as missing required fields and returns a 400 / tool_use_failed error.
|
|
record.required = existingRequired.filter(k => k in normalizedProps)
|
|
// additionalProperties: false is still required by strict-mode providers.
|
|
record.additionalProperties = false
|
|
} else {
|
|
// For Gemini: keep only existing required keys that are present in properties
|
|
record.required = existingRequired.filter(k => k in normalizedProps)
|
|
}
|
|
}
|
|
|
|
// Recurse into array items
|
|
if ('items' in record) {
|
|
if (Array.isArray(record.items)) {
|
|
record.items = (record.items as unknown[]).map(
|
|
item => normalizeSchemaForOpenAI(item as Record<string, unknown>, strict),
|
|
)
|
|
} else {
|
|
record.items = normalizeSchemaForOpenAI(record.items as Record<string, unknown>, strict)
|
|
}
|
|
}
|
|
|
|
// Recurse into combinators
|
|
for (const key of ['anyOf', 'oneOf', 'allOf'] as const) {
|
|
if (key in record && Array.isArray(record[key])) {
|
|
record[key] = (record[key] as unknown[]).map(
|
|
item => normalizeSchemaForOpenAI(item as Record<string, unknown>, strict),
|
|
)
|
|
}
|
|
}
|
|
|
|
return record
|
|
}
|
|
|
|
function convertTools(
|
|
tools: Array<{ name: string; description?: string; input_schema?: Record<string, unknown> }>,
|
|
): OpenAITool[] {
|
|
const isGemini = isGeminiMode()
|
|
|
|
return tools
|
|
.filter(t => t.name !== 'ToolSearchTool') // Not relevant for OpenAI
|
|
.map(t => {
|
|
const schema = { ...(t.input_schema ?? { type: 'object', properties: {} }) } as Record<string, unknown>
|
|
|
|
// For Codex/OpenAI: promote known Agent sub-fields into required[] only if
|
|
// they actually exist in properties (Gemini rejects required keys absent from properties).
|
|
if (t.name === 'Agent' && schema.properties) {
|
|
const props = schema.properties as Record<string, unknown>
|
|
if (!Array.isArray(schema.required)) schema.required = []
|
|
const req = schema.required as string[]
|
|
for (const key of ['message', 'subagent_type']) {
|
|
if (key in props && !req.includes(key)) req.push(key)
|
|
}
|
|
}
|
|
|
|
return {
|
|
type: 'function' as const,
|
|
function: {
|
|
name: t.name,
|
|
description: t.description ?? '',
|
|
parameters: normalizeSchemaForOpenAI(
|
|
schema,
|
|
!isGemini && !isEnvTruthy(process.env.OPENCLAUDE_DISABLE_STRICT_TOOLS),
|
|
),
|
|
},
|
|
}
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Streaming: OpenAI SSE → Anthropic stream events
|
|
// ---------------------------------------------------------------------------
|
|
|
|
interface OpenAIStreamChunk {
|
|
id: string
|
|
object: string
|
|
model: string
|
|
choices: Array<{
|
|
index: number
|
|
delta: {
|
|
role?: string
|
|
content?: string | null
|
|
reasoning_content?: string | null
|
|
tool_calls?: Array<{
|
|
index: number
|
|
id?: string
|
|
type?: string
|
|
function?: { name?: string; arguments?: string }
|
|
extra_content?: Record<string, unknown>
|
|
}>
|
|
}
|
|
finish_reason: string | null
|
|
}>
|
|
usage?: {
|
|
prompt_tokens?: number
|
|
completion_tokens?: number
|
|
total_tokens?: number
|
|
prompt_tokens_details?: {
|
|
cached_tokens?: number
|
|
}
|
|
}
|
|
}
|
|
|
|
function makeMessageId(): string {
|
|
return `msg_${crypto.randomUUID().replace(/-/g, '')}`
|
|
}
|
|
|
|
function convertChunkUsage(
|
|
usage: OpenAIStreamChunk['usage'] | undefined,
|
|
): Partial<AnthropicUsage> | undefined {
|
|
if (!usage) return undefined
|
|
|
|
const cached = usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
return {
|
|
// Subtract cached tokens: OpenAI includes them in prompt_tokens,
|
|
// but Anthropic convention treats input_tokens as non-cached only.
|
|
input_tokens: (usage.prompt_tokens ?? 0) - cached,
|
|
output_tokens: usage.completion_tokens ?? 0,
|
|
cache_creation_input_tokens: 0,
|
|
cache_read_input_tokens: cached,
|
|
}
|
|
}
|
|
|
|
const JSON_REPAIR_SUFFIXES = [
|
|
'}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
|
|
]
|
|
|
|
function repairPossiblyTruncatedObjectJson(raw: string): string | null {
|
|
try {
|
|
const parsed = JSON.parse(raw)
|
|
return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
? raw
|
|
: null
|
|
} catch {
|
|
for (const combo of JSON_REPAIR_SUFFIXES) {
|
|
try {
|
|
const repaired = raw + combo
|
|
const parsed = JSON.parse(repaired)
|
|
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
return repaired
|
|
}
|
|
} catch {}
|
|
}
|
|
return null
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Async generator that transforms an OpenAI SSE stream into
|
|
* Anthropic-format BetaRawMessageStreamEvent objects.
|
|
*/
|
|
async function* openaiStreamToAnthropic(
|
|
response: Response,
|
|
model: string,
|
|
signal?: AbortSignal,
|
|
): AsyncGenerator<AnthropicStreamEvent> {
|
|
const messageId = makeMessageId()
|
|
let contentBlockIndex = 0
|
|
const activeToolCalls = new Map<
|
|
number,
|
|
{
|
|
id: string
|
|
name: string
|
|
index: number
|
|
jsonBuffer: string
|
|
normalizeAtStop: boolean
|
|
}
|
|
>()
|
|
let hasEmittedContentStart = false
|
|
let hasEmittedThinkingStart = false
|
|
let hasClosedThinking = false
|
|
let activeTextBuffer = ''
|
|
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
|
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
|
let hasEmittedFinalUsage = false
|
|
let hasProcessedFinishReason = false
|
|
|
|
// Emit message_start
|
|
yield {
|
|
type: 'message_start',
|
|
message: {
|
|
id: messageId,
|
|
type: 'message',
|
|
role: 'assistant',
|
|
content: [],
|
|
model,
|
|
stop_reason: null,
|
|
stop_sequence: null,
|
|
usage: {
|
|
input_tokens: 0,
|
|
output_tokens: 0,
|
|
cache_creation_input_tokens: 0,
|
|
cache_read_input_tokens: 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
const reader = response.body?.getReader()
|
|
if (!reader) return
|
|
|
|
const decoder = new TextDecoder()
|
|
let buffer = ''
|
|
const STREAM_IDLE_TIMEOUT_MS = 120_000 // 2 minutes without data = connection likely dead
|
|
let lastDataTime = Date.now()
|
|
|
|
/**
|
|
* Read from the stream with an idle timeout. If no data arrives within
|
|
* STREAM_IDLE_TIMEOUT_MS, assume the connection is dead and throw so
|
|
* withRetry can reconnect. This prevents indefinite hangs on stale
|
|
* SSE connections from OpenAI/Gemini during long-running sessions.
|
|
* Respects the caller's AbortSignal — clears the idle timer on abort
|
|
* so the rejection reason is AbortError, not a spurious idle timeout.
|
|
*/
|
|
async function readWithTimeout(): Promise<ReadableStreamReadResult<Uint8Array>> {
|
|
return new Promise((resolve, reject) => {
|
|
const timeoutId = setTimeout(() => {
|
|
const elapsed = Math.round((Date.now() - lastDataTime) / 1000)
|
|
reject(new Error(
|
|
`OpenAI/Gemini SSE stream idle for ${elapsed}s (limit: ${STREAM_IDLE_TIMEOUT_MS / 1000}s). Connection likely dropped.`,
|
|
))
|
|
}, STREAM_IDLE_TIMEOUT_MS)
|
|
|
|
// If the caller aborts, clear the timer so the AbortError surfaces
|
|
// cleanly instead of being masked by a spurious idle timeout.
|
|
let abortCleanup: (() => void) | undefined
|
|
if (signal) {
|
|
abortCleanup = () => {
|
|
clearTimeout(timeoutId)
|
|
}
|
|
signal.addEventListener('abort', abortCleanup, { once: true })
|
|
}
|
|
|
|
reader.read().then(
|
|
result => {
|
|
clearTimeout(timeoutId)
|
|
if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup)
|
|
if (result.value) lastDataTime = Date.now()
|
|
resolve(result)
|
|
},
|
|
err => {
|
|
clearTimeout(timeoutId)
|
|
if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup)
|
|
reject(err)
|
|
},
|
|
)
|
|
})
|
|
}
|
|
|
|
const closeActiveContentBlock = async function* () {
|
|
if (!hasEmittedContentStart) return
|
|
|
|
if (textBufferMode !== 'none') {
|
|
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
|
if (sanitized) {
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: contentBlockIndex,
|
|
delta: { type: 'text_delta', text: sanitized },
|
|
}
|
|
}
|
|
}
|
|
|
|
yield {
|
|
type: 'content_block_stop',
|
|
index: contentBlockIndex,
|
|
}
|
|
contentBlockIndex++
|
|
hasEmittedContentStart = false
|
|
activeTextBuffer = ''
|
|
textBufferMode = 'none'
|
|
}
|
|
|
|
try {
|
|
while (true) {
|
|
const { done, value } = await readWithTimeout()
|
|
if (done) break
|
|
|
|
buffer += decoder.decode(value, { stream: true })
|
|
const lines = buffer.split('\n')
|
|
buffer = lines.pop() ?? ''
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim()
|
|
if (!trimmed || trimmed === 'data: [DONE]') continue
|
|
if (!trimmed.startsWith('data: ')) continue
|
|
|
|
let chunk: OpenAIStreamChunk
|
|
try {
|
|
chunk = JSON.parse(trimmed.slice(6))
|
|
} catch {
|
|
continue
|
|
}
|
|
|
|
const chunkUsage = convertChunkUsage(chunk.usage)
|
|
|
|
for (const choice of chunk.choices ?? []) {
|
|
const delta = choice.delta
|
|
|
|
// Reasoning models (e.g. GLM-5, DeepSeek) may stream chain-of-thought
|
|
// in `reasoning_content` before the actual reply appears in `content`.
|
|
// Emit reasoning as a thinking block and content as a text block.
|
|
if (delta.reasoning_content != null && delta.reasoning_content !== '') {
|
|
if (!hasEmittedThinkingStart) {
|
|
yield {
|
|
type: 'content_block_start',
|
|
index: contentBlockIndex,
|
|
content_block: { type: 'thinking', thinking: '' },
|
|
}
|
|
hasEmittedThinkingStart = true
|
|
}
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: contentBlockIndex,
|
|
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
|
|
}
|
|
}
|
|
|
|
// Text content — use != null to distinguish absent field from empty string,
|
|
// some providers send "" as first delta to signal streaming start
|
|
if (delta.content != null && delta.content !== '') {
|
|
// Close thinking block if transitioning from reasoning to content
|
|
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
|
yield { type: 'content_block_stop', index: contentBlockIndex }
|
|
contentBlockIndex++
|
|
hasClosedThinking = true
|
|
}
|
|
activeTextBuffer += delta.content
|
|
if (!hasEmittedContentStart) {
|
|
yield {
|
|
type: 'content_block_start',
|
|
index: contentBlockIndex,
|
|
content_block: { type: 'text', text: '' },
|
|
}
|
|
hasEmittedContentStart = true
|
|
}
|
|
|
|
if (
|
|
textBufferMode === 'strip' ||
|
|
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
|
) {
|
|
textBufferMode = 'strip'
|
|
continue
|
|
}
|
|
|
|
if (textBufferMode === 'pending') {
|
|
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
continue
|
|
}
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: contentBlockIndex,
|
|
delta: {
|
|
type: 'text_delta',
|
|
text: activeTextBuffer,
|
|
},
|
|
}
|
|
textBufferMode = 'none'
|
|
continue
|
|
}
|
|
|
|
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
textBufferMode = 'pending'
|
|
continue
|
|
}
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: contentBlockIndex,
|
|
delta: { type: 'text_delta', text: delta.content },
|
|
}
|
|
}
|
|
|
|
// Tool calls
|
|
if (delta.tool_calls) {
|
|
for (const tc of delta.tool_calls) {
|
|
if (tc.id && tc.function?.name) {
|
|
// New tool call starting — close any open thinking block first
|
|
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
|
yield { type: 'content_block_stop', index: contentBlockIndex }
|
|
contentBlockIndex++
|
|
hasClosedThinking = true
|
|
}
|
|
if (hasEmittedContentStart) {
|
|
yield* closeActiveContentBlock()
|
|
}
|
|
|
|
const toolBlockIndex = contentBlockIndex
|
|
const initialArguments = tc.function.arguments ?? ''
|
|
const normalizeAtStop = hasToolFieldMapping(tc.function.name)
|
|
activeToolCalls.set(tc.index, {
|
|
id: tc.id,
|
|
name: tc.function.name,
|
|
index: toolBlockIndex,
|
|
jsonBuffer: initialArguments,
|
|
normalizeAtStop,
|
|
})
|
|
|
|
yield {
|
|
type: 'content_block_start',
|
|
index: toolBlockIndex,
|
|
content_block: {
|
|
type: 'tool_use',
|
|
id: tc.id,
|
|
name: tc.function.name,
|
|
input: {},
|
|
...(tc.extra_content ? { extra_content: tc.extra_content } : {}),
|
|
// Extract Gemini signature from extra_content
|
|
...((tc.extra_content?.google as any)?.thought_signature
|
|
? {
|
|
signature: (tc.extra_content.google as any)
|
|
.thought_signature,
|
|
}
|
|
: {}),
|
|
},
|
|
}
|
|
contentBlockIndex++
|
|
|
|
// Emit any initial arguments
|
|
if (tc.function.arguments && !normalizeAtStop) {
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: toolBlockIndex,
|
|
delta: {
|
|
type: 'input_json_delta',
|
|
partial_json: tc.function.arguments,
|
|
},
|
|
}
|
|
}
|
|
} else if (tc.function?.arguments) {
|
|
// Continuation of existing tool call
|
|
const active = activeToolCalls.get(tc.index)
|
|
if (active) {
|
|
if (tc.function.arguments) {
|
|
active.jsonBuffer += tc.function.arguments
|
|
}
|
|
|
|
if (active.normalizeAtStop) {
|
|
continue
|
|
}
|
|
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: active.index,
|
|
delta: {
|
|
type: 'input_json_delta',
|
|
partial_json: tc.function.arguments,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finish — guard ensures we only process finish_reason once even if
|
|
// multiple chunks arrive with finish_reason set (some providers do this)
|
|
if (choice.finish_reason && !hasProcessedFinishReason) {
|
|
hasProcessedFinishReason = true
|
|
|
|
// Close any open thinking block that wasn't closed by content transition
|
|
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
|
yield { type: 'content_block_stop', index: contentBlockIndex }
|
|
contentBlockIndex++
|
|
hasClosedThinking = true
|
|
}
|
|
// Close any open content blocks
|
|
if (hasEmittedContentStart) {
|
|
yield* closeActiveContentBlock()
|
|
}
|
|
// Close active tool calls
|
|
for (const [, tc] of activeToolCalls) {
|
|
if (tc.normalizeAtStop) {
|
|
let partialJson: string
|
|
if (choice.finish_reason === 'length') {
|
|
// Truncated by max tokens — preserve raw buffer to avoid
|
|
// turning an incomplete tool call into an executable command
|
|
partialJson = tc.jsonBuffer
|
|
} else {
|
|
const repairedStructuredJson = repairPossiblyTruncatedObjectJson(
|
|
tc.jsonBuffer,
|
|
)
|
|
if (repairedStructuredJson) {
|
|
partialJson = repairedStructuredJson
|
|
} else {
|
|
partialJson = JSON.stringify(
|
|
normalizeToolArguments(tc.name, tc.jsonBuffer),
|
|
)
|
|
}
|
|
}
|
|
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: tc.index,
|
|
delta: {
|
|
type: 'input_json_delta',
|
|
partial_json: partialJson,
|
|
},
|
|
}
|
|
yield { type: 'content_block_stop', index: tc.index }
|
|
continue
|
|
}
|
|
|
|
let suffixToAdd = ''
|
|
if (tc.jsonBuffer) {
|
|
try {
|
|
JSON.parse(tc.jsonBuffer)
|
|
} catch {
|
|
const str = tc.jsonBuffer.trimEnd()
|
|
for (const combo of JSON_REPAIR_SUFFIXES) {
|
|
try {
|
|
JSON.parse(str + combo)
|
|
suffixToAdd = combo
|
|
break
|
|
} catch {}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (suffixToAdd) {
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: tc.index,
|
|
delta: {
|
|
type: 'input_json_delta',
|
|
partial_json: suffixToAdd,
|
|
},
|
|
}
|
|
}
|
|
|
|
yield { type: 'content_block_stop', index: tc.index }
|
|
}
|
|
|
|
const stopReason =
|
|
choice.finish_reason === 'tool_calls'
|
|
? 'tool_use'
|
|
: choice.finish_reason === 'length'
|
|
? 'max_tokens'
|
|
: 'end_turn'
|
|
if (choice.finish_reason === 'content_filter' || choice.finish_reason === 'safety') {
|
|
// Gemini/Azure content safety filter blocked the response.
|
|
// Emit a visible text block so the user knows why output was truncated.
|
|
if (!hasEmittedContentStart) {
|
|
yield {
|
|
type: 'content_block_start',
|
|
index: contentBlockIndex,
|
|
content_block: { type: 'text', text: '' },
|
|
}
|
|
hasEmittedContentStart = true
|
|
}
|
|
yield {
|
|
type: 'content_block_delta',
|
|
index: contentBlockIndex,
|
|
delta: { type: 'text_delta', text: '\n\n[Content blocked by provider safety filter]' },
|
|
}
|
|
}
|
|
lastStopReason = stopReason
|
|
|
|
yield {
|
|
type: 'message_delta',
|
|
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
...(chunkUsage ? { usage: chunkUsage } : {}),
|
|
}
|
|
if (chunkUsage) {
|
|
hasEmittedFinalUsage = true
|
|
}
|
|
}
|
|
}
|
|
|
|
if (
|
|
!hasEmittedFinalUsage &&
|
|
chunkUsage &&
|
|
(chunk.choices?.length ?? 0) === 0 &&
|
|
lastStopReason !== null
|
|
) {
|
|
yield {
|
|
type: 'message_delta',
|
|
delta: { stop_reason: lastStopReason, stop_sequence: null },
|
|
usage: chunkUsage,
|
|
}
|
|
hasEmittedFinalUsage = true
|
|
}
|
|
}
|
|
}
|
|
} finally {
|
|
reader.releaseLock()
|
|
}
|
|
|
|
yield { type: 'message_stop' }
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// The shim client — duck-types as Anthropic SDK
|
|
// ---------------------------------------------------------------------------
|
|
|
|
class OpenAIShimStream {
|
|
private generator: AsyncGenerator<AnthropicStreamEvent>
|
|
// The controller property is checked by claude.ts to distinguish streams from error messages
|
|
controller = new AbortController()
|
|
|
|
constructor(generator: AsyncGenerator<AnthropicStreamEvent>) {
|
|
this.generator = generator
|
|
}
|
|
|
|
async *[Symbol.asyncIterator]() {
|
|
yield* this.generator
|
|
}
|
|
}
|
|
|
|
class OpenAIShimMessages {
|
|
private defaultHeaders: Record<string, string>
|
|
private reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
|
private providerOverride?: { model: string; baseURL: string; apiKey: string }
|
|
|
|
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
|
|
this.defaultHeaders = filterAnthropicHeaders(defaultHeaders)
|
|
this.reasoningEffort = reasoningEffort
|
|
this.providerOverride = providerOverride
|
|
}
|
|
|
|
create(
|
|
params: ShimCreateParams,
|
|
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
|
) {
|
|
const self = this
|
|
|
|
let httpResponse: Response | undefined
|
|
|
|
const promise = (async () => {
|
|
const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
|
|
const response = await self._doRequest(request, params, options)
|
|
httpResponse = response
|
|
|
|
if (params.stream) {
|
|
const isResponsesStream = response.url?.includes('/responses')
|
|
return new OpenAIShimStream(
|
|
(request.transport === 'codex_responses' || isResponsesStream)
|
|
? codexStreamToAnthropic(response, request.resolvedModel, options?.signal)
|
|
: openaiStreamToAnthropic(response, request.resolvedModel, options?.signal),
|
|
)
|
|
}
|
|
|
|
if (request.transport === 'codex_responses') {
|
|
const data = await collectCodexCompletedResponse(response, options?.signal)
|
|
return convertCodexResponseToAnthropicMessage(
|
|
data,
|
|
request.resolvedModel,
|
|
)
|
|
}
|
|
|
|
const isResponsesNonStream = response.url?.includes('/responses')
|
|
if (isResponsesNonStream || (request.transport === 'chat_completions' && isGithubModelsMode())) {
|
|
const contentType = response.headers.get('content-type') ?? ''
|
|
if (contentType.includes('application/json')) {
|
|
const parsed = await response.json() as Record<string, unknown>
|
|
if (
|
|
parsed &&
|
|
typeof parsed === 'object' &&
|
|
('output' in parsed || 'incomplete_details' in parsed)
|
|
) {
|
|
return convertCodexResponseToAnthropicMessage(
|
|
parsed,
|
|
request.resolvedModel,
|
|
)
|
|
}
|
|
return self._convertNonStreamingResponse(parsed, request.resolvedModel)
|
|
}
|
|
}
|
|
|
|
const contentType = response.headers.get('content-type') ?? ''
|
|
if (contentType.includes('application/json')) {
|
|
const data = await response.json()
|
|
return self._convertNonStreamingResponse(data, request.resolvedModel)
|
|
}
|
|
|
|
const textBody = await response.text().catch(() => '')
|
|
throw APIError.generate(
|
|
response.status,
|
|
undefined,
|
|
`OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`,
|
|
response.headers as unknown as Headers,
|
|
)
|
|
})()
|
|
|
|
; (promise as unknown as Record<string, unknown>).withResponse =
|
|
async () => {
|
|
const data = await promise
|
|
return {
|
|
data,
|
|
response: httpResponse ?? new Response(),
|
|
request_id:
|
|
httpResponse?.headers.get('x-request-id') ?? makeMessageId(),
|
|
}
|
|
}
|
|
|
|
return promise
|
|
}
|
|
|
|
private async _doRequest(
|
|
request: ReturnType<typeof resolveProviderRequest>,
|
|
params: ShimCreateParams,
|
|
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
|
): Promise<Response> {
|
|
const githubEndpointType = getGithubEndpointType(request.baseUrl)
|
|
const isGithubMode = isGithubModelsMode()
|
|
const isGithubWithCodexTransport = isGithubMode && request.transport === 'codex_responses'
|
|
|
|
if (isGithubWithCodexTransport) {
|
|
const apiKey = this.providerOverride?.apiKey ?? process.env.OPENAI_API_KEY ?? ''
|
|
if (!apiKey) {
|
|
throw new Error(
|
|
'GitHub Copilot auth is required. Run /onboard-github to sign in.',
|
|
)
|
|
}
|
|
|
|
return performCodexRequest({
|
|
request,
|
|
credentials: {
|
|
apiKey,
|
|
source: 'env',
|
|
},
|
|
params,
|
|
defaultHeaders: {
|
|
...this.defaultHeaders,
|
|
...filterAnthropicHeaders(options?.headers),
|
|
...COPILOT_HEADERS,
|
|
},
|
|
signal: options?.signal,
|
|
})
|
|
}
|
|
|
|
if (request.transport === 'codex_responses' && !isGithubMode) {
|
|
const refreshResult = await refreshCodexAccessTokenIfNeeded().catch(
|
|
async error => {
|
|
logForDebugging(
|
|
`[codex] access token refresh failed before request: ${error instanceof Error ? error.message : String(error)}`,
|
|
{ level: 'warn' },
|
|
)
|
|
return {
|
|
refreshed: false,
|
|
credentials: await readCodexCredentialsAsync(),
|
|
}
|
|
},
|
|
)
|
|
const credentials = resolveRuntimeCodexCredentials({
|
|
storedCredentials: refreshResult.credentials,
|
|
})
|
|
if (!credentials.apiKey) {
|
|
const oauthHint = isBareMode() ? '' : ', choose Codex OAuth in /provider'
|
|
const authHint = credentials.authPath
|
|
? `${oauthHint} or place a Codex auth.json at ${credentials.authPath}`
|
|
: oauthHint
|
|
const safeModel =
|
|
redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ??
|
|
'the requested model'
|
|
throw new Error(
|
|
`Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`,
|
|
)
|
|
}
|
|
if (!credentials.accountId) {
|
|
throw new Error(
|
|
'Codex auth is missing chatgpt_account_id. Re-login with Codex OAuth, the Codex CLI, or set CHATGPT_ACCOUNT_ID/CODEX_ACCOUNT_ID.',
|
|
)
|
|
}
|
|
|
|
return performCodexRequest({
|
|
request,
|
|
credentials,
|
|
params,
|
|
defaultHeaders: {
|
|
...this.defaultHeaders,
|
|
...filterAnthropicHeaders(options?.headers),
|
|
},
|
|
signal: options?.signal,
|
|
})
|
|
}
|
|
|
|
return this._doOpenAIRequest(request, params, options)
|
|
}
|
|
|
|
private async _doOpenAIRequest(
|
|
request: ReturnType<typeof resolveProviderRequest>,
|
|
params: ShimCreateParams,
|
|
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
|
): Promise<Response> {
|
|
const openaiMessages = convertMessages(
|
|
params.messages as Array<{
|
|
role: string
|
|
message?: { role?: string; content?: unknown }
|
|
content?: unknown
|
|
}>,
|
|
params.system,
|
|
)
|
|
|
|
const body: Record<string, unknown> = {
|
|
model: request.resolvedModel,
|
|
messages: openaiMessages,
|
|
stream: params.stream ?? false,
|
|
store: false,
|
|
}
|
|
// Convert max_tokens to max_completion_tokens for OpenAI API compatibility.
|
|
// Azure OpenAI requires max_completion_tokens and does not accept max_tokens.
|
|
// Ensure max_tokens is a valid positive number before using it.
|
|
const maxTokensValue = typeof params.max_tokens === 'number' && params.max_tokens > 0
|
|
? params.max_tokens
|
|
: undefined
|
|
const maxCompletionTokensValue = typeof (params as Record<string, unknown>).max_completion_tokens === 'number'
|
|
? (params as Record<string, unknown>).max_completion_tokens as number
|
|
: undefined
|
|
|
|
if (maxTokensValue !== undefined) {
|
|
body.max_completion_tokens = maxTokensValue
|
|
} else if (maxCompletionTokensValue !== undefined) {
|
|
body.max_completion_tokens = maxCompletionTokensValue
|
|
}
|
|
|
|
if (params.stream && !isLocalProviderUrl(request.baseUrl)) {
|
|
body.stream_options = { include_usage: true }
|
|
}
|
|
|
|
const isGithub = isGithubModelsMode()
|
|
const isMistral = isMistralMode()
|
|
const isLocal = isLocalProviderUrl(request.baseUrl)
|
|
|
|
const githubEndpointType = getGithubEndpointType(request.baseUrl)
|
|
const isGithubCopilot = isGithub && githubEndpointType === 'copilot'
|
|
const isGithubModels = isGithub && (githubEndpointType === 'models' || githubEndpointType === 'custom')
|
|
|
|
if ((isGithub || isMistral || isLocal) && body.max_completion_tokens !== undefined) {
|
|
body.max_tokens = body.max_completion_tokens
|
|
delete body.max_completion_tokens
|
|
}
|
|
|
|
// mistral and gemini don't recognize body.store — Gemini returns 400
|
|
// "Invalid JSON payload received. Unknown name 'store': Cannot find field."
|
|
if (isMistral || isGeminiMode()) {
|
|
delete body.store
|
|
}
|
|
|
|
if (params.temperature !== undefined) body.temperature = params.temperature
|
|
if (params.top_p !== undefined) body.top_p = params.top_p
|
|
|
|
if (params.tools && params.tools.length > 0) {
|
|
const converted = convertTools(
|
|
params.tools as Array<{
|
|
name: string
|
|
description?: string
|
|
input_schema?: Record<string, unknown>
|
|
}>,
|
|
)
|
|
if (converted.length > 0) {
|
|
body.tools = converted
|
|
if (params.tool_choice) {
|
|
const tc = params.tool_choice as { type?: string; name?: string }
|
|
if (tc.type === 'auto') {
|
|
body.tool_choice = 'auto'
|
|
} else if (tc.type === 'tool' && tc.name) {
|
|
body.tool_choice = {
|
|
type: 'function',
|
|
function: { name: tc.name },
|
|
}
|
|
} else if (tc.type === 'any') {
|
|
body.tool_choice = 'required'
|
|
} else if (tc.type === 'none') {
|
|
body.tool_choice = 'none'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const headers: Record<string, string> = {
|
|
'Content-Type': 'application/json',
|
|
...this.defaultHeaders,
|
|
...filterAnthropicHeaders(options?.headers),
|
|
}
|
|
|
|
const isGemini = isGeminiMode()
|
|
const isMiniMax = !!process.env.MINIMAX_API_KEY
|
|
const apiKey =
|
|
this.providerOverride?.apiKey ??
|
|
process.env.OPENAI_API_KEY ??
|
|
(isMiniMax ? process.env.MINIMAX_API_KEY : '')
|
|
// Detect Azure endpoints by hostname (not raw URL) to prevent bypass via
|
|
// path segments like https://evil.com/cognitiveservices.azure.com/
|
|
let isAzure = false
|
|
try {
|
|
const { hostname } = new URL(request.baseUrl)
|
|
isAzure = hostname.endsWith('.azure.com') &&
|
|
(hostname.includes('cognitiveservices') || hostname.includes('openai') || hostname.includes('services.ai'))
|
|
} catch { /* malformed URL — not Azure */ }
|
|
|
|
if (apiKey) {
|
|
if (isAzure) {
|
|
// Azure uses api-key header instead of Bearer token
|
|
headers['api-key'] = apiKey
|
|
} else {
|
|
headers.Authorization = `Bearer ${apiKey}`
|
|
}
|
|
} else if (isGemini) {
|
|
const geminiCredential = await resolveGeminiCredential(process.env)
|
|
if (geminiCredential.kind !== 'none') {
|
|
headers.Authorization = `Bearer ${geminiCredential.credential}`
|
|
if (geminiCredential.kind !== 'api-key' && 'projectId' in geminiCredential && geminiCredential.projectId) {
|
|
headers['x-goog-user-project'] = geminiCredential.projectId
|
|
}
|
|
}
|
|
}
|
|
|
|
if (isGithubCopilot) {
|
|
Object.assign(headers, COPILOT_HEADERS)
|
|
} else if (isGithubModels) {
|
|
headers['Accept'] = 'application/vnd.github+json'
|
|
headers['X-GitHub-Api-Version'] = '2022-11-28'
|
|
}
|
|
|
|
// Build the chat completions URL
|
|
// Azure Cognitive Services / Azure OpenAI require a deployment-specific path
|
|
// and an api-version query parameter.
|
|
// Standard format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
|
|
// Non-Azure: {base}/chat/completions
|
|
let chatCompletionsUrl: string
|
|
if (isAzure) {
|
|
const apiVersion = process.env.AZURE_OPENAI_API_VERSION ?? '2024-12-01-preview'
|
|
const deployment = request.resolvedModel ?? process.env.OPENAI_MODEL ?? 'gpt-4o'
|
|
// If base URL already contains /deployments/, use it as-is with api-version
|
|
if (/\/deployments\//i.test(request.baseUrl)) {
|
|
const base = request.baseUrl.replace(/\/+$/, '')
|
|
chatCompletionsUrl = `${base}/chat/completions?api-version=${apiVersion}`
|
|
} else {
|
|
// Strip trailing /v1 or /openai/v1 if present, then build Azure path
|
|
const base = request.baseUrl.replace(/\/(openai\/)?v1\/?$/, '').replace(/\/+$/, '')
|
|
chatCompletionsUrl = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
|
|
}
|
|
} else {
|
|
chatCompletionsUrl = `${request.baseUrl}/chat/completions`
|
|
}
|
|
|
|
const fetchInit = {
|
|
method: 'POST' as const,
|
|
headers,
|
|
body: JSON.stringify(body),
|
|
signal: options?.signal,
|
|
}
|
|
|
|
const maxAttempts = isGithub ? GITHUB_429_MAX_RETRIES : 1
|
|
|
|
const throwClassifiedTransportError = (
|
|
error: unknown,
|
|
requestUrl: string,
|
|
): never => {
|
|
if (options?.signal?.aborted) {
|
|
throw error
|
|
}
|
|
|
|
const failure = classifyOpenAINetworkFailure(error, {
|
|
url: requestUrl,
|
|
})
|
|
const redactedUrl = redactUrlForDiagnostics(requestUrl)
|
|
const safeMessage =
|
|
redactSecretValueForDisplay(
|
|
failure.message,
|
|
process.env as SecretValueSource,
|
|
) || 'Request failed'
|
|
|
|
logForDebugging(
|
|
`[OpenAIShim] transport failure category=${failure.category} retryable=${failure.retryable} code=${failure.code ?? 'unknown'} method=POST url=${redactedUrl} model=${request.resolvedModel} message=${safeMessage}`,
|
|
{ level: 'warn' },
|
|
)
|
|
|
|
throw APIError.generate(
|
|
503,
|
|
undefined,
|
|
buildOpenAICompatibilityErrorMessage(
|
|
`OpenAI API transport error: ${safeMessage}${failure.code ? ` (code=${failure.code})` : ''}`,
|
|
failure,
|
|
),
|
|
new Headers(),
|
|
)
|
|
}
|
|
|
|
const throwClassifiedHttpError = (
|
|
status: number,
|
|
errorBody: string,
|
|
parsedBody: object | undefined,
|
|
responseHeaders: Headers,
|
|
requestUrl: string,
|
|
rateHint = '',
|
|
): never => {
|
|
const failure = classifyOpenAIHttpFailure({
|
|
status,
|
|
body: errorBody,
|
|
})
|
|
const redactedUrl = redactUrlForDiagnostics(requestUrl)
|
|
|
|
logForDebugging(
|
|
`[OpenAIShim] request failed category=${failure.category} retryable=${failure.retryable} status=${status} method=POST url=${redactedUrl} model=${request.resolvedModel}`,
|
|
{ level: 'warn' },
|
|
)
|
|
|
|
throw APIError.generate(
|
|
status,
|
|
parsedBody,
|
|
buildOpenAICompatibilityErrorMessage(
|
|
`OpenAI API error ${status}: ${errorBody}${rateHint}`,
|
|
failure,
|
|
),
|
|
responseHeaders,
|
|
)
|
|
}
|
|
|
|
let response: Response | undefined
|
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
try {
|
|
response = await fetchWithProxyRetry(chatCompletionsUrl, fetchInit)
|
|
} catch (error) {
|
|
const isAbortError =
|
|
fetchInit.signal?.aborted === true ||
|
|
(typeof DOMException !== 'undefined' &&
|
|
error instanceof DOMException &&
|
|
error.name === 'AbortError') ||
|
|
(typeof error === 'object' &&
|
|
error !== null &&
|
|
'name' in error &&
|
|
error.name === 'AbortError')
|
|
|
|
if (isAbortError) {
|
|
throw error
|
|
}
|
|
|
|
throwClassifiedTransportError(error, chatCompletionsUrl)
|
|
}
|
|
|
|
if (response.ok) {
|
|
return response
|
|
}
|
|
|
|
if (
|
|
isGithub &&
|
|
response.status === 429 &&
|
|
attempt < maxAttempts - 1
|
|
) {
|
|
await response.text().catch(() => {})
|
|
const delaySec = Math.min(
|
|
GITHUB_429_BASE_DELAY_SEC * 2 ** attempt,
|
|
GITHUB_429_MAX_DELAY_SEC,
|
|
)
|
|
await sleepMs(delaySec * 1000)
|
|
continue
|
|
}
|
|
// Read body exactly once here — Response body is a stream that can only
|
|
// be consumed a single time.
|
|
const errorBody = await response.text().catch(() => 'unknown error')
|
|
const rateHint =
|
|
isGithub && response.status === 429 ? formatRetryAfterHint(response) : ''
|
|
|
|
// If GitHub Copilot returns error about /chat/completions,
|
|
// try the /responses endpoint (needed for GPT-5+ models)
|
|
if (isGithub && response.status === 400) {
|
|
if (errorBody.includes('/chat/completions') || errorBody.includes('not accessible')) {
|
|
const responsesUrl = `${request.baseUrl}/responses`
|
|
const responsesBody: Record<string, unknown> = {
|
|
model: request.resolvedModel,
|
|
input: convertAnthropicMessagesToResponsesInput(
|
|
params.messages as Array<{
|
|
role?: string
|
|
message?: { role?: string; content?: unknown }
|
|
content?: unknown
|
|
}>,
|
|
),
|
|
stream: params.stream ?? false,
|
|
store: false,
|
|
}
|
|
|
|
if (!Array.isArray(responsesBody.input) || responsesBody.input.length === 0) {
|
|
responsesBody.input = [
|
|
{
|
|
type: 'message',
|
|
role: 'user',
|
|
content: [{ type: 'input_text', text: '' }],
|
|
},
|
|
]
|
|
}
|
|
|
|
const systemText = convertSystemPrompt(params.system)
|
|
if (systemText) {
|
|
responsesBody.instructions = systemText
|
|
}
|
|
|
|
if (body.max_tokens !== undefined) {
|
|
responsesBody.max_output_tokens = body.max_tokens
|
|
}
|
|
|
|
if (params.tools && params.tools.length > 0) {
|
|
const convertedTools = convertToolsToResponsesTools(
|
|
params.tools as Array<{
|
|
name?: string
|
|
description?: string
|
|
input_schema?: Record<string, unknown>
|
|
}>,
|
|
)
|
|
if (convertedTools.length > 0) {
|
|
responsesBody.tools = convertedTools
|
|
}
|
|
}
|
|
|
|
let responsesResponse: Response
|
|
try {
|
|
responsesResponse = await fetchWithProxyRetry(responsesUrl, {
|
|
method: 'POST',
|
|
headers,
|
|
body: JSON.stringify(responsesBody),
|
|
signal: options?.signal,
|
|
})
|
|
} catch (error) {
|
|
throwClassifiedTransportError(error, responsesUrl)
|
|
}
|
|
|
|
if (responsesResponse.ok) {
|
|
return responsesResponse
|
|
}
|
|
const responsesErrorBody = await responsesResponse.text().catch(() => 'unknown error')
|
|
let responsesErrorResponse: object | undefined
|
|
try { responsesErrorResponse = JSON.parse(responsesErrorBody) } catch { /* raw text */ }
|
|
throwClassifiedHttpError(
|
|
responsesResponse.status,
|
|
responsesErrorBody,
|
|
responsesErrorResponse,
|
|
responsesResponse.headers,
|
|
responsesUrl,
|
|
)
|
|
}
|
|
}
|
|
|
|
let errorResponse: object | undefined
|
|
try { errorResponse = JSON.parse(errorBody) } catch { /* raw text */ }
|
|
throwClassifiedHttpError(
|
|
response.status,
|
|
errorBody,
|
|
errorResponse,
|
|
response.headers as unknown as Headers,
|
|
chatCompletionsUrl,
|
|
rateHint,
|
|
)
|
|
}
|
|
|
|
throw APIError.generate(
|
|
500, undefined, 'OpenAI shim: request loop exited unexpectedly',
|
|
new Headers(),
|
|
)
|
|
}
|
|
|
|
private _convertNonStreamingResponse(
|
|
data: {
|
|
id?: string
|
|
model?: string
|
|
choices?: Array<{
|
|
message?: {
|
|
role?: string
|
|
content?:
|
|
| string
|
|
| null
|
|
| Array<{ type?: string; text?: string }>
|
|
reasoning_content?: string | null
|
|
tool_calls?: Array<{
|
|
id: string
|
|
function: { name: string; arguments: string }
|
|
extra_content?: Record<string, unknown>
|
|
}>
|
|
}
|
|
finish_reason?: string
|
|
}>
|
|
usage?: {
|
|
prompt_tokens?: number
|
|
completion_tokens?: number
|
|
prompt_tokens_details?: {
|
|
cached_tokens?: number
|
|
}
|
|
}
|
|
},
|
|
model: string,
|
|
) {
|
|
const choice = data.choices?.[0]
|
|
const content: Array<Record<string, unknown>> = []
|
|
|
|
// Some reasoning models (e.g. GLM-5) put their chain-of-thought in
|
|
// reasoning_content while content stays null. Preserve it as a thinking
|
|
// block, but do not surface it as visible assistant text.
|
|
const reasoningText = choice?.message?.reasoning_content
|
|
if (typeof reasoningText === 'string' && reasoningText) {
|
|
content.push({ type: 'thinking', thinking: reasoningText })
|
|
}
|
|
const rawContent =
|
|
choice?.message?.content !== '' && choice?.message?.content != null
|
|
? choice?.message?.content
|
|
: null
|
|
if (typeof rawContent === 'string' && rawContent) {
|
|
content.push({
|
|
type: 'text',
|
|
text: stripLeakedReasoningPreamble(rawContent),
|
|
})
|
|
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
|
const parts: string[] = []
|
|
for (const part of rawContent) {
|
|
if (
|
|
part &&
|
|
typeof part === 'object' &&
|
|
part.type === 'text' &&
|
|
typeof part.text === 'string'
|
|
) {
|
|
parts.push(part.text)
|
|
}
|
|
}
|
|
const joined = parts.join('\n')
|
|
if (joined) {
|
|
content.push({
|
|
type: 'text',
|
|
text: stripLeakedReasoningPreamble(joined),
|
|
})
|
|
}
|
|
}
|
|
|
|
if (choice?.message?.tool_calls) {
|
|
for (const tc of choice.message.tool_calls) {
|
|
const input = normalizeToolArguments(
|
|
tc.function.name,
|
|
tc.function.arguments,
|
|
)
|
|
content.push({
|
|
type: 'tool_use',
|
|
id: tc.id,
|
|
name: tc.function.name,
|
|
input,
|
|
...(tc.extra_content ? { extra_content: tc.extra_content } : {}),
|
|
// Extract Gemini signature from extra_content
|
|
...((tc.extra_content?.google as any)?.thought_signature
|
|
? { signature: (tc.extra_content.google as any).thought_signature }
|
|
: {}),
|
|
})
|
|
}
|
|
}
|
|
|
|
const stopReason =
|
|
choice?.finish_reason === 'tool_calls'
|
|
? 'tool_use'
|
|
: choice?.finish_reason === 'length'
|
|
? 'max_tokens'
|
|
: 'end_turn'
|
|
|
|
if (choice?.finish_reason === 'content_filter' || choice?.finish_reason === 'safety') {
|
|
content.push({
|
|
type: 'text',
|
|
text: '\n\n[Content blocked by provider safety filter]',
|
|
})
|
|
}
|
|
|
|
return {
|
|
id: data.id ?? makeMessageId(),
|
|
type: 'message',
|
|
role: 'assistant',
|
|
content,
|
|
model: data.model ?? model,
|
|
stop_reason: stopReason,
|
|
stop_sequence: null,
|
|
usage: {
|
|
input_tokens: data.usage?.prompt_tokens ?? 0,
|
|
output_tokens: data.usage?.completion_tokens ?? 0,
|
|
cache_creation_input_tokens: 0,
|
|
cache_read_input_tokens: data.usage?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
class OpenAIShimBeta {
|
|
messages: OpenAIShimMessages
|
|
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
|
|
|
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
|
|
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort, providerOverride)
|
|
this.reasoningEffort = reasoningEffort
|
|
}
|
|
}
|
|
|
|
export function createOpenAIShimClient(options: {
|
|
defaultHeaders?: Record<string, string>
|
|
maxRetries?: number
|
|
timeout?: number
|
|
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
|
providerOverride?: { model: string; baseURL: string; apiKey: string }
|
|
}): unknown {
|
|
hydrateGeminiAccessTokenFromSecureStorage()
|
|
hydrateGithubModelsTokenFromSecureStorage()
|
|
|
|
// When Gemini provider is active, map Gemini env vars to OpenAI-compatible ones
|
|
// so the existing providerConfig.ts infrastructure picks them up correctly.
|
|
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI)) {
|
|
process.env.OPENAI_BASE_URL ??=
|
|
process.env.GEMINI_BASE_URL ??
|
|
'https://generativelanguage.googleapis.com/v1beta/openai'
|
|
const geminiApiKey =
|
|
process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY
|
|
if (geminiApiKey && !process.env.OPENAI_API_KEY) {
|
|
process.env.OPENAI_API_KEY = geminiApiKey
|
|
}
|
|
if (process.env.GEMINI_MODEL && !process.env.OPENAI_MODEL) {
|
|
process.env.OPENAI_MODEL = process.env.GEMINI_MODEL
|
|
}
|
|
} else if (isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)) {
|
|
process.env.OPENAI_BASE_URL =
|
|
process.env.MISTRAL_BASE_URL ?? 'https://api.mistral.ai/v1'
|
|
process.env.OPENAI_API_KEY = process.env.MISTRAL_API_KEY
|
|
if (process.env.MISTRAL_MODEL) {
|
|
process.env.OPENAI_MODEL = process.env.MISTRAL_MODEL
|
|
}
|
|
} else if (isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)) {
|
|
process.env.OPENAI_BASE_URL ??= GITHUB_COPILOT_BASE
|
|
process.env.OPENAI_API_KEY ??=
|
|
process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? ''
|
|
}
|
|
|
|
const beta = new OpenAIShimBeta({
|
|
...(options.defaultHeaders ?? {}),
|
|
}, options.reasoningEffort, options.providerOverride)
|
|
|
|
return {
|
|
beta,
|
|
messages: beta.messages,
|
|
}
|
|
}
|