Files
orcs-code/src/services/api/openaiShim.ts
lunamonke 4c50977f3c Decouple and fix mistral (#595)
* decouple and fix mistral

* fix wrong variable for currentBaseUrl and buildAPIProviderProperties
2026-04-12 15:26:14 +08:00

1626 lines
54 KiB
TypeScript

/**
* OpenAI-compatible API shim for Claude Code.
*
* Translates Anthropic SDK calls (anthropic.beta.messages.create) into
* OpenAI-compatible chat completion requests and streams back events
* in the Anthropic streaming format so the rest of the codebase is unaware.
*
* Supports: OpenAI, Azure OpenAI, Ollama, LM Studio, OpenRouter,
* Together, Groq, Fireworks, DeepSeek, Mistral, and any OpenAI-compatible API.
*
* Environment variables:
* CLAUDE_CODE_USE_OPENAI=1 — enable this provider
* OPENAI_API_KEY=sk-... — API key (optional for local models)
* OPENAI_BASE_URL=http://... — base URL (default: https://api.openai.com/v1)
* OPENAI_MODEL=gpt-4o — default model override
* CODEX_API_KEY / ~/.codex/auth.json — Codex auth for codexplan/codexspark
*
* GitHub Copilot API (api.githubcopilot.com), OpenAI-compatible:
* CLAUDE_CODE_USE_GITHUB=1 — enable GitHub inference (no need for USE_OPENAI)
* GITHUB_TOKEN or GH_TOKEN — Copilot API token (mapped to Bearer auth)
* OPENAI_MODEL — optional; use github:copilot or openai/gpt-4.1 style IDs
*/
import { APIError } from '@anthropic-ai/sdk'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
import {
looksLikeLeakedReasoningPrefix,
shouldBufferPotentialReasoningPrefix,
stripLeakedReasoningPreamble,
} from './reasoningLeakSanitizer.js'
import {
codexStreamToAnthropic,
collectCodexCompletedResponse,
convertAnthropicMessagesToResponsesInput,
convertCodexResponseToAnthropicMessage,
convertToolsToResponsesTools,
performCodexRequest,
type AnthropicStreamEvent,
type AnthropicUsage,
type ShimCreateParams,
} from './codexShim.js'
import {
isLocalProviderUrl,
resolveCodexApiCredentials,
resolveProviderRequest,
getGithubEndpointType,
} from './providerConfig.js'
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
import {
normalizeToolArguments,
hasToolFieldMapping,
} from './toolArgumentNormalization.js'
type SecretValueSource = Partial<{
OPENAI_API_KEY: string
CODEX_API_KEY: string
GEMINI_API_KEY: string
GOOGLE_API_KEY: string
GEMINI_ACCESS_TOKEN: string
MISTRAL_API_KEY: string
}>
const GITHUB_COPILOT_BASE = 'https://api.githubcopilot.com'
const GITHUB_429_MAX_RETRIES = 3
const GITHUB_429_BASE_DELAY_SEC = 1
const GITHUB_429_MAX_DELAY_SEC = 32
const GEMINI_API_HOST = 'generativelanguage.googleapis.com'
const COPILOT_HEADERS: Record<string, string> = {
'User-Agent': 'GitHubCopilotChat/0.26.7',
'Editor-Version': 'vscode/1.99.3',
'Editor-Plugin-Version': 'copilot-chat/0.26.7',
'Copilot-Integration-Id': 'vscode-chat',
}
function isGithubModelsMode(): boolean {
return isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
}
function isMistralMode(): boolean {
return isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
}
function filterAnthropicHeaders(
headers: Record<string, string> | undefined,
): Record<string, string> {
if (!headers) return {}
const filtered: Record<string, string> = {}
for (const [key, value] of Object.entries(headers)) {
const lower = key.toLowerCase()
if (
lower.startsWith('x-anthropic') ||
lower.startsWith('anthropic-') ||
lower.startsWith('x-claude') ||
lower === 'x-app' ||
lower === 'x-client-app' ||
lower === 'authorization' ||
lower === 'x-api-key' ||
lower === 'api-key'
) {
continue
}
filtered[key] = value
}
return filtered
}
function hasGeminiApiHost(baseUrl: string | undefined): boolean {
if (!baseUrl) return false
try {
return new URL(baseUrl).hostname.toLowerCase() === GEMINI_API_HOST
} catch {
return false
}
}
function formatRetryAfterHint(response: Response): string {
const ra = response.headers.get('retry-after')
return ra ? ` (Retry-After: ${ra})` : ''
}
function sleepMs(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms))
}
// ---------------------------------------------------------------------------
// Types — minimal subset of Anthropic SDK types we need to produce
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Message format conversion: Anthropic → OpenAI
// ---------------------------------------------------------------------------
interface OpenAIMessage {
role: 'system' | 'user' | 'assistant' | 'tool'
content?: string | Array<{ type: string; text?: string; image_url?: { url: string } }>
tool_calls?: Array<{
id: string
type: 'function'
function: { name: string; arguments: string }
extra_content?: Record<string, unknown>
}>
tool_call_id?: string
name?: string
}
interface OpenAITool {
type: 'function'
function: {
name: string
description: string
parameters: Record<string, unknown>
strict?: boolean
}
}
function convertSystemPrompt(
system: unknown,
): string {
if (!system) return ''
if (typeof system === 'string') return system
if (Array.isArray(system)) {
return system
.map((block: { type?: string; text?: string }) =>
block.type === 'text' ? block.text ?? '' : '',
)
.join('\n\n')
}
return String(system)
}
function convertToolResultContent(content: unknown): string {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return JSON.stringify(content ?? '')
const chunks: string[] = []
for (const block of content) {
if (block?.type === 'text' && typeof block.text === 'string') {
chunks.push(block.text)
continue
}
if (block?.type === 'image') {
const source = block.source
if (source?.type === 'url' && source.url) {
chunks.push(`[Image](${source.url})`)
} else if (source?.type === 'base64') {
chunks.push(`[image:${source.media_type ?? 'unknown'}]`)
} else {
chunks.push('[image]')
}
continue
}
if (typeof block?.text === 'string') {
chunks.push(block.text)
}
}
return chunks.join('\n')
}
function convertContentBlocks(
content: unknown,
): string | Array<{ type: string; text?: string; image_url?: { url: string } }> {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return String(content ?? '')
const parts: Array<{ type: string; text?: string; image_url?: { url: string } }> = []
for (const block of content) {
switch (block.type) {
case 'text':
parts.push({ type: 'text', text: block.text ?? '' })
break
case 'image': {
const src = block.source
if (src?.type === 'base64') {
parts.push({
type: 'image_url',
image_url: {
url: `data:${src.media_type};base64,${src.data}`,
},
})
} else if (src?.type === 'url') {
parts.push({ type: 'image_url', image_url: { url: src.url } })
}
break
}
case 'tool_use':
// handled separately
break
case 'tool_result':
// handled separately
break
case 'thinking':
case 'redacted_thinking':
// Strip thinking blocks for OpenAI-compatible providers.
// These are Anthropic-specific content types that 3P providers
// don't understand. Serializing them as <thinking> text corrupts
// multi-turn context: the model sees the tags as part of its
// previous reply and may mimic or misattribute them.
break
default:
if (block.text) {
parts.push({ type: 'text', text: block.text })
}
}
}
if (parts.length === 0) return ''
if (parts.length === 1 && parts[0].type === 'text') return parts[0].text ?? ''
return parts
}
function isGeminiMode(): boolean {
return (
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
hasGeminiApiHost(process.env.OPENAI_BASE_URL)
)
}
function convertMessages(
messages: Array<{ role: string; message?: { role?: string; content?: unknown }; content?: unknown }>,
system: unknown,
): OpenAIMessage[] {
const result: OpenAIMessage[] = []
// System message first
const sysText = convertSystemPrompt(system)
if (sysText) {
result.push({ role: 'system', content: sysText })
}
for (const msg of messages) {
// Claude Code wraps messages in { role, message: { role, content } }
const inner = msg.message ?? msg
const role = (inner as { role?: string }).role ?? msg.role
const content = (inner as { content?: unknown }).content
if (role === 'user') {
// Check for tool_result blocks in user messages
if (Array.isArray(content)) {
const toolResults = content.filter((b: { type?: string }) => b.type === 'tool_result')
const otherContent = content.filter((b: { type?: string }) => b.type !== 'tool_result')
// Emit tool results as tool messages
for (const tr of toolResults) {
const trContent = convertToolResultContent(tr.content)
result.push({
role: 'tool',
tool_call_id: tr.tool_use_id ?? 'unknown',
content: tr.is_error ? `Error: ${trContent}` : trContent,
})
}
// Emit remaining user content
if (otherContent.length > 0) {
result.push({
role: 'user',
content: convertContentBlocks(otherContent),
})
}
} else {
result.push({
role: 'user',
content: convertContentBlocks(content),
})
}
} else if (role === 'assistant') {
// Check for tool_use blocks
if (Array.isArray(content)) {
const toolUses = content.filter((b: { type?: string }) => b.type === 'tool_use')
const thinkingBlock = content.find((b: { type?: string }) => b.type === 'thinking')
const textContent = content.filter(
(b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking',
)
const assistantMsg: OpenAIMessage = {
role: 'assistant',
content: (() => {
const c = convertContentBlocks(textContent)
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
})(),
}
if (toolUses.length > 0) {
assistantMsg.tool_calls = toolUses.map(
(tu: {
id?: string
name?: string
input?: unknown
extra_content?: Record<string, unknown>
signature?: string
}, index) => {
const toolCall: NonNullable<OpenAIMessage['tool_calls']>[number] = {
id: tu.id ?? `call_${crypto.randomUUID().replace(/-/g, '')}`,
type: 'function' as const,
function: {
name: tu.name ?? 'unknown',
arguments:
typeof tu.input === 'string'
? tu.input
: JSON.stringify(tu.input ?? {}),
},
}
// Preserve existing extra_content if present
if (tu.extra_content) {
toolCall.extra_content = { ...tu.extra_content }
}
// Handle Gemini thought_signature
if (isGeminiMode()) {
// If the model provided a signature in the tool_use block itself (e.g. from a previous Turn/Step)
// Use thinkingBlock.signature for ALL tool calls in the same assistant turn if available.
// The API requires the same signature on every replayed function call part in a parallel set.
const signature = tu.signature ?? (thinkingBlock as any)?.signature
// Merge into existing google-specific metadata if present
const existingGoogle = (toolCall.extra_content?.google as Record<string, unknown>) ?? {}
toolCall.extra_content = {
...toolCall.extra_content,
google: {
...existingGoogle,
thought_signature: signature ?? "skip_thought_signature_validator"
}
}
}
return toolCall
},
)
}
result.push(assistantMsg)
} else {
result.push({
role: 'assistant',
content: (() => {
const c = convertContentBlocks(content)
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
})(),
})
}
}
}
// Coalescing pass: merge consecutive messages of the same role.
// OpenAI/vLLM/Ollama require strict user↔assistant alternation.
// Multiple consecutive tool messages are allowed (assistant → tool* → user).
// Consecutive user or assistant messages must be merged to avoid Jinja
// template errors like "roles must alternate" (Devstral, Mistral models).
const coalesced: OpenAIMessage[] = []
for (const msg of result) {
const prev = coalesced[coalesced.length - 1]
if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') {
const prevContent = prev.content
const curContent = msg.content
if (typeof prevContent === 'string' && typeof curContent === 'string') {
prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent
} else {
const toArray = (
c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined,
): Array<{ type: string; text?: string; image_url?: { url: string } }> => {
if (!c) return []
if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : []
return c
}
prev.content = [...toArray(prevContent), ...toArray(curContent)]
}
if (msg.tool_calls?.length) {
prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls]
}
} else {
coalesced.push(msg)
}
}
return coalesced
}
/**
* OpenAI requires every key in `properties` to also appear in `required`.
* Anthropic schemas often mark fields as optional (omitted from `required`),
* which causes 400 errors on OpenAI/Codex endpoints. This normalizes the
* schema by ensuring `required` is a superset of `properties` keys.
*/
function normalizeSchemaForOpenAI(
schema: Record<string, unknown>,
strict = true,
): Record<string, unknown> {
const record = sanitizeSchemaForOpenAICompat(schema)
if (record.type === 'object' && record.properties) {
const properties = record.properties as Record<string, Record<string, unknown>>
const existingRequired = Array.isArray(record.required) ? record.required as string[] : []
// Recurse into each property
const normalizedProps: Record<string, unknown> = {}
for (const [key, value] of Object.entries(properties)) {
normalizedProps[key] = normalizeSchemaForOpenAI(
value as Record<string, unknown>,
strict,
)
}
record.properties = normalizedProps
if (strict) {
// Keep only the properties that were originally marked required in the schema.
// Adding every property to required[] (the previous behaviour) caused strict
// OpenAI-compatible providers (Groq, Azure, etc.) to reject tool calls because
// the model correctly omits optional arguments — but the provider treats them
// as missing required fields and returns a 400 / tool_use_failed error.
record.required = existingRequired.filter(k => k in normalizedProps)
// additionalProperties: false is still required by strict-mode providers.
record.additionalProperties = false
} else {
// For Gemini: keep only existing required keys that are present in properties
record.required = existingRequired.filter(k => k in normalizedProps)
}
}
// Recurse into array items
if ('items' in record) {
if (Array.isArray(record.items)) {
record.items = (record.items as unknown[]).map(
item => normalizeSchemaForOpenAI(item as Record<string, unknown>, strict),
)
} else {
record.items = normalizeSchemaForOpenAI(record.items as Record<string, unknown>, strict)
}
}
// Recurse into combinators
for (const key of ['anyOf', 'oneOf', 'allOf'] as const) {
if (key in record && Array.isArray(record[key])) {
record[key] = (record[key] as unknown[]).map(
item => normalizeSchemaForOpenAI(item as Record<string, unknown>, strict),
)
}
}
return record
}
function convertTools(
tools: Array<{ name: string; description?: string; input_schema?: Record<string, unknown> }>,
): OpenAITool[] {
const isGemini = isGeminiMode()
return tools
.filter(t => t.name !== 'ToolSearchTool') // Not relevant for OpenAI
.map(t => {
const schema = { ...(t.input_schema ?? { type: 'object', properties: {} }) } as Record<string, unknown>
// For Codex/OpenAI: promote known Agent sub-fields into required[] only if
// they actually exist in properties (Gemini rejects required keys absent from properties).
if (t.name === 'Agent' && schema.properties) {
const props = schema.properties as Record<string, unknown>
if (!Array.isArray(schema.required)) schema.required = []
const req = schema.required as string[]
for (const key of ['message', 'subagent_type']) {
if (key in props && !req.includes(key)) req.push(key)
}
}
return {
type: 'function' as const,
function: {
name: t.name,
description: t.description ?? '',
parameters: normalizeSchemaForOpenAI(schema, !isGemini),
},
}
})
}
// ---------------------------------------------------------------------------
// Streaming: OpenAI SSE → Anthropic stream events
// ---------------------------------------------------------------------------
interface OpenAIStreamChunk {
id: string
object: string
model: string
choices: Array<{
index: number
delta: {
role?: string
content?: string | null
reasoning_content?: string | null
tool_calls?: Array<{
index: number
id?: string
type?: string
function?: { name?: string; arguments?: string }
extra_content?: Record<string, unknown>
}>
}
finish_reason: string | null
}>
usage?: {
prompt_tokens?: number
completion_tokens?: number
total_tokens?: number
prompt_tokens_details?: {
cached_tokens?: number
}
}
}
function makeMessageId(): string {
return `msg_${crypto.randomUUID().replace(/-/g, '')}`
}
function convertChunkUsage(
usage: OpenAIStreamChunk['usage'] | undefined,
): Partial<AnthropicUsage> | undefined {
if (!usage) return undefined
const cached = usage.prompt_tokens_details?.cached_tokens ?? 0
return {
// Subtract cached tokens: OpenAI includes them in prompt_tokens,
// but Anthropic convention treats input_tokens as non-cached only.
input_tokens: (usage.prompt_tokens ?? 0) - cached,
output_tokens: usage.completion_tokens ?? 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: cached,
}
}
const JSON_REPAIR_SUFFIXES = [
'}', '"}', ']}', '"]}', '}}', '"}}', ']}}', '"]}}', '"]}]}', '}]}'
]
function repairPossiblyTruncatedObjectJson(raw: string): string | null {
try {
const parsed = JSON.parse(raw)
return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
? raw
: null
} catch {
for (const combo of JSON_REPAIR_SUFFIXES) {
try {
const repaired = raw + combo
const parsed = JSON.parse(repaired)
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
return repaired
}
} catch {}
}
return null
}
}
/**
* Async generator that transforms an OpenAI SSE stream into
* Anthropic-format BetaRawMessageStreamEvent objects.
*/
async function* openaiStreamToAnthropic(
response: Response,
model: string,
): AsyncGenerator<AnthropicStreamEvent> {
const messageId = makeMessageId()
let contentBlockIndex = 0
const activeToolCalls = new Map<
number,
{
id: string
name: string
index: number
jsonBuffer: string
normalizeAtStop: boolean
}
>()
let hasEmittedContentStart = false
let hasEmittedThinkingStart = false
let hasClosedThinking = false
let activeTextBuffer = ''
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
let hasEmittedFinalUsage = false
let hasProcessedFinishReason = false
// Emit message_start
yield {
type: 'message_start',
message: {
id: messageId,
type: 'message',
role: 'assistant',
content: [],
model,
stop_reason: null,
stop_sequence: null,
usage: {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
},
}
const reader = response.body?.getReader()
if (!reader) return
const decoder = new TextDecoder()
let buffer = ''
const closeActiveContentBlock = async function* () {
if (!hasEmittedContentStart) return
if (textBufferMode !== 'none') {
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
if (sanitized) {
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: { type: 'text_delta', text: sanitized },
}
}
}
yield {
type: 'content_block_stop',
index: contentBlockIndex,
}
contentBlockIndex++
hasEmittedContentStart = false
activeTextBuffer = ''
textBufferMode = 'none'
}
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n')
buffer = lines.pop() ?? ''
for (const line of lines) {
const trimmed = line.trim()
if (!trimmed || trimmed === 'data: [DONE]') continue
if (!trimmed.startsWith('data: ')) continue
let chunk: OpenAIStreamChunk
try {
chunk = JSON.parse(trimmed.slice(6))
} catch {
continue
}
const chunkUsage = convertChunkUsage(chunk.usage)
for (const choice of chunk.choices ?? []) {
const delta = choice.delta
// Reasoning models (e.g. GLM-5, DeepSeek) may stream chain-of-thought
// in `reasoning_content` before the actual reply appears in `content`.
// Emit reasoning as a thinking block and content as a text block.
if (delta.reasoning_content != null && delta.reasoning_content !== '') {
if (!hasEmittedThinkingStart) {
yield {
type: 'content_block_start',
index: contentBlockIndex,
content_block: { type: 'thinking', thinking: '' },
}
hasEmittedThinkingStart = true
}
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
}
}
// Text content — use != null to distinguish absent field from empty string,
// some providers send "" as first delta to signal streaming start
if (delta.content != null && delta.content !== '') {
// Close thinking block if transitioning from reasoning to content
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
activeTextBuffer += delta.content
if (!hasEmittedContentStart) {
yield {
type: 'content_block_start',
index: contentBlockIndex,
content_block: { type: 'text', text: '' },
}
hasEmittedContentStart = true
}
if (
textBufferMode === 'strip' ||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
) {
textBufferMode = 'strip'
continue
}
if (textBufferMode === 'pending') {
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
continue
}
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: {
type: 'text_delta',
text: activeTextBuffer,
},
}
textBufferMode = 'none'
continue
}
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
textBufferMode = 'pending'
continue
}
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: { type: 'text_delta', text: delta.content },
}
}
// Tool calls
if (delta.tool_calls) {
for (const tc of delta.tool_calls) {
if (tc.id && tc.function?.name) {
// New tool call starting — close any open thinking block first
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
if (hasEmittedContentStart) {
yield* closeActiveContentBlock()
}
const toolBlockIndex = contentBlockIndex
const initialArguments = tc.function.arguments ?? ''
const normalizeAtStop = hasToolFieldMapping(tc.function.name)
activeToolCalls.set(tc.index, {
id: tc.id,
name: tc.function.name,
index: toolBlockIndex,
jsonBuffer: initialArguments,
normalizeAtStop,
})
yield {
type: 'content_block_start',
index: toolBlockIndex,
content_block: {
type: 'tool_use',
id: tc.id,
name: tc.function.name,
input: {},
...(tc.extra_content ? { extra_content: tc.extra_content } : {}),
// Extract Gemini signature from extra_content
...((tc.extra_content?.google as any)?.thought_signature
? {
signature: (tc.extra_content.google as any)
.thought_signature,
}
: {}),
},
}
contentBlockIndex++
// Emit any initial arguments
if (tc.function.arguments && !normalizeAtStop) {
yield {
type: 'content_block_delta',
index: toolBlockIndex,
delta: {
type: 'input_json_delta',
partial_json: tc.function.arguments,
},
}
}
} else if (tc.function?.arguments) {
// Continuation of existing tool call
const active = activeToolCalls.get(tc.index)
if (active) {
if (tc.function.arguments) {
active.jsonBuffer += tc.function.arguments
}
if (active.normalizeAtStop) {
continue
}
yield {
type: 'content_block_delta',
index: active.index,
delta: {
type: 'input_json_delta',
partial_json: tc.function.arguments,
},
}
}
}
}
}
// Finish — guard ensures we only process finish_reason once even if
// multiple chunks arrive with finish_reason set (some providers do this)
if (choice.finish_reason && !hasProcessedFinishReason) {
hasProcessedFinishReason = true
// Close any open thinking block that wasn't closed by content transition
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
// Close any open content blocks
if (hasEmittedContentStart) {
yield* closeActiveContentBlock()
}
// Close active tool calls
for (const [, tc] of activeToolCalls) {
if (tc.normalizeAtStop) {
let partialJson: string
if (choice.finish_reason === 'length') {
// Truncated by max tokens — preserve raw buffer to avoid
// turning an incomplete tool call into an executable command
partialJson = tc.jsonBuffer
} else {
const repairedStructuredJson = repairPossiblyTruncatedObjectJson(
tc.jsonBuffer,
)
if (repairedStructuredJson) {
partialJson = repairedStructuredJson
} else {
partialJson = JSON.stringify(
normalizeToolArguments(tc.name, tc.jsonBuffer),
)
}
}
yield {
type: 'content_block_delta',
index: tc.index,
delta: {
type: 'input_json_delta',
partial_json: partialJson,
},
}
yield { type: 'content_block_stop', index: tc.index }
continue
}
let suffixToAdd = ''
if (tc.jsonBuffer) {
try {
JSON.parse(tc.jsonBuffer)
} catch {
const str = tc.jsonBuffer.trimEnd()
for (const combo of JSON_REPAIR_SUFFIXES) {
try {
JSON.parse(str + combo)
suffixToAdd = combo
break
} catch {}
}
}
}
if (suffixToAdd) {
yield {
type: 'content_block_delta',
index: tc.index,
delta: {
type: 'input_json_delta',
partial_json: suffixToAdd,
},
}
}
yield { type: 'content_block_stop', index: tc.index }
}
const stopReason =
choice.finish_reason === 'tool_calls'
? 'tool_use'
: choice.finish_reason === 'length'
? 'max_tokens'
: 'end_turn'
if (choice.finish_reason === 'content_filter' || choice.finish_reason === 'safety') {
// Gemini/Azure content safety filter blocked the response.
// Emit a visible text block so the user knows why output was truncated.
if (!hasEmittedContentStart) {
yield {
type: 'content_block_start',
index: contentBlockIndex,
content_block: { type: 'text', text: '' },
}
hasEmittedContentStart = true
}
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: { type: 'text_delta', text: '\n\n[Content blocked by provider safety filter]' },
}
}
lastStopReason = stopReason
yield {
type: 'message_delta',
delta: { stop_reason: stopReason, stop_sequence: null },
...(chunkUsage ? { usage: chunkUsage } : {}),
}
if (chunkUsage) {
hasEmittedFinalUsage = true
}
}
}
if (
!hasEmittedFinalUsage &&
chunkUsage &&
(chunk.choices?.length ?? 0) === 0 &&
lastStopReason !== null
) {
yield {
type: 'message_delta',
delta: { stop_reason: lastStopReason, stop_sequence: null },
usage: chunkUsage,
}
hasEmittedFinalUsage = true
}
}
}
} finally {
reader.releaseLock()
}
yield { type: 'message_stop' }
}
// ---------------------------------------------------------------------------
// The shim client — duck-types as Anthropic SDK
// ---------------------------------------------------------------------------
class OpenAIShimStream {
private generator: AsyncGenerator<AnthropicStreamEvent>
// The controller property is checked by claude.ts to distinguish streams from error messages
controller = new AbortController()
constructor(generator: AsyncGenerator<AnthropicStreamEvent>) {
this.generator = generator
}
async *[Symbol.asyncIterator]() {
yield* this.generator
}
}
class OpenAIShimMessages {
private defaultHeaders: Record<string, string>
private reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
private providerOverride?: { model: string; baseURL: string; apiKey: string }
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
this.defaultHeaders = filterAnthropicHeaders(defaultHeaders)
this.reasoningEffort = reasoningEffort
this.providerOverride = providerOverride
}
create(
params: ShimCreateParams,
options?: { signal?: AbortSignal; headers?: Record<string, string> },
) {
const self = this
let httpResponse: Response | undefined
const promise = (async () => {
const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
const response = await self._doRequest(request, params, options)
httpResponse = response
if (params.stream) {
const isResponsesStream = response.url?.includes('/responses')
return new OpenAIShimStream(
(request.transport === 'codex_responses' || isResponsesStream)
? codexStreamToAnthropic(response, request.resolvedModel)
: openaiStreamToAnthropic(response, request.resolvedModel),
)
}
if (request.transport === 'codex_responses') {
const data = await collectCodexCompletedResponse(response)
return convertCodexResponseToAnthropicMessage(
data,
request.resolvedModel,
)
}
const isResponsesNonStream = response.url?.includes('/responses')
if (isResponsesNonStream || (request.transport === 'chat_completions' && isGithubModelsMode())) {
const contentType = response.headers.get('content-type') ?? ''
if (contentType.includes('application/json')) {
const parsed = await response.json() as Record<string, unknown>
if (
parsed &&
typeof parsed === 'object' &&
('output' in parsed || 'incomplete_details' in parsed)
) {
return convertCodexResponseToAnthropicMessage(
parsed,
request.resolvedModel,
)
}
return self._convertNonStreamingResponse(parsed, request.resolvedModel)
}
}
const contentType = response.headers.get('content-type') ?? ''
if (contentType.includes('application/json')) {
const data = await response.json()
return self._convertNonStreamingResponse(data, request.resolvedModel)
}
const textBody = await response.text().catch(() => '')
throw APIError.generate(
response.status,
undefined,
`OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`,
response.headers as unknown as Headers,
)
})()
; (promise as unknown as Record<string, unknown>).withResponse =
async () => {
const data = await promise
return {
data,
response: httpResponse ?? new Response(),
request_id:
httpResponse?.headers.get('x-request-id') ?? makeMessageId(),
}
}
return promise
}
private async _doRequest(
request: ReturnType<typeof resolveProviderRequest>,
params: ShimCreateParams,
options?: { signal?: AbortSignal; headers?: Record<string, string> },
): Promise<Response> {
const githubEndpointType = getGithubEndpointType(request.baseUrl)
const isGithubMode = isGithubModelsMode()
const isGithubWithCodexTransport = isGithubMode && request.transport === 'codex_responses'
const isGithubCopilotEndpoint = isGithubMode && githubEndpointType === 'copilot'
if (isGithubWithCodexTransport) {
const apiKey = this.providerOverride?.apiKey ?? process.env.OPENAI_API_KEY ?? ''
if (!apiKey) {
throw new Error(
'GitHub Copilot auth is required. Run /onboard-github to sign in.',
)
}
return performCodexRequest({
request,
credentials: {
apiKey,
source: 'env',
},
params,
defaultHeaders: {
...this.defaultHeaders,
...filterAnthropicHeaders(options?.headers),
...COPILOT_HEADERS,
},
signal: options?.signal,
})
}
if (request.transport === 'codex_responses' && !isGithubMode) {
const credentials = resolveCodexApiCredentials()
if (!credentials.apiKey) {
const authHint = credentials.authPath
? ` or place a Codex auth.json at ${credentials.authPath}`
: ''
const safeModel =
redactSecretValueForDisplay(request.requestedModel, process.env as SecretValueSource) ??
'the requested model'
throw new Error(
`Codex auth is required for ${safeModel}. Set CODEX_API_KEY${authHint}.`,
)
}
if (!credentials.accountId) {
throw new Error(
'Codex auth is missing chatgpt_account_id. Re-login with the Codex CLI or set CHATGPT_ACCOUNT_ID/CODEX_ACCOUNT_ID.',
)
}
return performCodexRequest({
request,
credentials,
params,
defaultHeaders: {
...this.defaultHeaders,
...filterAnthropicHeaders(options?.headers),
},
signal: options?.signal,
})
}
return this._doOpenAIRequest(request, params, options)
}
private async _doOpenAIRequest(
request: ReturnType<typeof resolveProviderRequest>,
params: ShimCreateParams,
options?: { signal?: AbortSignal; headers?: Record<string, string> },
): Promise<Response> {
const openaiMessages = convertMessages(
params.messages as Array<{
role: string
message?: { role?: string; content?: unknown }
content?: unknown
}>,
params.system,
)
const body: Record<string, unknown> = {
model: request.resolvedModel,
messages: openaiMessages,
stream: params.stream ?? false,
store: false,
}
// Convert max_tokens to max_completion_tokens for OpenAI API compatibility.
// Azure OpenAI requires max_completion_tokens and does not accept max_tokens.
// Ensure max_tokens is a valid positive number before using it.
const maxTokensValue = typeof params.max_tokens === 'number' && params.max_tokens > 0
? params.max_tokens
: undefined
const maxCompletionTokensValue = typeof (params as Record<string, unknown>).max_completion_tokens === 'number'
? (params as Record<string, unknown>).max_completion_tokens as number
: undefined
if (maxTokensValue !== undefined) {
body.max_completion_tokens = maxTokensValue
} else if (maxCompletionTokensValue !== undefined) {
body.max_completion_tokens = maxCompletionTokensValue
}
if (params.stream && !isLocalProviderUrl(request.baseUrl)) {
body.stream_options = { include_usage: true }
}
const isGithub = isGithubModelsMode()
const isMistral = isMistralMode()
const githubEndpointType = getGithubEndpointType(request.baseUrl)
const isGithubCopilot = isGithub && githubEndpointType === 'copilot'
const isGithubModels = isGithub && (githubEndpointType === 'models' || githubEndpointType === 'custom')
if ((isGithub || isMistral) && body.max_completion_tokens !== undefined) {
body.max_tokens = body.max_completion_tokens
delete body.max_completion_tokens
}
// mistral also doesn't recognize body.store
if (isMistral) {
delete body.store
}
if (params.temperature !== undefined) body.temperature = params.temperature
if (params.top_p !== undefined) body.top_p = params.top_p
if (params.tools && params.tools.length > 0) {
const converted = convertTools(
params.tools as Array<{
name: string
description?: string
input_schema?: Record<string, unknown>
}>,
)
if (converted.length > 0) {
body.tools = converted
if (params.tool_choice) {
const tc = params.tool_choice as { type?: string; name?: string }
if (tc.type === 'auto') {
body.tool_choice = 'auto'
} else if (tc.type === 'tool' && tc.name) {
body.tool_choice = {
type: 'function',
function: { name: tc.name },
}
} else if (tc.type === 'any') {
body.tool_choice = 'required'
} else if (tc.type === 'none') {
body.tool_choice = 'none'
}
}
}
}
const headers: Record<string, string> = {
'Content-Type': 'application/json',
...this.defaultHeaders,
...filterAnthropicHeaders(options?.headers),
}
const isGemini = isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI)
const apiKey = this.providerOverride?.apiKey ?? process.env.OPENAI_API_KEY ?? ''
// Detect Azure endpoints by hostname (not raw URL) to prevent bypass via
// path segments like https://evil.com/cognitiveservices.azure.com/
let isAzure = false
try {
const { hostname } = new URL(request.baseUrl)
isAzure = hostname.endsWith('.azure.com') &&
(hostname.includes('cognitiveservices') || hostname.includes('openai') || hostname.includes('services.ai'))
} catch { /* malformed URL — not Azure */ }
if (apiKey) {
if (isAzure) {
// Azure uses api-key header instead of Bearer token
headers['api-key'] = apiKey
} else {
headers.Authorization = `Bearer ${apiKey}`
}
} else if (isGemini) {
const geminiCredential = await resolveGeminiCredential(process.env)
if (geminiCredential.kind !== 'none') {
headers.Authorization = `Bearer ${geminiCredential.credential}`
if (geminiCredential.kind !== 'api-key' && 'projectId' in geminiCredential && geminiCredential.projectId) {
headers['x-goog-user-project'] = geminiCredential.projectId
}
}
}
if (isGithubCopilot) {
Object.assign(headers, COPILOT_HEADERS)
} else if (isGithubModels) {
headers['Accept'] = 'application/vnd.github+json'
headers['X-GitHub-Api-Version'] = '2022-11-28'
}
// Build the chat completions URL
// Azure Cognitive Services / Azure OpenAI require a deployment-specific path
// and an api-version query parameter.
// Standard format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
// Non-Azure: {base}/chat/completions
let chatCompletionsUrl: string
if (isAzure) {
const apiVersion = process.env.AZURE_OPENAI_API_VERSION ?? '2024-12-01-preview'
const deployment = request.resolvedModel ?? process.env.OPENAI_MODEL ?? 'gpt-4o'
// If base URL already contains /deployments/, use it as-is with api-version
if (/\/deployments\//i.test(request.baseUrl)) {
const base = request.baseUrl.replace(/\/+$/, '')
chatCompletionsUrl = `${base}/chat/completions?api-version=${apiVersion}`
} else {
// Strip trailing /v1 or /openai/v1 if present, then build Azure path
const base = request.baseUrl.replace(/\/(openai\/)?v1\/?$/, '').replace(/\/+$/, '')
chatCompletionsUrl = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
}
} else {
chatCompletionsUrl = `${request.baseUrl}/chat/completions`
}
const fetchInit = {
method: 'POST' as const,
headers,
body: JSON.stringify(body),
signal: options?.signal,
}
const maxAttempts = isGithub ? GITHUB_429_MAX_RETRIES : 1
let response: Response | undefined
for (let attempt = 0; attempt < maxAttempts; attempt++) {
response = await fetch(chatCompletionsUrl, fetchInit)
if (response.ok) {
return response
}
if (
isGithub &&
response.status === 429 &&
attempt < maxAttempts - 1
) {
await response.text().catch(() => {})
const delaySec = Math.min(
GITHUB_429_BASE_DELAY_SEC * 2 ** attempt,
GITHUB_429_MAX_DELAY_SEC,
)
await sleepMs(delaySec * 1000)
continue
}
// Read body exactly once here — Response body is a stream that can only
// be consumed a single time.
const errorBody = await response.text().catch(() => 'unknown error')
const rateHint =
isGithub && response.status === 429 ? formatRetryAfterHint(response) : ''
// If GitHub Copilot returns error about /chat/completions,
// try the /responses endpoint (needed for GPT-5+ models)
if (isGithub && response.status === 400) {
if (errorBody.includes('/chat/completions') || errorBody.includes('not accessible')) {
const responsesUrl = `${request.baseUrl}/responses`
const responsesBody: Record<string, unknown> = {
model: request.resolvedModel,
input: convertAnthropicMessagesToResponsesInput(
params.messages as Array<{
role?: string
message?: { role?: string; content?: unknown }
content?: unknown
}>,
),
stream: params.stream ?? false,
store: false,
}
if (!Array.isArray(responsesBody.input) || responsesBody.input.length === 0) {
responsesBody.input = [
{
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: '' }],
},
]
}
const systemText = convertSystemPrompt(params.system)
if (systemText) {
responsesBody.instructions = systemText
}
if (body.max_tokens !== undefined) {
responsesBody.max_output_tokens = body.max_tokens
}
if (params.tools && params.tools.length > 0) {
const convertedTools = convertToolsToResponsesTools(
params.tools as Array<{
name?: string
description?: string
input_schema?: Record<string, unknown>
}>,
)
if (convertedTools.length > 0) {
responsesBody.tools = convertedTools
}
}
const responsesResponse = await fetch(responsesUrl, {
method: 'POST',
headers,
body: JSON.stringify(responsesBody),
signal: options?.signal,
})
if (responsesResponse.ok) {
return responsesResponse
}
const responsesErrorBody = await responsesResponse.text().catch(() => 'unknown error')
let responsesErrorResponse: object | undefined
try { responsesErrorResponse = JSON.parse(responsesErrorBody) } catch { /* raw text */ }
throw APIError.generate(
responsesResponse.status,
responsesErrorResponse,
`OpenAI API error ${responsesResponse.status}: ${responsesErrorBody}`,
responsesResponse.headers,
)
}
}
let errorResponse: object | undefined
try { errorResponse = JSON.parse(errorBody) } catch { /* raw text */ }
throw APIError.generate(
response.status,
errorResponse,
`OpenAI API error ${response.status}: ${errorBody}${rateHint}`,
response.headers as unknown as Headers,
)
}
throw APIError.generate(
500, undefined, 'OpenAI shim: request loop exited unexpectedly',
new Headers(),
)
}
private _convertNonStreamingResponse(
data: {
id?: string
model?: string
choices?: Array<{
message?: {
role?: string
content?:
| string
| null
| Array<{ type?: string; text?: string }>
reasoning_content?: string | null
tool_calls?: Array<{
id: string
function: { name: string; arguments: string }
extra_content?: Record<string, unknown>
}>
}
finish_reason?: string
}>
usage?: {
prompt_tokens?: number
completion_tokens?: number
prompt_tokens_details?: {
cached_tokens?: number
}
}
},
model: string,
) {
const choice = data.choices?.[0]
const content: Array<Record<string, unknown>> = []
// Some reasoning models (e.g. GLM-5) put their chain-of-thought in
// reasoning_content while content stays null. Preserve it as a thinking
// block, but do not surface it as visible assistant text.
const reasoningText = choice?.message?.reasoning_content
if (typeof reasoningText === 'string' && reasoningText) {
content.push({ type: 'thinking', thinking: reasoningText })
}
const rawContent =
choice?.message?.content !== '' && choice?.message?.content != null
? choice?.message?.content
: null
if (typeof rawContent === 'string' && rawContent) {
content.push({
type: 'text',
text: stripLeakedReasoningPreamble(rawContent),
})
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
const parts: string[] = []
for (const part of rawContent) {
if (
part &&
typeof part === 'object' &&
part.type === 'text' &&
typeof part.text === 'string'
) {
parts.push(part.text)
}
}
const joined = parts.join('\n')
if (joined) {
content.push({
type: 'text',
text: stripLeakedReasoningPreamble(joined),
})
}
}
if (choice?.message?.tool_calls) {
for (const tc of choice.message.tool_calls) {
const input = normalizeToolArguments(
tc.function.name,
tc.function.arguments,
)
content.push({
type: 'tool_use',
id: tc.id,
name: tc.function.name,
input,
...(tc.extra_content ? { extra_content: tc.extra_content } : {}),
// Extract Gemini signature from extra_content
...((tc.extra_content?.google as any)?.thought_signature
? { signature: (tc.extra_content.google as any).thought_signature }
: {}),
})
}
}
const stopReason =
choice?.finish_reason === 'tool_calls'
? 'tool_use'
: choice?.finish_reason === 'length'
? 'max_tokens'
: 'end_turn'
if (choice?.finish_reason === 'content_filter' || choice?.finish_reason === 'safety') {
content.push({
type: 'text',
text: '\n\n[Content blocked by provider safety filter]',
})
}
return {
id: data.id ?? makeMessageId(),
type: 'message',
role: 'assistant',
content,
model: data.model ?? model,
stop_reason: stopReason,
stop_sequence: null,
usage: {
input_tokens: data.usage?.prompt_tokens ?? 0,
output_tokens: data.usage?.completion_tokens ?? 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: data.usage?.prompt_tokens_details?.cached_tokens ?? 0,
},
}
}
}
class OpenAIShimBeta {
messages: OpenAIShimMessages
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort, providerOverride)
this.reasoningEffort = reasoningEffort
}
}
export function createOpenAIShimClient(options: {
defaultHeaders?: Record<string, string>
maxRetries?: number
timeout?: number
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
providerOverride?: { model: string; baseURL: string; apiKey: string }
}): unknown {
hydrateGeminiAccessTokenFromSecureStorage()
hydrateGithubModelsTokenFromSecureStorage()
// When Gemini provider is active, map Gemini env vars to OpenAI-compatible ones
// so the existing providerConfig.ts infrastructure picks them up correctly.
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI)) {
process.env.OPENAI_BASE_URL ??=
process.env.GEMINI_BASE_URL ??
'https://generativelanguage.googleapis.com/v1beta/openai'
const geminiApiKey =
process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY
if (geminiApiKey && !process.env.OPENAI_API_KEY) {
process.env.OPENAI_API_KEY = geminiApiKey
}
if (process.env.GEMINI_MODEL && !process.env.OPENAI_MODEL) {
process.env.OPENAI_MODEL = process.env.GEMINI_MODEL
}
} else if (isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)) {
process.env.OPENAI_BASE_URL =
process.env.MISTRAL_BASE_URL ?? 'https://api.mistral.ai/v1'
process.env.OPENAI_API_KEY = process.env.MISTRAL_API_KEY
if (process.env.MISTRAL_MODEL) {
process.env.OPENAI_MODEL = process.env.MISTRAL_MODEL
}
} else if (isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)) {
process.env.OPENAI_BASE_URL ??= GITHUB_COPILOT_BASE
process.env.OPENAI_API_KEY ??=
process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? ''
}
const beta = new OpenAIShimBeta({
...(options.defaultHeaders ?? {}),
}, options.reasoningEffort, options.providerOverride)
return {
beta,
messages: beta.messages,
}
}