diff --git a/.env.example b/.env.example index 540d1d07..1684e1bf 100644 --- a/.env.example +++ b/.env.example @@ -149,6 +149,23 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here # Use a custom OpenAI-compatible endpoint (optional — defaults to api.openai.com) # OPENAI_BASE_URL=https://api.openai.com/v1 +# Fallback context window size (tokens) when the model is not found in the +# built-in table (default: 128000). Increase this for models with larger +# context windows (e.g. 200000 for Claude-sized contexts). +# CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW=128000 + +# Per-model context window overrides as a JSON object. +# Takes precedence over the built-in table, so you can register new or +# custom models without patching source. +# Example: CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS={"my-corp/llm-v3":262144,"gpt-4o-mini":128000} +# CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS= + +# Per-model maximum output token overrides as a JSON object. +# Use this alongside CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS when your model +# supports a different output limit than what the built-in table specifies. +# Example: CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS={"my-corp/llm-v3":8192} +# CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS= + # ----------------------------------------------------------------------------- # Option 3: Google Gemini diff --git a/src/utils/context.ts b/src/utils/context.ts index 370ed5df..2da553ee 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -12,7 +12,12 @@ export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000 // Fallback context window for unknown 3P models. Must be large enough that // the effective context (this minus output token reservation) stays positive, // otherwise auto-compact fires on every message (issue #635). -export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000 +// Override via CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW env var to avoid +// hardcoding when deploying models not yet in openaiContextWindows.ts. +export const OPENAI_FALLBACK_CONTEXT_WINDOW = (() => { + const v = parseInt(process.env.CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW ?? '', 10) + return !isNaN(v) && v > 0 ? v : 128_000 +})() // Maximum output tokens for compact operations export const COMPACT_MAX_OUTPUT_TOKENS = 20_000 diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts index b7b41425..20cf5e4c 100644 --- a/src/utils/model/openaiContextWindows.ts +++ b/src/utils/model/openaiContextWindows.ts @@ -413,16 +413,51 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record = { 'moonshot-v1-128k': 32_768, } -function lookupByModel(table: Record, model: string): T | undefined { +// External context-window overrides loaded once at startup. +// Set CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS to a JSON object mapping model name +// → context-window token count to add or override entries without editing +// this file. Example: +// CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS='{"my-corp/llm-v2":200000}' +const OPENAI_EXTERNAL_CONTEXT_WINDOWS: Record = (() => { + try { + const raw = process.env.CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS + if (raw) { + const parsed = JSON.parse(raw) + if (typeof parsed === 'object' && parsed !== null) return parsed as Record + } + } catch { /* ignore malformed JSON */ } + return {} +})() + +// External max-output-token overrides. +// Set CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS to a JSON object mapping model name +// → max output token count. +const OPENAI_EXTERNAL_MAX_OUTPUT_TOKENS: Record = (() => { + try { + const raw = process.env.CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS + if (raw) { + const parsed = JSON.parse(raw) + if (typeof parsed === 'object' && parsed !== null) return parsed as Record + } + } catch { /* ignore malformed JSON */ } + return {} +})() + +function lookupByModel(table: Record, externalTable: Record, model: string): T | undefined { // Try provider-qualified key first: "{OPENAI_MODEL}:{model}" so that // e.g. "github:copilot:claude-haiku-4.5" can have different limits than // a bare "claude-haiku-4.5" served by another provider. const providerModel = process.env.OPENAI_MODEL?.trim() if (providerModel && providerModel !== model) { const qualified = `${providerModel}:${model}` + // External table takes precedence over the built-in table. + const externalQualified = lookupByKey(externalTable, qualified) + if (externalQualified !== undefined) return externalQualified const qualifiedResult = lookupByKey(table, qualified) if (qualifiedResult !== undefined) return qualifiedResult } + const externalResult = lookupByKey(externalTable, model) + if (externalResult !== undefined) return externalResult return lookupByKey(table, model) } @@ -446,7 +481,7 @@ function lookupByKey(table: Record, model: string): T | undefined * "gpt-4o-2024-11-20" resolve to the base "gpt-4o" entry. */ export function getOpenAIContextWindow(model: string): number | undefined { - return lookupByModel(OPENAI_CONTEXT_WINDOWS, model) + return lookupByModel(OPENAI_CONTEXT_WINDOWS, OPENAI_EXTERNAL_CONTEXT_WINDOWS, model) } /** @@ -454,5 +489,5 @@ export function getOpenAIContextWindow(model: string): number | undefined { * Returns undefined if the model is not in the table. */ export function getOpenAIMaxOutputTokens(model: string): number | undefined { - return lookupByModel(OPENAI_MAX_OUTPUT_TOKENS, model) + return lookupByModel(OPENAI_MAX_OUTPUT_TOKENS, OPENAI_EXTERNAL_MAX_OUTPUT_TOKENS, model) }