fix: make OpenAI fallback context window configurable + support external model lookup (#861)
* fix: make OpenAI fallback context window configurable and support external lookup table Unknown OpenAI-compatible models fell back to a hardcoded 128k constant, causing auto-compact to fire prematurely on models with larger windows (issue #635 follow-up). Two escape hatches are added without touching the built-in table: - CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW (number): overrides the 128k default for all unknown models. - CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS (JSON object): per-model overrides that take precedence over the built-in OPENAI_CONTEXT_WINDOWS table; supports the same provider-qualified and prefix-matching lookup as the built-in path. - CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS (JSON object): same pattern for output token limits. This lets operators deploy new or private models without patching openaiContextWindows.ts on every model release. * docs: add new OpenAI context window env vars to .env.example Document CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW, CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS, and CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS with usage examples. Addresses reviewer feedback on PR #861. --------- Co-authored-by: opencode <dev@example.com>
This commit is contained in:
17
.env.example
17
.env.example
@@ -149,6 +149,23 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
|
||||
# Use a custom OpenAI-compatible endpoint (optional — defaults to api.openai.com)
|
||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# Fallback context window size (tokens) when the model is not found in the
|
||||
# built-in table (default: 128000). Increase this for models with larger
|
||||
# context windows (e.g. 200000 for Claude-sized contexts).
|
||||
# CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW=128000
|
||||
|
||||
# Per-model context window overrides as a JSON object.
|
||||
# Takes precedence over the built-in table, so you can register new or
|
||||
# custom models without patching source.
|
||||
# Example: CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS={"my-corp/llm-v3":262144,"gpt-4o-mini":128000}
|
||||
# CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS=
|
||||
|
||||
# Per-model maximum output token overrides as a JSON object.
|
||||
# Use this alongside CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS when your model
|
||||
# supports a different output limit than what the built-in table specifies.
|
||||
# Example: CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS={"my-corp/llm-v3":8192}
|
||||
# CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS=
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Option 3: Google Gemini
|
||||
|
||||
@@ -12,7 +12,12 @@ export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
|
||||
// Fallback context window for unknown 3P models. Must be large enough that
|
||||
// the effective context (this minus output token reservation) stays positive,
|
||||
// otherwise auto-compact fires on every message (issue #635).
|
||||
export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000
|
||||
// Override via CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW env var to avoid
|
||||
// hardcoding when deploying models not yet in openaiContextWindows.ts.
|
||||
export const OPENAI_FALLBACK_CONTEXT_WINDOW = (() => {
|
||||
const v = parseInt(process.env.CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW ?? '', 10)
|
||||
return !isNaN(v) && v > 0 ? v : 128_000
|
||||
})()
|
||||
|
||||
// Maximum output tokens for compact operations
|
||||
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
|
||||
|
||||
@@ -413,16 +413,51 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
|
||||
'moonshot-v1-128k': 32_768,
|
||||
}
|
||||
|
||||
function lookupByModel<T>(table: Record<string, T>, model: string): T | undefined {
|
||||
// External context-window overrides loaded once at startup.
|
||||
// Set CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS to a JSON object mapping model name
|
||||
// → context-window token count to add or override entries without editing
|
||||
// this file. Example:
|
||||
// CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS='{"my-corp/llm-v2":200000}'
|
||||
const OPENAI_EXTERNAL_CONTEXT_WINDOWS: Record<string, number> = (() => {
|
||||
try {
|
||||
const raw = process.env.CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS
|
||||
if (raw) {
|
||||
const parsed = JSON.parse(raw)
|
||||
if (typeof parsed === 'object' && parsed !== null) return parsed as Record<string, number>
|
||||
}
|
||||
} catch { /* ignore malformed JSON */ }
|
||||
return {}
|
||||
})()
|
||||
|
||||
// External max-output-token overrides.
|
||||
// Set CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS to a JSON object mapping model name
|
||||
// → max output token count.
|
||||
const OPENAI_EXTERNAL_MAX_OUTPUT_TOKENS: Record<string, number> = (() => {
|
||||
try {
|
||||
const raw = process.env.CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS
|
||||
if (raw) {
|
||||
const parsed = JSON.parse(raw)
|
||||
if (typeof parsed === 'object' && parsed !== null) return parsed as Record<string, number>
|
||||
}
|
||||
} catch { /* ignore malformed JSON */ }
|
||||
return {}
|
||||
})()
|
||||
|
||||
function lookupByModel<T>(table: Record<string, T>, externalTable: Record<string, T>, model: string): T | undefined {
|
||||
// Try provider-qualified key first: "{OPENAI_MODEL}:{model}" so that
|
||||
// e.g. "github:copilot:claude-haiku-4.5" can have different limits than
|
||||
// a bare "claude-haiku-4.5" served by another provider.
|
||||
const providerModel = process.env.OPENAI_MODEL?.trim()
|
||||
if (providerModel && providerModel !== model) {
|
||||
const qualified = `${providerModel}:${model}`
|
||||
// External table takes precedence over the built-in table.
|
||||
const externalQualified = lookupByKey(externalTable, qualified)
|
||||
if (externalQualified !== undefined) return externalQualified
|
||||
const qualifiedResult = lookupByKey(table, qualified)
|
||||
if (qualifiedResult !== undefined) return qualifiedResult
|
||||
}
|
||||
const externalResult = lookupByKey(externalTable, model)
|
||||
if (externalResult !== undefined) return externalResult
|
||||
return lookupByKey(table, model)
|
||||
}
|
||||
|
||||
@@ -446,7 +481,7 @@ function lookupByKey<T>(table: Record<string, T>, model: string): T | undefined
|
||||
* "gpt-4o-2024-11-20" resolve to the base "gpt-4o" entry.
|
||||
*/
|
||||
export function getOpenAIContextWindow(model: string): number | undefined {
|
||||
return lookupByModel(OPENAI_CONTEXT_WINDOWS, model)
|
||||
return lookupByModel(OPENAI_CONTEXT_WINDOWS, OPENAI_EXTERNAL_CONTEXT_WINDOWS, model)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -454,5 +489,5 @@ export function getOpenAIContextWindow(model: string): number | undefined {
|
||||
* Returns undefined if the model is not in the table.
|
||||
*/
|
||||
export function getOpenAIMaxOutputTokens(model: string): number | undefined {
|
||||
return lookupByModel(OPENAI_MAX_OUTPUT_TOKENS, model)
|
||||
return lookupByModel(OPENAI_MAX_OUTPUT_TOKENS, OPENAI_EXTERNAL_MAX_OUTPUT_TOKENS, model)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user