Files
orcs-code/src/utils/context.ts
hika, maeng b750e9e97d fix: make OpenAI fallback context window configurable + support external model lookup (#861)
* fix: make OpenAI fallback context window configurable and support external lookup table

Unknown OpenAI-compatible models fell back to a hardcoded 128k constant,
causing auto-compact to fire prematurely on models with larger windows
(issue #635 follow-up). Two escape hatches are added without touching the
built-in table:

- CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW (number): overrides the 128k
  default for all unknown models.
- CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS (JSON object): per-model overrides that
  take precedence over the built-in OPENAI_CONTEXT_WINDOWS table; supports
  the same provider-qualified and prefix-matching lookup as the built-in path.
- CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS (JSON object): same pattern for output
  token limits.

This lets operators deploy new or private models without patching
openaiContextWindows.ts on every model release.

* docs: add new OpenAI context window env vars to .env.example

Document CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW,
CLAUDE_CODE_OPENAI_CONTEXT_WINDOWS, and
CLAUDE_CODE_OPENAI_MAX_OUTPUT_TOKENS with usage examples.

Addresses reviewer feedback on PR #861.

---------

Co-authored-by: opencode <dev@example.com>
2026-04-24 00:34:08 +08:00

267 lines
8.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// biome-ignore-all assist/source/organizeImports: internal-only import markers must not be reordered
import { CONTEXT_1M_BETA_HEADER } from '../constants/betas.js'
import { getGlobalConfig } from './config.js'
import { isEnvTruthy } from './envUtils.js'
import { getCanonicalName } from './model/model.js'
import { getModelCapability } from './model/modelCapabilities.js'
import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openaiContextWindows.js'
// Model context window size (200k tokens for all models right now)
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
// Fallback context window for unknown 3P models. Must be large enough that
// the effective context (this minus output token reservation) stays positive,
// otherwise auto-compact fires on every message (issue #635).
// Override via CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW env var to avoid
// hardcoding when deploying models not yet in openaiContextWindows.ts.
export const OPENAI_FALLBACK_CONTEXT_WINDOW = (() => {
const v = parseInt(process.env.CLAUDE_CODE_OPENAI_FALLBACK_CONTEXT_WINDOW ?? '', 10)
return !isNaN(v) && v > 0 ? v : 128_000
})()
// Maximum output tokens for compact operations
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
// Default max output tokens
const MAX_OUTPUT_TOKENS_DEFAULT = 32_000
const MAX_OUTPUT_TOKENS_UPPER_LIMIT = 64_000
// Capped default for slot-reservation optimization. BQ p99 output = 4,911
// tokens, so 32k/64k defaults over-reserve 8-16× slot capacity. With the cap
// enabled, <1% of requests hit the limit; those get one clean retry at 64k
// (see query.ts max_output_tokens_escalate). Cap is applied in
// claude.ts:getMaxOutputTokensForModel to avoid the growthbook→betas→context
// import cycle.
export const CAPPED_DEFAULT_MAX_TOKENS = 8_000
export const ESCALATED_MAX_TOKENS = 64_000
/**
* Check if 1M context is disabled via environment variable.
* Used by C4E admins to disable 1M context for HIPAA compliance.
*/
export function is1mContextDisabled(): boolean {
return isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT)
}
export function has1mContext(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
return /\[1m\]/i.test(model)
}
// @[MODEL LAUNCH]: Update this pattern if the new model supports 1M context
export function modelSupports1M(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
const canonical = getCanonicalName(model)
return canonical.includes('claude-sonnet-4') || canonical.includes('opus-4-6')
}
export function getContextWindowForModel(
model: string,
betas?: string[],
): number {
// Allow override via environment variable (internal-only)
// This takes precedence over all other context window resolution, including 1M detection,
// so users can cap the effective context window for local decisions (auto-compact, etc.)
// while still using a 1M-capable endpoint.
if (
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS
) {
const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10)
if (!isNaN(override) && override > 0) {
return override
}
}
// [1m] suffix — explicit client-side opt-in, respected over all detection
if (has1mContext(model)) {
return 1_000_000
}
// OpenAI-compatible provider — use known context windows for the model.
// Unknown models get a conservative 128k default. This was previously 8k,
// but that caused auto-compact to fire on every turn because the effective
// context (8k minus output reservation) became negative (issue #635).
const isOpenAIProvider =
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
if (isOpenAIProvider) {
const openaiWindow = getOpenAIContextWindow(model)
if (openaiWindow !== undefined) {
return openaiWindow
}
console.error(
`[context] Warning: model "${model}" not in context window table — using conservative 128k default. ` +
'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
)
return OPENAI_FALLBACK_CONTEXT_WINDOW
}
const cap = getModelCapability(model)
if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
if (
cap.max_input_tokens > MODEL_CONTEXT_WINDOW_DEFAULT &&
is1mContextDisabled()
) {
return MODEL_CONTEXT_WINDOW_DEFAULT
}
return cap.max_input_tokens
}
if (betas?.includes(CONTEXT_1M_BETA_HEADER) && modelSupports1M(model)) {
return 1_000_000
}
if (getSonnet1mExpTreatmentEnabled(model)) {
return 1_000_000
}
if (process.env.USER_TYPE === 'ant') {
const antModel = resolveAntModel(model)
if (antModel?.contextWindow) {
return antModel.contextWindow
}
}
return MODEL_CONTEXT_WINDOW_DEFAULT
}
export function getSonnet1mExpTreatmentEnabled(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
// Only applies to sonnet 4.6 without an explicit [1m] suffix
if (has1mContext(model)) {
return false
}
if (!getCanonicalName(model).includes('sonnet-4-6')) {
return false
}
return getGlobalConfig().clientDataCache?.['coral_reef_sonnet'] === 'true'
}
/**
* Calculate context window usage percentage from token usage data.
* Returns used and remaining percentages, or null values if no usage data.
*/
export function calculateContextPercentages(
currentUsage: {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
} | null,
contextWindowSize: number,
): { used: number | null; remaining: number | null } {
if (!currentUsage) {
return { used: null, remaining: null }
}
const totalInputTokens =
currentUsage.input_tokens +
currentUsage.cache_creation_input_tokens +
currentUsage.cache_read_input_tokens
const usedPercentage = Math.round(
(totalInputTokens / contextWindowSize) * 100,
)
const clampedUsed = Math.min(100, Math.max(0, usedPercentage))
return {
used: clampedUsed,
remaining: 100 - clampedUsed,
}
}
/**
* Returns the model's default and upper limit for max output tokens.
*/
export function getModelMaxOutputTokens(model: string): {
default: number
upperLimit: number
} {
let defaultTokens: number
let upperLimit: number
if (process.env.USER_TYPE === 'ant') {
const antModel = resolveAntModel(model.toLowerCase())
if (antModel) {
defaultTokens = antModel.defaultMaxTokens ?? MAX_OUTPUT_TOKENS_DEFAULT
upperLimit = antModel.upperMaxTokensLimit ?? MAX_OUTPUT_TOKENS_UPPER_LIMIT
return { default: defaultTokens, upperLimit }
}
}
// OpenAI-compatible provider — use known output limits to avoid 400 errors
if (
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
) {
const openaiMax = getOpenAIMaxOutputTokens(model)
if (openaiMax !== undefined) {
return { default: openaiMax, upperLimit: openaiMax }
}
}
const m = getCanonicalName(model)
if (m.includes('opus-4-6')) {
defaultTokens = 64_000
upperLimit = 128_000
} else if (m.includes('sonnet-4-6')) {
defaultTokens = 32_000
upperLimit = 128_000
} else if (
m.includes('opus-4-5') ||
m.includes('sonnet-4') ||
m.includes('haiku-4')
) {
defaultTokens = 32_000
upperLimit = 64_000
} else if (m.includes('opus-4-1') || m.includes('opus-4')) {
defaultTokens = 32_000
upperLimit = 32_000
} else if (m.includes('claude-3-opus')) {
defaultTokens = 4_096
upperLimit = 4_096
} else if (m.includes('claude-3-sonnet')) {
defaultTokens = 8_192
upperLimit = 8_192
} else if (m.includes('claude-3-haiku')) {
defaultTokens = 4_096
upperLimit = 4_096
} else if (m.includes('3-5-sonnet') || m.includes('3-5-haiku')) {
defaultTokens = 8_192
upperLimit = 8_192
} else if (m.includes('3-7-sonnet')) {
defaultTokens = 32_000
upperLimit = 64_000
} else {
defaultTokens = MAX_OUTPUT_TOKENS_DEFAULT
upperLimit = MAX_OUTPUT_TOKENS_UPPER_LIMIT
}
const cap = getModelCapability(model)
if (cap?.max_tokens && cap.max_tokens >= 4_096) {
upperLimit = cap.max_tokens
defaultTokens = Math.min(defaultTokens, upperLimit)
}
return { default: defaultTokens, upperLimit }
}
/**
* Returns the max thinking budget tokens for a given model. The max
* thinking tokens should be strictly less than the max output tokens.
*
* Deprecated since newer models use adaptive thinking rather than a
* strict thinking token budget.
*/
export function getMaxThinkingTokensForModel(model: string): number {
return getModelMaxOutputTokens(model).upperLimit - 1
}