feat: add context window guard for OpenAI-compatible models

Without this fix, getContextWindowForModel() returns 200k for all OpenAI
models (the Claude default), causing two problems:
  1. Auto-compact/warnings trigger at wrong thresholds (200k instead of 128k)
  2. getModelMaxOutputTokens() returns 32k causing 400 errors from APIs that
     cap output tokens lower (gpt-4o supports max 16384)

Fix:
- Add openaiContextWindows.ts with known context window sizes and max output
  token limits for 30+ OpenAI-compatible models (OpenAI, DeepSeek, Groq,
  Mistral, Ollama, LM Studio)
- Hook into getContextWindowForModel() so correct input limits are used
- Hook into getModelMaxOutputTokens() so correct output limits are sent,
  preventing 400 "max_tokens is too large" errors

All existing warning, blocking, and auto-compact infrastructure works
automatically once the correct limits are returned.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
gnanam1990
2026-04-01 17:42:04 +05:30
parent a3d8ab0fec
commit 4ca94b2454
2 changed files with 159 additions and 0 deletions

View File

@@ -4,6 +4,7 @@ import { getGlobalConfig } from './config.js'
import { isEnvTruthy } from './envUtils.js'
import { getCanonicalName } from './model/model.js'
import { getModelCapability } from './model/modelCapabilities.js'
import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openaiContextWindows.js'
// Model context window size (200k tokens for all models right now)
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
@@ -71,6 +72,19 @@ export function getContextWindowForModel(
return 1_000_000
}
// OpenAI-compatible provider — use known context windows for the model
if (
process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
process.env.CLAUDE_CODE_USE_GEMINI === 'true'
) {
const openaiWindow = getOpenAIContextWindow(model)
if (openaiWindow !== undefined) {
return openaiWindow
}
}
const cap = getModelCapability(model)
if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
if (
@@ -162,6 +176,19 @@ export function getModelMaxOutputTokens(model: string): {
}
}
// OpenAI-compatible provider — use known output limits to avoid 400 errors
if (
process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
process.env.CLAUDE_CODE_USE_GEMINI === 'true'
) {
const openaiMax = getOpenAIMaxOutputTokens(model)
if (openaiMax !== undefined) {
return { default: openaiMax, upperLimit: openaiMax }
}
}
const m = getCanonicalName(model)
if (m.includes('opus-4-6')) {