feat: add context window guard for OpenAI-compatible models
Without this fix, getContextWindowForModel() returns 200k for all OpenAI
models (the Claude default), causing two problems:
1. Auto-compact/warnings trigger at wrong thresholds (200k instead of 128k)
2. getModelMaxOutputTokens() returns 32k causing 400 errors from APIs that
cap output tokens lower (gpt-4o supports max 16384)
Fix:
- Add openaiContextWindows.ts with known context window sizes and max output
token limits for 30+ OpenAI-compatible models (OpenAI, DeepSeek, Groq,
Mistral, Ollama, LM Studio)
- Hook into getContextWindowForModel() so correct input limits are used
- Hook into getModelMaxOutputTokens() so correct output limits are sent,
preventing 400 "max_tokens is too large" errors
All existing warning, blocking, and auto-compact infrastructure works
automatically once the correct limits are returned.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import { getGlobalConfig } from './config.js'
|
||||
import { isEnvTruthy } from './envUtils.js'
|
||||
import { getCanonicalName } from './model/model.js'
|
||||
import { getModelCapability } from './model/modelCapabilities.js'
|
||||
import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openaiContextWindows.js'
|
||||
|
||||
// Model context window size (200k tokens for all models right now)
|
||||
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
|
||||
@@ -71,6 +72,19 @@ export function getContextWindowForModel(
|
||||
return 1_000_000
|
||||
}
|
||||
|
||||
// OpenAI-compatible provider — use known context windows for the model
|
||||
if (
|
||||
process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
|
||||
process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
|
||||
process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
|
||||
process.env.CLAUDE_CODE_USE_GEMINI === 'true'
|
||||
) {
|
||||
const openaiWindow = getOpenAIContextWindow(model)
|
||||
if (openaiWindow !== undefined) {
|
||||
return openaiWindow
|
||||
}
|
||||
}
|
||||
|
||||
const cap = getModelCapability(model)
|
||||
if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
|
||||
if (
|
||||
@@ -162,6 +176,19 @@ export function getModelMaxOutputTokens(model: string): {
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAI-compatible provider — use known output limits to avoid 400 errors
|
||||
if (
|
||||
process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
|
||||
process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
|
||||
process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
|
||||
process.env.CLAUDE_CODE_USE_GEMINI === 'true'
|
||||
) {
|
||||
const openaiMax = getOpenAIMaxOutputTokens(model)
|
||||
if (openaiMax !== undefined) {
|
||||
return { default: openaiMax, upperLimit: openaiMax }
|
||||
}
|
||||
}
|
||||
|
||||
const m = getCanonicalName(model)
|
||||
|
||||
if (m.includes('opus-4-6')) {
|
||||
|
||||
Reference in New Issue
Block a user