feat: add context window guard for OpenAI-compatible models

Without this fix, getContextWindowForModel() returns 200k for all OpenAI models (the Claude default), causing two problems: 1. Auto-compact/warnings trigger at wrong thresholds (200k instead of 128k) 2. getModelMaxOutputTokens() returns 32k causing 400 errors from APIs that cap output tokens lower (gpt-4o supports max 16384) Fix: - Add openaiContextWindows.ts with known context window sizes and max output token limits for 30+ OpenAI-compatible models (OpenAI, DeepSeek, Groq, Mistral, Ollama, LM Studio) - Hook into getContextWindowForModel() so correct input limits are used - Hook into getModelMaxOutputTokens() so correct output limits are sent, preventing 400 "max_tokens is too large" errors All existing warning, blocking, and auto-compact infrastructure works automatically once the correct limits are returned. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 17:42:04 +05:30
parent a3d8ab0fec
commit 4ca94b2454
2 changed files with 159 additions and 0 deletions
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -4,6 +4,7 @@ import { getGlobalConfig } from './config.js'
 import { isEnvTruthy } from './envUtils.js'
 import { getCanonicalName } from './model/model.js'
 import { getModelCapability } from './model/modelCapabilities.js'
+import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openaiContextWindows.js'

 // Model context window size (200k tokens for all models right now)
 export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
@@ -71,6 +72,19 @@ export function getContextWindowForModel(
    return 1_000_000
  }

+  // OpenAI-compatible provider — use known context windows for the model
+  if (
+    process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
+    process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
+    process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
+    process.env.CLAUDE_CODE_USE_GEMINI === 'true'
+  ) {
+    const openaiWindow = getOpenAIContextWindow(model)
+    if (openaiWindow !== undefined) {
+      return openaiWindow
+    }
+  }
+
  const cap = getModelCapability(model)
  if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
    if (
@@ -162,6 +176,19 @@ export function getModelMaxOutputTokens(model: string): {
    }
  }

+  // OpenAI-compatible provider — use known output limits to avoid 400 errors
+  if (
+    process.env.CLAUDE_CODE_USE_OPENAI === '1' ||
+    process.env.CLAUDE_CODE_USE_OPENAI === 'true' ||
+    process.env.CLAUDE_CODE_USE_GEMINI === '1' ||
+    process.env.CLAUDE_CODE_USE_GEMINI === 'true'
+  ) {
+    const openaiMax = getOpenAIMaxOutputTokens(model)
+    if (openaiMax !== undefined) {
+      return { default: openaiMax, upperLimit: openaiMax }
+    }
+  }
+
  const m = getCanonicalName(model)

  if (m.includes('opus-4-6')) {