fix: add GitHub Copilot model context windows and output limits (#576)

Add context_window and max_output_tokens entries for all models available through the GitHub Copilot proxy (Claude, GPT, Gemini, Grok), sourced from https://api.githubcopilot.com/models. Models are namespaced as "github:copilot:<model>" to avoid collisions with the same model names served by other providers (which may have different limits). A new lookupByKey() helper and qualified-key lookup in lookupByModel() ensures the correct limits are selected when OPENAI_MODEL=github:copilot. Without this, Claude models on Copilot would use default context/output limits that may not match the proxy's actual constraints, causing 400 errors like "max_tokens is too large". Related: #515 Co-authored-by: Zartris <14197299+Zartris@users.noreply.github.com>
2026-04-10 16:00:26 +02:00
parent cb8f8b7ac2
commit a7f5982f64
1 changed files with 86 additions and 0 deletions
--- a/src/utils/model/openaiContextWindows.ts
+++ b/src/utils/model/openaiContextWindows.ts
@@ -12,6 +12,44 @@
 */
 const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
  // GitHub Copilot — values from https://api.githubcopilot.com/models (2026-04-09)
  // Namespaced so they don't collide with bare model names from other providers.
  'github:copilot':                           128_000,
  // Claude
  'github:copilot:claude-sonnet-4':           216_000,
  'github:copilot:claude-haiku-4':            200_000,
  'github:copilot:claude-sonnet-4.5':         200_000,
  'github:copilot:claude-sonnet-4.6':         200_000,
  'github:copilot:claude-opus-4':             200_000,
  'github:copilot:claude-opus-4.6':           200_000,
  // GPT
  'github:copilot:gpt-3.5-turbo':             16_384,
  'github:copilot:gpt-4':                     32_768,
  'github:copilot:gpt-4-0125-preview':       128_000,
  'github:copilot:gpt-4-o-preview':          128_000,
  'github:copilot:gpt-4.1':                  128_000,
  'github:copilot:gpt-4o':                   128_000,
  'github:copilot:gpt-4o-2024-08-06':        128_000,
  'github:copilot:gpt-4o-2024-11-20':        128_000,
  'github:copilot:gpt-4o-mini':              128_000,
  'github:copilot:gpt-5-mini':               264_000,
  'github:copilot:gpt-5.1':                  264_000,
  'github:copilot:gpt-5.2':                  400_000,
  'github:copilot:gpt-5.2-codex':            400_000,
  'github:copilot:gpt-5.3-codex':            400_000,
  'github:copilot:gpt-5.4':                  400_000,
  'github:copilot:gpt-5.4-mini':             400_000,
  // Gemini
  'github:copilot:gemini-2.5-pro':           128_000,
  'github:copilot:gemini-3-flash-preview':   128_000,
  'github:copilot:gemini-3.1-pro-preview':   200_000,
  // Grok
  'github:copilot:grok-code-fast-1':         256_000,
  // NOTE: bare Claude model names (e.g. 'claude-sonnet-4') are intentionally
  // omitted. Different OpenAI-compatible providers may impose different context
  // limits for the same model name, so we cannot safely hardcode values here.
  // OpenAI
  'gpt-5.4':               1_050_000,
  'gpt-5.4-mini':            400_000,
@@ -82,6 +120,41 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
 * Fixes: 400 error "max_tokens is too large" when default 32k exceeds model limit.
 */
 const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
  // GitHub Copilot — values from https://api.githubcopilot.com/models (2026-04-09)
  'github:copilot':                            16_384,
  // Claude
  'github:copilot:claude-sonnet-4':            16_000,
  'github:copilot:claude-haiku-4':             64_000,
  'github:copilot:claude-sonnet-4.5':          32_000,
  'github:copilot:claude-sonnet-4.6':          32_000,
  'github:copilot:claude-opus-4':              32_000,
  'github:copilot:claude-opus-4.6':            32_000,
  // GPT
  'github:copilot:gpt-3.5-turbo':              4_096,
  'github:copilot:gpt-4':                      4_096,
  'github:copilot:gpt-4-0125-preview':         4_096,
  'github:copilot:gpt-4-o-preview':            4_096,
  'github:copilot:gpt-4.1':                   16_384,
  'github:copilot:gpt-4o':                     4_096,
  'github:copilot:gpt-4o-2024-08-06':         16_384,
  'github:copilot:gpt-4o-2024-11-20':         16_384,
  'github:copilot:gpt-4o-mini':                4_096,
  'github:copilot:gpt-5-mini':                64_000,
  'github:copilot:gpt-5.1':                   64_000,
  'github:copilot:gpt-5.2':                  128_000,
  'github:copilot:gpt-5.2-codex':            128_000,
  'github:copilot:gpt-5.3-codex':            128_000,
  'github:copilot:gpt-5.4':                  128_000,
  'github:copilot:gpt-5.4-mini':             128_000,
  // Gemini
  'github:copilot:gemini-2.5-pro':            64_000,
  'github:copilot:gemini-3-flash-preview':    64_000,
  'github:copilot:gemini-3.1-pro-preview':    64_000,
  // Grok
  'github:copilot:grok-code-fast-1':          64_000,
  // NOTE: bare Claude model names omitted — see context windows comment above.
  // OpenAI
  'gpt-5.4':                 128_000,
  'gpt-5.4-mini':            128_000,
@@ -145,6 +218,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
 }
 function lookupByModel<T>(table: Record<string, T>, model: string): T | undefined {
  // Try provider-qualified key first: "{OPENAI_MODEL}:{model}" so that
  // e.g. "github:copilot:claude-haiku-4.5" can have different limits than
  // a bare "claude-haiku-4.5" served by another provider.
  const providerModel = process.env.OPENAI_MODEL?.trim()
  if (providerModel && providerModel !== model) {
    const qualified = `${providerModel}:${model}`
    const qualifiedResult = lookupByKey(table, qualified)
    if (qualifiedResult !== undefined) return qualifiedResult
  }
  return lookupByKey(table, model)
 }
 function lookupByKey<T>(table: Record<string, T>, model: string): T | undefined {
  if (table[model] !== undefined) return table[model]
  // Sort keys by length descending so the most specific prefix wins.
  // Without this, 'gpt-4-turbo-preview' could match 'gpt-4' (8k) instead