From b65921e8c35abb4d6c62a83581685edceb0eae71 Mon Sep 17 00:00:00 2001 From: Juan Camilo Date: Thu, 2 Apr 2026 15:50:52 +0200 Subject: [PATCH] fix: deterministic prefix matching and correct Llama 3.x context windows Two fixes in openaiContextWindows.ts: 1. Sort lookup keys by length descending in lookupByModel() so the most specific prefix always wins. Without this, 'gpt-4-turbo-preview' could match 'gpt-4' (8k) instead of 'gpt-4-turbo' (128k) depending on V8's object key iteration order. 2. Update Llama 3.1/3.2/3.3 context windows from 8,192 to 128,000. These models support 128k context natively (Meta official specs). The previous 8k value was Ollama's default num_ctx, not the model's actual capability, causing premature auto-compact warnings. --- src/utils/model/openaiContextWindows.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts index 6cb12c37..66db3d35 100644 --- a/src/utils/model/openaiContextWindows.ts +++ b/src/utils/model/openaiContextWindows.ts @@ -50,9 +50,11 @@ const OPENAI_CONTEXT_WINDOWS: Record = { 'gemini-2.5-flash': 1_048_576, // Ollama local models - 'llama3.3:70b': 8_192, - 'llama3.1:8b': 8_192, - 'llama3.2:3b': 8_192, + // Llama 3.1+ models support 128k context natively (Meta official specs). + // Ollama defaults to num_ctx=8192 but users can configure higher values. + 'llama3.3:70b': 128_000, + 'llama3.1:8b': 128_000, + 'llama3.2:3b': 128_000, 'qwen2.5-coder:32b': 32_768, 'qwen2.5-coder:7b': 32_768, 'deepseek-coder-v2:16b': 163_840, @@ -122,7 +124,11 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record = { function lookupByModel(table: Record, model: string): T | undefined { if (table[model] !== undefined) return table[model] - for (const key of Object.keys(table)) { + // Sort keys by length descending so the most specific prefix wins. + // Without this, 'gpt-4-turbo-preview' could match 'gpt-4' (8k) instead + // of 'gpt-4-turbo' (128k) depending on V8's key iteration order. + const sortedKeys = Object.keys(table).sort((a, b) => b.length - a.length) + for (const key of sortedKeys) { if (model.startsWith(key)) return table[key] } return undefined