From b65921e8c35abb4d6c62a83581685edceb0eae71 Mon Sep 17 00:00:00 2001
From: Juan Camilo <juancamilo.auriti@gmail.com>
Date: Thu, 2 Apr 2026 15:50:52 +0200
Subject: [PATCH] fix: deterministic prefix matching and correct Llama 3.x
 context windows

Two fixes in openaiContextWindows.ts:

1. Sort lookup keys by length descending in lookupByModel() so the most
   specific prefix always wins. Without this, 'gpt-4-turbo-preview'
   could match 'gpt-4' (8k) instead of 'gpt-4-turbo' (128k) depending
   on V8's object key iteration order.

2. Update Llama 3.1/3.2/3.3 context windows from 8,192 to 128,000.
   These models support 128k context natively (Meta official specs).
   The previous 8k value was Ollama's default num_ctx, not the model's
   actual capability, causing premature auto-compact warnings.
---
 src/utils/model/openaiContextWindows.ts | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts
index 6cb12c37..66db3d35 100644
--- a/src/utils/model/openaiContextWindows.ts
+++ b/src/utils/model/openaiContextWindows.ts
@@ -50,9 +50,11 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
   'gemini-2.5-flash':       1_048_576,
 
   // Ollama local models
-  'llama3.3:70b':               8_192,
-  'llama3.1:8b':                8_192,
-  'llama3.2:3b':                8_192,
+  // Llama 3.1+ models support 128k context natively (Meta official specs).
+  // Ollama defaults to num_ctx=8192 but users can configure higher values.
+  'llama3.3:70b':             128_000,
+  'llama3.1:8b':              128_000,
+  'llama3.2:3b':              128_000,
   'qwen2.5-coder:32b':        32_768,
   'qwen2.5-coder:7b':         32_768,
   'deepseek-coder-v2:16b':    163_840,
@@ -122,7 +124,11 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
 
 function lookupByModel<T>(table: Record<string, T>, model: string): T | undefined {
   if (table[model] !== undefined) return table[model]
-  for (const key of Object.keys(table)) {
+  // Sort keys by length descending so the most specific prefix wins.
+  // Without this, 'gpt-4-turbo-preview' could match 'gpt-4' (8k) instead
+  // of 'gpt-4-turbo' (128k) depending on V8's key iteration order.
+  const sortedKeys = Object.keys(table).sort((a, b) => b.length - a.length)
+  for (const key of sortedKeys) {
     if (model.startsWith(key)) return table[key]
   }
   return undefined