fix: prevent infinite auto-compact loop for unknown 3P models (#635) (#636)

- Raise context window fallback from 8k to 128k for unknown OpenAI-compat models. The 8k fallback caused effective context (8k minus output reservation) to go negative, making auto-compact fire on every single message. - Add safety floor in getEffectiveContextWindowSize(): effective context is always at least reservedTokensForSummary + 13k buffer, ensuring the auto-compact threshold stays positive. - Add missing MiniMax model entries (M2.5, M2.5-highspeed, M2.1, M2.1-highspeed) all at 204,800 context / 131,072 max output per MiniMax docs. - Add tests for MiniMax variants, 128k fallback, and autoCompact floor. Fixes #635 Co-authored-by: root <root@vm7508.lumadock.com>
2026-04-12 23:33:02 +05:30
parent d2a057c6f1
commit aeaa658f77
5 changed files with 100 additions and 9 deletions
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -45,7 +45,12 @@ export function getEffectiveContextWindowSize(model: string): number {
    }
  }

-  return contextWindow - reservedTokensForSummary
+  // Floor: effective context must be at least the summary reservation plus a
+  // usable buffer. If it goes lower, the auto-compact threshold becomes
+  // negative and fires on every message (issue #635).
+  const autocompactBuffer = 13_000 // must match AUTOCOMPACT_BUFFER_TOKENS
+  const effectiveContext = contextWindow - reservedTokensForSummary
+  return Math.max(effectiveContext, reservedTokensForSummary + autocompactBuffer)
 }

 export type AutoCompactTrackingState = {