From aeaa658f776fb8df95721e8b8962385f8b00f66a Mon Sep 17 00:00:00 2001
From: Vasanth T <148849890+Vasanthdev2004@users.noreply.github.com>
Date: Sun, 12 Apr 2026 23:33:02 +0530
Subject: [PATCH] fix: prevent infinite auto-compact loop for unknown 3P models
 (#635) (#636)

- Raise context window fallback from 8k to 128k for unknown OpenAI-compat models.
  The 8k fallback caused effective context (8k minus output reservation) to go
  negative, making auto-compact fire on every single message.
- Add safety floor in getEffectiveContextWindowSize(): effective context is
  always at least reservedTokensForSummary + 13k buffer, ensuring the
  auto-compact threshold stays positive.
- Add missing MiniMax model entries (M2.5, M2.5-highspeed, M2.1, M2.1-highspeed)
  all at 204,800 context / 131,072 max output per MiniMax docs.
- Add tests for MiniMax variants, 128k fallback, and autoCompact floor.

Fixes #635

Co-authored-by: root <root@vm7508.lumadock.com>
---
 src/services/compact/autoCompact.test.ts | 46 ++++++++++++++++++++++++
 src/services/compact/autoCompact.ts      |  7 +++-
 src/utils/context.test.ts                | 18 ++++++++--
 src/utils/context.ts                     | 14 +++++---
 src/utils/model/openaiContextWindows.ts  | 24 +++++++++++--
 5 files changed, 100 insertions(+), 9 deletions(-)
 create mode 100644 src/services/compact/autoCompact.test.ts

diff --git a/src/services/compact/autoCompact.test.ts b/src/services/compact/autoCompact.test.ts
new file mode 100644
index 00000000..20248c70
--- /dev/null
+++ b/src/services/compact/autoCompact.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, test } from 'bun:test'
+import {
+  getEffectiveContextWindowSize,
+  getAutoCompactThreshold,
+} from './autoCompact.ts'
+
+describe('getEffectiveContextWindowSize', () => {
+  test('returns positive value for known models with large context windows', () => {
+    // claude-sonnet-4 has 200k context
+    const effective = getEffectiveContextWindowSize('claude-sonnet-4')
+    expect(effective).toBeGreaterThan(0)
+  })
+
+  test('never returns negative even for unknown 3P models (issue #635)', () => {
+    // Previously, unknown 3P models got 8k context → effective context was
+    // 8k minus 20k summary reservation = -12k, causing infinite auto-compact.
+    // Now the fallback is 128k and there's a floor, so effective is always
+    // at least reservedTokensForSummary + buffer.
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    try {
+      const effective = getEffectiveContextWindowSize('some-unknown-3p-model')
+      expect(effective).toBeGreaterThan(0)
+      // Must be at least summary reservation (20k) + buffer (13k) = 33k
+      expect(effective).toBeGreaterThanOrEqual(33_000)
+    } finally {
+      delete process.env.CLAUDE_CODE_USE_OPENAI
+    }
+  })
+})
+
+describe('getAutoCompactThreshold', () => {
+  test('returns positive threshold for known models', () => {
+    const threshold = getAutoCompactThreshold('claude-sonnet-4')
+    expect(threshold).toBeGreaterThan(0)
+  })
+
+  test('never returns negative threshold even for unknown 3P models (issue #635)', () => {
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    try {
+      const threshold = getAutoCompactThreshold('some-unknown-3p-model')
+      expect(threshold).toBeGreaterThan(0)
+    } finally {
+      delete process.env.CLAUDE_CODE_USE_OPENAI
+    }
+  })
+})
\ No newline at end of file
diff --git a/src/services/compact/autoCompact.ts b/src/services/compact/autoCompact.ts
index de5a6bf3..07e17b9a 100644
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -45,7 +45,12 @@ export function getEffectiveContextWindowSize(model: string): number {
     }
   }
 
-  return contextWindow - reservedTokensForSummary
+  // Floor: effective context must be at least the summary reservation plus a
+  // usable buffer. If it goes lower, the auto-compact threshold becomes
+  // negative and fires on every message (issue #635).
+  const autocompactBuffer = 13_000 // must match AUTOCOMPACT_BUFFER_TOKENS
+  const effectiveContext = contextWindow - reservedTokensForSummary
+  return Math.max(effectiveContext, reservedTokensForSummary + autocompactBuffer)
 }
 
 export type AutoCompactTrackingState = {
diff --git a/src/utils/context.test.ts b/src/utils/context.test.ts
index 3bb8d643..72c6bd43 100644
--- a/src/utils/context.test.ts
+++ b/src/utils/context.test.ts
@@ -107,9 +107,23 @@ test('MiniMax-M2.7 uses explicit provider-specific context and output caps', ()
   expect(getMaxOutputTokensForModel('MiniMax-M2.7')).toBe(131_072)
 })
 
-test('unknown openai-compatible models still use the conservative fallback window', () => {
+test('unknown openai-compatible models use the 128k fallback window (not 8k, see #635)', () => {
   process.env.CLAUDE_CODE_USE_OPENAI = '1'
   delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
 
-  expect(getContextWindowForModel('some-unknown-3p-model')).toBe(8_000)
+  expect(getContextWindowForModel('some-unknown-3p-model')).toBe(128_000)
+})
+
+test('MiniMax-M2.5 and M2.1 use explicit provider-specific context and output caps', () => {
+  process.env.CLAUDE_CODE_USE_OPENAI = '1'
+  delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
+
+  expect(getContextWindowForModel('MiniMax-M2.5')).toBe(204_800)
+  expect(getContextWindowForModel('MiniMax-M2.5-highspeed')).toBe(204_800)
+  expect(getContextWindowForModel('MiniMax-M2.1')).toBe(204_800)
+  expect(getContextWindowForModel('MiniMax-M2.1-highspeed')).toBe(204_800)
+  expect(getModelMaxOutputTokens('MiniMax-M2.5')).toEqual({
+    default: 131_072,
+    upperLimit: 131_072,
+  })
 })
diff --git a/src/utils/context.ts b/src/utils/context.ts
index 7a5b6c2f..370ed5df 100644
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -9,6 +9,11 @@ import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openai
 // Model context window size (200k tokens for all models right now)
 export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
 
+// Fallback context window for unknown 3P models. Must be large enough that
+// the effective context (this minus output token reservation) stays positive,
+// otherwise auto-compact fires on every message (issue #635).
+export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000
+
 // Maximum output tokens for compact operations
 export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
 
@@ -73,8 +78,9 @@ export function getContextWindowForModel(
   }
 
   // OpenAI-compatible provider — use known context windows for the model.
-  // Unknown models get a conservative 8k default so auto-compact triggers
-  // before hitting a hard context_window_exceeded error.
+  // Unknown models get a conservative 128k default. This was previously 8k,
+  // but that caused auto-compact to fire on every turn because the effective
+  // context (8k minus output reservation) became negative (issue #635).
   const isOpenAIProvider =
     isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
     isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
@@ -86,10 +92,10 @@ export function getContextWindowForModel(
       return openaiWindow
     }
     console.error(
-      `[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` +
+      `[context] Warning: model "${model}" not in context window table — using conservative 128k default. ` +
       'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
     )
-    return 8_000
+    return OPENAI_FALLBACK_CONTEXT_WINDOW
   }
 
   const cap = getModelCapability(model)
diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts
index 6833eacd..886091b3 100644
--- a/src/utils/model/openaiContextWindows.ts
+++ b/src/utils/model/openaiContextWindows.ts
@@ -104,9 +104,19 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
   'devstral-latest':          256_000,
   'ministral-3b-latest':      256_000,
 
-  // MiniMax
+  // MiniMax (all M2.x variants share 204,800 context, 131,072 max output)
   'MiniMax-M2.7':             204_800,
+  'MiniMax-M2.7-highspeed':   204_800,
+  'MiniMax-M2.5':             204_800,
+  'MiniMax-M2.5-highspeed':   204_800,
+  'MiniMax-M2.1':             204_800,
+  'MiniMax-M2.1-highspeed':   204_800,
   'minimax-m2.7':             204_800,
+  'minimax-m2.7-highspeed':   204_800,
+  'minimax-m2.5':             204_800,
+  'minimax-m2.5-highspeed':   204_800,
+  'minimax-m2.1':             204_800,
+  'minimax-m2.1-highspeed':   204_800,
 
   // Google (via OpenRouter)
   'google/gemini-2.0-flash':1_048_576,
@@ -223,9 +233,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
   'mistral-large-latest':     32_768,
   'mistral-small-latest':     32_768,
 
-  // MiniMax
+  // MiniMax (all M2.x variants share 131,072 max output)
   'MiniMax-M2.7':            131_072,
+  'MiniMax-M2.7-highspeed':  131_072,
+  'MiniMax-M2.5':            131_072,
+  'MiniMax-M2.5-highspeed':  131_072,
+  'MiniMax-M2.1':            131_072,
+  'MiniMax-M2.1-highspeed':  131_072,
   'minimax-m2.7':            131_072,
+  'minimax-m2.7-highspeed':  131_072,
+  'minimax-m2.5':            131_072,
+  'minimax-m2.5-highspeed':  131_072,
+  'minimax-m2.1':            131_072,
+  'minimax-m2.1-highspeed':  131_072,
 
   // Google (via OpenRouter)
   'google/gemini-2.0-flash':   8_192,