From aeaa658f776fb8df95721e8b8962385f8b00f66a Mon Sep 17 00:00:00 2001 From: Vasanth T <148849890+Vasanthdev2004@users.noreply.github.com> Date: Sun, 12 Apr 2026 23:33:02 +0530 Subject: [PATCH] fix: prevent infinite auto-compact loop for unknown 3P models (#635) (#636) - Raise context window fallback from 8k to 128k for unknown OpenAI-compat models. The 8k fallback caused effective context (8k minus output reservation) to go negative, making auto-compact fire on every single message. - Add safety floor in getEffectiveContextWindowSize(): effective context is always at least reservedTokensForSummary + 13k buffer, ensuring the auto-compact threshold stays positive. - Add missing MiniMax model entries (M2.5, M2.5-highspeed, M2.1, M2.1-highspeed) all at 204,800 context / 131,072 max output per MiniMax docs. - Add tests for MiniMax variants, 128k fallback, and autoCompact floor. Fixes #635 Co-authored-by: root --- src/services/compact/autoCompact.test.ts | 46 ++++++++++++++++++++++++ src/services/compact/autoCompact.ts | 7 +++- src/utils/context.test.ts | 18 ++++++++-- src/utils/context.ts | 14 +++++--- src/utils/model/openaiContextWindows.ts | 24 +++++++++++-- 5 files changed, 100 insertions(+), 9 deletions(-) create mode 100644 src/services/compact/autoCompact.test.ts diff --git a/src/services/compact/autoCompact.test.ts b/src/services/compact/autoCompact.test.ts new file mode 100644 index 00000000..20248c70 --- /dev/null +++ b/src/services/compact/autoCompact.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, test } from 'bun:test' +import { + getEffectiveContextWindowSize, + getAutoCompactThreshold, +} from './autoCompact.ts' + +describe('getEffectiveContextWindowSize', () => { + test('returns positive value for known models with large context windows', () => { + // claude-sonnet-4 has 200k context + const effective = getEffectiveContextWindowSize('claude-sonnet-4') + expect(effective).toBeGreaterThan(0) + }) + + test('never returns negative even for unknown 3P models (issue #635)', () => { + // Previously, unknown 3P models got 8k context → effective context was + // 8k minus 20k summary reservation = -12k, causing infinite auto-compact. + // Now the fallback is 128k and there's a floor, so effective is always + // at least reservedTokensForSummary + buffer. + process.env.CLAUDE_CODE_USE_OPENAI = '1' + try { + const effective = getEffectiveContextWindowSize('some-unknown-3p-model') + expect(effective).toBeGreaterThan(0) + // Must be at least summary reservation (20k) + buffer (13k) = 33k + expect(effective).toBeGreaterThanOrEqual(33_000) + } finally { + delete process.env.CLAUDE_CODE_USE_OPENAI + } + }) +}) + +describe('getAutoCompactThreshold', () => { + test('returns positive threshold for known models', () => { + const threshold = getAutoCompactThreshold('claude-sonnet-4') + expect(threshold).toBeGreaterThan(0) + }) + + test('never returns negative threshold even for unknown 3P models (issue #635)', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + try { + const threshold = getAutoCompactThreshold('some-unknown-3p-model') + expect(threshold).toBeGreaterThan(0) + } finally { + delete process.env.CLAUDE_CODE_USE_OPENAI + } + }) +}) \ No newline at end of file diff --git a/src/services/compact/autoCompact.ts b/src/services/compact/autoCompact.ts index de5a6bf3..07e17b9a 100644 --- a/src/services/compact/autoCompact.ts +++ b/src/services/compact/autoCompact.ts @@ -45,7 +45,12 @@ export function getEffectiveContextWindowSize(model: string): number { } } - return contextWindow - reservedTokensForSummary + // Floor: effective context must be at least the summary reservation plus a + // usable buffer. If it goes lower, the auto-compact threshold becomes + // negative and fires on every message (issue #635). + const autocompactBuffer = 13_000 // must match AUTOCOMPACT_BUFFER_TOKENS + const effectiveContext = contextWindow - reservedTokensForSummary + return Math.max(effectiveContext, reservedTokensForSummary + autocompactBuffer) } export type AutoCompactTrackingState = { diff --git a/src/utils/context.test.ts b/src/utils/context.test.ts index 3bb8d643..72c6bd43 100644 --- a/src/utils/context.test.ts +++ b/src/utils/context.test.ts @@ -107,9 +107,23 @@ test('MiniMax-M2.7 uses explicit provider-specific context and output caps', () expect(getMaxOutputTokensForModel('MiniMax-M2.7')).toBe(131_072) }) -test('unknown openai-compatible models still use the conservative fallback window', () => { +test('unknown openai-compatible models use the 128k fallback window (not 8k, see #635)', () => { process.env.CLAUDE_CODE_USE_OPENAI = '1' delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS - expect(getContextWindowForModel('some-unknown-3p-model')).toBe(8_000) + expect(getContextWindowForModel('some-unknown-3p-model')).toBe(128_000) +}) + +test('MiniMax-M2.5 and M2.1 use explicit provider-specific context and output caps', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS + + expect(getContextWindowForModel('MiniMax-M2.5')).toBe(204_800) + expect(getContextWindowForModel('MiniMax-M2.5-highspeed')).toBe(204_800) + expect(getContextWindowForModel('MiniMax-M2.1')).toBe(204_800) + expect(getContextWindowForModel('MiniMax-M2.1-highspeed')).toBe(204_800) + expect(getModelMaxOutputTokens('MiniMax-M2.5')).toEqual({ + default: 131_072, + upperLimit: 131_072, + }) }) diff --git a/src/utils/context.ts b/src/utils/context.ts index 7a5b6c2f..370ed5df 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -9,6 +9,11 @@ import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openai // Model context window size (200k tokens for all models right now) export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000 +// Fallback context window for unknown 3P models. Must be large enough that +// the effective context (this minus output token reservation) stays positive, +// otherwise auto-compact fires on every message (issue #635). +export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000 + // Maximum output tokens for compact operations export const COMPACT_MAX_OUTPUT_TOKENS = 20_000 @@ -73,8 +78,9 @@ export function getContextWindowForModel( } // OpenAI-compatible provider — use known context windows for the model. - // Unknown models get a conservative 8k default so auto-compact triggers - // before hitting a hard context_window_exceeded error. + // Unknown models get a conservative 128k default. This was previously 8k, + // but that caused auto-compact to fire on every turn because the effective + // context (8k minus output reservation) became negative (issue #635). const isOpenAIProvider = isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) || isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) || @@ -86,10 +92,10 @@ export function getContextWindowForModel( return openaiWindow } console.error( - `[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` + + `[context] Warning: model "${model}" not in context window table — using conservative 128k default. ` + 'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.', ) - return 8_000 + return OPENAI_FALLBACK_CONTEXT_WINDOW } const cap = getModelCapability(model) diff --git a/src/utils/model/openaiContextWindows.ts b/src/utils/model/openaiContextWindows.ts index 6833eacd..886091b3 100644 --- a/src/utils/model/openaiContextWindows.ts +++ b/src/utils/model/openaiContextWindows.ts @@ -104,9 +104,19 @@ const OPENAI_CONTEXT_WINDOWS: Record = { 'devstral-latest': 256_000, 'ministral-3b-latest': 256_000, - // MiniMax + // MiniMax (all M2.x variants share 204,800 context, 131,072 max output) 'MiniMax-M2.7': 204_800, + 'MiniMax-M2.7-highspeed': 204_800, + 'MiniMax-M2.5': 204_800, + 'MiniMax-M2.5-highspeed': 204_800, + 'MiniMax-M2.1': 204_800, + 'MiniMax-M2.1-highspeed': 204_800, 'minimax-m2.7': 204_800, + 'minimax-m2.7-highspeed': 204_800, + 'minimax-m2.5': 204_800, + 'minimax-m2.5-highspeed': 204_800, + 'minimax-m2.1': 204_800, + 'minimax-m2.1-highspeed': 204_800, // Google (via OpenRouter) 'google/gemini-2.0-flash':1_048_576, @@ -223,9 +233,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record = { 'mistral-large-latest': 32_768, 'mistral-small-latest': 32_768, - // MiniMax + // MiniMax (all M2.x variants share 131,072 max output) 'MiniMax-M2.7': 131_072, + 'MiniMax-M2.7-highspeed': 131_072, + 'MiniMax-M2.5': 131_072, + 'MiniMax-M2.5-highspeed': 131_072, + 'MiniMax-M2.1': 131_072, + 'MiniMax-M2.1-highspeed': 131_072, 'minimax-m2.7': 131_072, + 'minimax-m2.7-highspeed': 131_072, + 'minimax-m2.5': 131_072, + 'minimax-m2.5-highspeed': 131_072, + 'minimax-m2.1': 131_072, + 'minimax-m2.1-highspeed': 131_072, // Google (via OpenRouter) 'google/gemini-2.0-flash': 8_192,