- Raise context window fallback from 8k to 128k for unknown OpenAI-compat models. The 8k fallback caused effective context (8k minus output reservation) to go negative, making auto-compact fire on every single message. - Add safety floor in getEffectiveContextWindowSize(): effective context is always at least reservedTokensForSummary + 13k buffer, ensuring the auto-compact threshold stays positive. - Add missing MiniMax model entries (M2.5, M2.5-highspeed, M2.1, M2.1-highspeed) all at 204,800 context / 131,072 max output per MiniMax docs. - Add tests for MiniMax variants, 128k fallback, and autoCompact floor. Fixes #635 Co-authored-by: root <root@vm7508.lumadock.com>
This commit is contained in:
@@ -107,9 +107,23 @@ test('MiniMax-M2.7 uses explicit provider-specific context and output caps', ()
|
||||
expect(getMaxOutputTokensForModel('MiniMax-M2.7')).toBe(131_072)
|
||||
})
|
||||
|
||||
test('unknown openai-compatible models still use the conservative fallback window', () => {
|
||||
test('unknown openai-compatible models use the 128k fallback window (not 8k, see #635)', () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
|
||||
expect(getContextWindowForModel('some-unknown-3p-model')).toBe(8_000)
|
||||
expect(getContextWindowForModel('some-unknown-3p-model')).toBe(128_000)
|
||||
})
|
||||
|
||||
test('MiniMax-M2.5 and M2.1 use explicit provider-specific context and output caps', () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
|
||||
expect(getContextWindowForModel('MiniMax-M2.5')).toBe(204_800)
|
||||
expect(getContextWindowForModel('MiniMax-M2.5-highspeed')).toBe(204_800)
|
||||
expect(getContextWindowForModel('MiniMax-M2.1')).toBe(204_800)
|
||||
expect(getContextWindowForModel('MiniMax-M2.1-highspeed')).toBe(204_800)
|
||||
expect(getModelMaxOutputTokens('MiniMax-M2.5')).toEqual({
|
||||
default: 131_072,
|
||||
upperLimit: 131_072,
|
||||
})
|
||||
})
|
||||
|
||||
@@ -9,6 +9,11 @@ import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openai
|
||||
// Model context window size (200k tokens for all models right now)
|
||||
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
|
||||
|
||||
// Fallback context window for unknown 3P models. Must be large enough that
|
||||
// the effective context (this minus output token reservation) stays positive,
|
||||
// otherwise auto-compact fires on every message (issue #635).
|
||||
export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000
|
||||
|
||||
// Maximum output tokens for compact operations
|
||||
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
|
||||
|
||||
@@ -73,8 +78,9 @@ export function getContextWindowForModel(
|
||||
}
|
||||
|
||||
// OpenAI-compatible provider — use known context windows for the model.
|
||||
// Unknown models get a conservative 8k default so auto-compact triggers
|
||||
// before hitting a hard context_window_exceeded error.
|
||||
// Unknown models get a conservative 128k default. This was previously 8k,
|
||||
// but that caused auto-compact to fire on every turn because the effective
|
||||
// context (8k minus output reservation) became negative (issue #635).
|
||||
const isOpenAIProvider =
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
||||
@@ -86,10 +92,10 @@ export function getContextWindowForModel(
|
||||
return openaiWindow
|
||||
}
|
||||
console.error(
|
||||
`[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` +
|
||||
`[context] Warning: model "${model}" not in context window table — using conservative 128k default. ` +
|
||||
'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
|
||||
)
|
||||
return 8_000
|
||||
return OPENAI_FALLBACK_CONTEXT_WINDOW
|
||||
}
|
||||
|
||||
const cap = getModelCapability(model)
|
||||
|
||||
@@ -104,9 +104,19 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
|
||||
'devstral-latest': 256_000,
|
||||
'ministral-3b-latest': 256_000,
|
||||
|
||||
// MiniMax
|
||||
// MiniMax (all M2.x variants share 204,800 context, 131,072 max output)
|
||||
'MiniMax-M2.7': 204_800,
|
||||
'MiniMax-M2.7-highspeed': 204_800,
|
||||
'MiniMax-M2.5': 204_800,
|
||||
'MiniMax-M2.5-highspeed': 204_800,
|
||||
'MiniMax-M2.1': 204_800,
|
||||
'MiniMax-M2.1-highspeed': 204_800,
|
||||
'minimax-m2.7': 204_800,
|
||||
'minimax-m2.7-highspeed': 204_800,
|
||||
'minimax-m2.5': 204_800,
|
||||
'minimax-m2.5-highspeed': 204_800,
|
||||
'minimax-m2.1': 204_800,
|
||||
'minimax-m2.1-highspeed': 204_800,
|
||||
|
||||
// Google (via OpenRouter)
|
||||
'google/gemini-2.0-flash':1_048_576,
|
||||
@@ -223,9 +233,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
|
||||
'mistral-large-latest': 32_768,
|
||||
'mistral-small-latest': 32_768,
|
||||
|
||||
// MiniMax
|
||||
// MiniMax (all M2.x variants share 131,072 max output)
|
||||
'MiniMax-M2.7': 131_072,
|
||||
'MiniMax-M2.7-highspeed': 131_072,
|
||||
'MiniMax-M2.5': 131_072,
|
||||
'MiniMax-M2.5-highspeed': 131_072,
|
||||
'MiniMax-M2.1': 131_072,
|
||||
'MiniMax-M2.1-highspeed': 131_072,
|
||||
'minimax-m2.7': 131_072,
|
||||
'minimax-m2.7-highspeed': 131_072,
|
||||
'minimax-m2.5': 131_072,
|
||||
'minimax-m2.5-highspeed': 131_072,
|
||||
'minimax-m2.1': 131_072,
|
||||
'minimax-m2.1-highspeed': 131_072,
|
||||
|
||||
// Google (via OpenRouter)
|
||||
'google/gemini-2.0-flash': 8_192,
|
||||
|
||||
Reference in New Issue
Block a user