The test "never returns negative even for unknown 3P models (issue #635)" asserted that getEffectiveContextWindowSize() returns >= 33_000 for an unknown 3P model under the OpenAI shim. That specific number assumes reservedTokensForSummary = 20_000 (MAX_OUTPUT_TOKENS_FOR_SUMMARY), which holds only when the tengu_otk_slot_v1 GrowthBook flag is disabled. When the flag is ON — which is the case in CI but not always locally — getMaxOutputTokensForModel() caps the model's default output at CAPPED_DEFAULT_MAX_TOKENS (8_000). Then reservedTokensForSummary = 8_000, floor = 8_000 + 13_000 = 21_000, and the test fails with 21_000 < 33_000. The test reliably passes locally and reliably fails in CI, manifesting as the intermittent PR-check failure. Fix: relax the lower bound to 21_000 (cap-enabled worst case), which is still well above zero — preserving the anti-regression intent of issue #635 (no infinite auto-compact from a negative effective window) without binding the test to GrowthBook flag state. Co-authored-by: OpenClaude <openclaude@gitlawb.com>
55 lines
2.2 KiB
TypeScript
55 lines
2.2 KiB
TypeScript
import { describe, expect, test } from 'bun:test'
|
|
import {
|
|
getEffectiveContextWindowSize,
|
|
getAutoCompactThreshold,
|
|
} from './autoCompact.ts'
|
|
|
|
describe('getEffectiveContextWindowSize', () => {
|
|
test('returns positive value for known models with large context windows', () => {
|
|
// claude-sonnet-4 has 200k context
|
|
const effective = getEffectiveContextWindowSize('claude-sonnet-4')
|
|
expect(effective).toBeGreaterThan(0)
|
|
})
|
|
|
|
test('never returns negative even for unknown 3P models (issue #635)', () => {
|
|
// Previously, unknown 3P models got 8k context → effective context was
|
|
// 8k minus 20k summary reservation = -12k, causing infinite auto-compact.
|
|
// Now the fallback is 128k and there's a floor, so effective is always
|
|
// at least reservedTokensForSummary + buffer.
|
|
//
|
|
// The exact floor depends on the max-output-tokens slot-reservation cap
|
|
// (tengu_otk_slot_v1 GrowthBook flag). With cap enabled, the model's
|
|
// default output cap drops to CAPPED_DEFAULT_MAX_TOKENS (8k), so the
|
|
// summary reservation is 8k and the floor is 8k + 13k = 21k. With cap
|
|
// disabled it's 20k + 13k = 33k. Assert the worst case so the test is
|
|
// stable regardless of flag state in CI vs local.
|
|
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
|
try {
|
|
const effective = getEffectiveContextWindowSize('some-unknown-3p-model')
|
|
expect(effective).toBeGreaterThan(0)
|
|
// 21k = CAPPED_DEFAULT_MAX_TOKENS (8k) + AUTOCOMPACT_BUFFER_TOKENS (13k).
|
|
// Covers the anti-regression intent of issue #635 without assuming
|
|
// the GrowthBook flag state.
|
|
expect(effective).toBeGreaterThanOrEqual(21_000)
|
|
} finally {
|
|
delete process.env.CLAUDE_CODE_USE_OPENAI
|
|
}
|
|
})
|
|
})
|
|
|
|
describe('getAutoCompactThreshold', () => {
|
|
test('returns positive threshold for known models', () => {
|
|
const threshold = getAutoCompactThreshold('claude-sonnet-4')
|
|
expect(threshold).toBeGreaterThan(0)
|
|
})
|
|
|
|
test('never returns negative threshold even for unknown 3P models (issue #635)', () => {
|
|
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
|
try {
|
|
const threshold = getAutoCompactThreshold('some-unknown-3p-model')
|
|
expect(threshold).toBeGreaterThan(0)
|
|
} finally {
|
|
delete process.env.CLAUDE_CODE_USE_OPENAI
|
|
}
|
|
})
|
|
}) |