This pass rewrites comment-only ANT-ONLY markers to neutral internal-only language across the source tree without changing runtime strings, flags, commands, or protocol identifiers. The goal is to lower obvious internal prose leakage while keeping the diff mechanically safe and easy to review. Constraint: Phase B is limited to comments/prose only; runtime strings and user-facing labels remain deferred Rejected: Broad search-and-replace across strings and command descriptions | too risky for a prose-only pass Confidence: high Scope-risk: narrow Reversibility: clean Directive: Remaining ANT-ONLY hits are mostly runtime/user-facing strings and should be handled separately from comment cleanup Tested: bun run build Tested: bun run smoke Tested: bun run verify:privacy Tested: bun run test:provider Tested: bun run test:provider-recommendation Not-tested: Full repo typecheck (upstream baseline remains noisy) Co-authored-by: anandh8x <test@example.com>
247 lines
7.6 KiB
TypeScript
247 lines
7.6 KiB
TypeScript
// biome-ignore-all assist/source/organizeImports: internal-only import markers must not be reordered
|
||
import { CONTEXT_1M_BETA_HEADER } from '../constants/betas.js'
|
||
import { getGlobalConfig } from './config.js'
|
||
import { isEnvTruthy } from './envUtils.js'
|
||
import { getCanonicalName } from './model/model.js'
|
||
import { getModelCapability } from './model/modelCapabilities.js'
|
||
import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openaiContextWindows.js'
|
||
|
||
// Model context window size (200k tokens for all models right now)
|
||
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
|
||
|
||
// Maximum output tokens for compact operations
|
||
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
|
||
|
||
// Default max output tokens
|
||
const MAX_OUTPUT_TOKENS_DEFAULT = 32_000
|
||
const MAX_OUTPUT_TOKENS_UPPER_LIMIT = 64_000
|
||
|
||
// Capped default for slot-reservation optimization. BQ p99 output = 4,911
|
||
// tokens, so 32k/64k defaults over-reserve 8-16× slot capacity. With the cap
|
||
// enabled, <1% of requests hit the limit; those get one clean retry at 64k
|
||
// (see query.ts max_output_tokens_escalate). Cap is applied in
|
||
// claude.ts:getMaxOutputTokensForModel to avoid the growthbook→betas→context
|
||
// import cycle.
|
||
export const CAPPED_DEFAULT_MAX_TOKENS = 8_000
|
||
export const ESCALATED_MAX_TOKENS = 64_000
|
||
|
||
/**
|
||
* Check if 1M context is disabled via environment variable.
|
||
* Used by C4E admins to disable 1M context for HIPAA compliance.
|
||
*/
|
||
export function is1mContextDisabled(): boolean {
|
||
return isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT)
|
||
}
|
||
|
||
export function has1mContext(model: string): boolean {
|
||
if (is1mContextDisabled()) {
|
||
return false
|
||
}
|
||
return /\[1m\]/i.test(model)
|
||
}
|
||
|
||
// @[MODEL LAUNCH]: Update this pattern if the new model supports 1M context
|
||
export function modelSupports1M(model: string): boolean {
|
||
if (is1mContextDisabled()) {
|
||
return false
|
||
}
|
||
const canonical = getCanonicalName(model)
|
||
return canonical.includes('claude-sonnet-4') || canonical.includes('opus-4-6')
|
||
}
|
||
|
||
export function getContextWindowForModel(
|
||
model: string,
|
||
betas?: string[],
|
||
): number {
|
||
// Allow override via environment variable (internal-only)
|
||
// This takes precedence over all other context window resolution, including 1M detection,
|
||
// so users can cap the effective context window for local decisions (auto-compact, etc.)
|
||
// while still using a 1M-capable endpoint.
|
||
if (
|
||
process.env.USER_TYPE === 'ant' &&
|
||
process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS
|
||
) {
|
||
const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10)
|
||
if (!isNaN(override) && override > 0) {
|
||
return override
|
||
}
|
||
}
|
||
|
||
// [1m] suffix — explicit client-side opt-in, respected over all detection
|
||
if (has1mContext(model)) {
|
||
return 1_000_000
|
||
}
|
||
|
||
// OpenAI-compatible provider — use known context windows for the model
|
||
if (
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||
) {
|
||
const openaiWindow = getOpenAIContextWindow(model)
|
||
if (openaiWindow !== undefined) {
|
||
return openaiWindow
|
||
}
|
||
}
|
||
|
||
const cap = getModelCapability(model)
|
||
if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
|
||
if (
|
||
cap.max_input_tokens > MODEL_CONTEXT_WINDOW_DEFAULT &&
|
||
is1mContextDisabled()
|
||
) {
|
||
return MODEL_CONTEXT_WINDOW_DEFAULT
|
||
}
|
||
return cap.max_input_tokens
|
||
}
|
||
|
||
if (betas?.includes(CONTEXT_1M_BETA_HEADER) && modelSupports1M(model)) {
|
||
return 1_000_000
|
||
}
|
||
if (getSonnet1mExpTreatmentEnabled(model)) {
|
||
return 1_000_000
|
||
}
|
||
if (process.env.USER_TYPE === 'ant') {
|
||
const antModel = resolveAntModel(model)
|
||
if (antModel?.contextWindow) {
|
||
return antModel.contextWindow
|
||
}
|
||
}
|
||
return MODEL_CONTEXT_WINDOW_DEFAULT
|
||
}
|
||
|
||
export function getSonnet1mExpTreatmentEnabled(model: string): boolean {
|
||
if (is1mContextDisabled()) {
|
||
return false
|
||
}
|
||
// Only applies to sonnet 4.6 without an explicit [1m] suffix
|
||
if (has1mContext(model)) {
|
||
return false
|
||
}
|
||
if (!getCanonicalName(model).includes('sonnet-4-6')) {
|
||
return false
|
||
}
|
||
return getGlobalConfig().clientDataCache?.['coral_reef_sonnet'] === 'true'
|
||
}
|
||
|
||
/**
|
||
* Calculate context window usage percentage from token usage data.
|
||
* Returns used and remaining percentages, or null values if no usage data.
|
||
*/
|
||
export function calculateContextPercentages(
|
||
currentUsage: {
|
||
input_tokens: number
|
||
cache_creation_input_tokens: number
|
||
cache_read_input_tokens: number
|
||
} | null,
|
||
contextWindowSize: number,
|
||
): { used: number | null; remaining: number | null } {
|
||
if (!currentUsage) {
|
||
return { used: null, remaining: null }
|
||
}
|
||
|
||
const totalInputTokens =
|
||
currentUsage.input_tokens +
|
||
currentUsage.cache_creation_input_tokens +
|
||
currentUsage.cache_read_input_tokens
|
||
|
||
const usedPercentage = Math.round(
|
||
(totalInputTokens / contextWindowSize) * 100,
|
||
)
|
||
const clampedUsed = Math.min(100, Math.max(0, usedPercentage))
|
||
|
||
return {
|
||
used: clampedUsed,
|
||
remaining: 100 - clampedUsed,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns the model's default and upper limit for max output tokens.
|
||
*/
|
||
export function getModelMaxOutputTokens(model: string): {
|
||
default: number
|
||
upperLimit: number
|
||
} {
|
||
let defaultTokens: number
|
||
let upperLimit: number
|
||
|
||
if (process.env.USER_TYPE === 'ant') {
|
||
const antModel = resolveAntModel(model.toLowerCase())
|
||
if (antModel) {
|
||
defaultTokens = antModel.defaultMaxTokens ?? MAX_OUTPUT_TOKENS_DEFAULT
|
||
upperLimit = antModel.upperMaxTokensLimit ?? MAX_OUTPUT_TOKENS_UPPER_LIMIT
|
||
return { default: defaultTokens, upperLimit }
|
||
}
|
||
}
|
||
|
||
// OpenAI-compatible provider — use known output limits to avoid 400 errors
|
||
if (
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
|
||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||
) {
|
||
const openaiMax = getOpenAIMaxOutputTokens(model)
|
||
if (openaiMax !== undefined) {
|
||
return { default: openaiMax, upperLimit: openaiMax }
|
||
}
|
||
}
|
||
|
||
const m = getCanonicalName(model)
|
||
|
||
if (m.includes('opus-4-6')) {
|
||
defaultTokens = 64_000
|
||
upperLimit = 128_000
|
||
} else if (m.includes('sonnet-4-6')) {
|
||
defaultTokens = 32_000
|
||
upperLimit = 128_000
|
||
} else if (
|
||
m.includes('opus-4-5') ||
|
||
m.includes('sonnet-4') ||
|
||
m.includes('haiku-4')
|
||
) {
|
||
defaultTokens = 32_000
|
||
upperLimit = 64_000
|
||
} else if (m.includes('opus-4-1') || m.includes('opus-4')) {
|
||
defaultTokens = 32_000
|
||
upperLimit = 32_000
|
||
} else if (m.includes('claude-3-opus')) {
|
||
defaultTokens = 4_096
|
||
upperLimit = 4_096
|
||
} else if (m.includes('claude-3-sonnet')) {
|
||
defaultTokens = 8_192
|
||
upperLimit = 8_192
|
||
} else if (m.includes('claude-3-haiku')) {
|
||
defaultTokens = 4_096
|
||
upperLimit = 4_096
|
||
} else if (m.includes('3-5-sonnet') || m.includes('3-5-haiku')) {
|
||
defaultTokens = 8_192
|
||
upperLimit = 8_192
|
||
} else if (m.includes('3-7-sonnet')) {
|
||
defaultTokens = 32_000
|
||
upperLimit = 64_000
|
||
} else {
|
||
defaultTokens = MAX_OUTPUT_TOKENS_DEFAULT
|
||
upperLimit = MAX_OUTPUT_TOKENS_UPPER_LIMIT
|
||
}
|
||
|
||
const cap = getModelCapability(model)
|
||
if (cap?.max_tokens && cap.max_tokens >= 4_096) {
|
||
upperLimit = cap.max_tokens
|
||
defaultTokens = Math.min(defaultTokens, upperLimit)
|
||
}
|
||
|
||
return { default: defaultTokens, upperLimit }
|
||
}
|
||
|
||
/**
|
||
* Returns the max thinking budget tokens for a given model. The max
|
||
* thinking tokens should be strictly less than the max output tokens.
|
||
*
|
||
* Deprecated since newer models use adaptive thinking rather than a
|
||
* strict thinking token budget.
|
||
*/
|
||
export function getMaxThinkingTokensForModel(model: string): number {
|
||
return getModelMaxOutputTokens(model).upperLimit - 1
|
||
}
|