diff --git a/src/services/api/withRetry.test.ts b/src/services/api/withRetry.test.ts new file mode 100644 index 00000000..2bf57555 --- /dev/null +++ b/src/services/api/withRetry.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, test, afterEach } from 'bun:test' +import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js' +import { APIError } from '@anthropic-ai/sdk' + +// Helper to build a mock APIError with specific headers +function makeError(headers: Record): APIError { + const headersObj = new Headers(headers) + return { + headers: headersObj, + status: 429, + message: 'rate limit exceeded', + name: 'APIError', + error: {}, + } as unknown as APIError +} + +// Save/restore env vars between tests +const originalEnv = { ...process.env } +afterEach(() => { + for (const key of [ + 'CLAUDE_CODE_USE_OPENAI', + 'CLAUDE_CODE_USE_GEMINI', + 'CLAUDE_CODE_USE_GITHUB', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', + 'CLAUDE_CODE_USE_FOUNDRY', + ]) { + if (originalEnv[key] === undefined) delete process.env[key] + else process.env[key] = originalEnv[key] + } +}) + +// --- parseOpenAIDuration --- +describe('parseOpenAIDuration', () => { + test('parses seconds: "1s" → 1000', () => { + expect(parseOpenAIDuration('1s')).toBe(1000) + }) + + test('parses minutes+seconds: "6m0s" → 360000', () => { + expect(parseOpenAIDuration('6m0s')).toBe(360000) + }) + + test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => { + expect(parseOpenAIDuration('1h30m0s')).toBe(5400000) + }) + + test('parses milliseconds: "500ms" → 500', () => { + expect(parseOpenAIDuration('500ms')).toBe(500) + }) + + test('parses minutes only: "2m" → 120000', () => { + expect(parseOpenAIDuration('2m')).toBe(120000) + }) + + test('returns null for empty string', () => { + expect(parseOpenAIDuration('')).toBeNull() + }) + + test('returns null for unrecognized format', () => { + expect(parseOpenAIDuration('invalid')).toBeNull() + }) +}) + +// --- getRateLimitResetDelayMs --- +describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => { + test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => { + const futureUnixSec = Math.floor(Date.now() / 1000) + 60 + const error = makeError({ + 'anthropic-ratelimit-unified-reset': String(futureUnixSec), + }) + const delay = getRateLimitResetDelayMs(error) + expect(delay).not.toBeNull() + expect(delay!).toBeGreaterThan(50_000) + expect(delay!).toBeLessThanOrEqual(60_000) + }) + + test('returns null when header absent', () => { + const error = makeError({}) + expect(getRateLimitResetDelayMs(error)).toBeNull() + }) + + test('returns null when reset is in the past', () => { + const pastUnixSec = Math.floor(Date.now() / 1000) - 10 + const error = makeError({ + 'anthropic-ratelimit-unified-reset': String(pastUnixSec), + }) + expect(getRateLimitResetDelayMs(error)).toBeNull() + }) +}) + +describe('getRateLimitResetDelayMs - OpenAI provider', () => { + test('reads x-ratelimit-reset-requests duration string', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + const error = makeError({ 'x-ratelimit-reset-requests': '30s' }) + const delay = getRateLimitResetDelayMs(error) + expect(delay).toBe(30_000) + }) + + test('reads x-ratelimit-reset-tokens and picks the larger delay', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + const error = makeError({ + 'x-ratelimit-reset-requests': '10s', + 'x-ratelimit-reset-tokens': '1m0s', + }) + // Should use the larger of the two so we don't retry before both reset + const delay = getRateLimitResetDelayMs(error) + expect(delay).toBe(60_000) + }) + + test('returns null when no openai rate limit headers present', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + const error = makeError({}) + expect(getRateLimitResetDelayMs(error)).toBeNull() + }) + + test('works for github provider too', () => { + process.env.CLAUDE_CODE_USE_GITHUB = '1' + const error = makeError({ 'x-ratelimit-reset-requests': '5s' }) + expect(getRateLimitResetDelayMs(error)).toBe(5_000) + }) +}) + +describe('getRateLimitResetDelayMs - providers without reset headers', () => { + test('returns null for bedrock', () => { + process.env.CLAUDE_CODE_USE_BEDROCK = '1' + const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) }) + // Bedrock doesn't use this header — should still return null + expect(getRateLimitResetDelayMs(error)).toBeNull() + }) + + test('returns null for vertex', () => { + process.env.CLAUDE_CODE_USE_VERTEX = '1' + const error = makeError({}) + expect(getRateLimitResetDelayMs(error)).toBeNull() + }) +}) diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts index 5ec9ad08..7dc26de9 100644 --- a/src/services/api/withRetry.ts +++ b/src/services/api/withRetry.ts @@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js' import { logForDebugging } from 'src/utils/debug.js' import { logError } from 'src/utils/log.js' import { createSystemAPIErrorMessage } from 'src/utils/messages.js' -import { getAPIProviderForStatsig } from 'src/utils/model/providers.js' +import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js' import { clearApiKeyHelperCache, clearAwsCredentialsCache, @@ -811,12 +811,49 @@ function getRetryAfterMs(error: APIError): number | null { return null } -function getRateLimitResetDelayMs(error: APIError): number | null { - const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset') - if (!resetHeader) return null - const resetUnixSec = Number(resetHeader) - if (!Number.isFinite(resetUnixSec)) return null - const delayMs = resetUnixSec * 1000 - Date.now() - if (delayMs <= 0) return null - return Math.min(delayMs, PERSISTENT_RESET_CAP_MS) +/** + * Parse OpenAI-style relative duration strings into milliseconds. + * Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m" + * Returns null for unrecognized formats. + */ +export function parseOpenAIDuration(s: string): number | null { + if (!s) return null + // Try matching hours/minutes/seconds/milliseconds components + const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/ + const m = re.exec(s) + if (!m || m[0] === '') return null + const h = parseInt(m[1] ?? '0', 10) + const min = parseInt(m[2] ?? '0', 10) + const sec = parseInt(m[3] ?? '0', 10) + const ms = parseInt(m[4] ?? '0', 10) + const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms + return total > 0 ? total : null +} + +export function getRateLimitResetDelayMs(error: APIError): number | null { + const provider = getAPIProvider() + + if (provider === 'firstParty') { + const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset') + if (!resetHeader) return null + const resetUnixSec = Number(resetHeader) + if (!Number.isFinite(resetUnixSec)) return null + const delayMs = resetUnixSec * 1000 - Date.now() + if (delayMs <= 0) return null + return Math.min(delayMs, PERSISTENT_RESET_CAP_MS) + } + + if (provider === 'openai' || provider === 'codex' || provider === 'github') { + const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests') + const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens') + const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null + const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null + if (reqMs === null && tokMs === null) return null + // Use the larger delay so we don't retry before both limits reset + const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0) + return Math.min(delayMs, PERSISTENT_RESET_CAP_MS) + } + + // bedrock, vertex, foundry, gemini — no standard reset header + return null }