Merge pull request #174 from gnanam1990/feat/provider-aware-rate-limit
feat: provider-aware rate limit reset delay for OpenAI/GitHub/Codex providers
This commit is contained in:
136
src/services/api/withRetry.test.ts
Normal file
136
src/services/api/withRetry.test.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { describe, expect, test, afterEach } from 'bun:test'
|
||||
import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js'
|
||||
import { APIError } from '@anthropic-ai/sdk'
|
||||
|
||||
// Helper to build a mock APIError with specific headers
|
||||
function makeError(headers: Record<string, string>): APIError {
|
||||
const headersObj = new Headers(headers)
|
||||
return {
|
||||
headers: headersObj,
|
||||
status: 429,
|
||||
message: 'rate limit exceeded',
|
||||
name: 'APIError',
|
||||
error: {},
|
||||
} as unknown as APIError
|
||||
}
|
||||
|
||||
// Save/restore env vars between tests
|
||||
const originalEnv = { ...process.env }
|
||||
afterEach(() => {
|
||||
for (const key of [
|
||||
'CLAUDE_CODE_USE_OPENAI',
|
||||
'CLAUDE_CODE_USE_GEMINI',
|
||||
'CLAUDE_CODE_USE_GITHUB',
|
||||
'CLAUDE_CODE_USE_BEDROCK',
|
||||
'CLAUDE_CODE_USE_VERTEX',
|
||||
'CLAUDE_CODE_USE_FOUNDRY',
|
||||
]) {
|
||||
if (originalEnv[key] === undefined) delete process.env[key]
|
||||
else process.env[key] = originalEnv[key]
|
||||
}
|
||||
})
|
||||
|
||||
// --- parseOpenAIDuration ---
|
||||
describe('parseOpenAIDuration', () => {
|
||||
test('parses seconds: "1s" → 1000', () => {
|
||||
expect(parseOpenAIDuration('1s')).toBe(1000)
|
||||
})
|
||||
|
||||
test('parses minutes+seconds: "6m0s" → 360000', () => {
|
||||
expect(parseOpenAIDuration('6m0s')).toBe(360000)
|
||||
})
|
||||
|
||||
test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => {
|
||||
expect(parseOpenAIDuration('1h30m0s')).toBe(5400000)
|
||||
})
|
||||
|
||||
test('parses milliseconds: "500ms" → 500', () => {
|
||||
expect(parseOpenAIDuration('500ms')).toBe(500)
|
||||
})
|
||||
|
||||
test('parses minutes only: "2m" → 120000', () => {
|
||||
expect(parseOpenAIDuration('2m')).toBe(120000)
|
||||
})
|
||||
|
||||
test('returns null for empty string', () => {
|
||||
expect(parseOpenAIDuration('')).toBeNull()
|
||||
})
|
||||
|
||||
test('returns null for unrecognized format', () => {
|
||||
expect(parseOpenAIDuration('invalid')).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
// --- getRateLimitResetDelayMs ---
|
||||
describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => {
|
||||
test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => {
|
||||
const futureUnixSec = Math.floor(Date.now() / 1000) + 60
|
||||
const error = makeError({
|
||||
'anthropic-ratelimit-unified-reset': String(futureUnixSec),
|
||||
})
|
||||
const delay = getRateLimitResetDelayMs(error)
|
||||
expect(delay).not.toBeNull()
|
||||
expect(delay!).toBeGreaterThan(50_000)
|
||||
expect(delay!).toBeLessThanOrEqual(60_000)
|
||||
})
|
||||
|
||||
test('returns null when header absent', () => {
|
||||
const error = makeError({})
|
||||
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||
})
|
||||
|
||||
test('returns null when reset is in the past', () => {
|
||||
const pastUnixSec = Math.floor(Date.now() / 1000) - 10
|
||||
const error = makeError({
|
||||
'anthropic-ratelimit-unified-reset': String(pastUnixSec),
|
||||
})
|
||||
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('getRateLimitResetDelayMs - OpenAI provider', () => {
|
||||
test('reads x-ratelimit-reset-requests duration string', () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
const error = makeError({ 'x-ratelimit-reset-requests': '30s' })
|
||||
const delay = getRateLimitResetDelayMs(error)
|
||||
expect(delay).toBe(30_000)
|
||||
})
|
||||
|
||||
test('reads x-ratelimit-reset-tokens and picks the larger delay', () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
const error = makeError({
|
||||
'x-ratelimit-reset-requests': '10s',
|
||||
'x-ratelimit-reset-tokens': '1m0s',
|
||||
})
|
||||
// Should use the larger of the two so we don't retry before both reset
|
||||
const delay = getRateLimitResetDelayMs(error)
|
||||
expect(delay).toBe(60_000)
|
||||
})
|
||||
|
||||
test('returns null when no openai rate limit headers present', () => {
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
const error = makeError({})
|
||||
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||
})
|
||||
|
||||
test('works for github provider too', () => {
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
const error = makeError({ 'x-ratelimit-reset-requests': '5s' })
|
||||
expect(getRateLimitResetDelayMs(error)).toBe(5_000)
|
||||
})
|
||||
})
|
||||
|
||||
describe('getRateLimitResetDelayMs - providers without reset headers', () => {
|
||||
test('returns null for bedrock', () => {
|
||||
process.env.CLAUDE_CODE_USE_BEDROCK = '1'
|
||||
const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) })
|
||||
// Bedrock doesn't use this header — should still return null
|
||||
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||
})
|
||||
|
||||
test('returns null for vertex', () => {
|
||||
process.env.CLAUDE_CODE_USE_VERTEX = '1'
|
||||
const error = makeError({})
|
||||
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||
})
|
||||
})
|
||||
@@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import { logError } from 'src/utils/log.js'
|
||||
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
|
||||
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
||||
import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
||||
import {
|
||||
clearApiKeyHelperCache,
|
||||
clearAwsCredentialsCache,
|
||||
@@ -811,7 +811,29 @@ function getRetryAfterMs(error: APIError): number | null {
|
||||
return null
|
||||
}
|
||||
|
||||
function getRateLimitResetDelayMs(error: APIError): number | null {
|
||||
/**
|
||||
* Parse OpenAI-style relative duration strings into milliseconds.
|
||||
* Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m"
|
||||
* Returns null for unrecognized formats.
|
||||
*/
|
||||
export function parseOpenAIDuration(s: string): number | null {
|
||||
if (!s) return null
|
||||
// Try matching hours/minutes/seconds/milliseconds components
|
||||
const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/
|
||||
const m = re.exec(s)
|
||||
if (!m || m[0] === '') return null
|
||||
const h = parseInt(m[1] ?? '0', 10)
|
||||
const min = parseInt(m[2] ?? '0', 10)
|
||||
const sec = parseInt(m[3] ?? '0', 10)
|
||||
const ms = parseInt(m[4] ?? '0', 10)
|
||||
const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms
|
||||
return total > 0 ? total : null
|
||||
}
|
||||
|
||||
export function getRateLimitResetDelayMs(error: APIError): number | null {
|
||||
const provider = getAPIProvider()
|
||||
|
||||
if (provider === 'firstParty') {
|
||||
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
|
||||
if (!resetHeader) return null
|
||||
const resetUnixSec = Number(resetHeader)
|
||||
@@ -820,3 +842,18 @@ function getRateLimitResetDelayMs(error: APIError): number | null {
|
||||
if (delayMs <= 0) return null
|
||||
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
||||
}
|
||||
|
||||
if (provider === 'openai' || provider === 'codex' || provider === 'github') {
|
||||
const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests')
|
||||
const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens')
|
||||
const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null
|
||||
const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null
|
||||
if (reqMs === null && tokMs === null) return null
|
||||
// Use the larger delay so we don't retry before both limits reset
|
||||
const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0)
|
||||
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
||||
}
|
||||
|
||||
// bedrock, vertex, foundry, gemini — no standard reset header
|
||||
return null
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user