Merge pull request #174 from gnanam1990/feat/provider-aware-rate-limit

feat: provider-aware rate limit reset delay for OpenAI/GitHub/Codex providers
This commit is contained in:
Kevin Codex
2026-04-03 01:16:58 +08:00
committed by GitHub
2 changed files with 182 additions and 9 deletions

View File

@@ -0,0 +1,136 @@
import { describe, expect, test, afterEach } from 'bun:test'
import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js'
import { APIError } from '@anthropic-ai/sdk'
// Helper to build a mock APIError with specific headers
function makeError(headers: Record<string, string>): APIError {
const headersObj = new Headers(headers)
return {
headers: headersObj,
status: 429,
message: 'rate limit exceeded',
name: 'APIError',
error: {},
} as unknown as APIError
}
// Save/restore env vars between tests
const originalEnv = { ...process.env }
afterEach(() => {
for (const key of [
'CLAUDE_CODE_USE_OPENAI',
'CLAUDE_CODE_USE_GEMINI',
'CLAUDE_CODE_USE_GITHUB',
'CLAUDE_CODE_USE_BEDROCK',
'CLAUDE_CODE_USE_VERTEX',
'CLAUDE_CODE_USE_FOUNDRY',
]) {
if (originalEnv[key] === undefined) delete process.env[key]
else process.env[key] = originalEnv[key]
}
})
// --- parseOpenAIDuration ---
describe('parseOpenAIDuration', () => {
test('parses seconds: "1s" → 1000', () => {
expect(parseOpenAIDuration('1s')).toBe(1000)
})
test('parses minutes+seconds: "6m0s" → 360000', () => {
expect(parseOpenAIDuration('6m0s')).toBe(360000)
})
test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => {
expect(parseOpenAIDuration('1h30m0s')).toBe(5400000)
})
test('parses milliseconds: "500ms" → 500', () => {
expect(parseOpenAIDuration('500ms')).toBe(500)
})
test('parses minutes only: "2m" → 120000', () => {
expect(parseOpenAIDuration('2m')).toBe(120000)
})
test('returns null for empty string', () => {
expect(parseOpenAIDuration('')).toBeNull()
})
test('returns null for unrecognized format', () => {
expect(parseOpenAIDuration('invalid')).toBeNull()
})
})
// --- getRateLimitResetDelayMs ---
describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => {
test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => {
const futureUnixSec = Math.floor(Date.now() / 1000) + 60
const error = makeError({
'anthropic-ratelimit-unified-reset': String(futureUnixSec),
})
const delay = getRateLimitResetDelayMs(error)
expect(delay).not.toBeNull()
expect(delay!).toBeGreaterThan(50_000)
expect(delay!).toBeLessThanOrEqual(60_000)
})
test('returns null when header absent', () => {
const error = makeError({})
expect(getRateLimitResetDelayMs(error)).toBeNull()
})
test('returns null when reset is in the past', () => {
const pastUnixSec = Math.floor(Date.now() / 1000) - 10
const error = makeError({
'anthropic-ratelimit-unified-reset': String(pastUnixSec),
})
expect(getRateLimitResetDelayMs(error)).toBeNull()
})
})
describe('getRateLimitResetDelayMs - OpenAI provider', () => {
test('reads x-ratelimit-reset-requests duration string', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
const error = makeError({ 'x-ratelimit-reset-requests': '30s' })
const delay = getRateLimitResetDelayMs(error)
expect(delay).toBe(30_000)
})
test('reads x-ratelimit-reset-tokens and picks the larger delay', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
const error = makeError({
'x-ratelimit-reset-requests': '10s',
'x-ratelimit-reset-tokens': '1m0s',
})
// Should use the larger of the two so we don't retry before both reset
const delay = getRateLimitResetDelayMs(error)
expect(delay).toBe(60_000)
})
test('returns null when no openai rate limit headers present', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
const error = makeError({})
expect(getRateLimitResetDelayMs(error)).toBeNull()
})
test('works for github provider too', () => {
process.env.CLAUDE_CODE_USE_GITHUB = '1'
const error = makeError({ 'x-ratelimit-reset-requests': '5s' })
expect(getRateLimitResetDelayMs(error)).toBe(5_000)
})
})
describe('getRateLimitResetDelayMs - providers without reset headers', () => {
test('returns null for bedrock', () => {
process.env.CLAUDE_CODE_USE_BEDROCK = '1'
const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) })
// Bedrock doesn't use this header — should still return null
expect(getRateLimitResetDelayMs(error)).toBeNull()
})
test('returns null for vertex', () => {
process.env.CLAUDE_CODE_USE_VERTEX = '1'
const error = makeError({})
expect(getRateLimitResetDelayMs(error)).toBeNull()
})
})

View File

@@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
import { logForDebugging } from 'src/utils/debug.js' import { logForDebugging } from 'src/utils/debug.js'
import { logError } from 'src/utils/log.js' import { logError } from 'src/utils/log.js'
import { createSystemAPIErrorMessage } from 'src/utils/messages.js' import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js' import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js'
import { import {
clearApiKeyHelperCache, clearApiKeyHelperCache,
clearAwsCredentialsCache, clearAwsCredentialsCache,
@@ -811,7 +811,29 @@ function getRetryAfterMs(error: APIError): number | null {
return null return null
} }
function getRateLimitResetDelayMs(error: APIError): number | null { /**
* Parse OpenAI-style relative duration strings into milliseconds.
* Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m"
* Returns null for unrecognized formats.
*/
export function parseOpenAIDuration(s: string): number | null {
if (!s) return null
// Try matching hours/minutes/seconds/milliseconds components
const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/
const m = re.exec(s)
if (!m || m[0] === '') return null
const h = parseInt(m[1] ?? '0', 10)
const min = parseInt(m[2] ?? '0', 10)
const sec = parseInt(m[3] ?? '0', 10)
const ms = parseInt(m[4] ?? '0', 10)
const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms
return total > 0 ? total : null
}
export function getRateLimitResetDelayMs(error: APIError): number | null {
const provider = getAPIProvider()
if (provider === 'firstParty') {
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset') const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
if (!resetHeader) return null if (!resetHeader) return null
const resetUnixSec = Number(resetHeader) const resetUnixSec = Number(resetHeader)
@@ -819,4 +841,19 @@ function getRateLimitResetDelayMs(error: APIError): number | null {
const delayMs = resetUnixSec * 1000 - Date.now() const delayMs = resetUnixSec * 1000 - Date.now()
if (delayMs <= 0) return null if (delayMs <= 0) return null
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS) return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
}
if (provider === 'openai' || provider === 'codex' || provider === 'github') {
const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests')
const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens')
const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null
const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null
if (reqMs === null && tokMs === null) return null
// Use the larger delay so we don't retry before both limits reset
const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0)
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
}
// bedrock, vertex, foundry, gemini — no standard reset header
return null
} }