Merge pull request #174 from gnanam1990/feat/provider-aware-rate-limit
feat: provider-aware rate limit reset delay for OpenAI/GitHub/Codex providers
This commit is contained in:
136
src/services/api/withRetry.test.ts
Normal file
136
src/services/api/withRetry.test.ts
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
import { describe, expect, test, afterEach } from 'bun:test'
|
||||||
|
import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js'
|
||||||
|
import { APIError } from '@anthropic-ai/sdk'
|
||||||
|
|
||||||
|
// Helper to build a mock APIError with specific headers
|
||||||
|
function makeError(headers: Record<string, string>): APIError {
|
||||||
|
const headersObj = new Headers(headers)
|
||||||
|
return {
|
||||||
|
headers: headersObj,
|
||||||
|
status: 429,
|
||||||
|
message: 'rate limit exceeded',
|
||||||
|
name: 'APIError',
|
||||||
|
error: {},
|
||||||
|
} as unknown as APIError
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save/restore env vars between tests
|
||||||
|
const originalEnv = { ...process.env }
|
||||||
|
afterEach(() => {
|
||||||
|
for (const key of [
|
||||||
|
'CLAUDE_CODE_USE_OPENAI',
|
||||||
|
'CLAUDE_CODE_USE_GEMINI',
|
||||||
|
'CLAUDE_CODE_USE_GITHUB',
|
||||||
|
'CLAUDE_CODE_USE_BEDROCK',
|
||||||
|
'CLAUDE_CODE_USE_VERTEX',
|
||||||
|
'CLAUDE_CODE_USE_FOUNDRY',
|
||||||
|
]) {
|
||||||
|
if (originalEnv[key] === undefined) delete process.env[key]
|
||||||
|
else process.env[key] = originalEnv[key]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- parseOpenAIDuration ---
|
||||||
|
describe('parseOpenAIDuration', () => {
|
||||||
|
test('parses seconds: "1s" → 1000', () => {
|
||||||
|
expect(parseOpenAIDuration('1s')).toBe(1000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses minutes+seconds: "6m0s" → 360000', () => {
|
||||||
|
expect(parseOpenAIDuration('6m0s')).toBe(360000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => {
|
||||||
|
expect(parseOpenAIDuration('1h30m0s')).toBe(5400000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses milliseconds: "500ms" → 500', () => {
|
||||||
|
expect(parseOpenAIDuration('500ms')).toBe(500)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses minutes only: "2m" → 120000', () => {
|
||||||
|
expect(parseOpenAIDuration('2m')).toBe(120000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null for empty string', () => {
|
||||||
|
expect(parseOpenAIDuration('')).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null for unrecognized format', () => {
|
||||||
|
expect(parseOpenAIDuration('invalid')).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- getRateLimitResetDelayMs ---
|
||||||
|
describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => {
|
||||||
|
test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => {
|
||||||
|
const futureUnixSec = Math.floor(Date.now() / 1000) + 60
|
||||||
|
const error = makeError({
|
||||||
|
'anthropic-ratelimit-unified-reset': String(futureUnixSec),
|
||||||
|
})
|
||||||
|
const delay = getRateLimitResetDelayMs(error)
|
||||||
|
expect(delay).not.toBeNull()
|
||||||
|
expect(delay!).toBeGreaterThan(50_000)
|
||||||
|
expect(delay!).toBeLessThanOrEqual(60_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when header absent', () => {
|
||||||
|
const error = makeError({})
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when reset is in the past', () => {
|
||||||
|
const pastUnixSec = Math.floor(Date.now() / 1000) - 10
|
||||||
|
const error = makeError({
|
||||||
|
'anthropic-ratelimit-unified-reset': String(pastUnixSec),
|
||||||
|
})
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('getRateLimitResetDelayMs - OpenAI provider', () => {
|
||||||
|
test('reads x-ratelimit-reset-requests duration string', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||||
|
const error = makeError({ 'x-ratelimit-reset-requests': '30s' })
|
||||||
|
const delay = getRateLimitResetDelayMs(error)
|
||||||
|
expect(delay).toBe(30_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('reads x-ratelimit-reset-tokens and picks the larger delay', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||||
|
const error = makeError({
|
||||||
|
'x-ratelimit-reset-requests': '10s',
|
||||||
|
'x-ratelimit-reset-tokens': '1m0s',
|
||||||
|
})
|
||||||
|
// Should use the larger of the two so we don't retry before both reset
|
||||||
|
const delay = getRateLimitResetDelayMs(error)
|
||||||
|
expect(delay).toBe(60_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when no openai rate limit headers present', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||||
|
const error = makeError({})
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('works for github provider too', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||||
|
const error = makeError({ 'x-ratelimit-reset-requests': '5s' })
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBe(5_000)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('getRateLimitResetDelayMs - providers without reset headers', () => {
|
||||||
|
test('returns null for bedrock', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_BEDROCK = '1'
|
||||||
|
const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) })
|
||||||
|
// Bedrock doesn't use this header — should still return null
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null for vertex', () => {
|
||||||
|
process.env.CLAUDE_CODE_USE_VERTEX = '1'
|
||||||
|
const error = makeError({})
|
||||||
|
expect(getRateLimitResetDelayMs(error)).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
|
|||||||
import { logForDebugging } from 'src/utils/debug.js'
|
import { logForDebugging } from 'src/utils/debug.js'
|
||||||
import { logError } from 'src/utils/log.js'
|
import { logError } from 'src/utils/log.js'
|
||||||
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
|
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
|
||||||
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
||||||
import {
|
import {
|
||||||
clearApiKeyHelperCache,
|
clearApiKeyHelperCache,
|
||||||
clearAwsCredentialsCache,
|
clearAwsCredentialsCache,
|
||||||
@@ -811,7 +811,29 @@ function getRetryAfterMs(error: APIError): number | null {
|
|||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
function getRateLimitResetDelayMs(error: APIError): number | null {
|
/**
|
||||||
|
* Parse OpenAI-style relative duration strings into milliseconds.
|
||||||
|
* Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m"
|
||||||
|
* Returns null for unrecognized formats.
|
||||||
|
*/
|
||||||
|
export function parseOpenAIDuration(s: string): number | null {
|
||||||
|
if (!s) return null
|
||||||
|
// Try matching hours/minutes/seconds/milliseconds components
|
||||||
|
const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/
|
||||||
|
const m = re.exec(s)
|
||||||
|
if (!m || m[0] === '') return null
|
||||||
|
const h = parseInt(m[1] ?? '0', 10)
|
||||||
|
const min = parseInt(m[2] ?? '0', 10)
|
||||||
|
const sec = parseInt(m[3] ?? '0', 10)
|
||||||
|
const ms = parseInt(m[4] ?? '0', 10)
|
||||||
|
const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms
|
||||||
|
return total > 0 ? total : null
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getRateLimitResetDelayMs(error: APIError): number | null {
|
||||||
|
const provider = getAPIProvider()
|
||||||
|
|
||||||
|
if (provider === 'firstParty') {
|
||||||
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
|
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
|
||||||
if (!resetHeader) return null
|
if (!resetHeader) return null
|
||||||
const resetUnixSec = Number(resetHeader)
|
const resetUnixSec = Number(resetHeader)
|
||||||
@@ -819,4 +841,19 @@ function getRateLimitResetDelayMs(error: APIError): number | null {
|
|||||||
const delayMs = resetUnixSec * 1000 - Date.now()
|
const delayMs = resetUnixSec * 1000 - Date.now()
|
||||||
if (delayMs <= 0) return null
|
if (delayMs <= 0) return null
|
||||||
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (provider === 'openai' || provider === 'codex' || provider === 'github') {
|
||||||
|
const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests')
|
||||||
|
const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens')
|
||||||
|
const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null
|
||||||
|
const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null
|
||||||
|
if (reqMs === null && tokMs === null) return null
|
||||||
|
// Use the larger delay so we don't retry before both limits reset
|
||||||
|
const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0)
|
||||||
|
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
||||||
|
}
|
||||||
|
|
||||||
|
// bedrock, vertex, foundry, gemini — no standard reset header
|
||||||
|
return null
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user