Merge pull request #174 from gnanam1990/feat/provider-aware-rate-limit

feat: provider-aware rate limit reset delay for OpenAI/GitHub/Codex providers
2026-04-03 01:16:58 +08:00
parent 11d9660a80 8501786852
commit 47c53a18e8
2 changed files with 182 additions and 9 deletions
--- a/src/services/api/withRetry.test.ts
+++ b/src/services/api/withRetry.test.ts
@@ -0,0 +1,136 @@
 import { describe, expect, test, afterEach } from 'bun:test'
 import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js'
 import { APIError } from '@anthropic-ai/sdk'
 // Helper to build a mock APIError with specific headers
 function makeError(headers: Record<string, string>): APIError {
  const headersObj = new Headers(headers)
  return {
    headers: headersObj,
    status: 429,
    message: 'rate limit exceeded',
    name: 'APIError',
    error: {},
  } as unknown as APIError
 }
 // Save/restore env vars between tests
 const originalEnv = { ...process.env }
 afterEach(() => {
  for (const key of [
    'CLAUDE_CODE_USE_OPENAI',
    'CLAUDE_CODE_USE_GEMINI',
    'CLAUDE_CODE_USE_GITHUB',
    'CLAUDE_CODE_USE_BEDROCK',
    'CLAUDE_CODE_USE_VERTEX',
    'CLAUDE_CODE_USE_FOUNDRY',
  ]) {
    if (originalEnv[key] === undefined) delete process.env[key]
    else process.env[key] = originalEnv[key]
  }
 })
 // --- parseOpenAIDuration ---
 describe('parseOpenAIDuration', () => {
  test('parses seconds: "1s" → 1000', () => {
    expect(parseOpenAIDuration('1s')).toBe(1000)
  })
  test('parses minutes+seconds: "6m0s" → 360000', () => {
    expect(parseOpenAIDuration('6m0s')).toBe(360000)
  })
  test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => {
    expect(parseOpenAIDuration('1h30m0s')).toBe(5400000)
  })
  test('parses milliseconds: "500ms" → 500', () => {
    expect(parseOpenAIDuration('500ms')).toBe(500)
  })
  test('parses minutes only: "2m" → 120000', () => {
    expect(parseOpenAIDuration('2m')).toBe(120000)
  })
  test('returns null for empty string', () => {
    expect(parseOpenAIDuration('')).toBeNull()
  })
  test('returns null for unrecognized format', () => {
    expect(parseOpenAIDuration('invalid')).toBeNull()
  })
 })
 // --- getRateLimitResetDelayMs ---
 describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => {
  test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => {
    const futureUnixSec = Math.floor(Date.now() / 1000) + 60
    const error = makeError({
      'anthropic-ratelimit-unified-reset': String(futureUnixSec),
    })
    const delay = getRateLimitResetDelayMs(error)
    expect(delay).not.toBeNull()
    expect(delay!).toBeGreaterThan(50_000)
    expect(delay!).toBeLessThanOrEqual(60_000)
  })
  test('returns null when header absent', () => {
    const error = makeError({})
    expect(getRateLimitResetDelayMs(error)).toBeNull()
  })
  test('returns null when reset is in the past', () => {
    const pastUnixSec = Math.floor(Date.now() / 1000) - 10
    const error = makeError({
      'anthropic-ratelimit-unified-reset': String(pastUnixSec),
    })
    expect(getRateLimitResetDelayMs(error)).toBeNull()
  })
 })
 describe('getRateLimitResetDelayMs - OpenAI provider', () => {
  test('reads x-ratelimit-reset-requests duration string', () => {
    process.env.CLAUDE_CODE_USE_OPENAI = '1'
    const error = makeError({ 'x-ratelimit-reset-requests': '30s' })
    const delay = getRateLimitResetDelayMs(error)
    expect(delay).toBe(30_000)
  })
  test('reads x-ratelimit-reset-tokens and picks the larger delay', () => {
    process.env.CLAUDE_CODE_USE_OPENAI = '1'
    const error = makeError({
      'x-ratelimit-reset-requests': '10s',
      'x-ratelimit-reset-tokens': '1m0s',
    })
    // Should use the larger of the two so we don't retry before both reset
    const delay = getRateLimitResetDelayMs(error)
    expect(delay).toBe(60_000)
  })
  test('returns null when no openai rate limit headers present', () => {
    process.env.CLAUDE_CODE_USE_OPENAI = '1'
    const error = makeError({})
    expect(getRateLimitResetDelayMs(error)).toBeNull()
  })
  test('works for github provider too', () => {
    process.env.CLAUDE_CODE_USE_GITHUB = '1'
    const error = makeError({ 'x-ratelimit-reset-requests': '5s' })
    expect(getRateLimitResetDelayMs(error)).toBe(5_000)
  })
 })
 describe('getRateLimitResetDelayMs - providers without reset headers', () => {
  test('returns null for bedrock', () => {
    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
    const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) })
    // Bedrock doesn't use this header — should still return null
    expect(getRateLimitResetDelayMs(error)).toBeNull()
  })
  test('returns null for vertex', () => {
    process.env.CLAUDE_CODE_USE_VERTEX = '1'
    const error = makeError({})
    expect(getRateLimitResetDelayMs(error)).toBeNull()
  })
 })
--- a/src/services/api/withRetry.ts
+++ b/src/services/api/withRetry.ts
@@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
 import { logForDebugging } from 'src/utils/debug.js'
 import { logError } from 'src/utils/log.js'
 import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
-import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js'
 import {
  clearApiKeyHelperCache,
  clearAwsCredentialsCache,
@@ -811,7 +811,29 @@ function getRetryAfterMs(error: APIError): number | null {
  return null
 }
-function getRateLimitResetDelayMs(error: APIError): number | null {
+/**
 * Parse OpenAI-style relative duration strings into milliseconds.
 * Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m"
 * Returns null for unrecognized formats.
 */
 export function parseOpenAIDuration(s: string): number | null {
  if (!s) return null
  // Try matching hours/minutes/seconds/milliseconds components
  const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/
  const m = re.exec(s)
  if (!m || m[0] === '') return null
  const h = parseInt(m[1] ?? '0', 10)
  const min = parseInt(m[2] ?? '0', 10)
  const sec = parseInt(m[3] ?? '0', 10)
  const ms = parseInt(m[4] ?? '0', 10)
  const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms
  return total > 0 ? total : null
 }
 export function getRateLimitResetDelayMs(error: APIError): number | null {
  const provider = getAPIProvider()
  if (provider === 'firstParty') {
    const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
    if (!resetHeader) return null
    const resetUnixSec = Number(resetHeader)
@@ -819,4 +841,19 @@ function getRateLimitResetDelayMs(error: APIError): number | null {
    const delayMs = resetUnixSec * 1000 - Date.now()
    if (delayMs <= 0) return null
    return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
  }
  if (provider === 'openai' || provider === 'codex' || provider === 'github') {
    const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests')
    const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens')
    const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null
    const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null
    if (reqMs === null && tokMs === null) return null
    // Use the larger delay so we don't retry before both limits reset
    const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0)
    return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
  }
  // bedrock, vertex, foundry, gemini — no standard reset header
  return null
 }