Merge pull request #174 from gnanam1990/feat/provider-aware-rate-limit

feat: provider-aware rate limit reset delay for OpenAI/GitHub/Codex providers
2026-04-03 01:16:58 +08:00
parent 11d9660a80 8501786852
commit 47c53a18e8
2 changed files with 182 additions and 9 deletions
--- a/src/services/api/withRetry.test.ts
+++ b/src/services/api/withRetry.test.ts
@@ -0,0 +1,136 @@
+import { describe, expect, test, afterEach } from 'bun:test'
+import { getRateLimitResetDelayMs, parseOpenAIDuration } from './withRetry.js'
+import { APIError } from '@anthropic-ai/sdk'
+
+// Helper to build a mock APIError with specific headers
+function makeError(headers: Record<string, string>): APIError {
+  const headersObj = new Headers(headers)
+  return {
+    headers: headersObj,
+    status: 429,
+    message: 'rate limit exceeded',
+    name: 'APIError',
+    error: {},
+  } as unknown as APIError
+}
+
+// Save/restore env vars between tests
+const originalEnv = { ...process.env }
+afterEach(() => {
+  for (const key of [
+    'CLAUDE_CODE_USE_OPENAI',
+    'CLAUDE_CODE_USE_GEMINI',
+    'CLAUDE_CODE_USE_GITHUB',
+    'CLAUDE_CODE_USE_BEDROCK',
+    'CLAUDE_CODE_USE_VERTEX',
+    'CLAUDE_CODE_USE_FOUNDRY',
+  ]) {
+    if (originalEnv[key] === undefined) delete process.env[key]
+    else process.env[key] = originalEnv[key]
+  }
+})
+
+// --- parseOpenAIDuration ---
+describe('parseOpenAIDuration', () => {
+  test('parses seconds: "1s" → 1000', () => {
+    expect(parseOpenAIDuration('1s')).toBe(1000)
+  })
+
+  test('parses minutes+seconds: "6m0s" → 360000', () => {
+    expect(parseOpenAIDuration('6m0s')).toBe(360000)
+  })
+
+  test('parses hours+minutes+seconds: "1h30m0s" → 5400000', () => {
+    expect(parseOpenAIDuration('1h30m0s')).toBe(5400000)
+  })
+
+  test('parses milliseconds: "500ms" → 500', () => {
+    expect(parseOpenAIDuration('500ms')).toBe(500)
+  })
+
+  test('parses minutes only: "2m" → 120000', () => {
+    expect(parseOpenAIDuration('2m')).toBe(120000)
+  })
+
+  test('returns null for empty string', () => {
+    expect(parseOpenAIDuration('')).toBeNull()
+  })
+
+  test('returns null for unrecognized format', () => {
+    expect(parseOpenAIDuration('invalid')).toBeNull()
+  })
+})
+
+// --- getRateLimitResetDelayMs ---
+describe('getRateLimitResetDelayMs - Anthropic (firstParty)', () => {
+  test('reads anthropic-ratelimit-unified-reset Unix timestamp', () => {
+    const futureUnixSec = Math.floor(Date.now() / 1000) + 60
+    const error = makeError({
+      'anthropic-ratelimit-unified-reset': String(futureUnixSec),
+    })
+    const delay = getRateLimitResetDelayMs(error)
+    expect(delay).not.toBeNull()
+    expect(delay!).toBeGreaterThan(50_000)
+    expect(delay!).toBeLessThanOrEqual(60_000)
+  })
+
+  test('returns null when header absent', () => {
+    const error = makeError({})
+    expect(getRateLimitResetDelayMs(error)).toBeNull()
+  })
+
+  test('returns null when reset is in the past', () => {
+    const pastUnixSec = Math.floor(Date.now() / 1000) - 10
+    const error = makeError({
+      'anthropic-ratelimit-unified-reset': String(pastUnixSec),
+    })
+    expect(getRateLimitResetDelayMs(error)).toBeNull()
+  })
+})
+
+describe('getRateLimitResetDelayMs - OpenAI provider', () => {
+  test('reads x-ratelimit-reset-requests duration string', () => {
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    const error = makeError({ 'x-ratelimit-reset-requests': '30s' })
+    const delay = getRateLimitResetDelayMs(error)
+    expect(delay).toBe(30_000)
+  })
+
+  test('reads x-ratelimit-reset-tokens and picks the larger delay', () => {
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    const error = makeError({
+      'x-ratelimit-reset-requests': '10s',
+      'x-ratelimit-reset-tokens': '1m0s',
+    })
+    // Should use the larger of the two so we don't retry before both reset
+    const delay = getRateLimitResetDelayMs(error)
+    expect(delay).toBe(60_000)
+  })
+
+  test('returns null when no openai rate limit headers present', () => {
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    const error = makeError({})
+    expect(getRateLimitResetDelayMs(error)).toBeNull()
+  })
+
+  test('works for github provider too', () => {
+    process.env.CLAUDE_CODE_USE_GITHUB = '1'
+    const error = makeError({ 'x-ratelimit-reset-requests': '5s' })
+    expect(getRateLimitResetDelayMs(error)).toBe(5_000)
+  })
+})
+
+describe('getRateLimitResetDelayMs - providers without reset headers', () => {
+  test('returns null for bedrock', () => {
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    const error = makeError({ 'anthropic-ratelimit-unified-reset': String(Math.floor(Date.now() / 1000) + 60) })
+    // Bedrock doesn't use this header — should still return null
+    expect(getRateLimitResetDelayMs(error)).toBeNull()
+  })
+
+  test('returns null for vertex', () => {
+    process.env.CLAUDE_CODE_USE_VERTEX = '1'
+    const error = makeError({})
+    expect(getRateLimitResetDelayMs(error)).toBeNull()
+  })
+})
--- a/src/services/api/withRetry.ts
+++ b/src/services/api/withRetry.ts
@@ -11,7 +11,7 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
 import { logForDebugging } from 'src/utils/debug.js'
 import { logError } from 'src/utils/log.js'
 import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
-import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import { getAPIProvider, getAPIProviderForStatsig } from 'src/utils/model/providers.js'
 import {
  clearApiKeyHelperCache,
  clearAwsCredentialsCache,
@@ -811,7 +811,29 @@ function getRetryAfterMs(error: APIError): number | null {
  return null
 }

-function getRateLimitResetDelayMs(error: APIError): number | null {
+/**
+ * Parse OpenAI-style relative duration strings into milliseconds.
+ * Formats: "1s", "6m0s", "1h30m0s", "500ms", "2m"
+ * Returns null for unrecognized formats.
+ */
+export function parseOpenAIDuration(s: string): number | null {
+  if (!s) return null
+  // Try matching hours/minutes/seconds/milliseconds components
+  const re = /^(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)ms)?$/
+  const m = re.exec(s)
+  if (!m || m[0] === '') return null
+  const h = parseInt(m[1] ?? '0', 10)
+  const min = parseInt(m[2] ?? '0', 10)
+  const sec = parseInt(m[3] ?? '0', 10)
+  const ms = parseInt(m[4] ?? '0', 10)
+  const total = h * 3_600_000 + min * 60_000 + sec * 1_000 + ms
+  return total > 0 ? total : null
+}
+
+export function getRateLimitResetDelayMs(error: APIError): number | null {
+  const provider = getAPIProvider()
+
+  if (provider === 'firstParty') {
    const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
    if (!resetHeader) return null
    const resetUnixSec = Number(resetHeader)
@@ -820,3 +842,18 @@ function getRateLimitResetDelayMs(error: APIError): number | null {
    if (delayMs <= 0) return null
    return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
  }
+
+  if (provider === 'openai' || provider === 'codex' || provider === 'github') {
+    const reqHeader = error.headers?.get?.('x-ratelimit-reset-requests')
+    const tokHeader = error.headers?.get?.('x-ratelimit-reset-tokens')
+    const reqMs = reqHeader ? parseOpenAIDuration(reqHeader) : null
+    const tokMs = tokHeader ? parseOpenAIDuration(tokHeader) : null
+    if (reqMs === null && tokMs === null) return null
+    // Use the larger delay so we don't retry before both limits reset
+    const delayMs = Math.max(reqMs ?? 0, tokMs ?? 0)
+    return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
+  }
+
+  // bedrock, vertex, foundry, gemini — no standard reset header
+  return null
+}