fix(retry): prevent retries on quota-exhausted 429 errors (#249)

* fix(retry): prevent retries on quota-exhausted 429 errors

- Detect hard quota exhaustion (e.g. 'limit: 0')
- Short-circuit retry loop with CannotRetryError
- Align shouldRetry() to avoid inconsistent behavior

This prevents unnecessary retries and improves error clarity.

* fix(retry): prevent retries on quota-exhausted 429 errors

* fix(retry): prevent retries on quota-exhausted 429 errors
This commit is contained in:
Shivam singh
2026-04-03 18:50:17 +05:30
committed by GitHub
parent 116cc8e6bd
commit 36d1c45954

View File

@@ -103,6 +103,15 @@ function isPersistentRetryEnabled(): boolean {
: false
}
function isQuotaExhausted(error: any): boolean {
const msg = (error?.message || '').toLowerCase()
return (
error?.status === 429 &&
(msg.includes('limit: 0') || msg.includes('exceeded your current quota'))
)
}
function isTransientCapacityError(error: unknown): boolean {
return (
is529Error(error) || (error instanceof APIError && error.status === 429)
@@ -257,7 +266,17 @@ export async function* withRetry<T>(
`API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
{ level: 'error' },
)
if (isQuotaExhausted(error)) {
throw new CannotRetryError(
new Error(
'API quota exhausted or not enabled.\n' +
'Fix:\n' +
'- Enable billing for your provider\n' +
'- Or switch provider via /provider',
),
retryContext,
);
}
// Fast mode fallback: on 429/529, either wait and retry (short delays)
// or fall back to standard speed (long delays) to avoid cache thrashing.
// Skip in persistent mode: the short-retry path below loops with fast
@@ -765,6 +784,7 @@ function shouldRetry(error: APIError): boolean {
// Retry on rate limits, but not for ClaudeAI Subscription users
// Enterprise users can retry because they typically use PAYG instead of rate limits
if (error.status === 429) {
if (isQuotaExhausted(error)) return false
return !isClaudeAISubscriber() || isEnterpriseSubscriber()
}