Files
orcs-code/src/tools/WebSearchTool/providers/duckduckgo.ts
Kevin Codex 531e3f1059 feat(tools): resilient web search and fetch across all providers (#836)
- Add exponential backoff retry to DuckDuckGo adapter (3 attempts with
  jitter) to handle transient rate-limiting and connection errors.
- Add native fetch() fallback in WebFetch when axios hangs with custom
  DNS lookup in bundled contexts.
- Prevent broken native-path fallback for web search on OpenAI shim
  providers (minimax, moonshot, nvidia-nim, etc.) that do not support
  Anthropic's web_search_20250305 tool.
- Cherry-pick existing fixes:
  - a48bd56: cover codex/minimax/nvidia-nim in getSmallFastModel()
  - 31f0b68: 45s budget + raw-markdown fallback for secondary model
  - 446c1e8: sparse Codex /responses payload parsing
  - ae3f0b2: echo reasoning_content on assistant tool-call messages
- Fix domainCheck.test.ts mock modules to include isFirstPartyAnthropicBaseUrl
  and isGithubNativeAnthropicMode exports.

Co-authored-by: OpenClaude <openclaude@gitlawb.com>
2026-04-23 01:14:00 +08:00

101 lines
3.4 KiB
TypeScript

import type { SearchInput, SearchProvider } from './types.js'
import { applyDomainFilters, type ProviderOutput } from './types.js'
// DuckDuckGo's HTML scraper aggressively blocks datacenter / repeat IPs with
// an "anomaly in the request" response. When that happens we surface an
// actionable error instead of the opaque scraper message so users know how
// to configure a working backend.
const DDG_ANOMALY_HINT =
'DuckDuckGo scraping is rate-limited from this network. ' +
'Configure a search backend with one of: ' +
'FIRECRAWL_API_KEY, TAVILY_API_KEY, EXA_API_KEY, YOU_API_KEY, ' +
'JINA_API_KEY, BING_API_KEY, MOJEEK_API_KEY, LINKUP_API_KEY — ' +
'or use an Anthropic / Vertex / Foundry provider for native web search.'
const MAX_RETRIES = 3
const INITIAL_BACKOFF_MS = 1000
function isAnomalyError(message: string): boolean {
return /anomaly in the request|likely making requests too quickly/i.test(
message,
)
}
function isRetryableDDGError(err: unknown): boolean {
if (!(err instanceof Error)) return false
const msg = err.message.toLowerCase()
return (
msg.includes('anomaly') ||
msg.includes('too quickly') ||
msg.includes('rate limit') ||
msg.includes('timeout') ||
msg.includes('econnreset') ||
msg.includes('etimedout') ||
msg.includes('econnaborted')
)
}
function sleep(ms: number): Promise<void> {
return new Promise(r => setTimeout(r, ms))
}
export const duckduckgoProvider: SearchProvider = {
name: 'duckduckgo',
isConfigured() {
// DDG is the default fallback — always available (duck-duck-scrape is a runtime dep)
return true
},
async search(input: SearchInput, signal?: AbortSignal): Promise<ProviderOutput> {
const start = performance.now()
let search: typeof import('duck-duck-scrape').search
let SafeSearchType: typeof import('duck-duck-scrape').SafeSearchType
try {
;({ search, SafeSearchType } = await import('duck-duck-scrape'))
} catch {
throw new Error('duck-duck-scrape package not installed. Run: npm install duck-duck-scrape')
}
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
let lastErr: unknown
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
try {
// TODO: duck-duck-scrape doesn't accept AbortSignal — can't cancel in-flight searches
const response = await search(input.query, { safeSearch: SafeSearchType.STRICT })
const hits = applyDomainFilters(
response.results.map(r => ({
title: r.title || r.url,
url: r.url,
description: r.description ?? undefined,
})),
input,
)
return {
hits,
providerName: 'duckduckgo',
durationSeconds: (performance.now() - start) / 1000,
}
} catch (err) {
lastErr = err
const msg = err instanceof Error ? err.message : String(err)
if (isAnomalyError(msg)) {
throw new Error(DDG_ANOMALY_HINT)
}
if (!isRetryableDDGError(err) || attempt === MAX_RETRIES - 1) {
throw err
}
// Exponential backoff with jitter: 1s, 2s, 4s +/- 20%
const baseDelay = INITIAL_BACKOFF_MS * Math.pow(2, attempt)
const jitter = baseDelay * 0.2 * (Math.random() * 2 - 1)
await sleep(baseDelay + jitter)
}
}
throw lastErr
},
}