From e5c9a6f6299874c0840253e5dc6bd97decc1e620 Mon Sep 17 00:00:00 2001 From: Meetpatel006 <136876547+Meetpatel006@users.noreply.github.com> Date: Sat, 4 Apr 2026 06:51:54 +0530 Subject: [PATCH] Enable Free DDG WebSearch For Non-Claude Models (#234) * added duck duck go for websearch tools that allowed free searching * update readme * Replace @phukon/duckduckgo-search with duck-duck-scrape and fix Firecrawl routing priority, and add DDG error handling * refactor: streamline DuckDuckGo search fallback to use Firecrawl directly on rate limit * docs: update README to clarify DuckDuckGo web search fallback and its limitations with TOS --- README.md | 16 ++-- bun.lock | 11 +++ package.json | 1 + src/tools/WebSearchTool/WebSearchTool.ts | 109 ++++++++++++++++++++++- 4 files changed, 131 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a6206c9f..302a1d4e 100644 --- a/README.md +++ b/README.md @@ -185,9 +185,15 @@ When no routing match is found, the global provider (env vars) is used as fallba ## Web Search and Fetch -`WebFetch` works out of the box. +By default, `WebSearch` now works on non-Anthropic models using DuckDuckGo. This gives GPT-4o, DeepSeek, Gemini, Ollama, and other OpenAI-compatible providers a free web search path out of the box. -`WebSearch` and richer JS-aware fetching work best with a Firecrawl API key: +>**Note:** DuckDuckGo fallback works by scraping search results and may be rate-limited, blocked, or subject to DuckDuckGo's Terms of Service. If you want a more reliable supported option, configure Firecrawl. + +For Anthropic-native backends (Anthropic/Vertex/Foundry) and Codex responses, OpenClaude keeps the native provider web search behavior. + +`WebFetch` works but uses basic HTTP plus HTML-to-markdown conversion. That fails on JavaScript-rendered pages (React, Next.js, Vue SPAs) and sites that block plain HTTP requests. + +Set a [Firecrawl](https://firecrawl.dev) API key if you want Firecrawl-powered search/fetch behavior: ```bash export FIRECRAWL_API_KEY=your-key-here @@ -195,10 +201,10 @@ export FIRECRAWL_API_KEY=your-key-here With Firecrawl enabled: -- `WebSearch` is available across more provider setups -- `WebFetch` can handle JavaScript-rendered pages more reliably +- `WebSearch` can use Firecrawl's search API (while DuckDuckGo remains the default free path for non-Claude models) +- `WebFetch` uses Firecrawl's scrape endpoint instead of raw HTTP, handling JS-rendered pages correctly -Firecrawl is optional. Without it, OpenClaude falls back to the built-in behavior. +Free tier at [firecrawl.dev](https://firecrawl.dev) includes 500 credits. The key is optional. --- diff --git a/bun.lock b/bun.lock index 4fc76134..ab1a66ee 100644 --- a/bun.lock +++ b/bun.lock @@ -37,6 +37,7 @@ "code-excerpt": "4.0.0", "commander": "12.1.0", "diff": "8.0.3", + "duck-duck-scrape": "^2.2.7", "emoji-regex": "10.6.0", "env-paths": "3.0.0", "execa": "9.6.1", @@ -442,6 +443,8 @@ "dom-mutator": ["dom-mutator@0.6.0", "", {}, "sha512-iCt9o0aYfXMUkz/43ZOAUFQYotjGB+GNbYJiJdz4TgXkyToXbbRy5S6FbTp72lRBtfpUMwEc1KmpFEU4CZeoNg=="], + "duck-duck-scrape": ["duck-duck-scrape@2.2.7", "", { "dependencies": { "html-entities": "^2.3.3", "needle": "^3.2.0" } }, "sha512-BEcglwnfx5puJl90KQfX+Q2q5vCguqyMpZcSRPBWk8OY55qWwV93+E+7DbIkrGDW4qkqPfUvtOUdi0lXz6lEMQ=="], + "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], "ecdsa-sig-formatter": ["ecdsa-sig-formatter@1.0.11", "", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ=="], @@ -548,6 +551,8 @@ "hono": ["hono@4.12.9", "", {}, "sha512-wy3T8Zm2bsEvxKZM5w21VdHDDcwVS1yUFFY6i8UobSsKfFceT7TOwhbhfKsDyx7tYQlmRM5FLpIuYvNFyjctiA=="], + "html-entities": ["html-entities@2.6.0", "", {}, "sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ=="], + "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], "https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="], @@ -622,6 +627,8 @@ "mz": ["mz@2.7.0", "", { "dependencies": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q=="], + "needle": ["needle@3.5.0", "", { "dependencies": { "iconv-lite": "^0.6.3", "sax": "^1.2.4" }, "bin": { "needle": "bin/needle" } }, "sha512-jaQyPKKk2YokHrEg+vFDYxXIHTCBgiZwSHOoVx/8V3GIBS8/VN6NdVRmg8q1ERtPkMvmOvebsgga4sAj5hls/w=="], + "negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="], @@ -706,6 +713,8 @@ "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], + "sax": ["sax@1.6.0", "", {}, "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA=="], + "scheduler": ["scheduler@0.27.0", "", {}, "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q=="], "semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="], @@ -1246,6 +1255,8 @@ "gaxios/is-stream": ["is-stream@2.0.1", "", {}, "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg=="], + "needle/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], + "npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="], "parse5-htmlparser2-tree-adapter/parse5": ["parse5@6.0.1", "", {}, "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw=="], diff --git a/package.json b/package.json index e55f6eef..6fa9bbdc 100644 --- a/package.json +++ b/package.json @@ -64,6 +64,7 @@ "@opentelemetry/sdk-trace-base": "2.6.1", "@opentelemetry/sdk-trace-node": "2.6.1", "@opentelemetry/semantic-conventions": "1.40.0", + "duck-duck-scrape": "^2.2.7", "ajv": "8.18.0", "auto-bind": "5.0.1", "axios": "1.14.0", diff --git a/src/tools/WebSearchTool/WebSearchTool.ts b/src/tools/WebSearchTool/WebSearchTool.ts index e9eb373e..fa9b5360 100644 --- a/src/tools/WebSearchTool/WebSearchTool.ts +++ b/src/tools/WebSearchTool/WebSearchTool.ts @@ -4,6 +4,7 @@ import type { } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' import { getAPIProvider } from 'src/utils/model/providers.js' import type { PermissionResult } from 'src/utils/permissions/PermissionResult.js' + import { z } from 'zod/v4' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js' import { queryModelWithStreaming } from '../../services/api/claude.js' @@ -101,6 +102,100 @@ function shouldUseFirecrawl(): boolean { return true } +function isClaudeModel(model: string): boolean { + return /claude/i.test(model) +} + +function shouldUseDuckDuckGo(): boolean { + if (isCodexResponsesWebSearchEnabled()) return false + + const provider = getAPIProvider() + // Don't override providers/models that have native web search support. + if (provider === 'firstParty' || provider === 'vertex' || provider === 'foundry') { + return false + } + + // Use free DDG search for non-Claude models by default. + return !isClaudeModel(getMainLoopModel()) +} + +async function runDuckDuckGoSearch(input: Input): Promise { + const startTime = performance.now() + + try { + const { search } = await import('duck-duck-scrape') + + const response = await search(input.query, { + safeSearch: 0, + }) + + let hits = response.results.map(r => ({ + title: r.title || r.url, + url: r.url, + snippet: r.description, + })) + + if (input.blocked_domains?.length) { + hits = hits.filter(h => { + try { + const host = new URL(h.url).hostname + return !input.blocked_domains!.some(d => host.endsWith(d)) + } catch { + return false + } + }) + } + + if (input.allowed_domains?.length) { + hits = hits.filter(h => { + try { + const host = new URL(h.url).hostname + return input.allowed_domains!.some(d => host.endsWith(d)) + } catch { + return false + } + }) + } + + const snippets = hits + .filter(h => h.snippet) + .map(h => `**${h.title}** — ${h.snippet} (${h.url})`) + .join('\n') + + const results: Output['results'] = [] + if (snippets) results.push(snippets) + results.push({ + tool_use_id: 'duckduckgo-search', + content: hits.map(({ title, url }) => ({ title, url })), + }) + + return { + query: input.query, + results, + durationSeconds: (performance.now() - startTime) / 1000, + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + const isRateLimited = + message.includes('429') || + message.includes('rate') || + message.includes('CAPTCHA') || + message.includes('blocked') + + if (isRateLimited && isFirecrawlEnabled()) { + return runFirecrawlSearch(input) + } + + return { + query: input.query, + results: [ + 'Web search temporarily unavailable — try again or add a Firecrawl API key for reliable results.', + ], + durationSeconds: (performance.now() - startTime) / 1000, + } + } +} + async function runFirecrawlSearch(input: Input): Promise { const startTime = performance.now() const { FirecrawlClient } = await import('@mendable/firecrawl-js') @@ -443,6 +538,10 @@ export const WebSearchTool = buildTool({ return true } + if (shouldUseDuckDuckGo()) { + return true + } + const provider = getAPIProvider() const model = getMainLoopModel() @@ -502,7 +601,11 @@ export const WebSearchTool = buildTool({ } }, async prompt() { - if (shouldUseFirecrawl() || isCodexResponsesWebSearchEnabled()) { + if ( + shouldUseDuckDuckGo() || + shouldUseFirecrawl() || + isCodexResponsesWebSearchEnabled() + ) { return getWebSearchPrompt().replace( /\n\s*-\s*Web search is only available in the US/, '', @@ -543,6 +646,10 @@ export const WebSearchTool = buildTool({ return { data: await runFirecrawlSearch(input) } } + if (shouldUseDuckDuckGo()) { + return { data: await runDuckDuckGoSearch(input) } + } + if (isCodexResponsesWebSearchEnabled()) { return { data: await runCodexWebSearch(input, context.abortController.signal),