feat: add Firecrawl backend for WebSearch and WebFetch tools

WebSearch is currently disabled for all non-Anthropic providers (OpenAI
shim, DeepSeek, Ollama, etc.) because those providers have no native
search backend. This adds Firecrawl as a fallback that activates when
FIRECRAWL_API_KEY is set, unlocking web search for every model
openclaude supports.

WebFetch uses basic HTTP + Turndown for HTML-to-markdown conversion,
which fails silently on JS-rendered SPAs and bot-protected pages.
Firecrawl scrape replaces the fetch layer when FIRECRAWL_API_KEY is set,
returning clean markdown that handles dynamic content correctly.

Changes:
- WebSearchTool: add runFirecrawlSearch() using @mendable/firecrawl-js,
  respects allowed_domains (post-filter) and blocked_domains (-site: operators),
  includes result snippets alongside links. shouldUseFirecrawl() ensures
  firstParty/Vertex/Foundry/Codex providers keep their native backends.
- WebFetchTool: add scrapeWithFirecrawl(), drops into the existing
  applyPromptToMarkdown() pipeline so prompt processing is unchanged.
- Remove "Web search is only available in the US" restriction from
  prompt when Firecrawl is active (it works globally).
This commit is contained in:
Leonardo Grigorio
2026-04-02 12:18:20 -03:00
parent 4c1ba35aa1
commit ac4efae870
4 changed files with 112 additions and 2 deletions

View File

@@ -13,6 +13,7 @@
"@anthropic-ai/vertex-sdk": "0.14.4",
"@commander-js/extra-typings": "12.1.0",
"@growthbook/growthbook": "1.6.5",
"@mendable/firecrawl-js": "^4.18.1",
"@modelcontextprotocol/sdk": "1.29.0",
"@opentelemetry/api": "1.9.1",
"@opentelemetry/api-logs": "0.214.0",
@@ -185,6 +186,8 @@
"@js-sdsl/ordered-map": ["@js-sdsl/ordered-map@4.4.2", "", {}, "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw=="],
"@mendable/firecrawl-js": ["@mendable/firecrawl-js@4.18.1", "", { "dependencies": { "axios": "1.14.0", "firecrawl": "4.16.0", "typescript-event-target": "^1.1.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" } }, "sha512-NfmJv+xcHoZthj8I3NP/8KAgO8EWcvOcTvCAvszxqs7/6sCs1CRss6Tum6RycZNSwJkr5RzQossN89IlixRfng=="],
"@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
"@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="],
@@ -495,6 +498,8 @@
"find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="],
"firecrawl": ["firecrawl@4.16.0", "", { "dependencies": { "axios": "^1.13.5", "typescript-event-target": "^1.1.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" } }, "sha512-7SJ/FWhZBtW2gTCE/BsvU+gbfIpfTq+D9IH82l9MacauLVptaY6EdYAhrK3YSMC9yr5NxvxRcpZKcXG/nqjiiQ=="],
"follow-redirects": ["follow-redirects@1.15.11", "", {}, "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ=="],
"form-data": ["form-data@4.0.5", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w=="],
@@ -767,6 +772,8 @@
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
"typescript-event-target": ["typescript-event-target@1.1.2", "", {}, "sha512-TvkrTUpv7gCPlcnSoEwUVUBwsdheKm+HF5u2tPAKubkIGMfovdSizCTaZRY/NhR8+Ijy8iZZUapbVQAsNrkFrw=="],
"undici": ["undici@7.24.6", "", {}, "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA=="],
"undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],

View File

@@ -112,7 +112,8 @@
"ws": "8.20.0",
"xss": "1.0.15",
"yaml": "2.8.3",
"zod": "3.25.76"
"zod": "3.25.76",
"@mendable/firecrawl-js": "^4.18.1"
},
"devDependencies": {
"@types/bun": "1.3.11",

View File

@@ -21,6 +21,18 @@ import {
MAX_MARKDOWN_LENGTH,
} from './utils.js'
function isFirecrawlEnabled(): boolean {
return Boolean(process.env.FIRECRAWL_API_KEY)
}
async function scrapeWithFirecrawl(url: string): Promise<{ markdown: string; bytes: number }> {
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
const result = await app.scrape(url, { formats: ['markdown'] })
const markdown = (result as { markdown?: string }).markdown ?? ''
return { markdown, bytes: Buffer.byteLength(markdown) }
}
const inputSchema = lazySchema(() =>
z.strictObject({
url: z.string().url().describe('The URL to fetch content from'),
@@ -211,6 +223,27 @@ ${DESCRIPTION}`
) {
const start = Date.now()
if (isFirecrawlEnabled()) {
const { markdown, bytes } = await scrapeWithFirecrawl(url)
const result = await applyPromptToMarkdown(
prompt,
markdown,
abortController.signal,
isNonInteractiveSession,
false,
)
return {
data: {
bytes,
code: 200,
codeText: 'OK',
result,
durationMs: Date.now() - start,
url,
} satisfies Output,
}
}
const response = await getURLMarkdownContent(url, abortController)
// Check if we got a redirect to a different host

View File

@@ -88,6 +88,67 @@ function makeToolSchema(input: Input): BetaWebSearchTool20250305 {
}
}
function isFirecrawlEnabled(): boolean {
return Boolean(process.env.FIRECRAWL_API_KEY)
}
function shouldUseFirecrawl(): boolean {
if (!isFirecrawlEnabled()) return false
// Don't override native search on providers that already have it
if (isCodexResponsesWebSearchEnabled()) return false
const provider = getAPIProvider()
if (provider === 'firstParty' || provider === 'vertex' || provider === 'foundry') return false
return true
}
async function runFirecrawlSearch(input: Input): Promise<Output> {
const startTime = performance.now()
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
let query = input.query
if (input.blocked_domains?.length) {
const exclusions = input.blocked_domains.map(d => `-site:${d}`).join(' ')
query = `${query} ${exclusions}`
}
const data = await app.search(query, { limit: 10 })
let hits = (data.web ?? []).map((r: { url: string; title?: string }) => ({
title: r.title ?? r.url,
url: r.url,
}))
if (input.allowed_domains?.length) {
hits = hits.filter(h =>
input.allowed_domains!.some(d => {
try {
return new URL(h.url).hostname.endsWith(d)
} catch {
return false
}
}),
)
}
const snippets = (data.web ?? [])
.filter((r: { description?: string }) => r.description)
.map((r: { url: string; title?: string; description?: string }) =>
`**${r.title ?? r.url}** — ${r.description} (${r.url})`,
)
.join('\n')
const results: Output['results'] = []
if (snippets) results.push(snippets)
results.push({ tool_use_id: 'firecrawl-search', content: hits })
return {
query: input.query,
results,
durationSeconds: (performance.now() - startTime) / 1000,
}
}
function isCodexResponsesWebSearchEnabled(): boolean {
if (getAPIProvider() !== 'openai') {
return false
@@ -378,6 +439,10 @@ export const WebSearchTool = buildTool({
return summary ? `Searching for ${summary}` : 'Searching the web'
},
isEnabled() {
if (shouldUseFirecrawl()) {
return true
}
const provider = getAPIProvider()
const model = getMainLoopModel()
@@ -437,7 +502,7 @@ export const WebSearchTool = buildTool({
}
},
async prompt() {
if (isCodexResponsesWebSearchEnabled()) {
if (shouldUseFirecrawl() || isCodexResponsesWebSearchEnabled()) {
return getWebSearchPrompt().replace(
/\n\s*-\s*Web search is only available in the US/,
'',
@@ -474,6 +539,10 @@ export const WebSearchTool = buildTool({
return { result: true }
},
async call(input, context, _canUseTool, _parentMessage, onProgress) {
if (shouldUseFirecrawl()) {
return { data: await runFirecrawlSearch(input) }
}
if (isCodexResponsesWebSearchEnabled()) {
return {
data: await runCodexWebSearch(input, context.abortController.signal),