feat: add Firecrawl backend for WebSearch and WebFetch tools
WebSearch is currently disabled for all non-Anthropic providers (OpenAI shim, DeepSeek, Ollama, etc.) because those providers have no native search backend. This adds Firecrawl as a fallback that activates when FIRECRAWL_API_KEY is set, unlocking web search for every model openclaude supports. WebFetch uses basic HTTP + Turndown for HTML-to-markdown conversion, which fails silently on JS-rendered SPAs and bot-protected pages. Firecrawl scrape replaces the fetch layer when FIRECRAWL_API_KEY is set, returning clean markdown that handles dynamic content correctly. Changes: - WebSearchTool: add runFirecrawlSearch() using @mendable/firecrawl-js, respects allowed_domains (post-filter) and blocked_domains (-site: operators), includes result snippets alongside links. shouldUseFirecrawl() ensures firstParty/Vertex/Foundry/Codex providers keep their native backends. - WebFetchTool: add scrapeWithFirecrawl(), drops into the existing applyPromptToMarkdown() pipeline so prompt processing is unchanged. - Remove "Web search is only available in the US" restriction from prompt when Firecrawl is active (it works globally).
This commit is contained in:
@@ -21,6 +21,18 @@ import {
|
||||
MAX_MARKDOWN_LENGTH,
|
||||
} from './utils.js'
|
||||
|
||||
function isFirecrawlEnabled(): boolean {
|
||||
return Boolean(process.env.FIRECRAWL_API_KEY)
|
||||
}
|
||||
|
||||
async function scrapeWithFirecrawl(url: string): Promise<{ markdown: string; bytes: number }> {
|
||||
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
|
||||
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
|
||||
const result = await app.scrape(url, { formats: ['markdown'] })
|
||||
const markdown = (result as { markdown?: string }).markdown ?? ''
|
||||
return { markdown, bytes: Buffer.byteLength(markdown) }
|
||||
}
|
||||
|
||||
const inputSchema = lazySchema(() =>
|
||||
z.strictObject({
|
||||
url: z.string().url().describe('The URL to fetch content from'),
|
||||
@@ -211,6 +223,27 @@ ${DESCRIPTION}`
|
||||
) {
|
||||
const start = Date.now()
|
||||
|
||||
if (isFirecrawlEnabled()) {
|
||||
const { markdown, bytes } = await scrapeWithFirecrawl(url)
|
||||
const result = await applyPromptToMarkdown(
|
||||
prompt,
|
||||
markdown,
|
||||
abortController.signal,
|
||||
isNonInteractiveSession,
|
||||
false,
|
||||
)
|
||||
return {
|
||||
data: {
|
||||
bytes,
|
||||
code: 200,
|
||||
codeText: 'OK',
|
||||
result,
|
||||
durationMs: Date.now() - start,
|
||||
url,
|
||||
} satisfies Output,
|
||||
}
|
||||
}
|
||||
|
||||
const response = await getURLMarkdownContent(url, abortController)
|
||||
|
||||
// Check if we got a redirect to a different host
|
||||
|
||||
@@ -88,6 +88,67 @@ function makeToolSchema(input: Input): BetaWebSearchTool20250305 {
|
||||
}
|
||||
}
|
||||
|
||||
function isFirecrawlEnabled(): boolean {
|
||||
return Boolean(process.env.FIRECRAWL_API_KEY)
|
||||
}
|
||||
|
||||
function shouldUseFirecrawl(): boolean {
|
||||
if (!isFirecrawlEnabled()) return false
|
||||
// Don't override native search on providers that already have it
|
||||
if (isCodexResponsesWebSearchEnabled()) return false
|
||||
const provider = getAPIProvider()
|
||||
if (provider === 'firstParty' || provider === 'vertex' || provider === 'foundry') return false
|
||||
return true
|
||||
}
|
||||
|
||||
async function runFirecrawlSearch(input: Input): Promise<Output> {
|
||||
const startTime = performance.now()
|
||||
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
|
||||
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
|
||||
|
||||
let query = input.query
|
||||
if (input.blocked_domains?.length) {
|
||||
const exclusions = input.blocked_domains.map(d => `-site:${d}`).join(' ')
|
||||
query = `${query} ${exclusions}`
|
||||
}
|
||||
|
||||
const data = await app.search(query, { limit: 10 })
|
||||
|
||||
let hits = (data.web ?? []).map((r: { url: string; title?: string }) => ({
|
||||
title: r.title ?? r.url,
|
||||
url: r.url,
|
||||
}))
|
||||
|
||||
if (input.allowed_domains?.length) {
|
||||
hits = hits.filter(h =>
|
||||
input.allowed_domains!.some(d => {
|
||||
try {
|
||||
return new URL(h.url).hostname.endsWith(d)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
const snippets = (data.web ?? [])
|
||||
.filter((r: { description?: string }) => r.description)
|
||||
.map((r: { url: string; title?: string; description?: string }) =>
|
||||
`**${r.title ?? r.url}** — ${r.description} (${r.url})`,
|
||||
)
|
||||
.join('\n')
|
||||
|
||||
const results: Output['results'] = []
|
||||
if (snippets) results.push(snippets)
|
||||
results.push({ tool_use_id: 'firecrawl-search', content: hits })
|
||||
|
||||
return {
|
||||
query: input.query,
|
||||
results,
|
||||
durationSeconds: (performance.now() - startTime) / 1000,
|
||||
}
|
||||
}
|
||||
|
||||
function isCodexResponsesWebSearchEnabled(): boolean {
|
||||
if (getAPIProvider() !== 'openai') {
|
||||
return false
|
||||
@@ -378,6 +439,10 @@ export const WebSearchTool = buildTool({
|
||||
return summary ? `Searching for ${summary}` : 'Searching the web'
|
||||
},
|
||||
isEnabled() {
|
||||
if (shouldUseFirecrawl()) {
|
||||
return true
|
||||
}
|
||||
|
||||
const provider = getAPIProvider()
|
||||
const model = getMainLoopModel()
|
||||
|
||||
@@ -437,7 +502,7 @@ export const WebSearchTool = buildTool({
|
||||
}
|
||||
},
|
||||
async prompt() {
|
||||
if (isCodexResponsesWebSearchEnabled()) {
|
||||
if (shouldUseFirecrawl() || isCodexResponsesWebSearchEnabled()) {
|
||||
return getWebSearchPrompt().replace(
|
||||
/\n\s*-\s*Web search is only available in the US/,
|
||||
'',
|
||||
@@ -474,6 +539,10 @@ export const WebSearchTool = buildTool({
|
||||
return { result: true }
|
||||
},
|
||||
async call(input, context, _canUseTool, _parentMessage, onProgress) {
|
||||
if (shouldUseFirecrawl()) {
|
||||
return { data: await runFirecrawlSearch(input) }
|
||||
}
|
||||
|
||||
if (isCodexResponsesWebSearchEnabled()) {
|
||||
return {
|
||||
data: await runCodexWebSearch(input, context.abortController.signal),
|
||||
|
||||
Reference in New Issue
Block a user