Merge pull request #168 from firecrawl/add-firecrawl

feat: add Firecrawl backend for WebSearch and WebFetch
This commit is contained in:
Kevin Codex
2026-04-03 01:43:25 +08:00
committed by GitHub
5 changed files with 133 additions and 2 deletions

View File

@@ -21,6 +21,18 @@ import {
MAX_MARKDOWN_LENGTH,
} from './utils.js'
function isFirecrawlEnabled(): boolean {
return Boolean(process.env.FIRECRAWL_API_KEY)
}
async function scrapeWithFirecrawl(url: string): Promise<{ markdown: string; bytes: number }> {
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
const result = await app.scrape(url, { formats: ['markdown'] })
const markdown = (result as { markdown?: string }).markdown ?? ''
return { markdown, bytes: Buffer.byteLength(markdown) }
}
const inputSchema = lazySchema(() =>
z.strictObject({
url: z.string().url().describe('The URL to fetch content from'),
@@ -211,6 +223,27 @@ ${DESCRIPTION}`
) {
const start = Date.now()
if (isFirecrawlEnabled()) {
const { markdown, bytes } = await scrapeWithFirecrawl(url)
const result = await applyPromptToMarkdown(
prompt,
markdown,
abortController.signal,
isNonInteractiveSession,
false,
)
return {
data: {
bytes,
code: 200,
codeText: 'OK',
result,
durationMs: Date.now() - start,
url,
} satisfies Output,
}
}
const response = await getURLMarkdownContent(url, abortController)
// Check if we got a redirect to a different host

View File

@@ -88,6 +88,67 @@ function makeToolSchema(input: Input): BetaWebSearchTool20250305 {
}
}
function isFirecrawlEnabled(): boolean {
return Boolean(process.env.FIRECRAWL_API_KEY)
}
function shouldUseFirecrawl(): boolean {
if (!isFirecrawlEnabled()) return false
// Don't override native search on providers that already have it
if (isCodexResponsesWebSearchEnabled()) return false
const provider = getAPIProvider()
if (provider === 'firstParty' || provider === 'vertex' || provider === 'foundry') return false
return true
}
async function runFirecrawlSearch(input: Input): Promise<Output> {
const startTime = performance.now()
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
let query = input.query
if (input.blocked_domains?.length) {
const exclusions = input.blocked_domains.map(d => `-site:${d}`).join(' ')
query = `${query} ${exclusions}`
}
const data = await app.search(query, { limit: 10 })
let hits = (data.web ?? []).map((r: { url: string; title?: string }) => ({
title: r.title ?? r.url,
url: r.url,
}))
if (input.allowed_domains?.length) {
hits = hits.filter(h =>
input.allowed_domains!.some(d => {
try {
return new URL(h.url).hostname.endsWith(d)
} catch {
return false
}
}),
)
}
const snippets = (data.web ?? [])
.filter((r: { description?: string }) => r.description)
.map((r: { url: string; title?: string; description?: string }) =>
`**${r.title ?? r.url}** — ${r.description} (${r.url})`,
)
.join('\n')
const results: Output['results'] = []
if (snippets) results.push(snippets)
results.push({ tool_use_id: 'firecrawl-search', content: hits })
return {
query: input.query,
results,
durationSeconds: (performance.now() - startTime) / 1000,
}
}
function isCodexResponsesWebSearchEnabled(): boolean {
if (getAPIProvider() !== 'openai') {
return false
@@ -378,6 +439,10 @@ export const WebSearchTool = buildTool({
return summary ? `Searching for ${summary}` : 'Searching the web'
},
isEnabled() {
if (shouldUseFirecrawl()) {
return true
}
const provider = getAPIProvider()
const model = getMainLoopModel()
@@ -437,7 +502,7 @@ export const WebSearchTool = buildTool({
}
},
async prompt() {
if (isCodexResponsesWebSearchEnabled()) {
if (shouldUseFirecrawl() || isCodexResponsesWebSearchEnabled()) {
return getWebSearchPrompt().replace(
/\n\s*-\s*Web search is only available in the US/,
'',
@@ -474,6 +539,10 @@ export const WebSearchTool = buildTool({
return { result: true }
},
async call(input, context, _canUseTool, _parentMessage, onProgress) {
if (shouldUseFirecrawl()) {
return { data: await runFirecrawlSearch(input) }
}
if (isCodexResponsesWebSearchEnabled()) {
return {
data: await runCodexWebSearch(input, context.abortController.signal),