Merge pull request #168 from firecrawl/add-firecrawl
feat: add Firecrawl backend for WebSearch and WebFetch
This commit is contained in:
21
README.md
21
README.md
@@ -123,6 +123,27 @@ Best if you want local inference on Apple Silicon with Atomic Chat. See [Advance
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Web Search and Fetch
|
||||||
|
|
||||||
|
By default, `WebSearch` is disabled for all non-Anthropic providers. The native search backend requires either the Anthropic API or the Codex responses endpoint, so users on GPT-4o, DeepSeek, Gemini, Ollama, and other OpenAI-compatible providers get no web search at all.
|
||||||
|
|
||||||
|
`WebFetch` works but uses basic HTTP plus HTML-to-markdown conversion. That fails on JavaScript-rendered pages (React, Next.js, Vue SPAs) and sites that block plain HTTP requests.
|
||||||
|
|
||||||
|
Set a [Firecrawl](https://firecrawl.dev) API key to fix both:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export FIRECRAWL_API_KEY=your-key-here
|
||||||
|
```
|
||||||
|
|
||||||
|
With this set:
|
||||||
|
|
||||||
|
- `WebSearch` is enabled for all providers and routes through Firecrawl's search API
|
||||||
|
- `WebFetch` uses Firecrawl's scrape endpoint instead of raw HTTP, handling JS-rendered pages correctly
|
||||||
|
|
||||||
|
Free tier at [firecrawl.dev](https://firecrawl.dev) includes 500 credits. The key is optional — if not set, both tools fall back to their original behavior.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
|
|
||||||
The shim (`src/services/api/openaiShim.ts`) sits between Claude Code and the LLM API:
|
The shim (`src/services/api/openaiShim.ts`) sits between Claude Code and the LLM API:
|
||||||
|
|||||||
7
bun.lock
7
bun.lock
@@ -13,6 +13,7 @@
|
|||||||
"@anthropic-ai/vertex-sdk": "0.14.4",
|
"@anthropic-ai/vertex-sdk": "0.14.4",
|
||||||
"@commander-js/extra-typings": "12.1.0",
|
"@commander-js/extra-typings": "12.1.0",
|
||||||
"@growthbook/growthbook": "1.6.5",
|
"@growthbook/growthbook": "1.6.5",
|
||||||
|
"@mendable/firecrawl-js": "^4.18.1",
|
||||||
"@modelcontextprotocol/sdk": "1.29.0",
|
"@modelcontextprotocol/sdk": "1.29.0",
|
||||||
"@opentelemetry/api": "1.9.1",
|
"@opentelemetry/api": "1.9.1",
|
||||||
"@opentelemetry/api-logs": "0.214.0",
|
"@opentelemetry/api-logs": "0.214.0",
|
||||||
@@ -185,6 +186,8 @@
|
|||||||
|
|
||||||
"@js-sdsl/ordered-map": ["@js-sdsl/ordered-map@4.4.2", "", {}, "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw=="],
|
"@js-sdsl/ordered-map": ["@js-sdsl/ordered-map@4.4.2", "", {}, "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw=="],
|
||||||
|
|
||||||
|
"@mendable/firecrawl-js": ["@mendable/firecrawl-js@4.18.1", "", { "dependencies": { "axios": "1.14.0", "firecrawl": "4.16.0", "typescript-event-target": "^1.1.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" } }, "sha512-NfmJv+xcHoZthj8I3NP/8KAgO8EWcvOcTvCAvszxqs7/6sCs1CRss6Tum6RycZNSwJkr5RzQossN89IlixRfng=="],
|
||||||
|
|
||||||
"@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
|
"@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
|
||||||
|
|
||||||
"@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="],
|
"@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="],
|
||||||
@@ -495,6 +498,8 @@
|
|||||||
|
|
||||||
"find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="],
|
"find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="],
|
||||||
|
|
||||||
|
"firecrawl": ["firecrawl@4.16.0", "", { "dependencies": { "axios": "^1.13.5", "typescript-event-target": "^1.1.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" } }, "sha512-7SJ/FWhZBtW2gTCE/BsvU+gbfIpfTq+D9IH82l9MacauLVptaY6EdYAhrK3YSMC9yr5NxvxRcpZKcXG/nqjiiQ=="],
|
||||||
|
|
||||||
"follow-redirects": ["follow-redirects@1.15.11", "", {}, "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ=="],
|
"follow-redirects": ["follow-redirects@1.15.11", "", {}, "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ=="],
|
||||||
|
|
||||||
"form-data": ["form-data@4.0.5", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w=="],
|
"form-data": ["form-data@4.0.5", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w=="],
|
||||||
@@ -767,6 +772,8 @@
|
|||||||
|
|
||||||
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
||||||
|
|
||||||
|
"typescript-event-target": ["typescript-event-target@1.1.2", "", {}, "sha512-TvkrTUpv7gCPlcnSoEwUVUBwsdheKm+HF5u2tPAKubkIGMfovdSizCTaZRY/NhR8+Ijy8iZZUapbVQAsNrkFrw=="],
|
||||||
|
|
||||||
"undici": ["undici@7.24.6", "", {}, "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA=="],
|
"undici": ["undici@7.24.6", "", {}, "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA=="],
|
||||||
|
|
||||||
"undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
|
"undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
|
||||||
|
|||||||
@@ -112,7 +112,8 @@
|
|||||||
"ws": "8.20.0",
|
"ws": "8.20.0",
|
||||||
"xss": "1.0.15",
|
"xss": "1.0.15",
|
||||||
"yaml": "2.8.3",
|
"yaml": "2.8.3",
|
||||||
"zod": "3.25.76"
|
"zod": "3.25.76",
|
||||||
|
"@mendable/firecrawl-js": "^4.18.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/bun": "1.3.11",
|
"@types/bun": "1.3.11",
|
||||||
|
|||||||
@@ -21,6 +21,18 @@ import {
|
|||||||
MAX_MARKDOWN_LENGTH,
|
MAX_MARKDOWN_LENGTH,
|
||||||
} from './utils.js'
|
} from './utils.js'
|
||||||
|
|
||||||
|
function isFirecrawlEnabled(): boolean {
|
||||||
|
return Boolean(process.env.FIRECRAWL_API_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeWithFirecrawl(url: string): Promise<{ markdown: string; bytes: number }> {
|
||||||
|
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
|
||||||
|
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
|
||||||
|
const result = await app.scrape(url, { formats: ['markdown'] })
|
||||||
|
const markdown = (result as { markdown?: string }).markdown ?? ''
|
||||||
|
return { markdown, bytes: Buffer.byteLength(markdown) }
|
||||||
|
}
|
||||||
|
|
||||||
const inputSchema = lazySchema(() =>
|
const inputSchema = lazySchema(() =>
|
||||||
z.strictObject({
|
z.strictObject({
|
||||||
url: z.string().url().describe('The URL to fetch content from'),
|
url: z.string().url().describe('The URL to fetch content from'),
|
||||||
@@ -211,6 +223,27 @@ ${DESCRIPTION}`
|
|||||||
) {
|
) {
|
||||||
const start = Date.now()
|
const start = Date.now()
|
||||||
|
|
||||||
|
if (isFirecrawlEnabled()) {
|
||||||
|
const { markdown, bytes } = await scrapeWithFirecrawl(url)
|
||||||
|
const result = await applyPromptToMarkdown(
|
||||||
|
prompt,
|
||||||
|
markdown,
|
||||||
|
abortController.signal,
|
||||||
|
isNonInteractiveSession,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
data: {
|
||||||
|
bytes,
|
||||||
|
code: 200,
|
||||||
|
codeText: 'OK',
|
||||||
|
result,
|
||||||
|
durationMs: Date.now() - start,
|
||||||
|
url,
|
||||||
|
} satisfies Output,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const response = await getURLMarkdownContent(url, abortController)
|
const response = await getURLMarkdownContent(url, abortController)
|
||||||
|
|
||||||
// Check if we got a redirect to a different host
|
// Check if we got a redirect to a different host
|
||||||
|
|||||||
@@ -88,6 +88,67 @@ function makeToolSchema(input: Input): BetaWebSearchTool20250305 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isFirecrawlEnabled(): boolean {
|
||||||
|
return Boolean(process.env.FIRECRAWL_API_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldUseFirecrawl(): boolean {
|
||||||
|
if (!isFirecrawlEnabled()) return false
|
||||||
|
// Don't override native search on providers that already have it
|
||||||
|
if (isCodexResponsesWebSearchEnabled()) return false
|
||||||
|
const provider = getAPIProvider()
|
||||||
|
if (provider === 'firstParty' || provider === 'vertex' || provider === 'foundry') return false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runFirecrawlSearch(input: Input): Promise<Output> {
|
||||||
|
const startTime = performance.now()
|
||||||
|
const { FirecrawlClient } = await import('@mendable/firecrawl-js')
|
||||||
|
const app = new FirecrawlClient({ apiKey: process.env.FIRECRAWL_API_KEY! })
|
||||||
|
|
||||||
|
let query = input.query
|
||||||
|
if (input.blocked_domains?.length) {
|
||||||
|
const exclusions = input.blocked_domains.map(d => `-site:${d}`).join(' ')
|
||||||
|
query = `${query} ${exclusions}`
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await app.search(query, { limit: 10 })
|
||||||
|
|
||||||
|
let hits = (data.web ?? []).map((r: { url: string; title?: string }) => ({
|
||||||
|
title: r.title ?? r.url,
|
||||||
|
url: r.url,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if (input.allowed_domains?.length) {
|
||||||
|
hits = hits.filter(h =>
|
||||||
|
input.allowed_domains!.some(d => {
|
||||||
|
try {
|
||||||
|
return new URL(h.url).hostname.endsWith(d)
|
||||||
|
} catch {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const snippets = (data.web ?? [])
|
||||||
|
.filter((r: { description?: string }) => r.description)
|
||||||
|
.map((r: { url: string; title?: string; description?: string }) =>
|
||||||
|
`**${r.title ?? r.url}** — ${r.description} (${r.url})`,
|
||||||
|
)
|
||||||
|
.join('\n')
|
||||||
|
|
||||||
|
const results: Output['results'] = []
|
||||||
|
if (snippets) results.push(snippets)
|
||||||
|
results.push({ tool_use_id: 'firecrawl-search', content: hits })
|
||||||
|
|
||||||
|
return {
|
||||||
|
query: input.query,
|
||||||
|
results,
|
||||||
|
durationSeconds: (performance.now() - startTime) / 1000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function isCodexResponsesWebSearchEnabled(): boolean {
|
function isCodexResponsesWebSearchEnabled(): boolean {
|
||||||
if (getAPIProvider() !== 'openai') {
|
if (getAPIProvider() !== 'openai') {
|
||||||
return false
|
return false
|
||||||
@@ -378,6 +439,10 @@ export const WebSearchTool = buildTool({
|
|||||||
return summary ? `Searching for ${summary}` : 'Searching the web'
|
return summary ? `Searching for ${summary}` : 'Searching the web'
|
||||||
},
|
},
|
||||||
isEnabled() {
|
isEnabled() {
|
||||||
|
if (shouldUseFirecrawl()) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
const provider = getAPIProvider()
|
const provider = getAPIProvider()
|
||||||
const model = getMainLoopModel()
|
const model = getMainLoopModel()
|
||||||
|
|
||||||
@@ -437,7 +502,7 @@ export const WebSearchTool = buildTool({
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
async prompt() {
|
async prompt() {
|
||||||
if (isCodexResponsesWebSearchEnabled()) {
|
if (shouldUseFirecrawl() || isCodexResponsesWebSearchEnabled()) {
|
||||||
return getWebSearchPrompt().replace(
|
return getWebSearchPrompt().replace(
|
||||||
/\n\s*-\s*Web search is only available in the US/,
|
/\n\s*-\s*Web search is only available in the US/,
|
||||||
'',
|
'',
|
||||||
@@ -474,6 +539,10 @@ export const WebSearchTool = buildTool({
|
|||||||
return { result: true }
|
return { result: true }
|
||||||
},
|
},
|
||||||
async call(input, context, _canUseTool, _parentMessage, onProgress) {
|
async call(input, context, _canUseTool, _parentMessage, onProgress) {
|
||||||
|
if (shouldUseFirecrawl()) {
|
||||||
|
return { data: await runFirecrawlSearch(input) }
|
||||||
|
}
|
||||||
|
|
||||||
if (isCodexResponsesWebSearchEnabled()) {
|
if (isCodexResponsesWebSearchEnabled()) {
|
||||||
return {
|
return {
|
||||||
data: await runCodexWebSearch(input, context.abortController.signal),
|
data: await runCodexWebSearch(input, context.abortController.signal),
|
||||||
|
|||||||
Reference in New Issue
Block a user