fix: custom web search — WEB_URL_TEMPLATE not recognized, timeout too short, silent native fallback (#537)

* fix: custom web search — WEB_URL_TEMPLATE not recognized, timeout too short, silent native fallback

1. custom.ts: Add WEB_URL_TEMPLATE to isConfigured() so the custom provider
   is recognized when configured via URL template alone.

2. custom.ts: Bump DEFAULT_TIMEOUT_SECONDS from 15s to 120s.
   Self-hosted search APIs (SearXNG, internal) commonly need 30-90s.

3. WebSearchTool.ts: When an explicit adapter is selected via
   WEB_SEARCH_PROVIDER=custom, do not silently fall through to the
   native Anthropic path on adapter errors or 0-hit results.
   - 0 hits: return directly (no fallback)
   - Error: throw the real error (no fallback)
   - Auto mode: existing fallback behavior preserved

* fix: tighten auto-mode adapter fallback — only swallow transient errors

Address review feedback: in auto mode, only fall through to native on
transient errors (network failure, timeout, HTTP 5xx). Config and
guardrail errors (SSRF, HTTPS, bad URL, header allowlist, etc.) now
surface properly instead of being silently swallowed.

---------

Co-authored-by: FluxLuFFy <fluxluffy@users.noreply.github.com>
This commit is contained in:
FluxLuFFy
2026-04-09 18:11:58 +05:30
committed by GitHub
parent e30ad17ae0
commit 32fbd0c7b4
2 changed files with 52 additions and 12 deletions

View File

@@ -410,6 +410,29 @@ function makeOutputFromSearchResponse(
// Helper: should we use adapter-based providers? // Helper: should we use adapter-based providers?
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
/**
* Returns true for transient errors that are safe to fall through on in auto mode
* (network failures, timeouts, HTTP 5xx). Config and guardrail errors return false.
*/
function isTransientError(err: unknown): boolean {
if (!(err instanceof Error)) return true
const msg = err.message.toLowerCase()
// Guardrail / config errors — must surface
if (msg.includes('must use https')) return false
if (msg.includes('private/reserved address')) return false
if (msg.includes('not in the safe allowlist')) return false
if (msg.includes('exceeds') && msg.includes('bytes')) return false
if (msg.includes('not a valid url')) return false
if (msg.includes('is not configured')) return false
// Transient errors — safe to fall through
if (err.name === 'AbortError') return true
if (msg.includes('timed out')) return true
if (msg.includes('fetch failed') || msg.includes('econnrefused') || msg.includes('enotfound')) return true
if (msg.includes('returned 5')) return true // HTTP 5xx
// Unknown — treat as transient to preserve auto-mode fallback semantics
return true
}
/** /**
* Returns true when we should use the adapter-based provider system. * Returns true when we should use the adapter-based provider system.
* *
@@ -563,15 +586,32 @@ export const WebSearchTool = buildTool({
// - "auto": tries each provider, falls through on failure // - "auto": tries each provider, falls through on failure
// - specific mode: runs one provider, throws on failure // - specific mode: runs one provider, throws on failure
if (shouldUseAdapterProvider()) { if (shouldUseAdapterProvider()) {
const providerOutput = await runSearch( const mode = getProviderMode()
{ const isExplicitAdapter = mode !== 'auto'
query: input.query, try {
allowed_domains: input.allowed_domains, const providerOutput = await runSearch(
blocked_domains: input.blocked_domains, {
}, query: input.query,
context.abortController.signal, allowed_domains: input.allowed_domains,
) blocked_domains: input.blocked_domains,
return { data: formatProviderOutput(providerOutput, input.query) } },
context.abortController.signal,
)
// Explicit adapter: return even 0 hits (no silent native fallback)
if (isExplicitAdapter || providerOutput.hits.length > 0) {
return { data: formatProviderOutput(providerOutput, input.query) }
}
// Auto mode with 0 hits: fall through to native
} catch (err) {
// Explicit adapter: throw the real error (no silent native fallback)
if (isExplicitAdapter) throw err
// Auto mode: only fall through on transient errors (network, timeout, 5xx).
// Config / guardrail errors (SSRF, HTTPS, bad URL, etc.) must surface.
if (!isTransientError(err)) throw err
console.error(
`[web-search] Adapter failed, falling through to native: ${err}`,
)
}
} }
// --- Codex / OpenAI Responses path --- // --- Codex / OpenAI Responses path ---

View File

@@ -22,7 +22,7 @@
* 3. Built-in allowlist of header names — arbitrary headers require * 3. Built-in allowlist of header names — arbitrary headers require
* WEB_CUSTOM_ALLOW_ARBITRARY_HEADERS=true * WEB_CUSTOM_ALLOW_ARBITRARY_HEADERS=true
* 4. Max body size guard (300 KB for POST) * 4. Max body size guard (300 KB for POST)
* 5. Request timeout (default 15s, configurable via WEB_CUSTOM_TIMEOUT_SEC) * 5. Request timeout (default 120s, configurable via WEB_CUSTOM_TIMEOUT_SEC)
* 6. Audit log on first custom search (one-time warning) * 6. Audit log on first custom search (one-time warning)
*/ */
@@ -117,7 +117,7 @@ const BUILT_IN_PROVIDERS: Record<string, ProviderPreset> = {
const DEFAULT_MAX_BODY_KB = 300 const DEFAULT_MAX_BODY_KB = 300
/** Default request timeout in seconds. */ /** Default request timeout in seconds. */
const DEFAULT_TIMEOUT_SECONDS = 15 const DEFAULT_TIMEOUT_SECONDS = 120
/** Header names that are always allowed (case-insensitive). */ /** Header names that are always allowed (case-insensitive). */
const SAFE_HEADER_NAMES = new Set([ const SAFE_HEADER_NAMES = new Set([
@@ -455,7 +455,7 @@ export const customProvider: SearchProvider = {
name: 'custom', name: 'custom',
isConfigured() { isConfigured() {
return Boolean(process.env.WEB_SEARCH_API || process.env.WEB_PROVIDER) return Boolean(process.env.WEB_SEARCH_API || process.env.WEB_PROVIDER || process.env.WEB_URL_TEMPLATE)
}, },
async search(input: SearchInput, signal?: AbortSignal): Promise<ProviderOutput> { async search(input: SearchInput, signal?: AbortSignal): Promise<ProviderOutput> {