From 32fbd0c7b4168b32dcb13a5b69342e2727269201 Mon Sep 17 00:00:00 2001 From: FluxLuFFy Date: Thu, 9 Apr 2026 18:11:58 +0530 Subject: [PATCH] =?UTF-8?q?fix:=20custom=20web=20search=20=E2=80=94=20WEB?= =?UTF-8?q?=5FURL=5FTEMPLATE=20not=20recognized,=20timeout=20too=20short,?= =?UTF-8?q?=20silent=20native=20fallback=20(#537)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: custom web search — WEB_URL_TEMPLATE not recognized, timeout too short, silent native fallback 1. custom.ts: Add WEB_URL_TEMPLATE to isConfigured() so the custom provider is recognized when configured via URL template alone. 2. custom.ts: Bump DEFAULT_TIMEOUT_SECONDS from 15s to 120s. Self-hosted search APIs (SearXNG, internal) commonly need 30-90s. 3. WebSearchTool.ts: When an explicit adapter is selected via WEB_SEARCH_PROVIDER=custom, do not silently fall through to the native Anthropic path on adapter errors or 0-hit results. - 0 hits: return directly (no fallback) - Error: throw the real error (no fallback) - Auto mode: existing fallback behavior preserved * fix: tighten auto-mode adapter fallback — only swallow transient errors Address review feedback: in auto mode, only fall through to native on transient errors (network failure, timeout, HTTP 5xx). Config and guardrail errors (SSRF, HTTPS, bad URL, header allowlist, etc.) now surface properly instead of being silently swallowed. --------- Co-authored-by: FluxLuFFy --- src/tools/WebSearchTool/WebSearchTool.ts | 58 +++++++++++++++++---- src/tools/WebSearchTool/providers/custom.ts | 6 +-- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/tools/WebSearchTool/WebSearchTool.ts b/src/tools/WebSearchTool/WebSearchTool.ts index 69c3b3e8..9cb49748 100644 --- a/src/tools/WebSearchTool/WebSearchTool.ts +++ b/src/tools/WebSearchTool/WebSearchTool.ts @@ -410,6 +410,29 @@ function makeOutputFromSearchResponse( // Helper: should we use adapter-based providers? // --------------------------------------------------------------------------- +/** + * Returns true for transient errors that are safe to fall through on in auto mode + * (network failures, timeouts, HTTP 5xx). Config and guardrail errors return false. + */ +function isTransientError(err: unknown): boolean { + if (!(err instanceof Error)) return true + const msg = err.message.toLowerCase() + // Guardrail / config errors — must surface + if (msg.includes('must use https')) return false + if (msg.includes('private/reserved address')) return false + if (msg.includes('not in the safe allowlist')) return false + if (msg.includes('exceeds') && msg.includes('bytes')) return false + if (msg.includes('not a valid url')) return false + if (msg.includes('is not configured')) return false + // Transient errors — safe to fall through + if (err.name === 'AbortError') return true + if (msg.includes('timed out')) return true + if (msg.includes('fetch failed') || msg.includes('econnrefused') || msg.includes('enotfound')) return true + if (msg.includes('returned 5')) return true // HTTP 5xx + // Unknown — treat as transient to preserve auto-mode fallback semantics + return true +} + /** * Returns true when we should use the adapter-based provider system. * @@ -563,15 +586,32 @@ export const WebSearchTool = buildTool({ // - "auto": tries each provider, falls through on failure // - specific mode: runs one provider, throws on failure if (shouldUseAdapterProvider()) { - const providerOutput = await runSearch( - { - query: input.query, - allowed_domains: input.allowed_domains, - blocked_domains: input.blocked_domains, - }, - context.abortController.signal, - ) - return { data: formatProviderOutput(providerOutput, input.query) } + const mode = getProviderMode() + const isExplicitAdapter = mode !== 'auto' + try { + const providerOutput = await runSearch( + { + query: input.query, + allowed_domains: input.allowed_domains, + blocked_domains: input.blocked_domains, + }, + context.abortController.signal, + ) + // Explicit adapter: return even 0 hits (no silent native fallback) + if (isExplicitAdapter || providerOutput.hits.length > 0) { + return { data: formatProviderOutput(providerOutput, input.query) } + } + // Auto mode with 0 hits: fall through to native + } catch (err) { + // Explicit adapter: throw the real error (no silent native fallback) + if (isExplicitAdapter) throw err + // Auto mode: only fall through on transient errors (network, timeout, 5xx). + // Config / guardrail errors (SSRF, HTTPS, bad URL, etc.) must surface. + if (!isTransientError(err)) throw err + console.error( + `[web-search] Adapter failed, falling through to native: ${err}`, + ) + } } // --- Codex / OpenAI Responses path --- diff --git a/src/tools/WebSearchTool/providers/custom.ts b/src/tools/WebSearchTool/providers/custom.ts index bc717733..36c920c2 100644 --- a/src/tools/WebSearchTool/providers/custom.ts +++ b/src/tools/WebSearchTool/providers/custom.ts @@ -22,7 +22,7 @@ * 3. Built-in allowlist of header names — arbitrary headers require * WEB_CUSTOM_ALLOW_ARBITRARY_HEADERS=true * 4. Max body size guard (300 KB for POST) - * 5. Request timeout (default 15s, configurable via WEB_CUSTOM_TIMEOUT_SEC) + * 5. Request timeout (default 120s, configurable via WEB_CUSTOM_TIMEOUT_SEC) * 6. Audit log on first custom search (one-time warning) */ @@ -117,7 +117,7 @@ const BUILT_IN_PROVIDERS: Record = { const DEFAULT_MAX_BODY_KB = 300 /** Default request timeout in seconds. */ -const DEFAULT_TIMEOUT_SECONDS = 15 +const DEFAULT_TIMEOUT_SECONDS = 120 /** Header names that are always allowed (case-insensitive). */ const SAFE_HEADER_NAMES = new Set([ @@ -455,7 +455,7 @@ export const customProvider: SearchProvider = { name: 'custom', isConfigured() { - return Boolean(process.env.WEB_SEARCH_API || process.env.WEB_PROVIDER) + return Boolean(process.env.WEB_SEARCH_API || process.env.WEB_PROVIDER || process.env.WEB_URL_TEMPLATE) }, async search(input: SearchInput, signal?: AbortSignal): Promise {