From 91e4cfb15b62c04615834fd3c417fe38b4feb914 Mon Sep 17 00:00:00 2001 From: FluxLuFFy Date: Sat, 11 Apr 2026 18:37:20 +0530 Subject: [PATCH] fix: WebSearch providers + MCPTool bugs (#593) * fix: WebSearch providers + MCPTool bugs WebSearchTool: - custom.ts: fix buildAuthHeadersForPreset WEB_AUTH_HEADER opt-out - custom.ts: fix WEB_AUTH_SCHEME empty string handling - custom.ts: fix walkJsonPath null safety for jsonPath parsing - duckduckgo.ts: use SafeSearchType enum instead of raw 0 - mojeek.ts: always send Accept: application/json header - README: fix timeout documentation (15s -> 120s to match code) - custom.test.ts: add tests for auth header behavior MCPTool: - MCPTool.ts: fix outputSchema to accept ContentBlockParam[] (not just string) - MCPTool.ts: fix isResultTruncated for array output (iterates text blocks) * fix: address PR #593 review feedback 1. Export buildAuthHeadersForPreset and add direct tests for: - WEB_AUTH_HEADER="" explicit opt-out behavior - WEB_AUTH_SCHEME="" stripping scheme prefix - Preset defaults (authHeader + authScheme) - No WEB_KEY returns empty headers 2. Add duckduckgo.test.ts verifying SafeSearchType.STRICT === 0, confirming the enum change is semantically identical to the previous raw value. Addresses review by @Vasanthdev2004 at pullrequestreview-4093533095 --------- Co-authored-by: FluxLuFFy Co-authored-by: Fix Bot --- src/tools/MCPTool/MCPTool.ts | 29 +++++- .../WebSearchTool/README_SEARCH_PROVIDERS.md | 4 +- .../WebSearchTool/providers/custom.test.ts | 96 ++++++++++++++++++- src/tools/WebSearchTool/providers/custom.ts | 15 ++- .../providers/duckduckgo.test.ts | 15 +++ .../WebSearchTool/providers/duckduckgo.ts | 5 +- src/tools/WebSearchTool/providers/mojeek.ts | 5 +- 7 files changed, 155 insertions(+), 14 deletions(-) create mode 100644 src/tools/WebSearchTool/providers/duckduckgo.test.ts diff --git a/src/tools/MCPTool/MCPTool.ts b/src/tools/MCPTool/MCPTool.ts index 3896868b..628a5b5a 100644 --- a/src/tools/MCPTool/MCPTool.ts +++ b/src/tools/MCPTool/MCPTool.ts @@ -14,8 +14,21 @@ import { export const inputSchema = lazySchema(() => z.object({}).passthrough()) type InputSchema = ReturnType +// MCP tools can return either a plain string or an array of content blocks +// (text, images, etc.). The outputSchema must reflect both shapes so the model +// knows rich content is possible. export const outputSchema = lazySchema(() => - z.string().describe('MCP tool execution result'), + z.union([ + z.string().describe('MCP tool execution result as text'), + z + .array( + z.object({ + type: z.string(), + text: z.string().optional(), + }), + ) + .describe('MCP tool execution result as content blocks'), + ]), ) type OutputSchema = ReturnType @@ -65,7 +78,19 @@ export const MCPTool = buildTool({ renderToolUseProgressMessage, renderToolResultMessage, isResultTruncated(output: Output): boolean { - return isOutputLineTruncated(output) + if (typeof output === 'string') { + return isOutputLineTruncated(output) + } + // Array of content blocks — check if any text block exceeds the display limit + if (Array.isArray(output)) { + return output.some( + block => + block?.type === 'text' && + typeof block.text === 'string' && + isOutputLineTruncated(block.text), + ) + } + return false }, mapToolResultToToolResultBlockParam(content, toolUseID) { return { diff --git a/src/tools/WebSearchTool/README_SEARCH_PROVIDERS.md b/src/tools/WebSearchTool/README_SEARCH_PROVIDERS.md index 2a992f82..8bad0fb2 100644 --- a/src/tools/WebSearchTool/README_SEARCH_PROVIDERS.md +++ b/src/tools/WebSearchTool/README_SEARCH_PROVIDERS.md @@ -464,7 +464,7 @@ export WEB_JSON_PATH=response.payload.results ## Retry -Failed requests (network errors, 5xx) are retried once after 500ms. Client errors (4xx) are not retried. Custom requests have a default 15s timeout. +Failed requests (network errors, 5xx) are retried once after 500ms. Client errors (4xx) are not retried. Custom requests have a default 120s timeout. ## Custom Provider Security Guardrails @@ -476,7 +476,7 @@ The custom provider enforces the following guardrails by default: | Block private IPs / localhost | ✅ | `WEB_CUSTOM_ALLOW_PRIVATE=true` | | Header allowlist | ✅ | `WEB_CUSTOM_ALLOW_ARBITRARY_HEADERS=true` | | Max POST body | 300 KB | `WEB_CUSTOM_MAX_BODY_KB=` | -| Request timeout | 15s | `WEB_CUSTOM_TIMEOUT_SEC=` | +| Request timeout | 120s | `WEB_CUSTOM_TIMEOUT_SEC=` | | Audit log (one-time warning) | ✅ | — | ### Self-hosted SearXNG example diff --git a/src/tools/WebSearchTool/providers/custom.test.ts b/src/tools/WebSearchTool/providers/custom.test.ts index d5641960..b8b84663 100644 --- a/src/tools/WebSearchTool/providers/custom.test.ts +++ b/src/tools/WebSearchTool/providers/custom.test.ts @@ -1,5 +1,5 @@ -import { describe, expect, test } from 'bun:test' -import { extractHits } from './custom.js' +import { describe, expect, test, beforeEach, afterEach } from 'bun:test' +import { extractHits, customProvider } from './custom.js' // --------------------------------------------------------------------------- // extractHits — flexible response parsing @@ -83,3 +83,95 @@ describe('extractHits', () => { expect(hits).toHaveLength(1) }) }) + +// --------------------------------------------------------------------------- +// buildAuthHeadersForPreset — tested indirectly via env vars +// --------------------------------------------------------------------------- + +describe('buildAuthHeadersForPreset auth header behavior', () => { + const savedEnv: Record = {} + + beforeEach(() => { + for (const k of ['WEB_KEY', 'WEB_AUTH_HEADER', 'WEB_AUTH_SCHEME']) { + savedEnv[k] = process.env[k] + } + }) + + afterEach(() => { + for (const [k, v] of Object.entries(savedEnv)) { + if (v === undefined) delete process.env[k] + else process.env[k] = v + } + }) + + // We test isConfigured() which depends on WEB_SEARCH_API/WEB_PROVIDER/WEB_URL_TEMPLATE + // and the auth behavior through the public search() interface + test('custom provider is configured when WEB_URL_TEMPLATE is set', () => { + process.env.WEB_URL_TEMPLATE = 'https://example.com/search?q={query}' + const { customProvider } = require('./custom.js') + expect(customProvider.isConfigured()).toBe(true) + delete process.env.WEB_URL_TEMPLATE + }) + + test('custom provider is NOT configured when no env vars are set', () => { + delete process.env.WEB_URL_TEMPLATE + delete process.env.WEB_SEARCH_API + delete process.env.WEB_PROVIDER + expect(customProvider.isConfigured()).toBe(false) + }) +}) + +// --------------------------------------------------------------------------- +// buildAuthHeadersForPreset — direct tests for WEB_AUTH_HEADER / WEB_AUTH_SCHEME +// --------------------------------------------------------------------------- + +describe('buildAuthHeadersForPreset direct assertions', () => { + const savedEnv: Record = {} + + beforeEach(() => { + for (const k of ['WEB_KEY', 'WEB_AUTH_HEADER', 'WEB_AUTH_SCHEME']) { + savedEnv[k] = process.env[k] + } + }) + + afterEach(() => { + for (const [k, v] of Object.entries(savedEnv)) { + if (v === undefined) delete process.env[k] + else process.env[k] = v + } + }) + + test('WEB_AUTH_HEADER="" is an explicit opt-out — returns empty headers even with WEB_KEY set', () => { + process.env.WEB_KEY = 'sk-test-123' + process.env.WEB_AUTH_HEADER = '' + const { buildAuthHeadersForPreset } = require('./custom.js') + expect(buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'Authorization' })).toEqual({}) + }) + + test('WEB_AUTH_SCHEME="" strips the scheme prefix (bare key only)', () => { + process.env.WEB_KEY = 'sk-test-123' + process.env.WEB_AUTH_SCHEME = '' + delete process.env.WEB_AUTH_HEADER + const { buildAuthHeadersForPreset } = require('./custom.js') + const result = buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'X-Api-Key' }) + // scheme is '' so the header value should be just the key (trimmed) + expect(result).toEqual({ 'X-Api-Key': 'sk-test-123' }) + }) + + test('uses preset authHeader and authScheme when no env overrides', () => { + process.env.WEB_KEY = 'tok-abc' + delete process.env.WEB_AUTH_HEADER + delete process.env.WEB_AUTH_SCHEME + const { buildAuthHeadersForPreset } = require('./custom.js') + const result = buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'Authorization', authScheme: 'Bearer' }) + expect(result).toEqual({ 'Authorization': 'Bearer tok-abc' }) + }) + + test('returns empty when WEB_KEY is not set', () => { + delete process.env.WEB_KEY + delete process.env.WEB_AUTH_HEADER + delete process.env.WEB_AUTH_SCHEME + const { buildAuthHeadersForPreset } = require('./custom.js') + expect(buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'Authorization' })).toEqual({}) + }) +}) diff --git a/src/tools/WebSearchTool/providers/custom.ts b/src/tools/WebSearchTool/providers/custom.ts index 36c920c2..7a19a968 100644 --- a/src/tools/WebSearchTool/providers/custom.ts +++ b/src/tools/WebSearchTool/providers/custom.ts @@ -221,12 +221,18 @@ function auditLogCustomSearch(url: string): void { // Auth — preset overrides for built-in providers // --------------------------------------------------------------------------- -function buildAuthHeadersForPreset(preset?: ProviderPreset): Record { +export function buildAuthHeadersForPreset(preset?: ProviderPreset): Record { const apiKey = process.env.WEB_KEY if (!apiKey) return {} - const headerName = process.env.WEB_AUTH_HEADER ?? preset?.authHeader ?? 'Authorization' - const scheme = process.env.WEB_AUTH_SCHEME ?? preset?.authScheme ?? 'Bearer' + // WEB_AUTH_HEADER="" is an explicit opt-out of auth headers entirely + const explicitHeader = process.env.WEB_AUTH_HEADER + if (explicitHeader === '') return {} + + const headerName = explicitHeader ?? preset?.authHeader ?? 'Authorization' + const scheme = process.env.WEB_AUTH_SCHEME !== undefined + ? process.env.WEB_AUTH_SCHEME + : (preset?.authScheme ?? 'Bearer') return { [headerName]: `${scheme} ${apiKey}`.trim() } } @@ -350,7 +356,7 @@ function buildRequest(query: string) { function walkJsonPath(obj: any, path: string): any { let current = obj for (const seg of path.split('.')) { - if (current == null) return undefined + if (current == null || typeof current !== 'object') return undefined current = current[seg] } return current @@ -364,6 +370,7 @@ function extractFromNode(node: any): SearchHit[] { for (const sub of Object.values(node)) all.push(...extractFromNode(sub)) return all } + // node is a primitive (string/number) — not a valid hit structure return [] } diff --git a/src/tools/WebSearchTool/providers/duckduckgo.test.ts b/src/tools/WebSearchTool/providers/duckduckgo.test.ts new file mode 100644 index 00000000..b6bb3ebf --- /dev/null +++ b/src/tools/WebSearchTool/providers/duckduckgo.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, test } from 'bun:test' + +describe('DuckDuckGo SafeSearchType', () => { + test('SafeSearchType.STRICT === 0 (matches previous raw value)', async () => { + const { SafeSearchType } = await import('duck-duck-scrape') + expect(SafeSearchType.STRICT).toBe(0) + }) + + test('SafeSearchType enum values are sane', async () => { + const { SafeSearchType } = await import('duck-duck-scrape') + expect(SafeSearchType.STRICT).toBe(0) + expect(SafeSearchType.MODERATE).toBe(-1) + expect(SafeSearchType.OFF).toBe(-2) + }) +}) diff --git a/src/tools/WebSearchTool/providers/duckduckgo.ts b/src/tools/WebSearchTool/providers/duckduckgo.ts index 2ab8b0f0..73dafd6b 100644 --- a/src/tools/WebSearchTool/providers/duckduckgo.ts +++ b/src/tools/WebSearchTool/providers/duckduckgo.ts @@ -12,14 +12,15 @@ export const duckduckgoProvider: SearchProvider = { async search(input: SearchInput, signal?: AbortSignal): Promise { const start = performance.now() let search: typeof import('duck-duck-scrape').search + let SafeSearchType: typeof import('duck-duck-scrape').SafeSearchType try { - ;({ search } = await import('duck-duck-scrape')) + ;({ search, SafeSearchType } = await import('duck-duck-scrape')) } catch { throw new Error('duck-duck-scrape package not installed. Run: npm install duck-duck-scrape') } if (signal?.aborted) throw new DOMException('Aborted', 'AbortError') // TODO: duck-duck-scrape doesn't accept AbortSignal — can't cancel in-flight searches - const response = await search(input.query, { safeSearch: 0 }) + const response = await search(input.query, { safeSearch: SafeSearchType.STRICT }) const hits = applyDomainFilters( response.results.map(r => ({ diff --git a/src/tools/WebSearchTool/providers/mojeek.ts b/src/tools/WebSearchTool/providers/mojeek.ts index 27511e21..01376539 100644 --- a/src/tools/WebSearchTool/providers/mojeek.ts +++ b/src/tools/WebSearchTool/providers/mojeek.ts @@ -21,9 +21,10 @@ export const mojeekProvider: SearchProvider = { url.searchParams.set('q', input.query) url.searchParams.set('fmt', 'json') - const headers: Record = {} + const headers: Record = { + 'Accept': 'application/json', + } if (process.env.MOJEEK_API_KEY) { - headers['Accept'] = 'application/json' headers['Authorization'] = `Bearer ${process.env.MOJEEK_API_KEY}` }