From a02c44143b257fbee7f38f1b93873cc0ea68a1f9 Mon Sep 17 00:00:00 2001 From: euxaristia <25621994+euxaristia@users.noreply.github.com> Date: Sun, 12 Apr 2026 09:09:46 -0400 Subject: [PATCH] fix(web-search): close SSRF bypasses in custom provider hostname guard (#610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous `isPrivateHostname` used a list of regexes against `URL.hostname`. Several literal-address forms slipped past it: - IPv4-mapped IPv6 `[::ffff:127.0.0.1]` (WHATWG URL normalizes to `[::ffff:7f00:1]`, which no regex matched) — lets callers reach loopback and other private v4 via an IPv6 literal. - ULA `fc00::/7` (e.g. `[fc00::1]`) — not covered. - Link-local `fe80::/10` (e.g. `[fe80::1]`) — not covered. - IPv4 `169.254.0.0/16` (cloud metadata, including 169.254.169.254), `100.64.0.0/10` (CGNAT), and the full `0.0.0.0/8` — not covered. - The IPv6 regex `/^\[::1?\]$/` also required brackets, but `URL.hostname` returns bracketed form anyway, so this part happened to work. WHATWG `new URL(...)` already normalizes short-form / numeric / hex / octal IPv4 to dotted-quad before we see it, so those cases were in fact handled — the remaining gaps were IPv6 and a few missing v4 ranges. Replace the regex list with: - a dotted-quad IPv4 parser + int range check covering 0/8, 10/8, 100.64/10, 127/8, 169.254/16, 172.16/12, 192.168/16; - a small IPv6 parser (handles `::` compression and embedded v4 suffix) + a byte-range check covering `::`, `::1`, IPv4-mapped (recursing into the v4 classifier), IPv4-compatible, `fc00::/7`, `fe80::/10`, and `fec0::/10`. Export `isPrivateHostname` and add unit tests covering every bypass listed above plus public-address negatives. Co-authored-by: Claude Opus 4.6 --- .../WebSearchTool/providers/custom.test.ts | 93 ++++++++++- src/tools/WebSearchTool/providers/custom.ts | 155 ++++++++++++++++-- 2 files changed, 230 insertions(+), 18 deletions(-) diff --git a/src/tools/WebSearchTool/providers/custom.test.ts b/src/tools/WebSearchTool/providers/custom.test.ts index b8b84663..e9ab71dd 100644 --- a/src/tools/WebSearchTool/providers/custom.test.ts +++ b/src/tools/WebSearchTool/providers/custom.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test, beforeEach, afterEach } from 'bun:test' -import { extractHits, customProvider } from './custom.js' +import { extractHits, customProvider, isPrivateHostname } from './custom.js' // --------------------------------------------------------------------------- // extractHits — flexible response parsing @@ -175,3 +175,94 @@ describe('buildAuthHeadersForPreset direct assertions', () => { expect(buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'Authorization' })).toEqual({}) }) }) + +// --------------------------------------------------------------------------- +// isPrivateHostname — SSRF guard +// --------------------------------------------------------------------------- + +// Helper: route through new URL() the way validateUrl() does, so we exercise +// the same normalized hostname that production code sees. +const hostOf = (url: string) => new URL(url).hostname + +describe('isPrivateHostname — IPv4', () => { + test('blocks localhost', () => { + expect(isPrivateHostname('localhost')).toBe(true) + expect(isPrivateHostname('LOCALHOST')).toBe(true) + }) + + test('blocks 127.0.0.0/8 loopback including short/numeric/hex/octal forms (via URL normalization)', () => { + expect(isPrivateHostname(hostOf('http://127.0.0.1/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://127.1/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://2130706433/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://0x7f000001/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://0177.0.0.1/'))).toBe(true) + }) + + test('blocks RFC1918 ranges', () => { + expect(isPrivateHostname('10.0.0.1')).toBe(true) + expect(isPrivateHostname('172.16.0.1')).toBe(true) + expect(isPrivateHostname('172.31.255.255')).toBe(true) + expect(isPrivateHostname('192.168.1.1')).toBe(true) + }) + + test('blocks 169.254.0.0/16 link-local (AWS/GCP metadata)', () => { + expect(isPrivateHostname('169.254.169.254')).toBe(true) + }) + + test('blocks 100.64.0.0/10 CGNAT', () => { + expect(isPrivateHostname('100.64.0.1')).toBe(true) + expect(isPrivateHostname('100.127.255.255')).toBe(true) + }) + + test('blocks 0.0.0.0/8', () => { + expect(isPrivateHostname('0.0.0.0')).toBe(true) + expect(isPrivateHostname('0.1.2.3')).toBe(true) + }) + + test('allows public IPv4', () => { + expect(isPrivateHostname('8.8.8.8')).toBe(false) + expect(isPrivateHostname('172.15.0.1')).toBe(false) // just outside 172.16/12 + expect(isPrivateHostname('172.32.0.1')).toBe(false) + expect(isPrivateHostname('100.63.255.255')).toBe(false) // just outside CGNAT + expect(isPrivateHostname('100.128.0.0')).toBe(false) + }) + + test('allows regular hostnames', () => { + expect(isPrivateHostname('example.com')).toBe(false) + expect(isPrivateHostname('api.search.brave.com')).toBe(false) + }) +}) + +describe('isPrivateHostname — IPv6', () => { + test('blocks ::1 loopback and :: unspecified', () => { + expect(isPrivateHostname(hostOf('http://[::1]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[::]/'))).toBe(true) + }) + + test('blocks IPv4-mapped IPv6 pointing at private v4 (the previous bypass)', () => { + // WHATWG URL normalizes [::ffff:127.0.0.1] → [::ffff:7f00:1]; must still block. + expect(isPrivateHostname(hostOf('http://[::ffff:127.0.0.1]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[::ffff:7f00:1]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[::ffff:169.254.169.254]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[::ffff:10.0.0.1]/'))).toBe(true) + }) + + test('blocks ULA fc00::/7', () => { + expect(isPrivateHostname(hostOf('http://[fc00::1]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[fd12:3456:789a::1]/'))).toBe(true) + }) + + test('blocks link-local fe80::/10', () => { + expect(isPrivateHostname(hostOf('http://[fe80::1]/'))).toBe(true) + expect(isPrivateHostname(hostOf('http://[febf::1]/'))).toBe(true) + }) + + test('allows public IPv6', () => { + expect(isPrivateHostname(hostOf('http://[2001:4860:4860::8888]/'))).toBe(false) + expect(isPrivateHostname(hostOf('http://[2606:4700:4700::1111]/'))).toBe(false) + }) + + test('malformed IPv6 is not classified as private (URL parser rejects it upstream)', () => { + expect(isPrivateHostname('not:an:ipv6')).toBe(false) + }) +}) diff --git a/src/tools/WebSearchTool/providers/custom.ts b/src/tools/WebSearchTool/providers/custom.ts index d1db2940..9b2b5d13 100644 --- a/src/tools/WebSearchTool/providers/custom.ts +++ b/src/tools/WebSearchTool/providers/custom.ts @@ -137,26 +137,147 @@ const SAFE_HEADER_NAMES = new Set([ ]) /** - * Private / reserved IP ranges that should not be reachable from a - * search adapter (SSRF mitigation). + * Private / reserved address check for SSRF mitigation. * - * This is a hostname-level check. DNS resolution to private IPs is - * NOT blocked here (that would require resolving before fetch, which - * Node fetch does not expose). This guard blocks obvious cases. + * Operates on the hostname produced by WHATWG `new URL(...)`, which already + * normalizes short-form, numeric, hex, and octal IPv4 to dotted-quad + * (e.g. `127.1`, `2130706433`, `0x7f000001`, `0177.0.0.1` → `127.0.0.1`), + * and which preserves IPv6 in bracketed compressed form + * (e.g. `[::ffff:127.0.0.1]` → `[::ffff:7f00:1]`). + * + * DNS resolution to private IPs is NOT blocked here — resolving before + * fetch is not exposed by Node's fetch. This guard blocks literal-address + * bypasses, which is what the original regex was trying (and failing) to do. */ -const BLOCKED_HOSTNAME_PATTERNS = [ - /^localhost$/i, - /^127\.\d+\.\d+\.\d+$/, - /^10\.\d+\.\d+\.\d+$/, - /^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/, - /^192\.168\.\d+\.\d+$/, - /^0\.0\.0\.0$/, - /^\[::1?\]$/i, // [::1] or [::] - /^0x[0-9a-f]+$/i, // hex-encoded IPs -] -function isPrivateHostname(hostname: string): boolean { - return BLOCKED_HOSTNAME_PATTERNS.some(re => re.test(hostname)) +function ipv4DottedToInt(ip: string): number | null { + const parts = ip.split('.') + if (parts.length !== 4) return null + let n = 0 + for (const p of parts) { + if (!/^\d+$/.test(p)) return null + const x = Number(p) + if (!Number.isInteger(x) || x < 0 || x > 255) return null + n = n * 256 + x + } + return n >>> 0 +} + +function isPrivateIPv4Int(n: number): boolean { + const a = (n >>> 24) & 0xff + const b = (n >>> 16) & 0xff + // 0.0.0.0/8 "this network" + if (a === 0) return true + // 10.0.0.0/8 + if (a === 10) return true + // 100.64.0.0/10 CGNAT + if (a === 100 && (b & 0xc0) === 0x40) return true + // 127.0.0.0/8 loopback + if (a === 127) return true + // 169.254.0.0/16 link-local + if (a === 169 && b === 254) return true + // 172.16.0.0/12 + if (a === 172 && (b & 0xf0) === 0x10) return true + // 192.168.0.0/16 + if (a === 192 && b === 168) return true + return false +} + +/** + * Parse an IPv6 address (without brackets, zone id optional) to 16 bytes. + * Returns null on malformed input. Handles `::` compression and embedded + * IPv4 suffix (e.g. `::ffff:127.0.0.1`). + */ +function parseIPv6(input: string): Uint8Array | null { + let s = input.split('%')[0] ?? '' + if (s === '') return null + + // Split off trailing embedded IPv4 if present + let trailingV4: [number, number, number, number] | null = null + const v4m = s.match(/^(.*:)(\d+\.\d+\.\d+\.\d+)$/) + if (v4m) { + const n = ipv4DottedToInt(v4m[2]!) + if (n === null) return null + trailingV4 = [(n >>> 24) & 0xff, (n >>> 16) & 0xff, (n >>> 8) & 0xff, n & 0xff] + s = v4m[1]!.replace(/:$/, '') + if (s === '') s = '::' // e.g. input was "::1.2.3.4" + } + + const halves = s.split('::') + if (halves.length > 2) return null + const left = halves[0] ? halves[0]!.split(':') : [] + const right = halves.length === 2 && halves[1] ? halves[1]!.split(':') : [] + + const groupsNeeded = 8 - (trailingV4 ? 2 : 0) + if (halves.length === 1 && left.length !== groupsNeeded) return null + if (halves.length === 2 && left.length + right.length > groupsNeeded) return null + + const fill = halves.length === 2 ? groupsNeeded - left.length - right.length : 0 + const groups = [...left, ...Array(fill).fill('0'), ...right] + + const bytes = new Uint8Array(16) + for (let i = 0; i < groups.length; i++) { + const g = groups[i]! + if (!/^[0-9a-f]{1,4}$/i.test(g)) return null + const v = parseInt(g, 16) + bytes[i * 2] = (v >>> 8) & 0xff + bytes[i * 2 + 1] = v & 0xff + } + if (trailingV4) { + const off = groups.length * 2 + bytes[off] = trailingV4[0] + bytes[off + 1] = trailingV4[1] + bytes[off + 2] = trailingV4[2] + bytes[off + 3] = trailingV4[3] + } + return bytes +} + +function isPrivateIPv6(bytes: Uint8Array): boolean { + // ::1 loopback + let allZeroExceptLast = true + for (let i = 0; i < 15; i++) if (bytes[i] !== 0) { allZeroExceptLast = false; break } + if (allZeroExceptLast && bytes[15] === 1) return true + // :: unspecified + if (bytes.every(v => v === 0)) return true + // IPv4-mapped ::ffff:a.b.c.d + let isV4Mapped = true + for (let i = 0; i < 10; i++) if (bytes[i] !== 0) { isV4Mapped = false; break } + if (isV4Mapped && bytes[10] === 0xff && bytes[11] === 0xff) { + const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0 + return isPrivateIPv4Int(n) + } + // IPv4-compatible (deprecated) ::a.b.c.d — treat as private if embedded v4 is + let isV4Compat = true + for (let i = 0; i < 12; i++) if (bytes[i] !== 0) { isV4Compat = false; break } + if (isV4Compat) { + const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0 + if (n !== 0 && n !== 1) return isPrivateIPv4Int(n) + } + // ULA fc00::/7 + if ((bytes[0]! & 0xfe) === 0xfc) return true + // Link-local fe80::/10 + if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0x80) return true + // Site-local (deprecated) fec0::/10 + if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0xc0) return true + return false +} + +export function isPrivateHostname(hostname: string): boolean { + if (/^localhost$/i.test(hostname)) return true + // URL.hostname wraps IPv6 literals in brackets; strip for parsing. + const unwrapped = hostname.startsWith('[') && hostname.endsWith(']') + ? hostname.slice(1, -1) + : hostname + // IPv4 dotted-quad (WHATWG URL normalizes short/numeric/hex/octal to this). + const v4 = ipv4DottedToInt(unwrapped) + if (v4 !== null) return isPrivateIPv4Int(v4) + // IPv6 + if (unwrapped.includes(':')) { + const bytes = parseIPv6(unwrapped) + if (bytes) return isPrivateIPv6(bytes) + } + return false } /**