fix(web-search): close SSRF bypasses in custom provider hostname guard (#610)

The previous `isPrivateHostname` used a list of regexes against
`URL.hostname`. Several literal-address forms slipped past it:

- IPv4-mapped IPv6 `[::ffff:127.0.0.1]` (WHATWG URL normalizes to
  `[::ffff:7f00:1]`, which no regex matched) — lets callers reach
  loopback and other private v4 via an IPv6 literal.
- ULA `fc00::/7` (e.g. `[fc00::1]`) — not covered.
- Link-local `fe80::/10` (e.g. `[fe80::1]`) — not covered.
- IPv4 `169.254.0.0/16` (cloud metadata, including 169.254.169.254),
  `100.64.0.0/10` (CGNAT), and the full `0.0.0.0/8` — not covered.
- The IPv6 regex `/^\[::1?\]$/` also required brackets, but `URL.hostname`
  returns bracketed form anyway, so this part happened to work.

WHATWG `new URL(...)` already normalizes short-form / numeric / hex /
octal IPv4 to dotted-quad before we see it, so those cases were in fact
handled — the remaining gaps were IPv6 and a few missing v4 ranges.

Replace the regex list with:
- a dotted-quad IPv4 parser + int range check covering 0/8, 10/8,
  100.64/10, 127/8, 169.254/16, 172.16/12, 192.168/16;
- a small IPv6 parser (handles `::` compression and embedded v4 suffix)
  + a byte-range check covering `::`, `::1`, IPv4-mapped (recursing
  into the v4 classifier), IPv4-compatible, `fc00::/7`, `fe80::/10`,
  and `fec0::/10`.

Export `isPrivateHostname` and add unit tests covering every bypass
listed above plus public-address negatives.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
euxaristia
2026-04-12 09:09:46 -04:00
committed by GitHub
parent 7817fe88bd
commit a02c44143b
2 changed files with 230 additions and 18 deletions

View File

@@ -1,5 +1,5 @@
import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
import { extractHits, customProvider } from './custom.js'
import { extractHits, customProvider, isPrivateHostname } from './custom.js'
// ---------------------------------------------------------------------------
// extractHits — flexible response parsing
@@ -175,3 +175,94 @@ describe('buildAuthHeadersForPreset direct assertions', () => {
expect(buildAuthHeadersForPreset({ urlTemplate: '', queryParam: 'q', authHeader: 'Authorization' })).toEqual({})
})
})
// ---------------------------------------------------------------------------
// isPrivateHostname — SSRF guard
// ---------------------------------------------------------------------------
// Helper: route through new URL() the way validateUrl() does, so we exercise
// the same normalized hostname that production code sees.
const hostOf = (url: string) => new URL(url).hostname
describe('isPrivateHostname — IPv4', () => {
test('blocks localhost', () => {
expect(isPrivateHostname('localhost')).toBe(true)
expect(isPrivateHostname('LOCALHOST')).toBe(true)
})
test('blocks 127.0.0.0/8 loopback including short/numeric/hex/octal forms (via URL normalization)', () => {
expect(isPrivateHostname(hostOf('http://127.0.0.1/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://127.1/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://2130706433/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://0x7f000001/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://0177.0.0.1/'))).toBe(true)
})
test('blocks RFC1918 ranges', () => {
expect(isPrivateHostname('10.0.0.1')).toBe(true)
expect(isPrivateHostname('172.16.0.1')).toBe(true)
expect(isPrivateHostname('172.31.255.255')).toBe(true)
expect(isPrivateHostname('192.168.1.1')).toBe(true)
})
test('blocks 169.254.0.0/16 link-local (AWS/GCP metadata)', () => {
expect(isPrivateHostname('169.254.169.254')).toBe(true)
})
test('blocks 100.64.0.0/10 CGNAT', () => {
expect(isPrivateHostname('100.64.0.1')).toBe(true)
expect(isPrivateHostname('100.127.255.255')).toBe(true)
})
test('blocks 0.0.0.0/8', () => {
expect(isPrivateHostname('0.0.0.0')).toBe(true)
expect(isPrivateHostname('0.1.2.3')).toBe(true)
})
test('allows public IPv4', () => {
expect(isPrivateHostname('8.8.8.8')).toBe(false)
expect(isPrivateHostname('172.15.0.1')).toBe(false) // just outside 172.16/12
expect(isPrivateHostname('172.32.0.1')).toBe(false)
expect(isPrivateHostname('100.63.255.255')).toBe(false) // just outside CGNAT
expect(isPrivateHostname('100.128.0.0')).toBe(false)
})
test('allows regular hostnames', () => {
expect(isPrivateHostname('example.com')).toBe(false)
expect(isPrivateHostname('api.search.brave.com')).toBe(false)
})
})
describe('isPrivateHostname — IPv6', () => {
test('blocks ::1 loopback and :: unspecified', () => {
expect(isPrivateHostname(hostOf('http://[::1]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[::]/'))).toBe(true)
})
test('blocks IPv4-mapped IPv6 pointing at private v4 (the previous bypass)', () => {
// WHATWG URL normalizes [::ffff:127.0.0.1] → [::ffff:7f00:1]; must still block.
expect(isPrivateHostname(hostOf('http://[::ffff:127.0.0.1]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[::ffff:7f00:1]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[::ffff:169.254.169.254]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[::ffff:10.0.0.1]/'))).toBe(true)
})
test('blocks ULA fc00::/7', () => {
expect(isPrivateHostname(hostOf('http://[fc00::1]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[fd12:3456:789a::1]/'))).toBe(true)
})
test('blocks link-local fe80::/10', () => {
expect(isPrivateHostname(hostOf('http://[fe80::1]/'))).toBe(true)
expect(isPrivateHostname(hostOf('http://[febf::1]/'))).toBe(true)
})
test('allows public IPv6', () => {
expect(isPrivateHostname(hostOf('http://[2001:4860:4860::8888]/'))).toBe(false)
expect(isPrivateHostname(hostOf('http://[2606:4700:4700::1111]/'))).toBe(false)
})
test('malformed IPv6 is not classified as private (URL parser rejects it upstream)', () => {
expect(isPrivateHostname('not:an:ipv6')).toBe(false)
})
})

View File

@@ -137,26 +137,147 @@ const SAFE_HEADER_NAMES = new Set([
])
/**
* Private / reserved IP ranges that should not be reachable from a
* search adapter (SSRF mitigation).
* Private / reserved address check for SSRF mitigation.
*
* This is a hostname-level check. DNS resolution to private IPs is
* NOT blocked here (that would require resolving before fetch, which
* Node fetch does not expose). This guard blocks obvious cases.
* Operates on the hostname produced by WHATWG `new URL(...)`, which already
* normalizes short-form, numeric, hex, and octal IPv4 to dotted-quad
* (e.g. `127.1`, `2130706433`, `0x7f000001`, `0177.0.0.1` → `127.0.0.1`),
* and which preserves IPv6 in bracketed compressed form
* (e.g. `[::ffff:127.0.0.1]` → `[::ffff:7f00:1]`).
*
* DNS resolution to private IPs is NOT blocked here — resolving before
* fetch is not exposed by Node's fetch. This guard blocks literal-address
* bypasses, which is what the original regex was trying (and failing) to do.
*/
const BLOCKED_HOSTNAME_PATTERNS = [
/^localhost$/i,
/^127\.\d+\.\d+\.\d+$/,
/^10\.\d+\.\d+\.\d+$/,
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
/^192\.168\.\d+\.\d+$/,
/^0\.0\.0\.0$/,
/^\[::1?\]$/i, // [::1] or [::]
/^0x[0-9a-f]+$/i, // hex-encoded IPs
]
function isPrivateHostname(hostname: string): boolean {
return BLOCKED_HOSTNAME_PATTERNS.some(re => re.test(hostname))
function ipv4DottedToInt(ip: string): number | null {
const parts = ip.split('.')
if (parts.length !== 4) return null
let n = 0
for (const p of parts) {
if (!/^\d+$/.test(p)) return null
const x = Number(p)
if (!Number.isInteger(x) || x < 0 || x > 255) return null
n = n * 256 + x
}
return n >>> 0
}
function isPrivateIPv4Int(n: number): boolean {
const a = (n >>> 24) & 0xff
const b = (n >>> 16) & 0xff
// 0.0.0.0/8 "this network"
if (a === 0) return true
// 10.0.0.0/8
if (a === 10) return true
// 100.64.0.0/10 CGNAT
if (a === 100 && (b & 0xc0) === 0x40) return true
// 127.0.0.0/8 loopback
if (a === 127) return true
// 169.254.0.0/16 link-local
if (a === 169 && b === 254) return true
// 172.16.0.0/12
if (a === 172 && (b & 0xf0) === 0x10) return true
// 192.168.0.0/16
if (a === 192 && b === 168) return true
return false
}
/**
* Parse an IPv6 address (without brackets, zone id optional) to 16 bytes.
* Returns null on malformed input. Handles `::` compression and embedded
* IPv4 suffix (e.g. `::ffff:127.0.0.1`).
*/
function parseIPv6(input: string): Uint8Array | null {
let s = input.split('%')[0] ?? ''
if (s === '') return null
// Split off trailing embedded IPv4 if present
let trailingV4: [number, number, number, number] | null = null
const v4m = s.match(/^(.*:)(\d+\.\d+\.\d+\.\d+)$/)
if (v4m) {
const n = ipv4DottedToInt(v4m[2]!)
if (n === null) return null
trailingV4 = [(n >>> 24) & 0xff, (n >>> 16) & 0xff, (n >>> 8) & 0xff, n & 0xff]
s = v4m[1]!.replace(/:$/, '')
if (s === '') s = '::' // e.g. input was "::1.2.3.4"
}
const halves = s.split('::')
if (halves.length > 2) return null
const left = halves[0] ? halves[0]!.split(':') : []
const right = halves.length === 2 && halves[1] ? halves[1]!.split(':') : []
const groupsNeeded = 8 - (trailingV4 ? 2 : 0)
if (halves.length === 1 && left.length !== groupsNeeded) return null
if (halves.length === 2 && left.length + right.length > groupsNeeded) return null
const fill = halves.length === 2 ? groupsNeeded - left.length - right.length : 0
const groups = [...left, ...Array(fill).fill('0'), ...right]
const bytes = new Uint8Array(16)
for (let i = 0; i < groups.length; i++) {
const g = groups[i]!
if (!/^[0-9a-f]{1,4}$/i.test(g)) return null
const v = parseInt(g, 16)
bytes[i * 2] = (v >>> 8) & 0xff
bytes[i * 2 + 1] = v & 0xff
}
if (trailingV4) {
const off = groups.length * 2
bytes[off] = trailingV4[0]
bytes[off + 1] = trailingV4[1]
bytes[off + 2] = trailingV4[2]
bytes[off + 3] = trailingV4[3]
}
return bytes
}
function isPrivateIPv6(bytes: Uint8Array): boolean {
// ::1 loopback
let allZeroExceptLast = true
for (let i = 0; i < 15; i++) if (bytes[i] !== 0) { allZeroExceptLast = false; break }
if (allZeroExceptLast && bytes[15] === 1) return true
// :: unspecified
if (bytes.every(v => v === 0)) return true
// IPv4-mapped ::ffff:a.b.c.d
let isV4Mapped = true
for (let i = 0; i < 10; i++) if (bytes[i] !== 0) { isV4Mapped = false; break }
if (isV4Mapped && bytes[10] === 0xff && bytes[11] === 0xff) {
const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0
return isPrivateIPv4Int(n)
}
// IPv4-compatible (deprecated) ::a.b.c.d — treat as private if embedded v4 is
let isV4Compat = true
for (let i = 0; i < 12; i++) if (bytes[i] !== 0) { isV4Compat = false; break }
if (isV4Compat) {
const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0
if (n !== 0 && n !== 1) return isPrivateIPv4Int(n)
}
// ULA fc00::/7
if ((bytes[0]! & 0xfe) === 0xfc) return true
// Link-local fe80::/10
if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0x80) return true
// Site-local (deprecated) fec0::/10
if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0xc0) return true
return false
}
export function isPrivateHostname(hostname: string): boolean {
if (/^localhost$/i.test(hostname)) return true
// URL.hostname wraps IPv6 literals in brackets; strip for parsing.
const unwrapped = hostname.startsWith('[') && hostname.endsWith(']')
? hostname.slice(1, -1)
: hostname
// IPv4 dotted-quad (WHATWG URL normalizes short/numeric/hex/octal to this).
const v4 = ipv4DottedToInt(unwrapped)
if (v4 !== null) return isPrivateIPv4Int(v4)
// IPv6
if (unwrapped.includes(':')) {
const bytes = parseIPv6(unwrapped)
if (bytes) return isPrivateIPv6(bytes)
}
return false
}
/**