fix(web-search): close SSRF bypasses in custom provider hostname guard (#610)

The previous `isPrivateHostname` used a list of regexes against
`URL.hostname`. Several literal-address forms slipped past it:

- IPv4-mapped IPv6 `[::ffff:127.0.0.1]` (WHATWG URL normalizes to
  `[::ffff:7f00:1]`, which no regex matched) — lets callers reach
  loopback and other private v4 via an IPv6 literal.
- ULA `fc00::/7` (e.g. `[fc00::1]`) — not covered.
- Link-local `fe80::/10` (e.g. `[fe80::1]`) — not covered.
- IPv4 `169.254.0.0/16` (cloud metadata, including 169.254.169.254),
  `100.64.0.0/10` (CGNAT), and the full `0.0.0.0/8` — not covered.
- The IPv6 regex `/^\[::1?\]$/` also required brackets, but `URL.hostname`
  returns bracketed form anyway, so this part happened to work.

WHATWG `new URL(...)` already normalizes short-form / numeric / hex /
octal IPv4 to dotted-quad before we see it, so those cases were in fact
handled — the remaining gaps were IPv6 and a few missing v4 ranges.

Replace the regex list with:
- a dotted-quad IPv4 parser + int range check covering 0/8, 10/8,
  100.64/10, 127/8, 169.254/16, 172.16/12, 192.168/16;
- a small IPv6 parser (handles `::` compression and embedded v4 suffix)
  + a byte-range check covering `::`, `::1`, IPv4-mapped (recursing
  into the v4 classifier), IPv4-compatible, `fc00::/7`, `fe80::/10`,
  and `fec0::/10`.

Export `isPrivateHostname` and add unit tests covering every bypass
listed above plus public-address negatives.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
euxaristia
2026-04-12 09:09:46 -04:00
committed by GitHub
parent 7817fe88bd
commit a02c44143b
2 changed files with 230 additions and 18 deletions

View File

@@ -137,26 +137,147 @@ const SAFE_HEADER_NAMES = new Set([
])
/**
* Private / reserved IP ranges that should not be reachable from a
* search adapter (SSRF mitigation).
* Private / reserved address check for SSRF mitigation.
*
* This is a hostname-level check. DNS resolution to private IPs is
* NOT blocked here (that would require resolving before fetch, which
* Node fetch does not expose). This guard blocks obvious cases.
* Operates on the hostname produced by WHATWG `new URL(...)`, which already
* normalizes short-form, numeric, hex, and octal IPv4 to dotted-quad
* (e.g. `127.1`, `2130706433`, `0x7f000001`, `0177.0.0.1` → `127.0.0.1`),
* and which preserves IPv6 in bracketed compressed form
* (e.g. `[::ffff:127.0.0.1]` → `[::ffff:7f00:1]`).
*
* DNS resolution to private IPs is NOT blocked here — resolving before
* fetch is not exposed by Node's fetch. This guard blocks literal-address
* bypasses, which is what the original regex was trying (and failing) to do.
*/
const BLOCKED_HOSTNAME_PATTERNS = [
/^localhost$/i,
/^127\.\d+\.\d+\.\d+$/,
/^10\.\d+\.\d+\.\d+$/,
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
/^192\.168\.\d+\.\d+$/,
/^0\.0\.0\.0$/,
/^\[::1?\]$/i, // [::1] or [::]
/^0x[0-9a-f]+$/i, // hex-encoded IPs
]
function isPrivateHostname(hostname: string): boolean {
return BLOCKED_HOSTNAME_PATTERNS.some(re => re.test(hostname))
function ipv4DottedToInt(ip: string): number | null {
const parts = ip.split('.')
if (parts.length !== 4) return null
let n = 0
for (const p of parts) {
if (!/^\d+$/.test(p)) return null
const x = Number(p)
if (!Number.isInteger(x) || x < 0 || x > 255) return null
n = n * 256 + x
}
return n >>> 0
}
function isPrivateIPv4Int(n: number): boolean {
const a = (n >>> 24) & 0xff
const b = (n >>> 16) & 0xff
// 0.0.0.0/8 "this network"
if (a === 0) return true
// 10.0.0.0/8
if (a === 10) return true
// 100.64.0.0/10 CGNAT
if (a === 100 && (b & 0xc0) === 0x40) return true
// 127.0.0.0/8 loopback
if (a === 127) return true
// 169.254.0.0/16 link-local
if (a === 169 && b === 254) return true
// 172.16.0.0/12
if (a === 172 && (b & 0xf0) === 0x10) return true
// 192.168.0.0/16
if (a === 192 && b === 168) return true
return false
}
/**
* Parse an IPv6 address (without brackets, zone id optional) to 16 bytes.
* Returns null on malformed input. Handles `::` compression and embedded
* IPv4 suffix (e.g. `::ffff:127.0.0.1`).
*/
function parseIPv6(input: string): Uint8Array | null {
let s = input.split('%')[0] ?? ''
if (s === '') return null
// Split off trailing embedded IPv4 if present
let trailingV4: [number, number, number, number] | null = null
const v4m = s.match(/^(.*:)(\d+\.\d+\.\d+\.\d+)$/)
if (v4m) {
const n = ipv4DottedToInt(v4m[2]!)
if (n === null) return null
trailingV4 = [(n >>> 24) & 0xff, (n >>> 16) & 0xff, (n >>> 8) & 0xff, n & 0xff]
s = v4m[1]!.replace(/:$/, '')
if (s === '') s = '::' // e.g. input was "::1.2.3.4"
}
const halves = s.split('::')
if (halves.length > 2) return null
const left = halves[0] ? halves[0]!.split(':') : []
const right = halves.length === 2 && halves[1] ? halves[1]!.split(':') : []
const groupsNeeded = 8 - (trailingV4 ? 2 : 0)
if (halves.length === 1 && left.length !== groupsNeeded) return null
if (halves.length === 2 && left.length + right.length > groupsNeeded) return null
const fill = halves.length === 2 ? groupsNeeded - left.length - right.length : 0
const groups = [...left, ...Array(fill).fill('0'), ...right]
const bytes = new Uint8Array(16)
for (let i = 0; i < groups.length; i++) {
const g = groups[i]!
if (!/^[0-9a-f]{1,4}$/i.test(g)) return null
const v = parseInt(g, 16)
bytes[i * 2] = (v >>> 8) & 0xff
bytes[i * 2 + 1] = v & 0xff
}
if (trailingV4) {
const off = groups.length * 2
bytes[off] = trailingV4[0]
bytes[off + 1] = trailingV4[1]
bytes[off + 2] = trailingV4[2]
bytes[off + 3] = trailingV4[3]
}
return bytes
}
function isPrivateIPv6(bytes: Uint8Array): boolean {
// ::1 loopback
let allZeroExceptLast = true
for (let i = 0; i < 15; i++) if (bytes[i] !== 0) { allZeroExceptLast = false; break }
if (allZeroExceptLast && bytes[15] === 1) return true
// :: unspecified
if (bytes.every(v => v === 0)) return true
// IPv4-mapped ::ffff:a.b.c.d
let isV4Mapped = true
for (let i = 0; i < 10; i++) if (bytes[i] !== 0) { isV4Mapped = false; break }
if (isV4Mapped && bytes[10] === 0xff && bytes[11] === 0xff) {
const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0
return isPrivateIPv4Int(n)
}
// IPv4-compatible (deprecated) ::a.b.c.d — treat as private if embedded v4 is
let isV4Compat = true
for (let i = 0; i < 12; i++) if (bytes[i] !== 0) { isV4Compat = false; break }
if (isV4Compat) {
const n = ((bytes[12]! << 24) | (bytes[13]! << 16) | (bytes[14]! << 8) | bytes[15]!) >>> 0
if (n !== 0 && n !== 1) return isPrivateIPv4Int(n)
}
// ULA fc00::/7
if ((bytes[0]! & 0xfe) === 0xfc) return true
// Link-local fe80::/10
if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0x80) return true
// Site-local (deprecated) fec0::/10
if (bytes[0] === 0xfe && (bytes[1]! & 0xc0) === 0xc0) return true
return false
}
export function isPrivateHostname(hostname: string): boolean {
if (/^localhost$/i.test(hostname)) return true
// URL.hostname wraps IPv6 literals in brackets; strip for parsing.
const unwrapped = hostname.startsWith('[') && hostname.endsWith(']')
? hostname.slice(1, -1)
: hostname
// IPv4 dotted-quad (WHATWG URL normalizes short/numeric/hex/octal to this).
const v4 = ipv4DottedToInt(unwrapped)
if (v4 !== null) return isPrivateIPv4Int(v4)
// IPv6
if (unwrapped.includes(':')) {
const bytes = parseIPv6(unwrapped)
if (bytes) return isPrivateIPv6(bytes)
}
return false
}
/**