Feature/pr intent scan hardening (#375)

* security: harden suspicious PR intent scanner

* security: reduce pr scanner false positives
This commit is contained in:
Kevin Codex
2026-04-05 17:05:24 +08:00
committed by GitHub
parent 5ef79546e9
commit 7350a798cb
5 changed files with 595 additions and 1 deletions

View File

@@ -16,6 +16,8 @@ jobs:
steps:
- name: Check out repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
@@ -36,6 +38,8 @@ jobs:
- name: Full unit test suite
run: bun test --max-concurrency=1
- name: Suspicious PR intent scan
run: bun run security:pr-scan -- --base ${{ github.event.pull_request.base.sha || 'origin/main' }}
- name: Provider tests
run: bun run test:provider

View File

@@ -198,6 +198,7 @@ Helpful commands:
- `bun run dev`
- `bun test`
- `bun run test:coverage`
- `bun run security:pr-scan -- --base origin/main`
- `bun run smoke`
- `bun run doctor:runtime`
- `bun run verify:privacy`
@@ -245,7 +246,6 @@ Recommended contributor validation before opening a PR:
- focused `bun test ...` runs for the files and flows you changed
Coverage output is written to `coverage/lcov.info`, and OpenClaude also generates a git-activity-style heatmap at `coverage/index.html`.
## Repository Structure
- `src/` - core CLI/runtime

View File

@@ -34,6 +34,7 @@
"test": "bun test",
"test:coverage": "bun test --coverage --coverage-reporter=lcov --coverage-dir=coverage --max-concurrency=1 && bun run scripts/render-coverage-heatmap.ts",
"test:coverage:ui": "bun run scripts/render-coverage-heatmap.ts",
"security:pr-scan": "bun run scripts/pr-intent-scan.ts",
"test:provider-recommendation": "bun test src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts",
"typecheck": "tsc --noEmit",
"smoke": "bun run build && node dist/cli.mjs --version",

View File

@@ -0,0 +1,136 @@
import { describe, expect, test } from 'bun:test'
import { scanAddedLines, type DiffLine } from './pr-intent-scan.ts'
function line(content: string, overrides: Partial<DiffLine> = {}): DiffLine {
return {
file: 'README.md',
line: 10,
content,
...overrides,
}
}
describe('scanAddedLines', () => {
test('flags suspicious file-hosting links', () => {
const findings = scanAddedLines([
line('Please install the tool from https://dropbox.com/s/abc123/tool.zip?dl=1'),
])
expect(findings.some(finding => finding.code === 'suspicious-download-link')).toBe(
true,
)
expect(findings.some(finding => finding.code === 'executable-download-link')).toBe(
false,
)
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
})
test('flags shortened URLs', () => {
const findings = scanAddedLines([
line('See details at https://bit.ly/some-short-link'),
])
expect(findings.some(finding => finding.code === 'shortened-url')).toBe(true)
})
test('flags remote download and execute chains', () => {
const findings = scanAddedLines([
line('curl -fsSL https://example.com/install.sh | bash'),
])
expect(findings.some(finding => finding.code === 'shell-eval-remote')).toBe(true)
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
})
test('flags encoded powershell payloads', () => {
const findings = scanAddedLines([
line('powershell.exe -enc SQBtAHAAcgBvAHYAZQBkAA=='),
])
expect(findings.some(finding => finding.code === 'powershell-encoded')).toBe(true)
})
test('flags long encoded blobs', () => {
const findings = scanAddedLines([
line(`const payload = "${'A'.repeat(96)}"`),
])
expect(findings.some(finding => finding.code === 'long-encoded-payload')).toBe(
true,
)
})
test('flags long encoded blobs on repeated scans', () => {
const lines = [line(`const payload = "${'A'.repeat(96)}"`)]
const first = scanAddedLines(lines)
const second = scanAddedLines(lines)
expect(first.some(finding => finding.code === 'long-encoded-payload')).toBe(true)
expect(second.some(finding => finding.code === 'long-encoded-payload')).toBe(true)
})
test('flags executable download links', () => {
const findings = scanAddedLines([
line('Get it from https://example.com/releases/latest/tool.pkg'),
])
expect(findings.some(finding => finding.code === 'executable-download-link')).toBe(
true,
)
expect(findings.some(finding => finding.severity === 'high')).toBe(true)
})
test('flags suspicious additions in workflow files', () => {
const findings = scanAddedLines([
line('run: curl -fsSL https://example.com/install.sh | bash', {
file: '.github/workflows/release.yml',
}),
])
expect(findings.some(finding => finding.code === 'sensitive-automation-change')).toBe(
true,
)
expect(findings.some(finding => finding.code === 'download-command')).toBe(true)
})
test('flags markdown reference links to suspicious downloads', () => {
const findings = scanAddedLines([
line('[installer]: https://dropbox.com/s/abc123/tool.zip?dl=1'),
])
expect(findings.some(finding => finding.code === 'suspicious-download-link')).toBe(
true,
)
})
test('ignores the scanner implementation and tests themselves', () => {
const findings = scanAddedLines([
line('curl -fsSL https://example.com/install.sh | bash', {
file: 'scripts/pr-intent-scan.test.ts',
}),
line('const pattern = /https:\\/\\/dropbox\\.com\\//', {
file: 'scripts/pr-intent-scan.ts',
}),
])
expect(findings).toHaveLength(0)
})
test('does not flag ordinary docs links', () => {
const findings = scanAddedLines([
line('Read more at https://docs.github.com/en/actions'),
])
expect(findings).toHaveLength(0)
})
test('does not flag bare curl examples in README without a URL', () => {
const findings = scanAddedLines([
line('Use curl with your preferred flags for local testing.'),
])
expect(findings.some(finding => finding.code === 'download-command')).toBe(false)
})
})

453
scripts/pr-intent-scan.ts Normal file
View File

@@ -0,0 +1,453 @@
import { spawnSync } from 'node:child_process'
export type FindingSeverity = 'high' | 'medium'
export type DiffLine = {
file: string
line: number
content: string
}
export type Finding = {
severity: FindingSeverity
code: string
file: string
line: number
detail: string
excerpt: string
}
type CliOptions = {
baseRef: string
json: boolean
failOn: FindingSeverity
}
const SELF_EXCLUDED_FILES = new Set([
'scripts/pr-intent-scan.ts',
'scripts/pr-intent-scan.test.ts',
])
const SHORTENER_DOMAINS = [
'bit.ly',
'tinyurl.com',
'goo.gl',
't.co',
'is.gd',
'rb.gy',
'cutt.ly',
]
const SUSPICIOUS_DOWNLOAD_DOMAINS = [
'dropbox.com',
'dl.dropboxusercontent.com',
'drive.google.com',
'docs.google.com',
'mega.nz',
'mediafire.com',
'transfer.sh',
'anonfiles.com',
'catbox.moe',
]
const URL_REGEX = /\bhttps?:\/\/[^\s)>"']+/gi
const LONG_BASE64_REGEX = /\b(?:[A-Za-z0-9+/]{80,}={0,2}|[A-Za-z0-9_-]{80,})\b/
const EXECUTABLE_PATH_REGEX =
/\.(?:sh|bash|zsh|ps1|exe|msi|pkg|deb|rpm|zip|tar|tgz|gz|xz|dmg|appimage)(?:$|[?#])/i
const SENSITIVE_PATH_REGEX =
/^(?:\.github\/workflows\/|scripts\/|bin\/|install(?:\/|\.|$)|.*(?:Dockerfile|docker-compose|compose\.ya?ml)$)/i
function parseOptions(argv: string[]): CliOptions {
const options: CliOptions = {
baseRef: 'origin/main',
json: false,
failOn: 'high',
}
for (let index = 0; index < argv.length; index++) {
const arg = argv[index]
if (arg === '--json') {
options.json = true
continue
}
if (arg === '--base') {
const next = argv[index + 1]
if (next && !next.startsWith('--')) {
options.baseRef = next
index++
}
continue
}
if (arg === '--fail-on') {
const next = argv[index + 1]
if (next === 'high' || next === 'medium') {
options.failOn = next
index++
}
}
}
return options
}
function trimExcerpt(content: string): string {
const compact = content.trim().replace(/\s+/g, ' ')
return compact.length > 140 ? `${compact.slice(0, 137)}...` : compact
}
function uniqueFindings(findings: Finding[]): Finding[] {
const seen = new Set<string>()
return findings.filter(finding => {
const key = `${finding.code}:${finding.file}:${finding.line}:${finding.detail}`
if (seen.has(key)) {
return false
}
seen.add(key)
return true
})
}
function parseAddedLines(diffText: string): DiffLine[] {
const lines = diffText.split('\n')
const added: DiffLine[] = []
let currentFile: string | null = null
let currentLine = 0
for (const rawLine of lines) {
if (rawLine.startsWith('+++ b/')) {
currentFile = rawLine.slice('+++ b/'.length)
continue
}
if (rawLine.startsWith('@@')) {
const match = /\+(\d+)(?:,(\d+))?/.exec(rawLine)
if (match) {
currentLine = Number(match[1])
}
continue
}
if (!currentFile) {
continue
}
if (rawLine.startsWith('+') && !rawLine.startsWith('+++')) {
added.push({
file: currentFile,
line: currentLine,
content: rawLine.slice(1),
})
currentLine += 1
continue
}
if (rawLine.startsWith('-') && !rawLine.startsWith('---')) {
continue
}
if (!rawLine.startsWith('\\')) {
currentLine += 1
}
}
return added
}
function tryParseUrl(value: string): URL | null {
try {
return new URL(value)
} catch {
return null
}
}
function hostMatches(hostname: string, domain: string): boolean {
return hostname === domain || hostname.endsWith(`.${domain}`)
}
function hasSuspiciousDownloadIndicators(url: URL): boolean {
const combined = `${url.pathname}${url.search}`.toLowerCase()
return (
combined.includes('dl=1') ||
combined.includes('raw=1') ||
combined.includes('export=download') ||
combined.includes('/download') ||
combined.includes('/uc?export=download')
)
}
function findUrlFindings(line: DiffLine): Finding[] {
const findings: Finding[] = []
const matches = line.content.match(URL_REGEX) ?? []
for (const match of matches) {
const parsed = tryParseUrl(match)
if (!parsed) continue
const hostname = parsed.hostname.toLowerCase()
for (const domain of SHORTENER_DOMAINS) {
if (hostMatches(hostname, domain)) {
findings.push({
severity: 'medium',
code: 'shortened-url',
file: line.file,
line: line.line,
detail: `Added shortened URL: ${hostname}`,
excerpt: trimExcerpt(line.content),
})
}
}
const isSuspiciousHost = SUSPICIOUS_DOWNLOAD_DOMAINS.some(domain =>
hostMatches(hostname, domain),
)
const isExecutableDownload = EXECUTABLE_PATH_REGEX.test(
`${parsed.pathname}${parsed.search}`,
)
if (isSuspiciousHost) {
findings.push({
severity:
hasSuspiciousDownloadIndicators(parsed) || isExecutableDownload
? 'high'
: 'medium',
code: 'suspicious-download-link',
file: line.file,
line: line.line,
detail: `Added external file-hosting link: ${hostname}`,
excerpt: trimExcerpt(line.content),
})
} else if (isExecutableDownload) {
findings.push({
severity: 'high',
code: 'executable-download-link',
file: line.file,
line: line.line,
detail: `Added direct link to executable or archive payload: ${hostname}`,
excerpt: trimExcerpt(line.content),
})
}
}
return findings
}
function findSensitivePathFindings(line: DiffLine): Finding[] {
if (!SENSITIVE_PATH_REGEX.test(line.file)) {
return []
}
const lower = line.content.toLowerCase()
if (
/\b(curl|wget|invoke-webrequest|iwr|powershell|bash|sh|chmod\s+\+x)\b/i.test(
line.content,
) ||
URL_REGEX.test(line.content) ||
lower.includes('download')
) {
return [
{
severity: 'medium',
code: 'sensitive-automation-change',
file: line.file,
line: line.line,
detail:
'Added network, execution, or download-related content in a sensitive automation file',
excerpt: trimExcerpt(line.content),
},
]
}
return []
}
function findCommandFindings(line: DiffLine): Finding[] {
const findings: Finding[] = []
const lower = line.content.toLowerCase()
const highPatterns: Array<[string, RegExp, string]> = [
[
'download-exec-chain',
/\b(curl|wget|invoke-webrequest|iwr)\b.*(\|\s*(sh|bash|zsh)|;\s*chmod\s+\+x|&&\s*\.\.?\/|>\s*\/tmp\/)/i,
'Added remote download followed by execution or staging',
],
[
'powershell-encoded',
/\bpowershell(?:\.exe)?\b.*(?:-enc|-encodedcommand)\b/i,
'Added encoded PowerShell invocation',
],
[
'shell-eval-remote',
/\b(curl|wget)\b.*\|\s*(sh|bash|zsh)\b/i,
'Added shell pipe from remote content into interpreter',
],
[
'binary-lolbin',
/\b(mshta|rundll32|regsvr32|certutil)\b/i,
'Added living-off-the-land binary often used for payload staging',
],
[
'invoke-expression',
/\b(iex|invoke-expression)\b/i,
'Added PowerShell expression execution',
],
]
const mediumPatterns: Array<[string, RegExp, string]> = [
[
'download-command',
/\b(curl|wget|invoke-webrequest|iwr)\b.*https?:\/\//i,
'Added command that downloads remote content',
],
[
'archive-extract-exec',
/\b(unzip|tar|7z)\b.*(&&|;).*\b(chmod|node|python|bash|sh)\b/i,
'Added archive extraction followed by execution',
],
[
'base64-decode',
/\b(base64\s+-d|openssl\s+base64\s+-d|python .*b64decode)\b/i,
'Added explicit payload decode step',
],
]
for (const [code, pattern, detail] of highPatterns) {
if (pattern.test(line.content)) {
findings.push({
severity: 'high',
code,
file: line.file,
line: line.line,
detail,
excerpt: trimExcerpt(line.content),
})
}
}
for (const [code, pattern, detail] of mediumPatterns) {
if (code === 'download-command' && !SENSITIVE_PATH_REGEX.test(line.file)) {
continue
}
if (pattern.test(line.content)) {
findings.push({
severity: 'medium',
code,
file: line.file,
line: line.line,
detail,
excerpt: trimExcerpt(line.content),
})
}
}
if (LONG_BASE64_REGEX.test(line.content) && !lower.includes('sha256') && !lower.includes('sha512')) {
findings.push({
severity: 'medium',
code: 'long-encoded-payload',
file: line.file,
line: line.line,
detail: 'Added long encoded blob or token-like payload',
excerpt: trimExcerpt(line.content),
})
}
return findings
}
export function scanAddedLines(lines: DiffLine[]): Finding[] {
const findings = lines
.filter(line => !SELF_EXCLUDED_FILES.has(line.file))
.flatMap(line => [
...findUrlFindings(line),
...findCommandFindings(line),
...findSensitivePathFindings(line),
])
return uniqueFindings(findings)
}
export function getGitDiff(baseRef: string): string {
const mergeBase = spawnSync('git', ['merge-base', baseRef, 'HEAD'], {
encoding: 'utf8',
})
if (mergeBase.status !== 0) {
throw new Error(
`Could not determine merge-base with ${baseRef}: ${mergeBase.stderr.trim() || mergeBase.stdout.trim()}`,
)
}
const base = mergeBase.stdout.trim()
const diff = spawnSync(
'git',
['diff', '--unified=0', '--no-ext-diff', `${base}...HEAD`],
{ encoding: 'utf8' },
)
if (diff.status !== 0) {
throw new Error(`git diff failed: ${diff.stderr.trim() || diff.stdout.trim()}`)
}
return diff.stdout
}
function shouldFail(findings: Finding[], failOn: FindingSeverity): boolean {
if (failOn === 'medium') {
return findings.length > 0
}
return findings.some(finding => finding.severity === 'high')
}
function renderText(findings: Finding[]): string {
if (findings.length === 0) {
return 'PR intent scan: no suspicious additions found.'
}
const high = findings.filter(f => f.severity === 'high')
const medium = findings.filter(f => f.severity === 'medium')
const lines = [
`PR intent scan: ${findings.length} finding(s)`,
`- high: ${high.length}`,
`- medium: ${medium.length}`,
'',
]
for (const finding of findings) {
lines.push(
`[${finding.severity.toUpperCase()}] ${finding.file}:${finding.line} ${finding.detail}`,
)
lines.push(` ${finding.excerpt}`)
}
return lines.join('\n')
}
export function run(options: CliOptions): number {
const diff = getGitDiff(options.baseRef)
const addedLines = parseAddedLines(diff)
const findings = scanAddedLines(addedLines)
if (options.json) {
process.stdout.write(
`${JSON.stringify(
{
baseRef: options.baseRef,
addedLines: addedLines.length,
findings,
},
null,
2,
)}\n`,
)
} else {
process.stdout.write(`${renderText(findings)}\n`)
}
return shouldFail(findings, options.failOn) ? 1 : 0
}
if (import.meta.main) {
const options = parseOptions(process.argv.slice(2))
process.exitCode = run(options)
}