Files
brain/tools/static-content-scanner.mjs
T

170 lines
5.8 KiB
JavaScript

#!/usr/bin/env node
/**
* Static content scanner (router-gate v4 Stream C, spec §5.2).
*
* Pure multi-language suspicious-pattern scan for test/boot files. No I/O:
* consumers (enforce-router-gate.mjs) read files via fs and call scanContent().
* Owns TEST_RUNNERS — shared with framework-boot-scanner.mjs (F7).
*/
// §5.2 test-runner detection.
export const TEST_RUNNERS = [
/\bcomposer\s+(?:test|phpunit|pest)\b/,
/\bphp\s+artisan\s+test\b/,
/\bvendor\/bin\/(?:phpunit|pest)\b/,
/\bbundle\s+exec\s+(?:rspec|rake|test)\b/,
/\bgo\s+(?:test|run)\b/,
/\bmvn\s+(?:test|package)\b/,
/\bgradle\s+(?:test|build)\b/,
/\bcargo\s+(?:test|run)\b/,
/\bdotnet\s+(?:test|run)\b/,
/\bnpx\s+vitest\b/,
/\bnpm\s+(?:test|run\s+test)\b/,
];
export function isTestRunner(command) {
if (typeof command !== 'string') return false;
return TEST_RUNNERS.some((re) => re.test(command));
}
const EXT_LANG = {
'.php': 'php',
'.rb': 'ruby',
'.go': 'go',
'.java': 'java',
'.kt': 'java', // JVM treated as java patterns
'.rs': 'rust',
'.cs': 'dotnet',
};
export function detectLanguage(filePath) {
if (typeof filePath !== 'string') return null;
const lower = filePath.toLowerCase();
const dot = lower.lastIndexOf('.');
if (dot < 0) return null;
return EXT_LANG[lower.slice(dot)] || null;
}
// Always-suspicious: code execution + dynamic dispatch.
// Each entry is { name, re } (single regex) or { name, all: [re,...] } (co-occurrence,
// ALL must match somewhere in source — independent anchored tests, no proximity regex,
// avoids catastrophic backtracking).
const ALWAYS_PATTERNS = {
php: [
{ name: 'exec', re: /\bexec\s*\(/ },
{ name: 'system', re: /\bsystem\s*\(/ },
{ name: 'passthru', re: /\bpassthru\s*\(/ },
{ name: 'shell_exec', re: /\bshell_exec\s*\(/ },
{ name: 'popen', re: /\bpopen\s*\(/ },
{ name: 'proc_open', re: /\bproc_open\s*\(/ },
{ name: 'eval', re: /\beval\s*\(/ },
{ name: 'assert', re: /\bassert\s*\(/ },
{ name: 'pcntl_exec', re: /\bpcntl_exec\s*\(/ },
{ name: 'pcntl_fork', re: /\bpcntl_fork\s*\(/ },
{ name: 'backtick', re: /=\s*`[^`]*`/ },
],
ruby: [
{ name: 'Kernel.eval', re: /\bKernel\.eval\b/ },
{ name: 'eval', re: /\beval\s*\(/ },
{ name: 'instance_eval', re: /\binstance_eval\b/ },
{ name: 'class_eval', re: /\bclass_eval\b/ },
{ name: 'system', re: /\bsystem\s*\(/ },
{ name: 'exec', re: /\bexec\s*\(/ },
{ name: 'IO.popen', re: /\bIO\.popen\b/ },
{ name: 'Open3.popen', re: /\bOpen3\.popen/ },
{ name: 'backtick', re: /=\s*`[^`]*`/ },
{ name: 'send', re: /\b(?:public_)?send\s*\(/ },
],
go: [
{ name: 'exec.Command', re: /\bexec\.Command(?:Context)?\b/ },
{ name: 'syscall.Exec', re: /\bsyscall\.(?:Exec|ForkExec)\b/ },
{ name: 'reflect.Call', all: [/\breflect\.ValueOf\b/, /\.Call\s*\(/] },
],
java: [
{ name: 'Runtime.exec', re: /\bRuntime(?:\.getRuntime\(\))?\.exec\s*\(/ },
{ name: 'ProcessBuilder', re: /\bProcessBuilder\b/ },
{ name: 'Method.invoke', all: [/\bget(?:Declared)?Method\s*\(/, /\.invoke\s*\(/] },
],
rust: [
{ name: 'process.Command', re: /\b(?:std::)?process::Command\b|\bCommand::new\b/ },
],
dotnet: [
{ name: 'Process.Start', re: /\bProcess\.Start\b/ },
{ name: 'ProcessStartInfo', re: /\bProcessStartInfo\b/ },
{ name: 'Assembly.Load', all: [/\bAssembly\.Load\b/, /\.Invoke\s*\(/] },
],
};
// Protected-sensitive: file/dir deletion + write — only matter when the target is
// a protected path (consumer decides). Reported with category 'protected_sensitive'.
const PROTECTED_SENSITIVE_PATTERNS = {
php: [
{ name: 'file_put_contents', re: /\bfile_put_contents\s*\(/ },
{ name: 'unlink', re: /\bunlink\s*\(/ },
{ name: 'rmdir', re: /\brmdir\s*\(/ },
],
ruby: [
{ name: 'File.delete', re: /\bFile\.delete\b/ },
{ name: 'FileUtils.rm', re: /\bFileUtils\.rm\b/ },
{ name: 'Dir.delete', re: /\bDir\.delete\b/ },
],
go: [
{ name: 'os.Remove', re: /\bos\.Remove(?:All)?\s*\(/ },
],
java: [
{ name: 'Files.delete', re: /\bFiles\.delete(?:IfExists)?\s*\(/ },
],
rust: [
{ name: 'fs.remove', re: /\b(?:std::)?fs::remove_(?:file|dir_all)\b/ },
],
dotnet: [
{ name: 'File.Delete', re: /\bFile\.Delete\b/ },
{ name: 'Directory.Delete', re: /\bDirectory\.Delete\b/ },
],
};
function matchPattern(source, p) {
if (p.all) return p.all.every((re) => re.test(source));
return p.re.test(source);
}
/**
* Scan source text of a known language for suspicious patterns.
* @param {string} source - file contents.
* @param {string} lang - 'php'|'ruby'|'go'|'java'|'rust'|'dotnet'.
* @param {{includeProtectedSensitive?: boolean}} [opts]
* @returns {Array<{name: string, category: 'always'|'protected_sensitive', lang: string}>}
*/
export function scanContent(source, lang, opts = {}) {
const { includeProtectedSensitive = true } = opts;
const findings = [];
if (typeof source !== 'string' || !lang) return findings;
for (const p of ALWAYS_PATTERNS[lang] || []) {
if (matchPattern(source, p)) findings.push({ name: p.name, category: 'always', lang });
}
if (includeProtectedSensitive) {
for (const p of PROTECTED_SENSITIVE_PATTERNS[lang] || []) {
if (matchPattern(source, p)) findings.push({ name: p.name, category: 'protected_sensitive', lang });
}
}
return findings;
}
/**
* Convenience: detect language from path, read via injected reader, scan.
* @param {string} filePath
* @param {(path: string) => string} readFile - injected (e.g. fs.readFileSync utf8).
* @param {object} [opts] - forwarded to scanContent.
*/
export function scanFileWith(filePath, readFile, opts = {}) {
const lang = detectLanguage(filePath);
if (!lang) return [];
let source;
try {
source = readFile(filePath);
} catch {
return [];
}
return scanContent(source, lang, opts);
}