Files
brain/tools/bash-tokenizer.mjs

80 lines
3.1 KiB
JavaScript

#!/usr/bin/env node
/**
* Bash tokenizer — обёртка над shell-quote (router-gate v4 §5.1).
* Возвращает segments (по control-операторам) + флаг sub-shell.
* ParseError / unbalanced quotes → {ok:false} → вызывающий хук fail-CLOSE.
*/
import { parse } from 'shell-quote';
const CONTROL_OPS = new Set([';', '&&', '||', '|', '&']);
function hasUnbalancedQuotes(s) {
let single = 0, double = 0, escaped = false;
for (const ch of s) {
if (escaped) { escaped = false; continue; }
if (ch === '\\') { escaped = true; continue; }
if (ch === "'" && double % 2 === 0) single++;
else if (ch === '"' && single % 2 === 0) double++;
}
return single % 2 !== 0 || double % 2 !== 0;
}
export function detectSubshell(raw) {
const kinds = [];
if (/`/.test(raw)) kinds.push('backtick');
if (/\$\(/.test(raw)) kinds.push('cmd-subst');
if (/<\(/.test(raw)) kinds.push('process-subst-in');
if (/>\(/.test(raw)) kinds.push('process-subst-out');
if (/<<-?\s*[\w'"]/.test(raw)) kinds.push('heredoc');
return { found: kinds.length > 0, kinds };
}
export function tokenizeBash(command) {
if (typeof command !== 'string' || command.trim() === '') {
return { ok: false, error: 'empty' };
}
if (hasUnbalancedQuotes(command)) return { ok: false, error: 'parse_error' };
let parsed;
try { parsed = parse(command); } catch { return { ok: false, error: 'parse_error' }; }
const subshell = detectSubshell(command);
const segments = [];
let cur = [];
for (const e of parsed) {
if (typeof e === 'string') { cur.push(e); continue; }
if (e && typeof e === 'object' && 'op' in e) {
if (e.op === 'glob') { cur.push(e.pattern); continue; }
if (CONTROL_OPS.has(e.op)) { segments.push({ tokens: cur, op: e.op }); cur = []; continue; }
cur.push(e.op); // redirect or other op kept as token
continue;
}
// comment object {comment} — ignore
}
if (cur.length) segments.push({ tokens: cur, op: null });
return { ok: true, raw: command, hasSubshell: subshell.found, subshellKinds: subshell.kinds, segments };
}
// ── mutating detection (for chain rule §5.1 C13) ──
const MUTATING_CMDS = new Set([
'rm', 'mv', 'cp', 'chmod', 'chown', 'chgrp', 'dd', 'truncate', 'tee',
'mkdir', 'rmdir', 'ln', 'touch', 'sed', 'curl', 'wget', 'nc', 'ncat',
'netcat', 'socat', 'kill', 'killall',
]);
const GIT_MUTATING_SUB = new Set([
'commit', 'push', 'merge', 'rebase', 'reset', 'checkout', 'switch',
'branch', 'stash', 'cherry-pick', 'revert', 'pull', 'clean', 'add',
'rm', 'mv', 'tag', 'apply', 'am',
]);
const PKG_MUTATING_SUB = new Set(['install', 'update', 'require', 'remove', 'add', 'i']);
export function isMutatingSegment(tokens) {
if (!Array.isArray(tokens) || tokens.length === 0) return false;
const cmd = tokens[0];
if (MUTATING_CMDS.has(cmd)) return true;
if (cmd === 'git' && GIT_MUTATING_SUB.has(tokens[1])) return true;
if (['composer', 'npm', 'yarn', 'pnpm'].includes(cmd) && PKG_MUTATING_SUB.has(tokens[1])) return true;
// redirect operators present in the segment
if (tokens.some((t) => t === '>' || t === '>>')) return true;
return false;
}