Files
brain/tools/enforce-safe-baseline-metering.mjs
T

228 lines
9.7 KiB
JavaScript

#!/usr/bin/env node
/**
* enforce-safe-baseline-metering — PreToolUse wrapper around the pure
* safe-baseline-metering module (router-gate v4 §3.1.2 Direction 1).
*
* Catches skill-substitution laundering: many Read/Grep/Glob/LS/TodoWrite/
* AskUserQuestion calls used as an analysis channel INSTEAD of invoking the
* recommended Skill, then a mutating tool (Edit/Write/Bash/…) lands without any
* skill ever matching. Safe-baseline tools themselves stay allowed (legit
* continuation reading); only a mutating tool past the hard threshold is blocked.
*
* Stream H tail — adds the wrapper. Pure metering + threshold logic live in
* safe-baseline-metering.mjs; this file is just the hook entry composition.
*
* Convention (mirrors enforce-decomposition-detector.mjs): the testable unit is
* the pure `decide()` composition. The live `main()` — task-boundary inference,
* skill-match detection from the transcript, and per-task counter persistence —
* is a deferred no-op (exit 0) until that wiring is designed in the spec/plan.
* Until then the hook NEVER blocks (no self-lockout, same posture as the sibling
* Stream H wrappers). Settings.json registration is also deferred.
*/
import {
incrementCounter,
evaluateThresholds,
DEFAULT_THRESHOLDS,
newCounterState,
shouldInheritTaskId,
deriveTaskId,
} from './safe-baseline-metering.mjs';
import { readFileSync, writeFileSync, appendFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { homedir } from 'node:os';
import {
readStdin,
parseEventJson,
readTranscript,
lastUserPromptText,
lastTurnEntries,
exitDecision,
logSafeBaselineAction,
} from './enforce-hook-helpers.mjs';
/**
* Pure decision: increment the per-task counter for `toolName`, then evaluate
* thresholds against the resulting state.
*
* @param {object} args
* @param {object} args.state - current per-task counter state (newCounterState shape)
* @param {string} args.toolName - the tool about to run
* @param {boolean} [args.skillMatched] - whether a recommended Skill matched in this task
* @param {object} [args.thresholds] - override DEFAULT_THRESHOLDS
* @returns {{state:object, action:'allow'|'soft_flag'|'hard_block', reason?:string}}
*/
export function decide({ state, toolName, skillMatched = false, thresholds = DEFAULT_THRESHOLDS }) {
const next = incrementCounter(state, toolName);
const evalResult = evaluateThresholds(next, toolName, skillMatched, thresholds);
return { state: next, action: evalResult.action, reason: evalResult.reason };
}
/**
* Task-boundary head: decide whether the current event continues the prior task
* or starts a fresh one, then meter it.
*
* Continuation rules (delegated to the pure module):
* - no prior ledger → fresh task
* - reset marker in promptText → fresh task (shouldInheritTaskId=false)
* - keyword overlap with prior task < 2 → fresh task
* - otherwise → inherit prior counters
*
* @param {object} args
* @param {object} args.event - PreToolUse event ({ tool_name })
* @param {object|null} args.priorLedger - { state, lastKeywords } from the last event, or null
* @param {string[]} args.currentKeywords - keywords distilled from the current prompt
* @param {string} args.promptText - the current user prompt (for reset-marker detection)
* @param {boolean} [args.skillMatched] - whether a recommended Skill matched in this task
* @param {object} [args.thresholds] - override DEFAULT_THRESHOLDS
* @returns {{action:string, reason?:string, ledger:{state:object, lastKeywords:string[]}}}
*/
export function processEvent({
event,
priorLedger,
currentKeywords = [],
promptText = '',
skillMatched = false,
thresholds = DEFAULT_THRESHOLDS,
}) {
const toolName = event && event.tool_name;
const inherit =
priorLedger &&
priorLedger.state &&
shouldInheritTaskId(priorLedger.lastKeywords || [], currentKeywords, promptText);
const baseState = inherit
? priorLedger.state
: newCounterState({
taskId: deriveTaskId(promptText),
startedAtIso: '',
firstPromptExcerpt: promptText,
});
const d = decide({ state: baseState, toolName, skillMatched, thresholds });
return {
action: d.action,
reason: d.reason,
ledger: { state: d.state, lastKeywords: currentKeywords },
};
}
// ── 1b live-wiring: pure helpers (safe-baseline-live-wiring-design.md v4) ──
// Common RU imperatives + RU/EN stopwords that would otherwise create spurious
// keyword overlap between unrelated tasks (G2). Length<4 tokens are dropped
// separately; this set targets >=4-char common words.
const STOPWORDS = new Set([
'сделай', 'сделать', 'проверь', 'проверить', 'посмотри', 'добавь', 'добавить',
'напиши', 'написать', 'нужно', 'надо', 'давай', 'можешь', 'потом', 'после',
'перед', 'через', 'очень', 'если', 'чтобы', 'этот', 'эта', 'это', 'эти',
'или', 'тоже', 'также', 'когда', 'пока', 'весь', 'всё', 'все', 'теперь',
'здесь', 'там', 'нет', 'есть', 'будет', 'было', 'твой', 'мой', 'самый',
'then', 'this', 'that', 'with', 'from', 'your', 'please', 'just', 'make',
'check', 'look', 'need', 'want', 'also', 'into', 'more', 'very', 'should',
'will', 'have', 'does', 'done', 'them', 'they', 'here', 'there',
]);
/** Deterministic keyword extraction (H1): lowercase, drop <4-char + stopwords, unique, sorted. */
export function extractKeywords(promptText) {
if (typeof promptText !== 'string') return [];
const tokens = promptText
.toLowerCase()
.split(/[^\p{L}\p{N}]+/u)
.filter((t) => t.length >= 4 && !STOPWORDS.has(t));
return [...new Set(tokens)].sort();
}
const SKILL_MATCH_TOOLS = new Set(['Skill', 'EnterPlanMode']);
/** C2/V2-5: true iff the turn has a real assistant tool_use of Skill or EnterPlanMode. */
export function detectSkillMatch(turnEntries) {
if (!Array.isArray(turnEntries)) return false;
for (const e of turnEntries) {
const c = e && e.message && e.message.content;
if (!Array.isArray(c)) continue;
for (const b of c) {
if (b && b.type === 'tool_use' && SKILL_MATCH_TOOLS.has(b.name)) return true;
}
}
return false;
}
/**
* V2-1 stickiness contract: the pure pipeline neither persists nor task-scopes
* skill-match, so this wrapper owns it. Compute inherit (same predicate as
* processEvent), scope the prior sticky flag to inherit, OR in this turn's match,
* run the decision, then write the effective flag back into the persisted state.
*/
export function runLiveDecision({ event, priorLedger, promptText, currentKeywords, skillMatchedThisTurn, thresholds }) {
const inherit = !!(priorLedger && priorLedger.state &&
shouldInheritTaskId(priorLedger.lastKeywords || [], currentKeywords, promptText));
const priorSticky = inherit ? !!priorLedger.state.skill_match_within_task : false;
const effectiveSkillMatched = priorSticky || !!skillMatchedThisTurn;
const res = processEvent({
event, priorLedger, currentKeywords, promptText,
skillMatched: effectiveSkillMatched, thresholds,
});
res.ledger.state.skill_match_within_task = effectiveSkillMatched;
return res;
}
// ── live I/O composition ──
const ESCAPE_MSG = 'invoke the recommended Skill, or EnterPlanMode, to proceed (skill/plan invocations are never blocked by this layer).';
function ledgerDir(override) {
return override || join(homedir(), '.claude', 'runtime');
}
function loadLedger(dir, sess) {
try { return JSON.parse(readFileSync(join(dir, `safe-baseline-ledger-${sess || 'unknown'}.json`), 'utf8')); }
catch { return null; }
}
function saveLedger(dir, sess, ledger) {
try {
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, `safe-baseline-ledger-${sess || 'unknown'}.json`), JSON.stringify(ledger));
} catch { /* fail-quiet */ }
}
function logFlag(dir, sess, entry) {
try {
mkdirSync(dir, { recursive: true });
appendFileSync(join(dir, `safe-baseline-flags-${sess || 'unknown'}.jsonl`),
JSON.stringify({ ts: new Date().toISOString(), ...entry }) + '\n');
} catch { /* ignore */ }
}
/** Testable live head: returns {block, message?} and persists the ledger. Fail-quiet. */
export async function runMain({ event, runtimeDir, transcript: injectedTranscript } = {}) {
try {
const sess = event.session_id;
const dir = ledgerDir(runtimeDir);
const transcript = injectedTranscript || readTranscript(event.transcript_path);
const promptText = lastUserPromptText(transcript) || '';
const currentKeywords = extractKeywords(promptText);
const skillMatchedThisTurn = detectSkillMatch(lastTurnEntries(transcript)) ||
SKILL_MATCH_TOOLS.has(event.tool_name);
const priorLedger = loadLedger(dir, sess);
const res = runLiveDecision({ event, priorLedger, promptText, currentKeywords, skillMatchedThisTurn });
saveLedger(dir, sess, res.ledger);
if (res.action !== 'allow') logSafeBaselineAction(sess, { tool: event.tool_name, action: res.action }, { baseDir: dir });
if (res.action === 'soft_flag') logFlag(dir, sess, { tool: event.tool_name, reason: res.reason });
if (res.action === 'hard_block') return { block: true, message: `[safe-baseline] ${res.reason}\n${ESCAPE_MSG}` };
return { block: false };
} catch {
return { block: false }; // fail-quiet — never crash the session
}
}
async function main() {
const event = parseEventJson(await readStdin());
const res = await runMain({ event });
exitDecision(res);
}
if ((process.argv[1] || '').replace(/\\/g, '/').endsWith('/enforce-safe-baseline-metering.mjs')) {
main().catch(() => process.exit(0));
}