brain/tools/enforce-safe-baseline-metering.mjs

#!/usr/bin/env node
/**
 * enforce-safe-baseline-metering — PreToolUse wrapper around the pure
 * safe-baseline-metering module (router-gate v4 §3.1.2 Direction 1).
 *
 * Catches skill-substitution laundering: many Read/Grep/Glob/LS/TodoWrite/
 * AskUserQuestion calls used as an analysis channel INSTEAD of invoking the
 * recommended Skill, then a mutating tool (Edit/Write/Bash/…) lands without any
 * skill ever matching. Safe-baseline tools themselves stay allowed (legit
 * continuation reading); only a mutating tool past the hard threshold is blocked.
 *
 * Stream H tail — adds the wrapper. Pure metering + threshold logic live in
 * safe-baseline-metering.mjs; this file is just the hook entry composition.
 *
 * Convention (mirrors enforce-decomposition-detector.mjs): the testable unit is
 * the pure `decide()` composition. The live `main()` — task-boundary inference,
 * skill-match detection from the transcript, and per-task counter persistence —
 * is a deferred no-op (exit 0) until that wiring is designed in the spec/plan.
 * Until then the hook NEVER blocks (no self-lockout, same posture as the sibling
 * Stream H wrappers). Settings.json registration is also deferred.
 */
import {
  incrementCounter,
  evaluateThresholds,
  DEFAULT_THRESHOLDS,
  newCounterState,
  shouldInheritTaskId,
  deriveTaskId,
} from './safe-baseline-metering.mjs';
import { readFileSync, writeFileSync, appendFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { homedir } from 'node:os';
import {
  readStdin,
  parseEventJson,
  readTranscript,
  lastUserPromptText,
  lastTurnEntries,
  exitDecision,
  logSafeBaselineAction,
} from './enforce-hook-helpers.mjs';

/**
 * Pure decision: increment the per-task counter for `toolName`, then evaluate
 * thresholds against the resulting state.
 *
 * @param {object} args
 * @param {object} args.state         - current per-task counter state (newCounterState shape)
 * @param {string} args.toolName      - the tool about to run
 * @param {boolean} [args.skillMatched] - whether a recommended Skill matched in this task
 * @param {object} [args.thresholds]  - override DEFAULT_THRESHOLDS
 * @returns {{state:object, action:'allow'|'soft_flag'|'hard_block', reason?:string}}
 */
export function decide({ state, toolName, skillMatched = false, thresholds = DEFAULT_THRESHOLDS }) {
  const next = incrementCounter(state, toolName);
  const evalResult = evaluateThresholds(next, toolName, skillMatched, thresholds);
  return { state: next, action: evalResult.action, reason: evalResult.reason };
}

/**
 * Task-boundary head: decide whether the current event continues the prior task
 * or starts a fresh one, then meter it.
 *
 * Continuation rules (delegated to the pure module):
 *   - no prior ledger                           → fresh task
 *   - reset marker in promptText                → fresh task (shouldInheritTaskId=false)
 *   - keyword overlap with prior task < 2       → fresh task
 *   - otherwise                                 → inherit prior counters
 *
 * @param {object} args
 * @param {object} args.event           - PreToolUse event ({ tool_name })
 * @param {object|null} args.priorLedger - { state, lastKeywords } from the last event, or null
 * @param {string[]} args.currentKeywords - keywords distilled from the current prompt
 * @param {string} args.promptText      - the current user prompt (for reset-marker detection)
 * @param {boolean} [args.skillMatched] - whether a recommended Skill matched in this task
 * @param {object} [args.thresholds]    - override DEFAULT_THRESHOLDS
 * @returns {{action:string, reason?:string, ledger:{state:object, lastKeywords:string[]}}}
 */
export function processEvent({
  event,
  priorLedger,
  currentKeywords = [],
  promptText = '',
  skillMatched = false,
  thresholds = DEFAULT_THRESHOLDS,
}) {
  const toolName = event && event.tool_name;
  const inherit =
    priorLedger &&
    priorLedger.state &&
    shouldInheritTaskId(priorLedger.lastKeywords || [], currentKeywords, promptText);

  const baseState = inherit
    ? priorLedger.state
    : newCounterState({
        taskId: deriveTaskId(promptText),
        startedAtIso: '',
        firstPromptExcerpt: promptText,
      });

  const d = decide({ state: baseState, toolName, skillMatched, thresholds });
  return {
    action: d.action,
    reason: d.reason,
    ledger: { state: d.state, lastKeywords: currentKeywords },
  };
}

// ── 1b live-wiring: pure helpers (safe-baseline-live-wiring-design.md v4) ──

// Common RU imperatives + RU/EN stopwords that would otherwise create spurious
// keyword overlap between unrelated tasks (G2). Length<4 tokens are dropped
// separately; this set targets >=4-char common words.
const STOPWORDS = new Set([
  'сделай', 'сделать', 'проверь', 'проверить', 'посмотри', 'добавь', 'добавить',
  'напиши', 'написать', 'нужно', 'надо', 'давай', 'можешь', 'потом', 'после',
  'перед', 'через', 'очень', 'если', 'чтобы', 'этот', 'эта', 'это', 'эти',
  'или', 'тоже', 'также', 'когда', 'пока', 'весь', 'всё', 'все', 'теперь',
  'здесь', 'там', 'нет', 'есть', 'будет', 'было', 'твой', 'мой', 'самый',
  'then', 'this', 'that', 'with', 'from', 'your', 'please', 'just', 'make',
  'check', 'look', 'need', 'want', 'also', 'into', 'more', 'very', 'should',
  'will', 'have', 'does', 'done', 'them', 'they', 'here', 'there',
]);

/** Deterministic keyword extraction (H1): lowercase, drop <4-char + stopwords, unique, sorted. */
export function extractKeywords(promptText) {
  if (typeof promptText !== 'string') return [];
  const tokens = promptText
    .toLowerCase()
    .split(/[^\p{L}\p{N}]+/u)
    .filter((t) => t.length >= 4 && !STOPWORDS.has(t));
  return [...new Set(tokens)].sort();
}

const SKILL_MATCH_TOOLS = new Set(['Skill', 'EnterPlanMode']);

/** C2/V2-5: true iff the turn has a real assistant tool_use of Skill or EnterPlanMode. */
export function detectSkillMatch(turnEntries) {
  if (!Array.isArray(turnEntries)) return false;
  for (const e of turnEntries) {
    const c = e && e.message && e.message.content;
    if (!Array.isArray(c)) continue;
    for (const b of c) {
      if (b && b.type === 'tool_use' && SKILL_MATCH_TOOLS.has(b.name)) return true;
    }
  }
  return false;
}

/**
 * V2-1 stickiness contract: the pure pipeline neither persists nor task-scopes
 * skill-match, so this wrapper owns it. Compute inherit (same predicate as
 * processEvent), scope the prior sticky flag to inherit, OR in this turn's match,
 * run the decision, then write the effective flag back into the persisted state.
 */
export function runLiveDecision({ event, priorLedger, promptText, currentKeywords, skillMatchedThisTurn, thresholds }) {
  const inherit = !!(priorLedger && priorLedger.state &&
    shouldInheritTaskId(priorLedger.lastKeywords || [], currentKeywords, promptText));
  const priorSticky = inherit ? !!priorLedger.state.skill_match_within_task : false;
  const effectiveSkillMatched = priorSticky || !!skillMatchedThisTurn;

  const res = processEvent({
    event, priorLedger, currentKeywords, promptText,
    skillMatched: effectiveSkillMatched, thresholds,
  });
  res.ledger.state.skill_match_within_task = effectiveSkillMatched;
  return res;
}

// ── live I/O composition ──

const ESCAPE_MSG = 'invoke the recommended Skill, or EnterPlanMode, to proceed (skill/plan invocations are never blocked by this layer).';

function ledgerDir(override) {
  return override || join(homedir(), '.claude', 'runtime');
}
function loadLedger(dir, sess) {
  try { return JSON.parse(readFileSync(join(dir, `safe-baseline-ledger-${sess || 'unknown'}.json`), 'utf8')); }
  catch { return null; }
}
function saveLedger(dir, sess, ledger) {
  try {
    mkdirSync(dir, { recursive: true });
    writeFileSync(join(dir, `safe-baseline-ledger-${sess || 'unknown'}.json`), JSON.stringify(ledger));
  } catch { /* fail-quiet */ }
}
function logFlag(dir, sess, entry) {
  try {
    mkdirSync(dir, { recursive: true });
    appendFileSync(join(dir, `safe-baseline-flags-${sess || 'unknown'}.jsonl`),
      JSON.stringify({ ts: new Date().toISOString(), ...entry }) + '\n');
  } catch { /* ignore */ }
}

/** Testable live head: returns {block, message?} and persists the ledger. Fail-quiet. */
export async function runMain({ event, runtimeDir, transcript: injectedTranscript } = {}) {
  try {
    const sess = event.session_id;
    const dir = ledgerDir(runtimeDir);
    const transcript = injectedTranscript || readTranscript(event.transcript_path);
    const promptText = lastUserPromptText(transcript) || '';
    const currentKeywords = extractKeywords(promptText);
    const skillMatchedThisTurn = detectSkillMatch(lastTurnEntries(transcript)) ||
      SKILL_MATCH_TOOLS.has(event.tool_name);
    const priorLedger = loadLedger(dir, sess);

    const res = runLiveDecision({ event, priorLedger, promptText, currentKeywords, skillMatchedThisTurn });
    saveLedger(dir, sess, res.ledger);

    if (res.action !== 'allow') logSafeBaselineAction(sess, { tool: event.tool_name, action: res.action }, { baseDir: dir });
    if (res.action === 'soft_flag') logFlag(dir, sess, { tool: event.tool_name, reason: res.reason });
    if (res.action === 'hard_block') return { block: true, message: `[safe-baseline] ${res.reason}\n${ESCAPE_MSG}` };
    return { block: false };
  } catch {
    return { block: false }; // fail-quiet — never crash the session
  }
}

async function main() {
  const event = parseEventJson(await readStdin());
  const res = await runMain({ event });
  exitDecision(res);
}

if ((process.argv[1] || '').replace(/\\/g, '/').endsWith('/enforce-safe-baseline-metering.mjs')) {
  main().catch(() => process.exit(0));
}