brain/tools/enforce-safe-baseline-metering.test.mjs

// tools/enforce-safe-baseline-metering.test.mjs
// Stream H tail — wrapper tests around the pure safe-baseline-metering module
// (router-gate v4 §3.1.2 Direction 1). Mirrors the enforce-decomposition-detector
// convention: implement + test a pure `decide()` composition; live main() wiring
// (transcript task-boundary + skill detection + state persistence) is now live
// (1b — safe-baseline-live-wiring-design.md v4).
import { describe, it, expect } from 'vitest';
import { decide, processEvent, extractKeywords, detectSkillMatch, runLiveDecision, runMain } from './enforce-safe-baseline-metering.mjs';
import { newCounterState } from './safe-baseline-metering.mjs';
import { mkdtempSync, writeFileSync, existsSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';

function freshState() {
  return newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
}
function withCounts(patch) {
  const s = freshState();
  return { ...s, counts: { ...s.counts, ...patch } };
}

describe('enforce-safe-baseline-metering decide()', () => {
  it('allows a metered Read below warn threshold and increments its counter', () => {
    const r = decide({ state: freshState(), toolName: 'Read', skillMatched: false });
    expect(r.action).toBe('allow');
    expect(r.state.counts.Read).toBe(1);
  });

  it('soft_flags a metered Read once it reaches the warn threshold (29→30)', () => {
    const r = decide({ state: withCounts({ Read: 29 }), toolName: 'Read', skillMatched: false });
    expect(r.action).toBe('soft_flag');
    expect(r.state.counts.Read).toBe(30);
  });

  it('hard_blocks a mutating tool when a metered counter is at its hard limit, no skill', () => {
    const r = decide({ state: withCounts({ Read: 60 }), toolName: 'Edit', skillMatched: false });
    expect(r.action).toBe('hard_block');
    expect(r.reason).toContain('Read=60');
  });

  it('allows the mutating tool when a skill was matched, even past the hard limit', () => {
    const r = decide({ state: withCounts({ Read: 60 }), toolName: 'Edit', skillMatched: true });
    expect(r.action).toBe('allow');
  });

  it('allows (and does not count) a tool that is neither metered nor mutating', () => {
    const r = decide({ state: freshState(), toolName: 'WebFetch', skillMatched: false });
    expect(r.action).toBe('allow');
    expect(r.state.counts.Read).toBe(0);
  });

  it('does not mutate the caller-provided state object (immutability)', () => {
    const s = freshState();
    decide({ state: s, toolName: 'Read', skillMatched: false });
    expect(s.counts.Read).toBe(0);
  });

  it('maps TodoWrite to TodoWrite_writes and soft_flags at its warn threshold (4→5)', () => {
    const r = decide({ state: withCounts({ TodoWrite_writes: 4 }), toolName: 'TodoWrite', skillMatched: false });
    expect(r.state.counts.TodoWrite_writes).toBe(5);
    expect(r.action).toBe('soft_flag');
  });

  it('keeps a metered Grep allowed once past its hard threshold (continuation reading)', () => {
    const r = decide({ state: withCounts({ Grep: 30 }), toolName: 'Grep', skillMatched: false });
    expect(r.action).toBe('allow');
    expect(r.state.counts.Grep).toBe(31);
  });

  it('hard_blocks a mutating Bash when TodoWrite_writes is at its hard limit', () => {
    const r = decide({ state: withCounts({ TodoWrite_writes: 15 }), toolName: 'Bash', skillMatched: false });
    expect(r.action).toBe('hard_block');
    expect(r.reason).toContain('TodoWrite_writes=15');
  });
});

describe('enforce-safe-baseline-metering processEvent() — task-boundary head', () => {
  it('starts a fresh task when there is no prior ledger', () => {
    const r = processEvent({
      event: { tool_name: 'Read' },
      priorLedger: null,
      currentKeywords: ['router', 'gate', 'safe'],
      promptText: 'почини safe-baseline',
      skillMatched: false,
    });
    expect(r.action).toBe('allow');
    expect(r.ledger.state.counts.Read).toBe(1);
    expect(r.ledger.lastKeywords).toEqual(['router', 'gate', 'safe']);
  });

  it('continues the prior task when keywords overlap >=2 and no reset marker', () => {
    const prior = {
      state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
      lastKeywords: ['router', 'gate', 'safe'],
    };
    const r = processEvent({
      event: { tool_name: 'Read' },
      priorLedger: prior,
      currentKeywords: ['router', 'gate', 'extra'],
      promptText: 'дальше по safe-baseline',
      skillMatched: false,
    });
    expect(r.ledger.state.counts.Read).toBe(30);
    expect(r.action).toBe('soft_flag');
  });

  it('resets to a fresh task on a reset marker even if keywords overlap', () => {
    const prior = {
      state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
      lastKeywords: ['router', 'gate', 'safe'],
    };
    const r = processEvent({
      event: { tool_name: 'Read' },
      priorLedger: prior,
      currentKeywords: ['router', 'gate', 'safe'],
      promptText: 'новая задача — посмотри другое',
      skillMatched: false,
    });
    expect(r.ledger.state.counts.Read).toBe(1);
  });

  it('starts a fresh task when keyword overlap is below 2', () => {
    const prior = {
      state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
      lastKeywords: ['router', 'gate', 'safe'],
    };
    const r = processEvent({
      event: { tool_name: 'Read' },
      priorLedger: prior,
      currentKeywords: ['totally', 'different', 'topic'],
      promptText: 'другая тема',
      skillMatched: false,
    });
    expect(r.ledger.state.counts.Read).toBe(1);
  });

  it('allows a mutating tool past the hard limit when a skill matched', () => {
    const prior = {
      state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
      lastKeywords: ['router', 'gate', 'safe'],
    };
    const r = processEvent({
      event: { tool_name: 'Edit' },
      priorLedger: prior,
      currentKeywords: ['router', 'gate', 'safe'],
      promptText: 'продолжаем',
      skillMatched: true,
    });
    expect(r.action).toBe('allow');
  });
});

// ── 1b live-wiring: new pure helpers ──

describe('extractKeywords (H1)', () => {
  it('lowercases, drops <4-char tokens, returns unique sorted', () => {
    expect(extractKeywords('Router GATE safe baseline router')).toEqual(['baseline', 'gate', 'router', 'safe']);
  });
  it('drops common RU imperatives so unrelated tasks do not falsely overlap', () => {
    const a = extractKeywords('сделай проверь биллинг тариф');
    const b = extractKeywords('сделай проверь регион маршрут');
    const overlap = a.filter((k) => b.includes(k));
    expect(overlap).toEqual([]);
  });
  it('returns [] for empty/non-string', () => {
    expect(extractKeywords('')).toEqual([]);
    expect(extractKeywords(null)).toEqual([]);
  });
});

function asstToolUse(name, input = {}) {
  return { message: { role: 'assistant', content: [{ type: 'tool_use', name, input }] } };
}

describe('detectSkillMatch (C2/V2-5)', () => {
  it('true when the turn has a Skill tool_use', () => {
    expect(detectSkillMatch([asstToolUse('Skill', { skill: 'superpowers:brainstorming' })])).toBe(true);
  });
  it('true when the turn has an EnterPlanMode tool_use', () => {
    expect(detectSkillMatch([asstToolUse('EnterPlanMode')])).toBe(true);
  });
  it('false for Read tool_use or plain text mention of a plan path (no self-grant)', () => {
    expect(detectSkillMatch([asstToolUse('Read', { file_path: 'docs/superpowers/plans/x.md' })])).toBe(false);
    expect(detectSkillMatch([{ message: { role: 'assistant', content: [{ type: 'text', text: 'docs/superpowers/plans/x.md' }] } }])).toBe(false);
  });
  it('false for empty/non-array', () => {
    expect(detectSkillMatch([])).toBe(false);
    expect(detectSkillMatch(null)).toBe(false);
  });
});

function ledgerWith(counts, skill, keywords) {
  return {
    state: {
      ...newCounterState({ taskId: 't', startedAtIso: '2026-05-30T00:00:00Z', firstPromptExcerpt: 'p' }),
      counts: { Read: 0, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0, ...counts },
      skill_match_within_task: skill,
    },
    lastKeywords: keywords,
  };
}

describe('runLiveDecision — stickiness contract (V2-1)', () => {
  it('persists skillMatchedThisTurn into the ledger (stickiness not lost)', () => {
    const r = runLiveDecision({
      event: { tool_name: 'Read' }, priorLedger: null,
      promptText: 'router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
      skillMatchedThisTurn: true,
    });
    expect(r.ledger.state.skill_match_within_task).toBe(true);
  });

  it('a skill earlier in a task keeps later mutating ops allowed past the hard limit (no false block)', () => {
    const prior = ledgerWith({ Read: 60 }, true, ['router', 'gate', 'safe', 'baseline']);
    const r = runLiveDecision({
      event: { tool_name: 'Edit' }, priorLedger: prior,
      promptText: 'продолжаем router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
      skillMatchedThisTurn: false,
    });
    expect(r.action).toBe('allow');
  });

  it('skill match in task A does NOT exempt an unrelated task B (no cross-task leak)', () => {
    const prior = ledgerWith({ Read: 60 }, true, ['router', 'gate', 'safe', 'baseline']);
    const r = runLiveDecision({
      event: { tool_name: 'Edit' }, priorLedger: prior,
      promptText: 'регион маршрут лиды поставщик', currentKeywords: ['регион', 'маршрут', 'лиды', 'поставщик'],
      skillMatchedThisTurn: false,
    });
    expect(r.ledger.state.skill_match_within_task).toBe(false);
    expect(r.ledger.state.counts.Read).toBe(0);
  });

  it('hard-blocks a mutating tool past the limit in a no-skill task', () => {
    const prior = ledgerWith({ Read: 60 }, false, ['router', 'gate', 'safe', 'baseline']);
    const r = runLiveDecision({
      event: { tool_name: 'Edit' }, priorLedger: prior,
      promptText: 'router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
      skillMatchedThisTurn: false,
    });
    expect(r.action).toBe('hard_block');
  });
});

describe('runMain — live integration', () => {
  function fixtureTranscript(path, entries) {
    writeFileSync(path, entries.map((e) => JSON.stringify(e)).join('\n'));
  }

  it('blocks an Edit when Read past hard with no skill, and names the escape', async () => {
    const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
    const tpath = join(dir, 't.jsonl');
    writeFileSync(join(dir, 'safe-baseline-ledger-S.json'), JSON.stringify({
      state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
      lastKeywords: ['router', 'gate', 'safe', 'baseline'],
    }));
    fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
    const res = await runMain({ event: { tool_name: 'Edit', session_id: 'S', transcript_path: tpath }, runtimeDir: dir });
    expect(res.block).toBe(true);
    expect(res.message).toMatch(/EnterPlanMode|Skill/);
  });

  it('allows a fresh task and persists the ledger', async () => {
    const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
    const tpath = join(dir, 't.jsonl');
    fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'регион маршрут лиды' } }]);
    const res = await runMain({ event: { tool_name: 'Read', session_id: 'S2', transcript_path: tpath }, runtimeDir: dir });
    expect(res.block).toBe(false);
    expect(existsSync(join(dir, 'safe-baseline-ledger-S2.json'))).toBe(true);
  });

  it('allows an Edit (escape) when the current event is a Skill invocation', async () => {
    const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
    const tpath = join(dir, 't.jsonl');
    writeFileSync(join(dir, 'safe-baseline-ledger-S3.json'), JSON.stringify({
      state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
      lastKeywords: ['router', 'gate', 'safe', 'baseline'],
    }));
    fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
    const res = await runMain({ event: { tool_name: 'Skill', session_id: 'S3', transcript_path: tpath }, runtimeDir: dir });
    expect(res.block).toBe(false);
  });

  it('logs the action to safe-baseline-actions when a mutating tool is hard-blocked', async () => {
    const { readFileSync } = await import('node:fs');
    const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
    const tpath = join(dir, 't.jsonl');
    writeFileSync(join(dir, 'safe-baseline-ledger-S4.json'), JSON.stringify({
      state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
      lastKeywords: ['router', 'gate', 'safe', 'baseline'],
    }));
    fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
    const res = await runMain({ event: { tool_name: 'Edit', session_id: 'S4', transcript_path: tpath }, runtimeDir: dir });
    expect(res.block).toBe(true);
    const log = readFileSync(join(dir, 'safe-baseline-actions-S4.jsonl'), 'utf-8').trim().split('\n').map((l) => JSON.parse(l));
    expect(log.some((r) => r.action === 'hard_block' && r.tool === 'Edit')).toBe(true);
  });
});