397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
299 lines
14 KiB
JavaScript
299 lines
14 KiB
JavaScript
// tools/enforce-safe-baseline-metering.test.mjs
|
|
// Stream H tail — wrapper tests around the pure safe-baseline-metering module
|
|
// (router-gate v4 §3.1.2 Direction 1). Mirrors the enforce-decomposition-detector
|
|
// convention: implement + test a pure `decide()` composition; live main() wiring
|
|
// (transcript task-boundary + skill detection + state persistence) is now live
|
|
// (1b — safe-baseline-live-wiring-design.md v4).
|
|
import { describe, it, expect } from 'vitest';
|
|
import { decide, processEvent, extractKeywords, detectSkillMatch, runLiveDecision, runMain } from './enforce-safe-baseline-metering.mjs';
|
|
import { newCounterState } from './safe-baseline-metering.mjs';
|
|
import { mkdtempSync, writeFileSync, existsSync } from 'node:fs';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
|
|
function freshState() {
|
|
return newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
}
|
|
function withCounts(patch) {
|
|
const s = freshState();
|
|
return { ...s, counts: { ...s.counts, ...patch } };
|
|
}
|
|
|
|
describe('enforce-safe-baseline-metering decide()', () => {
|
|
it('allows a metered Read below warn threshold and increments its counter', () => {
|
|
const r = decide({ state: freshState(), toolName: 'Read', skillMatched: false });
|
|
expect(r.action).toBe('allow');
|
|
expect(r.state.counts.Read).toBe(1);
|
|
});
|
|
|
|
it('soft_flags a metered Read once it reaches the warn threshold (29→30)', () => {
|
|
const r = decide({ state: withCounts({ Read: 29 }), toolName: 'Read', skillMatched: false });
|
|
expect(r.action).toBe('soft_flag');
|
|
expect(r.state.counts.Read).toBe(30);
|
|
});
|
|
|
|
it('hard_blocks a mutating tool when a metered counter is at its hard limit, no skill', () => {
|
|
const r = decide({ state: withCounts({ Read: 60 }), toolName: 'Edit', skillMatched: false });
|
|
expect(r.action).toBe('hard_block');
|
|
expect(r.reason).toContain('Read=60');
|
|
});
|
|
|
|
it('allows the mutating tool when a skill was matched, even past the hard limit', () => {
|
|
const r = decide({ state: withCounts({ Read: 60 }), toolName: 'Edit', skillMatched: true });
|
|
expect(r.action).toBe('allow');
|
|
});
|
|
|
|
it('allows (and does not count) a tool that is neither metered nor mutating', () => {
|
|
const r = decide({ state: freshState(), toolName: 'WebFetch', skillMatched: false });
|
|
expect(r.action).toBe('allow');
|
|
expect(r.state.counts.Read).toBe(0);
|
|
});
|
|
|
|
it('does not mutate the caller-provided state object (immutability)', () => {
|
|
const s = freshState();
|
|
decide({ state: s, toolName: 'Read', skillMatched: false });
|
|
expect(s.counts.Read).toBe(0);
|
|
});
|
|
|
|
it('maps TodoWrite to TodoWrite_writes and soft_flags at its warn threshold (4→5)', () => {
|
|
const r = decide({ state: withCounts({ TodoWrite_writes: 4 }), toolName: 'TodoWrite', skillMatched: false });
|
|
expect(r.state.counts.TodoWrite_writes).toBe(5);
|
|
expect(r.action).toBe('soft_flag');
|
|
});
|
|
|
|
it('keeps a metered Grep allowed once past its hard threshold (continuation reading)', () => {
|
|
const r = decide({ state: withCounts({ Grep: 30 }), toolName: 'Grep', skillMatched: false });
|
|
expect(r.action).toBe('allow');
|
|
expect(r.state.counts.Grep).toBe(31);
|
|
});
|
|
|
|
it('hard_blocks a mutating Bash when TodoWrite_writes is at its hard limit', () => {
|
|
const r = decide({ state: withCounts({ TodoWrite_writes: 15 }), toolName: 'Bash', skillMatched: false });
|
|
expect(r.action).toBe('hard_block');
|
|
expect(r.reason).toContain('TodoWrite_writes=15');
|
|
});
|
|
});
|
|
|
|
describe('enforce-safe-baseline-metering processEvent() — task-boundary head', () => {
|
|
it('starts a fresh task when there is no prior ledger', () => {
|
|
const r = processEvent({
|
|
event: { tool_name: 'Read' },
|
|
priorLedger: null,
|
|
currentKeywords: ['router', 'gate', 'safe'],
|
|
promptText: 'почини safe-baseline',
|
|
skillMatched: false,
|
|
});
|
|
expect(r.action).toBe('allow');
|
|
expect(r.ledger.state.counts.Read).toBe(1);
|
|
expect(r.ledger.lastKeywords).toEqual(['router', 'gate', 'safe']);
|
|
});
|
|
|
|
it('continues the prior task when keywords overlap >=2 and no reset marker', () => {
|
|
const prior = {
|
|
state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
|
|
lastKeywords: ['router', 'gate', 'safe'],
|
|
};
|
|
const r = processEvent({
|
|
event: { tool_name: 'Read' },
|
|
priorLedger: prior,
|
|
currentKeywords: ['router', 'gate', 'extra'],
|
|
promptText: 'дальше по safe-baseline',
|
|
skillMatched: false,
|
|
});
|
|
expect(r.ledger.state.counts.Read).toBe(30);
|
|
expect(r.action).toBe('soft_flag');
|
|
});
|
|
|
|
it('resets to a fresh task on a reset marker even if keywords overlap', () => {
|
|
const prior = {
|
|
state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
|
|
lastKeywords: ['router', 'gate', 'safe'],
|
|
};
|
|
const r = processEvent({
|
|
event: { tool_name: 'Read' },
|
|
priorLedger: prior,
|
|
currentKeywords: ['router', 'gate', 'safe'],
|
|
promptText: 'новая задача — посмотри другое',
|
|
skillMatched: false,
|
|
});
|
|
expect(r.ledger.state.counts.Read).toBe(1);
|
|
});
|
|
|
|
it('starts a fresh task when keyword overlap is below 2', () => {
|
|
const prior = {
|
|
state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 29, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
|
|
lastKeywords: ['router', 'gate', 'safe'],
|
|
};
|
|
const r = processEvent({
|
|
event: { tool_name: 'Read' },
|
|
priorLedger: prior,
|
|
currentKeywords: ['totally', 'different', 'topic'],
|
|
promptText: 'другая тема',
|
|
skillMatched: false,
|
|
});
|
|
expect(r.ledger.state.counts.Read).toBe(1);
|
|
});
|
|
|
|
it('allows a mutating tool past the hard limit when a skill matched', () => {
|
|
const prior = {
|
|
state: { ...newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' }), counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 } },
|
|
lastKeywords: ['router', 'gate', 'safe'],
|
|
};
|
|
const r = processEvent({
|
|
event: { tool_name: 'Edit' },
|
|
priorLedger: prior,
|
|
currentKeywords: ['router', 'gate', 'safe'],
|
|
promptText: 'продолжаем',
|
|
skillMatched: true,
|
|
});
|
|
expect(r.action).toBe('allow');
|
|
});
|
|
});
|
|
|
|
// ── 1b live-wiring: new pure helpers ──
|
|
|
|
describe('extractKeywords (H1)', () => {
|
|
it('lowercases, drops <4-char tokens, returns unique sorted', () => {
|
|
expect(extractKeywords('Router GATE safe baseline router')).toEqual(['baseline', 'gate', 'router', 'safe']);
|
|
});
|
|
it('drops common RU imperatives so unrelated tasks do not falsely overlap', () => {
|
|
const a = extractKeywords('сделай проверь биллинг тариф');
|
|
const b = extractKeywords('сделай проверь регион маршрут');
|
|
const overlap = a.filter((k) => b.includes(k));
|
|
expect(overlap).toEqual([]);
|
|
});
|
|
it('returns [] for empty/non-string', () => {
|
|
expect(extractKeywords('')).toEqual([]);
|
|
expect(extractKeywords(null)).toEqual([]);
|
|
});
|
|
});
|
|
|
|
function asstToolUse(name, input = {}) {
|
|
return { message: { role: 'assistant', content: [{ type: 'tool_use', name, input }] } };
|
|
}
|
|
|
|
describe('detectSkillMatch (C2/V2-5)', () => {
|
|
it('true when the turn has a Skill tool_use', () => {
|
|
expect(detectSkillMatch([asstToolUse('Skill', { skill: 'superpowers:brainstorming' })])).toBe(true);
|
|
});
|
|
it('true when the turn has an EnterPlanMode tool_use', () => {
|
|
expect(detectSkillMatch([asstToolUse('EnterPlanMode')])).toBe(true);
|
|
});
|
|
it('false for Read tool_use or plain text mention of a plan path (no self-grant)', () => {
|
|
expect(detectSkillMatch([asstToolUse('Read', { file_path: 'docs/superpowers/plans/x.md' })])).toBe(false);
|
|
expect(detectSkillMatch([{ message: { role: 'assistant', content: [{ type: 'text', text: 'docs/superpowers/plans/x.md' }] } }])).toBe(false);
|
|
});
|
|
it('false for empty/non-array', () => {
|
|
expect(detectSkillMatch([])).toBe(false);
|
|
expect(detectSkillMatch(null)).toBe(false);
|
|
});
|
|
});
|
|
|
|
function ledgerWith(counts, skill, keywords) {
|
|
return {
|
|
state: {
|
|
...newCounterState({ taskId: 't', startedAtIso: '2026-05-30T00:00:00Z', firstPromptExcerpt: 'p' }),
|
|
counts: { Read: 0, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0, ...counts },
|
|
skill_match_within_task: skill,
|
|
},
|
|
lastKeywords: keywords,
|
|
};
|
|
}
|
|
|
|
describe('runLiveDecision — stickiness contract (V2-1)', () => {
|
|
it('persists skillMatchedThisTurn into the ledger (stickiness not lost)', () => {
|
|
const r = runLiveDecision({
|
|
event: { tool_name: 'Read' }, priorLedger: null,
|
|
promptText: 'router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
skillMatchedThisTurn: true,
|
|
});
|
|
expect(r.ledger.state.skill_match_within_task).toBe(true);
|
|
});
|
|
|
|
it('a skill earlier in a task keeps later mutating ops allowed past the hard limit (no false block)', () => {
|
|
const prior = ledgerWith({ Read: 60 }, true, ['router', 'gate', 'safe', 'baseline']);
|
|
const r = runLiveDecision({
|
|
event: { tool_name: 'Edit' }, priorLedger: prior,
|
|
promptText: 'продолжаем router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
skillMatchedThisTurn: false,
|
|
});
|
|
expect(r.action).toBe('allow');
|
|
});
|
|
|
|
it('skill match in task A does NOT exempt an unrelated task B (no cross-task leak)', () => {
|
|
const prior = ledgerWith({ Read: 60 }, true, ['router', 'gate', 'safe', 'baseline']);
|
|
const r = runLiveDecision({
|
|
event: { tool_name: 'Edit' }, priorLedger: prior,
|
|
promptText: 'регион маршрут лиды поставщик', currentKeywords: ['регион', 'маршрут', 'лиды', 'поставщик'],
|
|
skillMatchedThisTurn: false,
|
|
});
|
|
expect(r.ledger.state.skill_match_within_task).toBe(false);
|
|
expect(r.ledger.state.counts.Read).toBe(0);
|
|
});
|
|
|
|
it('hard-blocks a mutating tool past the limit in a no-skill task', () => {
|
|
const prior = ledgerWith({ Read: 60 }, false, ['router', 'gate', 'safe', 'baseline']);
|
|
const r = runLiveDecision({
|
|
event: { tool_name: 'Edit' }, priorLedger: prior,
|
|
promptText: 'router gate safe baseline', currentKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
skillMatchedThisTurn: false,
|
|
});
|
|
expect(r.action).toBe('hard_block');
|
|
});
|
|
});
|
|
|
|
describe('runMain — live integration', () => {
|
|
function fixtureTranscript(path, entries) {
|
|
writeFileSync(path, entries.map((e) => JSON.stringify(e)).join('\n'));
|
|
}
|
|
|
|
it('blocks an Edit when Read past hard with no skill, and names the escape', async () => {
|
|
const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
|
|
const tpath = join(dir, 't.jsonl');
|
|
writeFileSync(join(dir, 'safe-baseline-ledger-S.json'), JSON.stringify({
|
|
state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
|
|
lastKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
}));
|
|
fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
|
|
const res = await runMain({ event: { tool_name: 'Edit', session_id: 'S', transcript_path: tpath }, runtimeDir: dir });
|
|
expect(res.block).toBe(true);
|
|
expect(res.message).toMatch(/EnterPlanMode|Skill/);
|
|
});
|
|
|
|
it('allows a fresh task and persists the ledger', async () => {
|
|
const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
|
|
const tpath = join(dir, 't.jsonl');
|
|
fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'регион маршрут лиды' } }]);
|
|
const res = await runMain({ event: { tool_name: 'Read', session_id: 'S2', transcript_path: tpath }, runtimeDir: dir });
|
|
expect(res.block).toBe(false);
|
|
expect(existsSync(join(dir, 'safe-baseline-ledger-S2.json'))).toBe(true);
|
|
});
|
|
|
|
it('allows an Edit (escape) when the current event is a Skill invocation', async () => {
|
|
const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
|
|
const tpath = join(dir, 't.jsonl');
|
|
writeFileSync(join(dir, 'safe-baseline-ledger-S3.json'), JSON.stringify({
|
|
state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
|
|
lastKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
}));
|
|
fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
|
|
const res = await runMain({ event: { tool_name: 'Skill', session_id: 'S3', transcript_path: tpath }, runtimeDir: dir });
|
|
expect(res.block).toBe(false);
|
|
});
|
|
|
|
it('logs the action to safe-baseline-actions when a mutating tool is hard-blocked', async () => {
|
|
const { readFileSync } = await import('node:fs');
|
|
const dir = mkdtempSync(join(tmpdir(), 'sbm-'));
|
|
const tpath = join(dir, 't.jsonl');
|
|
writeFileSync(join(dir, 'safe-baseline-ledger-S4.json'), JSON.stringify({
|
|
state: { schema_version: 1, task_id: 't', counts: { Read: 60, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 }, skill_match_within_task: false },
|
|
lastKeywords: ['router', 'gate', 'safe', 'baseline'],
|
|
}));
|
|
fixtureTranscript(tpath, [{ type: 'user', message: { role: 'user', content: 'router gate safe baseline' } }]);
|
|
const res = await runMain({ event: { tool_name: 'Edit', session_id: 'S4', transcript_path: tpath }, runtimeDir: dir });
|
|
expect(res.block).toBe(true);
|
|
const log = readFileSync(join(dir, 'safe-baseline-actions-S4.jsonl'), 'utf-8').trim().split('\n').map((l) => JSON.parse(l));
|
|
expect(log.some((r) => r.action === 'hard_block' && r.tool === 'Edit')).toBe(true);
|
|
});
|
|
});
|