Files
brain/tools/safe-baseline-metering.test.mjs
T

173 lines
6.7 KiB
JavaScript

// tools/safe-baseline-metering.test.mjs
import { describe, it, expect } from 'vitest';
import {
RESET_MARKERS, isResetMarker, DEFAULT_THRESHOLDS, deriveTaskId,
keywordOverlapCount, shouldInheritTaskId, newCounterState,
incrementCounter, evaluateThresholds, isMutatingForBaseline,
} from './safe-baseline-metering.mjs';
describe('isResetMarker', () => {
it('detects "новая задача" case-insensitive', () => {
expect(isResetMarker('Окей, НОВАЯ ЗАДАЧА теперь')).toBe(true);
});
it('returns false for unrelated prompt', () => {
expect(isResetMarker('почини баг в парсере')).toBe(false);
});
});
describe('deriveTaskId', () => {
it('is deterministic 16-hex for same input', () => {
const a = deriveTaskId('fix foo');
const b = deriveTaskId('fix foo');
expect(a).toBe(b);
expect(a).toMatch(/^[0-9a-f]{16}$/);
});
it('differs for different input', () => {
expect(deriveTaskId('a')).not.toBe(deriveTaskId('b'));
});
});
describe('keywordOverlapCount / shouldInheritTaskId', () => {
it('counts intersection', () => {
expect(keywordOverlapCount(['foo', 'bar', 'baz'], ['foo', 'bar', 'qux'])).toBe(2);
});
it('inherits when overlap>=2 and no reset marker', () => {
expect(shouldInheritTaskId(['foo', 'bar'], ['foo', 'bar'], 'edge case')).toBe(true);
});
it('does NOT inherit on reset marker', () => {
expect(shouldInheritTaskId(['foo', 'bar'], ['foo', 'bar'], 'новая задача')).toBe(false);
});
it('does NOT inherit when overlap<2', () => {
expect(shouldInheritTaskId(['foo'], ['foo', 'x'], 'continue')).toBe(false);
});
});
describe('incrementCounter', () => {
const start = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
it('increments Read counter', () => {
expect(incrementCounter(start, 'Read').counts.Read).toBe(1);
});
it('maps TodoWrite to TodoWrite_writes', () => {
expect(incrementCounter(start, 'TodoWrite').counts.TodoWrite_writes).toBe(1);
});
it('returns state unchanged for non-metered tool (Edit)', () => {
const result = incrementCounter(start, 'Edit');
expect(result.counts.Read).toBe(0);
});
it('is immutable — original state not mutated', () => {
incrementCounter(start, 'Read');
expect(start.counts.Read).toBe(0);
});
});
describe('evaluateThresholds — warn/soft_flag', () => {
function stateWith(patches) {
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
return { ...s, counts: { ...s.counts, ...patches } };
}
it('Read=30 → soft_flag', () => {
const s = stateWith({ Read: 30 });
expect(evaluateThresholds(s, 'Read', false).action).toBe('soft_flag');
});
it('Read=29 → allow', () => {
const s = stateWith({ Read: 29 });
expect(evaluateThresholds(s, 'Read', false).action).toBe('allow');
});
it('Grep=15 → soft_flag', () => {
const s = stateWith({ Grep: 15 });
expect(evaluateThresholds(s, 'Grep', false).action).toBe('soft_flag');
});
});
describe('evaluateThresholds — hard threshold, safe-baseline tool stays allowed', () => {
function stateWith(patches) {
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
return { ...s, counts: { ...s.counts, ...patches } };
}
it('Read=60 → allow (continuation reading)', () => {
const s = stateWith({ Read: 60 });
expect(evaluateThresholds(s, 'Read', false).action).toBe('allow');
});
it('Glob=20 → allow', () => {
const s = stateWith({ Glob: 20 });
expect(evaluateThresholds(s, 'Glob', false).action).toBe('allow');
});
});
describe('evaluateThresholds — mutating hard-block', () => {
function stateWith(patches) {
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
return { ...s, counts: { ...s.counts, ...patches } };
}
it('Read=60, Edit → hard_block with reason containing Read=60', () => {
const s = stateWith({ Read: 60 });
const r = evaluateThresholds(s, 'Edit', false);
expect(r.action).toBe('hard_block');
expect(r.reason).toContain('Read=60');
});
it('Read=60, Bash → hard_block', () => {
const s = stateWith({ Read: 60 });
expect(evaluateThresholds(s, 'Bash', false).action).toBe('hard_block');
});
it('Read=59, Edit → allow (no counter at hard threshold)', () => {
const s = stateWith({ Read: 59 });
expect(evaluateThresholds(s, 'Edit', false).action).toBe('allow');
});
it('Read=60, Edit, skill=true → allow (skill match overrides)', () => {
const s = stateWith({ Read: 60 });
expect(evaluateThresholds(s, 'Edit', true).action).toBe('allow');
});
it('TodoWrite_writes=15, Write → hard_block with reason containing TodoWrite_writes=15', () => {
const s = stateWith({ TodoWrite_writes: 15 });
const r = evaluateThresholds(s, 'Write', false);
expect(r.action).toBe('hard_block');
expect(r.reason).toContain('TodoWrite_writes=15');
});
});
describe('evaluateThresholds — skillMatched short-circuit', () => {
it('Read=100, skill=true → allow regardless', () => {
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
const big = { ...s, counts: { ...s.counts, Read: 100 } };
expect(evaluateThresholds(big, 'Read', true).action).toBe('allow');
});
});
describe('evaluateThresholds — LS and AskUserQuestion warn thresholds', () => {
function stateWith(patches) {
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
return { ...s, counts: { ...s.counts, ...patches } };
}
it('LS=10, skill=false → soft_flag', () => {
const s = stateWith({ LS: 10 });
expect(evaluateThresholds(s, 'LS', false).action).toBe('soft_flag');
});
it('AskUserQuestion=2, skill=false → soft_flag', () => {
const s = stateWith({ AskUserQuestion: 2 });
expect(evaluateThresholds(s, 'AskUserQuestion', false).action).toBe('soft_flag');
});
});
describe('keywordOverlapCount — deduplication', () => {
it('dedupes duplicates in a: ["router","router","gate"] ∩ ["router","gate"] = 2', () => {
expect(keywordOverlapCount(['router', 'router', 'gate'], ['router', 'gate'])).toBe(2);
});
});
describe('DEFAULT_THRESHOLDS — deep freeze', () => {
it('outer object is frozen', () => {
expect(Object.isFrozen(DEFAULT_THRESHOLDS)).toBe(true);
});
it('nested Read threshold object is frozen', () => {
expect(Object.isFrozen(DEFAULT_THRESHOLDS.Read)).toBe(true);
});
it('nested AskUserQuestion threshold object is frozen', () => {
expect(Object.isFrozen(DEFAULT_THRESHOLDS.AskUserQuestion)).toBe(true);
});
});