397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
173 lines
6.7 KiB
JavaScript
173 lines
6.7 KiB
JavaScript
// tools/safe-baseline-metering.test.mjs
|
|
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
RESET_MARKERS, isResetMarker, DEFAULT_THRESHOLDS, deriveTaskId,
|
|
keywordOverlapCount, shouldInheritTaskId, newCounterState,
|
|
incrementCounter, evaluateThresholds, isMutatingForBaseline,
|
|
} from './safe-baseline-metering.mjs';
|
|
|
|
describe('isResetMarker', () => {
|
|
it('detects "новая задача" case-insensitive', () => {
|
|
expect(isResetMarker('Окей, НОВАЯ ЗАДАЧА теперь')).toBe(true);
|
|
});
|
|
it('returns false for unrelated prompt', () => {
|
|
expect(isResetMarker('почини баг в парсере')).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('deriveTaskId', () => {
|
|
it('is deterministic 16-hex for same input', () => {
|
|
const a = deriveTaskId('fix foo');
|
|
const b = deriveTaskId('fix foo');
|
|
expect(a).toBe(b);
|
|
expect(a).toMatch(/^[0-9a-f]{16}$/);
|
|
});
|
|
it('differs for different input', () => {
|
|
expect(deriveTaskId('a')).not.toBe(deriveTaskId('b'));
|
|
});
|
|
});
|
|
|
|
describe('keywordOverlapCount / shouldInheritTaskId', () => {
|
|
it('counts intersection', () => {
|
|
expect(keywordOverlapCount(['foo', 'bar', 'baz'], ['foo', 'bar', 'qux'])).toBe(2);
|
|
});
|
|
it('inherits when overlap>=2 and no reset marker', () => {
|
|
expect(shouldInheritTaskId(['foo', 'bar'], ['foo', 'bar'], 'edge case')).toBe(true);
|
|
});
|
|
it('does NOT inherit on reset marker', () => {
|
|
expect(shouldInheritTaskId(['foo', 'bar'], ['foo', 'bar'], 'новая задача')).toBe(false);
|
|
});
|
|
it('does NOT inherit when overlap<2', () => {
|
|
expect(shouldInheritTaskId(['foo'], ['foo', 'x'], 'continue')).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('incrementCounter', () => {
|
|
const start = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
|
|
it('increments Read counter', () => {
|
|
expect(incrementCounter(start, 'Read').counts.Read).toBe(1);
|
|
});
|
|
it('maps TodoWrite to TodoWrite_writes', () => {
|
|
expect(incrementCounter(start, 'TodoWrite').counts.TodoWrite_writes).toBe(1);
|
|
});
|
|
it('returns state unchanged for non-metered tool (Edit)', () => {
|
|
const result = incrementCounter(start, 'Edit');
|
|
expect(result.counts.Read).toBe(0);
|
|
});
|
|
it('is immutable — original state not mutated', () => {
|
|
incrementCounter(start, 'Read');
|
|
expect(start.counts.Read).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe('evaluateThresholds — warn/soft_flag', () => {
|
|
function stateWith(patches) {
|
|
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
return { ...s, counts: { ...s.counts, ...patches } };
|
|
}
|
|
|
|
it('Read=30 → soft_flag', () => {
|
|
const s = stateWith({ Read: 30 });
|
|
expect(evaluateThresholds(s, 'Read', false).action).toBe('soft_flag');
|
|
});
|
|
it('Read=29 → allow', () => {
|
|
const s = stateWith({ Read: 29 });
|
|
expect(evaluateThresholds(s, 'Read', false).action).toBe('allow');
|
|
});
|
|
it('Grep=15 → soft_flag', () => {
|
|
const s = stateWith({ Grep: 15 });
|
|
expect(evaluateThresholds(s, 'Grep', false).action).toBe('soft_flag');
|
|
});
|
|
});
|
|
|
|
describe('evaluateThresholds — hard threshold, safe-baseline tool stays allowed', () => {
|
|
function stateWith(patches) {
|
|
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
return { ...s, counts: { ...s.counts, ...patches } };
|
|
}
|
|
|
|
it('Read=60 → allow (continuation reading)', () => {
|
|
const s = stateWith({ Read: 60 });
|
|
expect(evaluateThresholds(s, 'Read', false).action).toBe('allow');
|
|
});
|
|
it('Glob=20 → allow', () => {
|
|
const s = stateWith({ Glob: 20 });
|
|
expect(evaluateThresholds(s, 'Glob', false).action).toBe('allow');
|
|
});
|
|
});
|
|
|
|
describe('evaluateThresholds — mutating hard-block', () => {
|
|
function stateWith(patches) {
|
|
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
return { ...s, counts: { ...s.counts, ...patches } };
|
|
}
|
|
|
|
it('Read=60, Edit → hard_block with reason containing Read=60', () => {
|
|
const s = stateWith({ Read: 60 });
|
|
const r = evaluateThresholds(s, 'Edit', false);
|
|
expect(r.action).toBe('hard_block');
|
|
expect(r.reason).toContain('Read=60');
|
|
});
|
|
it('Read=60, Bash → hard_block', () => {
|
|
const s = stateWith({ Read: 60 });
|
|
expect(evaluateThresholds(s, 'Bash', false).action).toBe('hard_block');
|
|
});
|
|
it('Read=59, Edit → allow (no counter at hard threshold)', () => {
|
|
const s = stateWith({ Read: 59 });
|
|
expect(evaluateThresholds(s, 'Edit', false).action).toBe('allow');
|
|
});
|
|
it('Read=60, Edit, skill=true → allow (skill match overrides)', () => {
|
|
const s = stateWith({ Read: 60 });
|
|
expect(evaluateThresholds(s, 'Edit', true).action).toBe('allow');
|
|
});
|
|
it('TodoWrite_writes=15, Write → hard_block with reason containing TodoWrite_writes=15', () => {
|
|
const s = stateWith({ TodoWrite_writes: 15 });
|
|
const r = evaluateThresholds(s, 'Write', false);
|
|
expect(r.action).toBe('hard_block');
|
|
expect(r.reason).toContain('TodoWrite_writes=15');
|
|
});
|
|
});
|
|
|
|
describe('evaluateThresholds — skillMatched short-circuit', () => {
|
|
it('Read=100, skill=true → allow regardless', () => {
|
|
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
const big = { ...s, counts: { ...s.counts, Read: 100 } };
|
|
expect(evaluateThresholds(big, 'Read', true).action).toBe('allow');
|
|
});
|
|
});
|
|
|
|
describe('evaluateThresholds — LS and AskUserQuestion warn thresholds', () => {
|
|
function stateWith(patches) {
|
|
const s = newCounterState({ taskId: 't', startedAtIso: '2026-05-29T00:00:00Z', firstPromptExcerpt: 'p' });
|
|
return { ...s, counts: { ...s.counts, ...patches } };
|
|
}
|
|
|
|
it('LS=10, skill=false → soft_flag', () => {
|
|
const s = stateWith({ LS: 10 });
|
|
expect(evaluateThresholds(s, 'LS', false).action).toBe('soft_flag');
|
|
});
|
|
it('AskUserQuestion=2, skill=false → soft_flag', () => {
|
|
const s = stateWith({ AskUserQuestion: 2 });
|
|
expect(evaluateThresholds(s, 'AskUserQuestion', false).action).toBe('soft_flag');
|
|
});
|
|
});
|
|
|
|
describe('keywordOverlapCount — deduplication', () => {
|
|
it('dedupes duplicates in a: ["router","router","gate"] ∩ ["router","gate"] = 2', () => {
|
|
expect(keywordOverlapCount(['router', 'router', 'gate'], ['router', 'gate'])).toBe(2);
|
|
});
|
|
});
|
|
|
|
describe('DEFAULT_THRESHOLDS — deep freeze', () => {
|
|
it('outer object is frozen', () => {
|
|
expect(Object.isFrozen(DEFAULT_THRESHOLDS)).toBe(true);
|
|
});
|
|
it('nested Read threshold object is frozen', () => {
|
|
expect(Object.isFrozen(DEFAULT_THRESHOLDS.Read)).toBe(true);
|
|
});
|
|
it('nested AskUserQuestion threshold object is frozen', () => {
|
|
expect(Object.isFrozen(DEFAULT_THRESHOLDS.AskUserQuestion)).toBe(true);
|
|
});
|
|
});
|