0ea3b5d70d
Brain-retro #5 candidate C, hole 9: enforce-rationalization-audit.mjs only logged rationalization phrases (e.g., 'just this once', 'пока без') — never blocked. Also vocab was sparse. Changes: - Expanded vocabulary by 5 phrases: 'давай разок', 'только сейчас', 'один раз без правил', 'на этот раз без', 'я знаю что не надо но'. - Made decide() accept priorFlagCount; blocks on 3rd flag/session. - main() reads rationalization-flags-<session>.jsonl to compute count before calling decide().
137 lines
5.7 KiB
JavaScript
137 lines
5.7 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import { findRationalizationPhrases, detectProdEditWithoutTest, audit, decide } from './enforce-rationalization-audit.mjs';
|
|
|
|
describe('findRationalizationPhrases', () => {
|
|
it('detects "just this once" in mixed case', () => {
|
|
expect(findRationalizationPhrases('Hmm, Just This Once we will skip')).toContain('just this once');
|
|
});
|
|
it('detects "пока без" Russian', () => {
|
|
expect(findRationalizationPhrases('сделаем пока без тестов')).toContain('пока без');
|
|
});
|
|
it('detects multiple phrases in one text', () => {
|
|
const hits = findRationalizationPhrases('временно делаем потом разберусь');
|
|
expect(hits.length).toBeGreaterThanOrEqual(2);
|
|
});
|
|
it('returns empty array on clean text', () => {
|
|
expect(findRationalizationPhrases('coverage: skill:tdd')).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe('detectProdEditWithoutTest', () => {
|
|
it('flags prod edit without any test edit in turn', () => {
|
|
const uses = [{ name: 'Edit', input: { file_path: 'tools/foo.mjs' } }];
|
|
expect(detectProdEditWithoutTest(uses)).toEqual(['tools/foo.mjs']);
|
|
});
|
|
it('does NOT flag when test also edited', () => {
|
|
const uses = [
|
|
{ name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } },
|
|
{ name: 'Edit', input: { file_path: 'tools/foo.mjs' } },
|
|
];
|
|
expect(detectProdEditWithoutTest(uses)).toEqual([]);
|
|
});
|
|
it('does NOT flag for non-prod paths', () => {
|
|
expect(detectProdEditWithoutTest([{ name: 'Edit', input: { file_path: 'docs/x.md' } }])).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe('audit', () => {
|
|
it('flags rationalization phrases in assistant text', () => {
|
|
const entries = [
|
|
{ message: { role: 'user', content: 'go' } },
|
|
{ message: { role: 'assistant', content: [{ type: 'text', text: 'just this once без скила' }] } },
|
|
];
|
|
const flags = audit(entries);
|
|
expect(flags.find((f) => f.kind === 'rationalization-phrase')).toBeTruthy();
|
|
});
|
|
|
|
it('flags prod-edit-without-test', () => {
|
|
const entries = [
|
|
{ message: { role: 'user', content: 'go' } },
|
|
{ message: { role: 'assistant', content: [
|
|
{ type: 'tool_use', id: 't1', name: 'Edit', input: { file_path: 'tools/foo.mjs' } },
|
|
] } },
|
|
];
|
|
const flags = audit(entries);
|
|
expect(flags.find((f) => f.kind === 'prod-edit-without-test')).toBeTruthy();
|
|
});
|
|
|
|
it('flags weak commit messages (<12 chars)', () => {
|
|
const entries = [
|
|
{ message: { role: 'user', content: 'go' } },
|
|
{ message: { role: 'assistant', content: [
|
|
{ type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'git commit -m "fix"' } },
|
|
] } },
|
|
];
|
|
const flags = audit(entries);
|
|
expect(flags.find((f) => f.kind === 'weak-commit-message')).toBeTruthy();
|
|
});
|
|
|
|
it('returns no flags for clean turn', () => {
|
|
const entries = [
|
|
{ message: { role: 'user', content: 'go' } },
|
|
{ message: { role: 'assistant', content: [
|
|
{ type: 'text', text: 'coverage: skill:tdd\nworking properly' },
|
|
{ type: 'tool_use', id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } },
|
|
{ type: 'tool_use', id: 't2', name: 'Edit', input: { file_path: 'tools/foo.mjs' } },
|
|
] } },
|
|
];
|
|
expect(audit(entries)).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe('vocab — new phrases', () => {
|
|
it('detects "давай разок"', () => {
|
|
expect(findRationalizationPhrases('давай разок без тестов')).toContain('давай разок');
|
|
});
|
|
it('detects "только сейчас"', () => {
|
|
expect(findRationalizationPhrases('только сейчас пропустим')).toContain('только сейчас');
|
|
});
|
|
it('detects "один раз без правил"', () => {
|
|
expect(findRationalizationPhrases('один раз без правил сделаем')).toContain('один раз без правил');
|
|
});
|
|
it('detects "на этот раз без"', () => {
|
|
expect(findRationalizationPhrases('на этот раз без скила')).toContain('на этот раз без');
|
|
});
|
|
it('detects "я знаю что не надо но"', () => {
|
|
expect(findRationalizationPhrases('я знаю что не надо но пропустим')).toContain('я знаю что не надо но');
|
|
});
|
|
});
|
|
|
|
describe('decide — escalation on 3rd flag', () => {
|
|
const sessionId = 'test-session';
|
|
const textWithPhrase = 'just this once';
|
|
|
|
it('does NOT block when priorFlagCount=0', () => {
|
|
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 0 });
|
|
expect(result.block).toBe(false);
|
|
expect(result.detected.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('does NOT block when priorFlagCount=1', () => {
|
|
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 1 });
|
|
expect(result.block).toBe(false);
|
|
});
|
|
|
|
it('blocks when priorFlagCount=2 (3rd occurrence)', () => {
|
|
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 2 });
|
|
expect(result.block).toBe(true);
|
|
expect(result.message).toMatch(/rationali/i);
|
|
});
|
|
|
|
it('blocks when priorFlagCount=5 (subsequent occurrences)', () => {
|
|
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 5 });
|
|
expect(result.block).toBe(true);
|
|
});
|
|
|
|
it('does NOT block clean text even with priorFlagCount=10', () => {
|
|
const result = decide({ assistantText: 'coverage: skill:tdd', sessionId, priorFlagCount: 10 });
|
|
expect(result.block).toBe(false);
|
|
expect(result.detected).toEqual([]);
|
|
});
|
|
|
|
it('override=true suppresses block even on 3rd flag', () => {
|
|
const result = decide({ assistantText: textWithPhrase, sessionId, override: true, priorFlagCount: 2 });
|
|
expect(result.block).toBe(false);
|
|
});
|
|
});
|