fix(enforce): hole 9 — rationalization-audit blocks on 3rd flag + expanded vocab
Brain-retro #5 candidate C, hole 9: enforce-rationalization-audit.mjs only logged rationalization phrases (e.g., 'just this once', 'пока без') — never blocked. Also vocab was sparse. Changes: - Expanded vocabulary by 5 phrases: 'давай разок', 'только сейчас', 'один раз без правил', 'на этот раз без', 'я знаю что не надо но'. - Made decide() accept priorFlagCount; blocks on 3rd flag/session. - main() reads rationalization-flags-<session>.jsonl to compute count before calling decide().
This commit is contained in:
@@ -22,6 +22,7 @@ import {
|
||||
lastAssistantText,
|
||||
turnToolUses,
|
||||
appendRationalizationFlag,
|
||||
readRationalizationFlags,
|
||||
exitDecision,
|
||||
isProductionCodePath,
|
||||
} from './enforce-hook-helpers.mjs';
|
||||
@@ -39,6 +40,12 @@ const RATIONALIZATION_PHRASES = [
|
||||
'rationalize',
|
||||
'без церемоний',
|
||||
'без скила сейчас',
|
||||
// expanded vocabulary
|
||||
'давай разок',
|
||||
'только сейчас',
|
||||
'один раз без правил',
|
||||
'на этот раз без',
|
||||
'я знаю что не надо но',
|
||||
];
|
||||
|
||||
export function findRationalizationPhrases(text) {
|
||||
@@ -87,14 +94,38 @@ export function audit(transcriptEntries) {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure decision seam — injectable priorFlagCount for testability.
|
||||
* Blocks on 3rd flag of the same session (priorFlagCount >= 2).
|
||||
*/
|
||||
export function decide({ assistantText, sessionId: _sessionId, override = false, priorFlagCount = 0 }) {
|
||||
const detected = findRationalizationPhrases(assistantText || '');
|
||||
if (override) return { block: false, detected };
|
||||
if (priorFlagCount >= 2 && detected.length > 0) {
|
||||
return {
|
||||
block: true,
|
||||
message: `Rationalization detected (phrase: "${detected[0]}"). This is the ${priorFlagCount + 1}th flag in this session — blocking to prevent pattern escalation.`,
|
||||
detected,
|
||||
};
|
||||
}
|
||||
return { block: false, detected };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const raw = await readStdin();
|
||||
const event = parseEventJson(raw);
|
||||
const transcript = readTranscript(event.transcript_path);
|
||||
const flags = audit(transcript);
|
||||
|
||||
// Count prior flags before appending new ones
|
||||
const priorFlagCount = readRationalizationFlags(event.session_id).length;
|
||||
for (const f of flags) appendRationalizationFlag(event.session_id, f.kind, f.evidence);
|
||||
exitDecision({ block: false });
|
||||
|
||||
// Check if we should block based on rationalization phrases specifically
|
||||
const text = lastAssistantText(transcript);
|
||||
const decision = decide({ assistantText: text, sessionId: event.session_id, priorFlagCount });
|
||||
exitDecision(decision.block ? { block: true, message: decision.message } : { block: false });
|
||||
} catch {
|
||||
exitDecision({ block: false });
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { findRationalizationPhrases, detectProdEditWithoutTest, audit } from './enforce-rationalization-audit.mjs';
|
||||
import { findRationalizationPhrases, detectProdEditWithoutTest, audit, decide } from './enforce-rationalization-audit.mjs';
|
||||
|
||||
describe('findRationalizationPhrases', () => {
|
||||
it('detects "just this once" in mixed case', () => {
|
||||
@@ -78,3 +78,59 @@ describe('audit', () => {
|
||||
expect(audit(entries)).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('vocab — new phrases', () => {
|
||||
it('detects "давай разок"', () => {
|
||||
expect(findRationalizationPhrases('давай разок без тестов')).toContain('давай разок');
|
||||
});
|
||||
it('detects "только сейчас"', () => {
|
||||
expect(findRationalizationPhrases('только сейчас пропустим')).toContain('только сейчас');
|
||||
});
|
||||
it('detects "один раз без правил"', () => {
|
||||
expect(findRationalizationPhrases('один раз без правил сделаем')).toContain('один раз без правил');
|
||||
});
|
||||
it('detects "на этот раз без"', () => {
|
||||
expect(findRationalizationPhrases('на этот раз без скила')).toContain('на этот раз без');
|
||||
});
|
||||
it('detects "я знаю что не надо но"', () => {
|
||||
expect(findRationalizationPhrases('я знаю что не надо но пропустим')).toContain('я знаю что не надо но');
|
||||
});
|
||||
});
|
||||
|
||||
describe('decide — escalation on 3rd flag', () => {
|
||||
const sessionId = 'test-session';
|
||||
const textWithPhrase = 'just this once';
|
||||
|
||||
it('does NOT block when priorFlagCount=0', () => {
|
||||
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 0 });
|
||||
expect(result.block).toBe(false);
|
||||
expect(result.detected.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('does NOT block when priorFlagCount=1', () => {
|
||||
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 1 });
|
||||
expect(result.block).toBe(false);
|
||||
});
|
||||
|
||||
it('blocks when priorFlagCount=2 (3rd occurrence)', () => {
|
||||
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 2 });
|
||||
expect(result.block).toBe(true);
|
||||
expect(result.message).toMatch(/rationali/i);
|
||||
});
|
||||
|
||||
it('blocks when priorFlagCount=5 (subsequent occurrences)', () => {
|
||||
const result = decide({ assistantText: textWithPhrase, sessionId, priorFlagCount: 5 });
|
||||
expect(result.block).toBe(true);
|
||||
});
|
||||
|
||||
it('does NOT block clean text even with priorFlagCount=10', () => {
|
||||
const result = decide({ assistantText: 'coverage: skill:tdd', sessionId, priorFlagCount: 10 });
|
||||
expect(result.block).toBe(false);
|
||||
expect(result.detected).toEqual([]);
|
||||
});
|
||||
|
||||
it('override=true suppresses block even on 3rd flag', () => {
|
||||
const result = decide({ assistantText: textWithPhrase, sessionId, override: true, priorFlagCount: 2 });
|
||||
expect(result.block).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user