81cbd8c1c2
retro #7 (docs/observer/notes/2026-05-27-brain-retro-7.md) surfaced 4 candidates against 23 turns since retro #6. All four implemented TDD. C1 — translit slang vocabulary in router-classifier-regex-fallback.mjs. TASK_TYPE_KEYWORDS += deploy bucket (push / запушь / выкат); memory-sync += обнови мозг / эталон / пилот / memory dump. C2 — short_ambiguous_block in router-tool-gate.mjs + router-prehook.mjs. prehook persists prompt_length; gate blocks Edit/Write/MultiEdit/Bash when task_type in {ambiguous, unknown} AND prompt_length <= 30 AND skill not invoked AND no direct_justified tag. C3 — self-assessment timeout 30s to 50s in observer-self-assessment-api.mjs. Windows TLS handshake + Sonnet latency exceeded 30s. Stop-hook has 60s budget; 50s leaves headroom. DEFAULT_TIMEOUT_MS exported for tests. C4 — Reviewer findings block in status-md-generator.mjs. New helper computeReviewerFindingsBlock surfaces 51 actionable findings without running /brain-retro. Detects batch-reviewed via outcome_reviewed_source=direct_api_batch. MD012 guard test added. C5 (gitleaks-before-push) intentionally skipped — pre-push hook already blocks at server side. Tests: 956/956 root tools, 0 regressions. LEFTHOOK=0 used per quirk #111. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
259 lines
10 KiB
JavaScript
259 lines
10 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
shouldBlock,
|
|
decodeRoutingTag,
|
|
isReadOnlyBash,
|
|
decideDecision,
|
|
} from './router-tool-gate.mjs';
|
|
|
|
const baseState = {
|
|
skillInvokedThisTurn: false,
|
|
classification: {
|
|
task_type: 'feature',
|
|
no_skill_found: false,
|
|
recommendedNode: '#19',
|
|
recommendedChain: 'L1',
|
|
},
|
|
chainProgress: [],
|
|
};
|
|
|
|
describe('isReadOnlyBash', () => {
|
|
it('detects ls / cat / grep / git status as read-only', () => {
|
|
expect(isReadOnlyBash('ls -la')).toBe(true);
|
|
expect(isReadOnlyBash('cat file.txt')).toBe(true);
|
|
expect(isReadOnlyBash('grep "x" file')).toBe(true);
|
|
expect(isReadOnlyBash('git status')).toBe(true);
|
|
expect(isReadOnlyBash('git log')).toBe(true);
|
|
expect(isReadOnlyBash('git rev-parse HEAD')).toBe(true);
|
|
});
|
|
|
|
it('does not classify git commit / push as read-only', () => {
|
|
expect(isReadOnlyBash('git commit -m "x"')).toBe(false);
|
|
expect(isReadOnlyBash('git push origin main')).toBe(false);
|
|
});
|
|
|
|
it('does not classify rm / cp / mv as read-only', () => {
|
|
expect(isReadOnlyBash('rm file')).toBe(false);
|
|
expect(isReadOnlyBash('cp a b')).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('decodeRoutingTag', () => {
|
|
it('parses direct_justified=true with reason', () => {
|
|
const r = decodeRoutingTag('<!-- routing: direct_justified=true reason="micro fix per user override" -->');
|
|
expect(r.directJustified).toBe(true);
|
|
expect(r.reason).toContain('micro fix');
|
|
});
|
|
|
|
it('returns null on missing tag', () => {
|
|
expect(decodeRoutingTag('just a regular response')).toBeNull();
|
|
});
|
|
|
|
it('rejects direct_justified=true WITHOUT reason', () => {
|
|
const r = decodeRoutingTag('<!-- routing: direct_justified=true -->');
|
|
expect(r).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('shouldBlock — §17 mode-based (Phase 2 Task 13)', () => {
|
|
it('mode=off never blocks', () => {
|
|
expect(shouldBlock('Edit', baseState, '', { mode: 'off' })).toBe(false);
|
|
});
|
|
|
|
it('warn-only never blocks (always returns false)', () => {
|
|
expect(shouldBlock('Edit', baseState, '', { mode: 'warn-only' })).toBe(false);
|
|
});
|
|
|
|
it('enforce blocks Edit on feature without skill invoked', () => {
|
|
expect(shouldBlock('Edit', baseState, '', { mode: 'enforce' })).toMatchObject({
|
|
block: true,
|
|
reason: 'direct_in_non_conversation',
|
|
});
|
|
});
|
|
|
|
it('enforce passes conversation task_type (§17 exempt)', () => {
|
|
const s = { ...baseState, classification: { task_type: 'conversation', no_skill_found: false } };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('enforce passes micro / manual_override (§17 exempt)', () => {
|
|
for (const t of ['micro', 'manual_override']) {
|
|
const s = { ...baseState, classification: { task_type: t, no_skill_found: false } };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
}
|
|
});
|
|
|
|
// H (2026-05-26): 'unknown' added to NON_BLOCKING. After A1+A2 fixes, LLM
|
|
// classifier still hits parse_null occasionally (Sonnet sometimes returns
|
|
// prose-wrapped JSON parseClassifierResponse can't extract). Until G fixes
|
|
// the parser, blocking on unknown is too strict — user gets stuck on routine
|
|
// edits despite real LLM ответ.
|
|
it('enforce passes unknown (added 2026-05-26 — see G for parse_null root cause)', () => {
|
|
const s = { ...baseState, classification: { task_type: 'unknown', no_skill_found: false } };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
expect(shouldBlock('Bash', s, '', { mode: 'enforce', bashCommand: 'git commit -m "x"' })).toBe(false);
|
|
});
|
|
|
|
it('enforce does NOT block when skill invoked this turn', () => {
|
|
const s = { ...baseState, skillInvokedThisTurn: true };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('enforce blocks no_skill_found=true with specific reason', () => {
|
|
const s = { ...baseState, classification: { task_type: 'feature', no_skill_found: true } };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toMatchObject({
|
|
block: true,
|
|
reason: 'no_skill_found_block',
|
|
});
|
|
});
|
|
|
|
it('continuation-inherited feature is NOT exempt (D1 — same shape as base)', () => {
|
|
expect(shouldBlock('Edit', baseState, '', { mode: 'enforce' })).toMatchObject({ block: true });
|
|
});
|
|
|
|
it('enforce does NOT block when routing-tag has direct_justified=true with reason', () => {
|
|
expect(shouldBlock('Edit', baseState, '<!-- routing: direct_justified=true reason="testing" -->', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('enforce does NOT block read-only Bash', () => {
|
|
expect(shouldBlock('Bash', baseState, '', { mode: 'enforce', bashCommand: 'ls' })).toBe(false);
|
|
});
|
|
|
|
it('enforce does NOT block tools outside whitelist (e.g. Read)', () => {
|
|
expect(shouldBlock('Read', baseState, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('legacy back-compat: warnOnly=false maps to enforce', () => {
|
|
expect(shouldBlock('Edit', baseState, '', { warnOnly: false })).toMatchObject({ block: true });
|
|
});
|
|
|
|
it('legacy back-compat: taskType (camelCase) still recognised', () => {
|
|
const s = { ...baseState, classification: { taskType: 'conversation', no_skill_found: false } };
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
// brain-retro #7 C2 (2026-05-27): short ambiguous prompts.
|
|
// Reviewer (Opus 4.7) flagged 3/4 short ambiguous prompts in retro #7 as
|
|
// mistake_should_not_start — agent improvised instead of asking. The 'unknown'
|
|
// task_type bypass added in H (2026-05-26) for parse_null was too broad —
|
|
// it also exempted short prompts that the regex fallback couldn't classify.
|
|
// Fix: on short prompts (<=30 chars) classified as ambiguous or unknown,
|
|
// block to force AskUserQuestion. Long prompts still pass per H.
|
|
describe('C2 short_ambiguous_block (brain-retro #7)', () => {
|
|
it('blocks short ambiguous prompt (<=30 chars)', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 19,
|
|
classification: { task_type: 'ambiguous', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toMatchObject({
|
|
block: true,
|
|
reason: 'short_ambiguous_block',
|
|
});
|
|
});
|
|
|
|
it('blocks short unknown prompt (<=30 chars) — closes H bypass on short improv', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 14,
|
|
classification: { task_type: 'unknown', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Bash', s, '', { mode: 'enforce', bashCommand: 'git commit -m "x"' })).toMatchObject({
|
|
block: true,
|
|
reason: 'short_ambiguous_block',
|
|
});
|
|
});
|
|
|
|
it('does NOT block LONG unknown prompt (>30 chars) — keeps H bypass for real work', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 250,
|
|
classification: { task_type: 'unknown', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('does NOT block when prompt_length is missing (legacy state, back-compat)', () => {
|
|
const s = {
|
|
...baseState,
|
|
// no prompt_length field — old state file
|
|
classification: { task_type: 'unknown', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('does NOT block short prompt with routing-tag direct_justified', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 19,
|
|
classification: { task_type: 'ambiguous', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Edit', s,
|
|
'<!-- routing: direct_justified=true reason="user confirmed scope" -->',
|
|
{ mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('does NOT block short prompt when skill already invoked this turn', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 19,
|
|
skillInvokedThisTurn: true,
|
|
classification: { task_type: 'ambiguous', no_skill_found: false },
|
|
};
|
|
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
|
|
});
|
|
|
|
it('decideDecision returns short_ambiguous_block reason_code with helpful text', () => {
|
|
const s = {
|
|
...baseState,
|
|
prompt_length: 19,
|
|
classification: { task_type: 'ambiguous', no_skill_found: false },
|
|
};
|
|
const r = decideDecision('Edit', s, '', { mode: 'enforce' });
|
|
expect(r.decision).toBe('block');
|
|
expect(r.reason_code).toBe('short_ambiguous_block');
|
|
expect(r.reason).toMatch(/корот|уточ|AskUserQuestion/i);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('decideDecision — §17 mode-based', () => {
|
|
it('returns decision: block with reason text and reason_code when shouldBlock blocks', () => {
|
|
const r = decideDecision('Edit', baseState, '', { mode: 'enforce' });
|
|
expect(r.decision).toBe('block');
|
|
expect(r.reason).toMatch(/#19/);
|
|
expect(r.reason_code).toBe('direct_in_non_conversation');
|
|
});
|
|
|
|
it('returns no_skill_found_block reason_code when classifier signalled no match', () => {
|
|
const s = { ...baseState, classification: { task_type: 'feature', no_skill_found: true, recommendedNode: null } };
|
|
const r = decideDecision('Edit', s, '', { mode: 'enforce' });
|
|
expect(r.decision).toBe('block');
|
|
expect(r.reason_code).toBe('no_skill_found_block');
|
|
});
|
|
|
|
it('returns empty (proceed) when skill invoked', () => {
|
|
const r = decideDecision('Edit', { ...baseState, skillInvokedThisTurn: true }, '', { mode: 'enforce' });
|
|
expect(r.decision).toBeUndefined();
|
|
});
|
|
|
|
it('warn-only mode emits warning string but does not block', () => {
|
|
const r = decideDecision('Edit', baseState, '', { mode: 'warn-only' });
|
|
expect(r.decision).toBeUndefined();
|
|
expect(r.warning).toMatch(/#19/);
|
|
});
|
|
|
|
it('warn-only mode does NOT emit warning when task is exempt (conversation)', () => {
|
|
const s = { ...baseState, classification: { task_type: 'conversation', no_skill_found: false } };
|
|
const r = decideDecision('Edit', s, '', { mode: 'warn-only' });
|
|
expect(r.warning).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('UTF-8 cyrillic stdin (regression — Stage 3 fix 1)', () => {
|
|
it('module loads with UTF-8 helper wired (smoke)', async () => {
|
|
const mod = await import('./router-tool-gate.mjs');
|
|
expect(typeof mod.shouldBlock).toBe('function');
|
|
});
|
|
});
|