Files
portal/tools/router-tool-gate.test.mjs
T
Дмитрий 7b4da1477e fix(classifier,gate): G parser-quirks + H unknown-not-blocking + A1/A2/B3/C1
Brain-retro #6 follow-up #2 (consolidated). Eight independent fixes:

A1 — task_cost wiring (cost tracking)
  - router-prehook.mjs: capture classifier LLM usage via onUsage callback,
    persist to state.task_cost.classifier_input_tokens / output_tokens.
  - observer-transcript-parser.mjs: merge router-state.task_cost on top of
    extractTokenUsage(turn). State-file values win for classifier/
    self_assessment/reviewer fields.
  - New buildCostFromClassifierUsage() exported from router-prehook.
  - Verified live: state file now shows real input_tokens=190 /
    output_tokens=598 / cache_read=10075 (was 0 before).

A2 — self-assessment coverage
  - observer-self-assessment-api.mjs: DEFAULT_TIMEOUT_MS 10s -> 30s.
  - .claude/settings.json: Stop-hook timeout 15s -> 60s.
  - Same Windows TLS handshake issue. Was 85% no_self_assessment in retro #6.

B3 — brain-retro SKILL.md reconciliation
  - Step 5b: batch=default for N>=20, subagent for N<20.

C1 — dead-code cleanup
  - Removed recommendNode import + getClassificationMap + getDormancy from
    observer-transcript-parser.mjs.

G — parseClassifierResponse Pass 3 (fixLLMJsonQuirks)
  - Root cause: real Sonnet output sometimes contains raw newlines inside
    string values (multi-line reason_for_choice) and trailing commas, which
    strict JSON.parse rejects. Result was llm_error_type=parse_null on
    every other call, falling back to regex with task_type=unknown.
  - Fix: after Pass 1 (clean) and Pass 2 (brace-extract) fail, try Pass 3
    that escapes raw newline/tab inside string values and strips trailing
    commas before final JSON.parse attempt. Pure char-walk, no JSON5 dep.

H — 'unknown' added to NON_BLOCKING_TASK_TYPES in router-tool-gate.mjs
  - Until G fully proves itself, blocking Bash/Edit on unknown is too strict.
    With G in place, parse_null should be rare; H gives a safety net.

Tests added: +9 across 5 test files. Regression: 913 vitest tests in tools/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 19:25:16 +03:00

175 lines
6.9 KiB
JavaScript

import { describe, it, expect } from 'vitest';
import {
shouldBlock,
decodeRoutingTag,
isReadOnlyBash,
decideDecision,
} from './router-tool-gate.mjs';
const baseState = {
skillInvokedThisTurn: false,
classification: {
task_type: 'feature',
no_skill_found: false,
recommendedNode: '#19',
recommendedChain: 'L1',
},
chainProgress: [],
};
describe('isReadOnlyBash', () => {
it('detects ls / cat / grep / git status as read-only', () => {
expect(isReadOnlyBash('ls -la')).toBe(true);
expect(isReadOnlyBash('cat file.txt')).toBe(true);
expect(isReadOnlyBash('grep "x" file')).toBe(true);
expect(isReadOnlyBash('git status')).toBe(true);
expect(isReadOnlyBash('git log')).toBe(true);
expect(isReadOnlyBash('git rev-parse HEAD')).toBe(true);
});
it('does not classify git commit / push as read-only', () => {
expect(isReadOnlyBash('git commit -m "x"')).toBe(false);
expect(isReadOnlyBash('git push origin main')).toBe(false);
});
it('does not classify rm / cp / mv as read-only', () => {
expect(isReadOnlyBash('rm file')).toBe(false);
expect(isReadOnlyBash('cp a b')).toBe(false);
});
});
describe('decodeRoutingTag', () => {
it('parses direct_justified=true with reason', () => {
const r = decodeRoutingTag('<!-- routing: direct_justified=true reason="micro fix per user override" -->');
expect(r.directJustified).toBe(true);
expect(r.reason).toContain('micro fix');
});
it('returns null on missing tag', () => {
expect(decodeRoutingTag('just a regular response')).toBeNull();
});
it('rejects direct_justified=true WITHOUT reason', () => {
const r = decodeRoutingTag('<!-- routing: direct_justified=true -->');
expect(r).toBeNull();
});
});
describe('shouldBlock — §17 mode-based (Phase 2 Task 13)', () => {
it('mode=off never blocks', () => {
expect(shouldBlock('Edit', baseState, '', { mode: 'off' })).toBe(false);
});
it('warn-only never blocks (always returns false)', () => {
expect(shouldBlock('Edit', baseState, '', { mode: 'warn-only' })).toBe(false);
});
it('enforce blocks Edit on feature without skill invoked', () => {
expect(shouldBlock('Edit', baseState, '', { mode: 'enforce' })).toMatchObject({
block: true,
reason: 'direct_in_non_conversation',
});
});
it('enforce passes conversation task_type (§17 exempt)', () => {
const s = { ...baseState, classification: { task_type: 'conversation', no_skill_found: false } };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
});
it('enforce passes micro / manual_override (§17 exempt)', () => {
for (const t of ['micro', 'manual_override']) {
const s = { ...baseState, classification: { task_type: t, no_skill_found: false } };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
}
});
// H (2026-05-26): 'unknown' added to NON_BLOCKING. After A1+A2 fixes, LLM
// classifier still hits parse_null occasionally (Sonnet sometimes returns
// prose-wrapped JSON parseClassifierResponse can't extract). Until G fixes
// the parser, blocking on unknown is too strict — user gets stuck on routine
// edits despite real LLM ответ.
it('enforce passes unknown (added 2026-05-26 — see G for parse_null root cause)', () => {
const s = { ...baseState, classification: { task_type: 'unknown', no_skill_found: false } };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
expect(shouldBlock('Bash', s, '', { mode: 'enforce', bashCommand: 'git commit -m "x"' })).toBe(false);
});
it('enforce does NOT block when skill invoked this turn', () => {
const s = { ...baseState, skillInvokedThisTurn: true };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
});
it('enforce blocks no_skill_found=true with specific reason', () => {
const s = { ...baseState, classification: { task_type: 'feature', no_skill_found: true } };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toMatchObject({
block: true,
reason: 'no_skill_found_block',
});
});
it('continuation-inherited feature is NOT exempt (D1 — same shape as base)', () => {
expect(shouldBlock('Edit', baseState, '', { mode: 'enforce' })).toMatchObject({ block: true });
});
it('enforce does NOT block when routing-tag has direct_justified=true with reason', () => {
expect(shouldBlock('Edit', baseState, '<!-- routing: direct_justified=true reason="testing" -->', { mode: 'enforce' })).toBe(false);
});
it('enforce does NOT block read-only Bash', () => {
expect(shouldBlock('Bash', baseState, '', { mode: 'enforce', bashCommand: 'ls' })).toBe(false);
});
it('enforce does NOT block tools outside whitelist (e.g. Read)', () => {
expect(shouldBlock('Read', baseState, '', { mode: 'enforce' })).toBe(false);
});
it('legacy back-compat: warnOnly=false maps to enforce', () => {
expect(shouldBlock('Edit', baseState, '', { warnOnly: false })).toMatchObject({ block: true });
});
it('legacy back-compat: taskType (camelCase) still recognised', () => {
const s = { ...baseState, classification: { taskType: 'conversation', no_skill_found: false } };
expect(shouldBlock('Edit', s, '', { mode: 'enforce' })).toBe(false);
});
});
describe('decideDecision — §17 mode-based', () => {
it('returns decision: block with reason text and reason_code when shouldBlock blocks', () => {
const r = decideDecision('Edit', baseState, '', { mode: 'enforce' });
expect(r.decision).toBe('block');
expect(r.reason).toMatch(/#19/);
expect(r.reason_code).toBe('direct_in_non_conversation');
});
it('returns no_skill_found_block reason_code when classifier signalled no match', () => {
const s = { ...baseState, classification: { task_type: 'feature', no_skill_found: true, recommendedNode: null } };
const r = decideDecision('Edit', s, '', { mode: 'enforce' });
expect(r.decision).toBe('block');
expect(r.reason_code).toBe('no_skill_found_block');
});
it('returns empty (proceed) when skill invoked', () => {
const r = decideDecision('Edit', { ...baseState, skillInvokedThisTurn: true }, '', { mode: 'enforce' });
expect(r.decision).toBeUndefined();
});
it('warn-only mode emits warning string but does not block', () => {
const r = decideDecision('Edit', baseState, '', { mode: 'warn-only' });
expect(r.decision).toBeUndefined();
expect(r.warning).toMatch(/#19/);
});
it('warn-only mode does NOT emit warning when task is exempt (conversation)', () => {
const s = { ...baseState, classification: { task_type: 'conversation', no_skill_found: false } };
const r = decideDecision('Edit', s, '', { mode: 'warn-only' });
expect(r.warning).toBeUndefined();
});
});
describe('UTF-8 cyrillic stdin (regression — Stage 3 fix 1)', () => {
it('module loads with UTF-8 helper wired (smoke)', async () => {
const mod = await import('./router-tool-gate.mjs');
expect(typeof mod.shouldBlock).toBe('function');
});
});