Files
portal/tools/enforce-classifier-match.test.mjs
T
Дмитрий d1d5308013 feat(brain-governance): classifier threshold 0.7→0.8 + chain-recommendation enforcer + registry test bump
Three brain-governance hardening changes from retro #8 follow-up:

1. enforce-classifier-match: confidence threshold raised 0.7→0.8 (was producing false-positives on borderline LLM recommendations like #3 GitHub MCP for local debug, #36 adr-kit for status readouts). 2 new vitest tests cover boundary values 0.7 and 0.75 (now allowed).

2. enforce-chain-recommendation (NEW): PreToolUse hook blocking mutating tool calls when router gave recommended_chain length >= 2 and controller is not expanding it. Allows pass when: any chain node already invoked, inline 'chain-override: <reason>' present, or global override-phrase in user prompt. 20 vitest tests cover empty chain, single-node bypass, override variants, alias resolution, mixed numeric/string ids.

3. registry-load.test.mjs: bump expected counts 85→86 nodes / 77→78 active (collateral fix after parallel session added #86 graphifyy in 27289c05).

Full vitest tools-sweep: 1022/1022 GREEN.

Reviewer APPROVE on spec compliance + code quality (non-blocking observations: test count mis-report in implementer's claim 33→20 actual, hardcoded 'superpowers:' alias prefix, no direct test for extractCalledSkillIds — deferred).

Hook activation in .claude/settings.json deferred — controller will register separately based on owner's choice (block / warn-only / defer).
2026-05-28 05:33:22 +03:00

192 lines
6.2 KiB
JavaScript

import { describe, it, expect } from 'vitest';
import { decide } from './enforce-classifier-match.mjs';
describe('enforce-classifier-match / decide', () => {
it('allows pure conversation (no mutating tools)', () => {
expect(decide({
toolUses: [{ name: 'Read' }],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
}).block).toBe(false);
});
it('allows when no recommendation', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: null,
confidence: null,
}).block).toBe(false);
});
it('allows when confidence below threshold', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.5,
}).block).toBe(false);
});
// Raised 2026-05-27 (retro #8 follow-up): borderline 0.7 confidence was the
// source of false-positive blocks (#3 GitHub MCP for local debug, #36
// adr-kit for status readouts). Threshold raised 0.7 → 0.8 so 0.7 and 0.75
// no longer block.
it('allows when confidence exactly 0.7 (raised threshold)', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.7,
}).block).toBe(false);
});
it('allows when confidence 0.75 (still under raised threshold)', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.75,
}).block).toBe(false);
});
it('blocks when recommendation high-confidence + no matching tool', () => {
const r = decide({
toolUses: [{ name: 'Edit', input: { file_path: 'x.mjs' } }],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
});
expect(r.block).toBe(true);
expect(r.message).toMatch(/writing-plans/);
});
it('allows when Skill tool invoked with matching name', () => {
const r = decide({
toolUses: [
{ name: 'Skill', input: { skill: 'superpowers:writing-plans' } },
{ name: 'Edit', input: { file_path: 'x.mjs' } },
],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
});
expect(r.block).toBe(false);
});
it('matches normalized name without superpowers: prefix', () => {
const r = decide({
toolUses: [
{ name: 'Skill', input: { skill: 'writing-plans' } },
{ name: 'Edit', input: {} },
],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
});
expect(r.block).toBe(false);
});
it('matches Task subagent', () => {
const r = decide({
toolUses: [
{ name: 'Task', input: { subagent_type: 'rls-reviewer' } },
{ name: 'Edit', input: {} },
],
recommendation: 'rls-reviewer',
confidence: 0.85,
});
expect(r.block).toBe(false);
});
it('blocks (not allows) when only "override:" in assistant text — self-override removed (hole 1)', () => {
const r = decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'foo:bar',
confidence: 0.9,
assistantText: 'override: simpler direct edit, foo:bar overkill here\n',
override: null,
});
expect(r.block).toBe(true);
});
it('blocks when assistant text has "override: reason" but user prompt has no override phrase (hole 1)', () => {
const r = decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
assistantText: 'override: just doing it quick',
override: null,
});
expect(r.block).toBe(true);
});
it('allows when override phrase present', () => {
const r = decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'foo:bar',
confidence: 0.9,
override: { phrase: 'direct ok', suppresses: ['classifier-mismatch'] },
});
expect(r.block).toBe(false);
});
it('blocks when Task subagent is spawned without matching recommendation (hole 2)', () => {
const r = decide({
toolUses: [{ name: 'Task', input: { subagent_type: 'general-purpose', prompt: 'do stuff' } }],
recommendation: 'superpowers:writing-plans',
confidence: 0.9,
assistantText: '',
override: null,
});
expect(r.block).toBe(true);
});
it('does NOT block when Task subagent matches recommendation (regression — Task should count as match when right type)', () => {
const r = decide({
toolUses: [{ name: 'Task', input: { subagent_type: 'writing-plans', prompt: '...' } }],
recommendation: 'writing-plans',
confidence: 0.9,
assistantText: '',
override: null,
});
expect(r.block).toBe(false);
});
it('does not match meta-planning to planning recommendation (hole 5)', () => {
const r = decide({
toolUses: [{ name: 'Skill', input: { skill: 'meta-planning' } }, { name: 'Edit', input: {} }],
recommendation: 'planning',
confidence: 0.9,
assistantText: '',
override: null,
});
expect(r.block).toBe(true);
});
it('matches superpowers:writing-plans to writing-plans recommendation (regression — keep working)', () => {
expect(decide({
toolUses: [{ name: 'Skill', input: { skill: 'superpowers:writing-plans' } }, { name: 'Edit', input: {} }],
recommendation: 'writing-plans',
confidence: 0.9,
assistantText: '',
override: null,
}).block).toBe(false);
});
it('matches exact-name skill regression — keep working', () => {
expect(decide({
toolUses: [{ name: 'Skill', input: { skill: 'brainstorming' } }, { name: 'Edit', input: {} }],
recommendation: 'brainstorming',
confidence: 0.9,
assistantText: '',
override: null,
}).block).toBe(false);
});
// hole 4: triggers_matched fallback — decide() contract test
it('blocks when recommendation comes from triggers_matched fallback (hole 4, null confidence)', () => {
const r = decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans', // would-be from triggers_matched[0]
confidence: null, // no LLM, but triggers present
assistantText: '',
override: null,
});
expect(r.block).toBe(true);
});
});