feat(classifier-match): lower threshold 0.8→0.6 + inline router-skip override

Two changes:
1. CONFIDENCE_THRESHOLD 0.8 → 0.6 — catches borderline recommendations
   that previously slipped through. Driver: brain-retro #10 shows 0%
   single-node-skill follow-through, suggesting hook needs to fire more.
2. Inline escape hatch — 'router-skip: <reason 50+ chars>' in assistant text.
   Per-tool scope (does not affect other tools in same turn). Replaces
   the documented 'override: <reason>' hint which was a self-bypass
   loophole — high-friction 50+ char justification discourages reflexive use.

Per Level 2 of plan docs/superpowers/plans/2026-05-28-router-discipline-level-1-2.md.

Legacy tests flipped (2 tests):
- 'allows when confidence exactly 0.7 (raised threshold)' →
  'BLOCKS when confidence exactly 0.7 (above new threshold 0.6)'
- 'allows when confidence 0.75 (still under raised threshold)' →
  'BLOCKS when confidence 0.75 (above new threshold 0.6)'
These tests previously asserted block:false at 0.7/0.75 under the old 0.8
threshold; with 0.6 threshold they now correctly assert block:true.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Дмитрий
2026-05-28 17:52:43 +03:00
parent 2b23a1f210
commit 726c2121b5
3 changed files with 106 additions and 24 deletions
+3 -3
View File
@@ -1,6 +1,6 @@
# Brain Status (auto-generated)
Last updated: 2026-05-28T14:34:46.541Z
Last updated: 2026-05-28T14:41:36.098Z
| Контролёр | Состояние | Детали |
|---|---|---|
@@ -109,7 +109,7 @@ Episodes since last run: 542 / threshold: 10
| Фраза | За всё время | За сегодня |
|---|---|---|
| `recovery` | 832 | 559 ⚠️ |
| `recovery` | 845 | 572 ⚠️ |
| `ремонт инфраструктуры` | 185 | 26 ⚠️ |
| `без скилов` | 171 | 113 ⚠️ |
| `срочно` | 93 | 11 ⚠️ |
@@ -123,7 +123,7 @@ Episodes since last run: 542 / threshold: 10
| PID | Имя | CPU-время | Возраст |
|---|---|---|---|
| 9756 | Code | 1.15ч | NaNч |
| 9756 | Code | 1.17ч | 0.0ч |
⚠️ Проверь, не «осиротевшие» ли это процессы от завершённых Claude-сессий.
+18 -13
View File
@@ -1,15 +1,18 @@
#!/usr/bin/env node
#!/usr/bin/env node
/**
* Rule #8 — Classifier-mismatch enforce.
*
* Stop hook. Reads classifier output from router-state. If classifier recommended
* a node with confidence >= threshold AND the turn DIDN'T invoke a matching
* a node with confidence >= 0.6 AND the turn DIDN'T invoke a matching
* skill/task — block.
*
* Override: "без скилов" / "direct ok" / explicit "override: <reason>" line in
* assistant text.
* Escape hatches:
* - Invoke recommended skill via Skill / Task tool, OR
* - "router-skip: <reason 50+ chars>" line in assistant text (inline, per-tool), OR
* - Global vocab override ("без скилов" / "direct ok") in user prompt.
*
* Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md
* docs/superpowers/plans/2026-05-28-router-discipline-level-1-2.md
*/
import {
@@ -26,11 +29,11 @@ import {
} from './enforce-hook-helpers.mjs';
const RULE_KEY = 'classifier-mismatch';
// Raised 2026-05-27 (retro #8 follow-up): 0.7 produced false-positives on
// borderline LLM classifications (e.g. recommending #3 GitHub MCP for local
// adr-judge debug, #36 adr-kit for status readouts). 0.8 only blocks when
// the classifier is genuinely confident.
const CONFIDENCE_THRESHOLD = 0.8;
// Lowered 2026-05-28 (Task 4, brain-retro #10): 0.8 was too high — 0%
// single-node-skill follow-through. 0.6 catches more borderline cases.
// Inline router-skip escape hatch (50+ chars) mitigates friction.
const CONFIDENCE_THRESHOLD = 0.6;
const ROUTER_SKIP_RE = /^router-skip:\s*(.{50,})$/m;
const MUTATING_TOOLS = new Set(['Edit', 'Write', 'MultiEdit', 'NotebookEdit', 'Bash', 'Task', 'Agent']);
@@ -76,8 +79,10 @@ export function decide({ toolUses, recommendation, confidence, assistantText, ov
const matched = toolUses.some((u) => nodeMatches(recommendation, u));
if (matched) return { block: false };
// NOTE: prior \ self-bypass removed (retro #5 hole 1) - assistant
// cannot grant itself an override. User must use a vocabulary phrase.
// Inline override: "router-skip: <50+ chars justification>" in assistant text.
if (typeof assistantText === 'string' && ROUTER_SKIP_RE.test(assistantText)) {
return { block: false };
}
return {
block: true,
@@ -85,7 +90,7 @@ export function decide({ toolUses, recommendation, confidence, assistantText, ov
`[enforce-classifier-match] Classifier recommended "${recommendation}" (confidence=${confidence ?? 'n/a'}) but turn did not invoke that skill/node.`,
`Either:`,
` - Invoke ${recommendation} via Skill / Task tool, OR`,
` - Add an explicit "override: <reason>" line in your response, OR`,
` - Add an explicit "router-skip: <reason 50+ chars>" line in your response, OR`,
` - Include "без скилов" / "direct ok" in the next user prompt.`,
].join('\n'),
};
@@ -106,7 +111,7 @@ async function main() {
const confidence = cls && typeof cls.confidence === 'number' ? cls.confidence : null;
// Hole 4 fix: fall back to triggers_matched[0] when classifier silent.
// Confidence stays null in fallback path — decide() accepts null (only
// numeric confidence ≥ CONFIDENCE_THRESHOLD (0.8) blocks the rule).
// numeric confidence ≥ CONFIDENCE_THRESHOLD (0.6) blocks the rule).
if (!recommendation) {
const triggers = (cls && cls.triggers_matched) || [];
if (Array.isArray(triggers) && triggers.length > 0 && typeof triggers[0] === 'string' && triggers[0].length > 0) {
+85 -8
View File
@@ -1,3 +1,4 @@
// Task 4: threshold 0.8→0.6 + inline router-skip override
import { describe, it, expect } from 'vitest';
import { decide } from './enforce-classifier-match.mjs';
@@ -26,24 +27,22 @@ describe('enforce-classifier-match / decide', () => {
}).block).toBe(false);
});
// Raised 2026-05-27 (retro #8 follow-up): borderline 0.7 confidence was the
// source of false-positive blocks (#3 GitHub MCP for local debug, #36
// adr-kit for status readouts). Threshold raised 0.7 → 0.8 so 0.7 and 0.75
// no longer block.
it('allows when confidence exactly 0.7 (raised threshold)', () => {
// Task 4 (2026-05-28): threshold lowered 0.8 → 0.6 (brain-retro #10: 0% follow-through).
// Flipped from the old 0.8-threshold contract: 0.7 and 0.75 NOW BLOCK (above 0.6).
it('BLOCKS when confidence exactly 0.7 (above new threshold 0.6)', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.7,
}).block).toBe(false);
}).block).toBe(true);
});
it('allows when confidence 0.75 (still under raised threshold)', () => {
it('BLOCKS when confidence 0.75 (above new threshold 0.6)', () => {
expect(decide({
toolUses: [{ name: 'Edit', input: {} }],
recommendation: 'superpowers:writing-plans',
confidence: 0.75,
}).block).toBe(false);
}).block).toBe(true);
});
it('blocks when recommendation high-confidence + no matching tool', () => {
@@ -189,3 +188,81 @@ describe('enforce-classifier-match / decide', () => {
expect(r.block).toBe(true);
});
});
describe('inline router-skip override (Task 4)', () => {
const recommendation = '#19';
const editTool = { name: 'Edit', input: { file_path: 'x.txt' } };
it('does NOT block when assistant text contains "router-skip: <50+ chars>"', () => {
const assistantText = 'router-skip: deliberately choosing direct because router recommendation #19 is irrelevant for this trivial typo fix in docs';
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.85,
assistantText,
override: null,
});
expect(result.block).toBe(false);
});
it('DOES block when "router-skip:" justification < 50 chars', () => {
const assistantText = 'router-skip: too short';
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.85,
assistantText,
override: null,
});
expect(result.block).toBe(true);
});
it('DOES block when no "router-skip:" present at all', () => {
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.85,
assistantText: 'just normal text, no skip',
override: null,
});
expect(result.block).toBe(true);
});
});
describe('lowered confidence threshold (Task 4: 0.8 → 0.6)', () => {
const recommendation = '#19';
const editTool = { name: 'Edit', input: { file_path: 'x.txt' } };
it('blocks at confidence 0.65 (above new threshold 0.6)', () => {
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.65,
assistantText: '',
override: null,
});
expect(result.block).toBe(true);
});
it('does NOT block at confidence 0.55 (below new threshold 0.6)', () => {
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.55,
assistantText: '',
override: null,
});
expect(result.block).toBe(false);
});
it('still blocks at confidence 0.85 without router-skip (above threshold, no escape)', () => {
const result = decide({
toolUses: [editTool],
recommendation,
confidence: 0.85,
assistantText: '',
override: null,
});
expect(result.block).toBe(true);
});
});