From bec69aa5651b2b115695d0cc8cefaabf2424bf15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9?= Date: Sun, 24 May 2026 13:25:05 +0300 Subject: [PATCH] fix(brain): derive routerStep from observable signals (was hardcoded constant) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: primary_rationale.step было жёстко прописано как литерал `1` в обоих episode-builder'ах (observer-transcript-parser.mjs:813, observer-stop-hook.mjs:153). Поэтому routerStepReached видел { '1': N } и suspicious=true для ВСЕХ данных — показатель измерял константу, а не дисциплину роутера. Фикс: новая чистая функция deriveRouterStep(primary_rationale) — берёт максимум наблюдаемой стадии router-procedure.md из реальных признаков (task_classification ≠ 'other' → 2; triggers_matched → 3; chain_ref → 4; node_chosen ≠ 'direct' → 5). routerStepReached теперь вызывает её при чтении, игнорируя хранимое pr.step. Это делает метрику честной для ВСЕХ существующих эпизодов (включая исторические 136 за май) — без миграции данных. Boost для baseline'а CHECKPOINT B этапа 3: на боевых данных (131 schema-v2+ эпизод) distribution теперь = { 1: 55, 2: 46, 3: 12, 5: 18 }, suspicious=false. Видно реальную картину: ~42% эпизодов остановились на hard-floor, только ~14% реально дошли до исполнения навыка. Follow-up: episode-builder'ы продолжают писать step:1 (теперь это безвредно — метрика игнорирует). Отдельно можно прибрать запись в builder'ах для self-describing эпизодов. Test changes: - tools/discipline-metrics.test.mjs: +describe('deriveRouterStep') (9 cases), routerStepReached describe переписан под сигналы-источник. - tools/brain-retro-analyzer.test.mjs: 'returns routerStepReached distribution' обновлён — эпизоды конструируются с сигналами (triggers vs bare), не хранимым step. Full tools/ vitest run: 520/520 GREEN. 4 pre-existing empty test files (ruflo-*, subagent-prompt-prefix) — не моя регрессия. Co-Authored-By: Claude Opus 4.7 --- tools/brain-retro-analyzer.test.mjs | 8 ++- tools/discipline-metrics.mjs | 38 +++++++++++-- tools/discipline-metrics.test.mjs | 85 ++++++++++++++++++++++------- 3 files changed, 104 insertions(+), 27 deletions(-) diff --git a/tools/brain-retro-analyzer.test.mjs b/tools/brain-retro-analyzer.test.mjs index c4438a13..48f19f9c 100644 --- a/tools/brain-retro-analyzer.test.mjs +++ b/tools/brain-retro-analyzer.test.mjs @@ -334,10 +334,12 @@ describe('analyze — discipline metrics (stage 2)', () => { expect(res.disciplineByClassification.feature.viaSkill).toBe(1); }); - it('returns routerStepReached distribution', () => { + it('returns routerStepReached distribution (derived from signals)', () => { const eps = [ - ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), - ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), + // bare/direct → derived step 1 + ep({ primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), + // triggers matched → derived step 3 + ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [{ node: '#19' }], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ]; const res = analyze(eps, { classificationMap: map, dormancy }); expect(res.routerStep.distribution['1']).toBe(1); diff --git a/tools/discipline-metrics.mjs b/tools/discipline-metrics.mjs index a0c8b7f0..1a60f08f 100644 --- a/tools/discipline-metrics.mjs +++ b/tools/discipline-metrics.mjs @@ -42,9 +42,38 @@ export function disciplinePercentByClassification(episodes, classificationMap) { } /** - * Распределение по шагу роутера (primary_rationale.step). - * suspicious=true если total >= 5 && >90% эпизодов застряли на step=1 - * (sentinel-bug парсера — Pravila §16.4 sanity-check). + * Вывести шаг router-procedure.md, которого реально достиг эпизод, из + * НАБЛЮДАЕМЫХ признаков primary_rationale (хранимое поле `step` исторически — + * жёсткая константа 1 в обоих episode-builder'ах, поэтому ему не доверяем). + * + * Стадии (берётся максимум достигнутой): + * 1 — hard-floor checkpoint (всегда пройден), + * 2 — классификация дала реальный класс (task_classification ≠ 'other'), + * 3 — подобраны триггеры (triggers_matched непуст), + * 4 — найдена каноническая цепочка (chain_ref непуст), + * 5 — выбран и исполнен узел (node_chosen ≠ 'direct'). + * + * @param {object|undefined} pr primary_rationale + * @returns {1|2|3|4|5} + */ +export function deriveRouterStep(pr) { + if (!pr || typeof pr !== 'object') return 1; + let step = 1; + if (pr.task_classification && pr.task_classification !== 'other') step = 2; + if (Array.isArray(pr.triggers_matched) && pr.triggers_matched.length > 0) step = Math.max(step, 3); + const chain = pr.chain_ref; + const hasChain = Array.isArray(chain) ? chain.length > 0 : Boolean(chain); + if (hasChain) step = Math.max(step, 4); + if (pr.node_chosen && pr.node_chosen !== 'direct') step = Math.max(step, 5); + return step; +} + +/** + * Распределение по шагу роутера, ВЫВЕДЕННОМУ из наблюдаемых признаков + * (deriveRouterStep) — а не из хранимого pr.step (он был константой 1). + * suspicious=true если total >= 5 && >90% эпизодов выводятся в step 1 + * (Pravila §16.4 sanity-check — теперь это реальный сигнал «дисциплина + * проваливается / признаки не пишутся», а не гарантированный артефакт). * * @param {object[]} episodes * @returns {{ distribution: { [step: string]: number }, total: number, suspicious: boolean }} @@ -53,8 +82,7 @@ export function routerStepReached(episodes) { const distribution = {}; let total = 0; for (const e of valid(episodes)) { - const step = (e.primary_rationale || {}).step; - const key = step === null || step === undefined ? 'null' : String(step); + const key = String(deriveRouterStep(e.primary_rationale)); distribution[key] = (distribution[key] || 0) + 1; total += 1; } diff --git a/tools/discipline-metrics.test.mjs b/tools/discipline-metrics.test.mjs index 295ea2b1..853085bf 100644 --- a/tools/discipline-metrics.test.mjs +++ b/tools/discipline-metrics.test.mjs @@ -2,6 +2,7 @@ import { describe, it, expect } from 'vitest'; import { disciplinePercentByClassification, routerStepReached, + deriveRouterStep, boundariesAppliedRate, } from './discipline-metrics.mjs'; @@ -60,46 +61,92 @@ describe('disciplinePercentByClassification', () => { }); }); -describe('routerStepReached', () => { - it('counts episodes by step', () => { +describe('deriveRouterStep', () => { + // Маппинг наблюдаемых признаков primary_rationale → шаг router-procedure.md + // (1 hard-floor → 2 классификация → 3 триггеры → 4 цепочка → 5 исполнение узла). + // Берётся МАКСИМУМ достигнутой стадии. Хранимое pr.step игнорируется. + it('returns 1 for a bare direct episode (hard-floor only, no signals)', () => { + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: 'direct' })).toBe(1); + }); + + it('returns 2 when a real task_classification was produced', () => { + expect(deriveRouterStep({ task_classification: 'feature', triggers_matched: [], chain_ref: [], node_chosen: 'direct' })).toBe(2); + }); + + it("treats 'other' classification as not reaching step 2", () => { + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: null, node_chosen: 'direct' })).toBe(1); + }); + + it('returns 3 when triggers matched', () => { + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [{ keyword: 'x' }], chain_ref: [], node_chosen: 'direct' })).toBe(3); + }); + + it('returns 4 when a chain was referenced (array or non-empty string)', () => { + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: ['routing-off-phase L1'], node_chosen: 'direct' })).toBe(4); + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: 'L1', node_chosen: 'direct' })).toBe(4); + }); + + it('returns 5 when a node was actually chosen (execution)', () => { + expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: '#19' })).toBe(5); + }); + + it('takes the furthest stage reached (max), not the first', () => { + expect(deriveRouterStep({ task_classification: 'feature', triggers_matched: [{ k: 1 }], chain_ref: [], node_chosen: '#19' })).toBe(5); + }); + + it('handles a missing/empty primary_rationale → 1', () => { + expect(deriveRouterStep(undefined)).toBe(1); + expect(deriveRouterStep({})).toBe(1); + }); +}); + +describe('routerStepReached (derived from observable signals)', () => { + // Признаковые шаблоны (хранимый step специально проставлен 1/99 — должен игнорироваться). + const at = { + 1: { task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: 'direct' }, + 3: { task_classification: 'other', triggers_matched: [{ k: 1 }], chain_ref: [], node_chosen: 'direct' }, + 5: { task_classification: 'feature', triggers_matched: [], chain_ref: [], node_chosen: '#19' }, + }; + + it('counts episodes by derived step, ignoring any stored pr.step value', () => { const eps = [ - ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), - ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), - ep({ primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), - ep({ primary_rationale: { step: null, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), + ep({ primary_rationale: { ...at[1], step: 1 } }), + ep({ primary_rationale: { ...at[1], step: 99 } }), + ep({ primary_rationale: { ...at[3], step: 1 } }), + ep({ primary_rationale: { ...at[5], step: 1 } }), ]; const res = routerStepReached(eps); expect(res.distribution['1']).toBe(2); expect(res.distribution['3']).toBe(1); - expect(res.distribution['null']).toBe(1); + expect(res.distribution['5']).toBe(1); + expect(res.total).toBe(4); }); - it('flags suspicious=true when >90% эпизодов остановились на step=1', () => { + it('flags suspicious=true when >90% эпизодов выводятся в step 1', () => { const eps = Array.from({ length: 11 }, (_, i) => - ep({ primary_rationale: { step: i === 10 ? 3 : 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }) + ep({ primary_rationale: i === 10 ? { ...at[3], step: 1 } : { ...at[1], step: 1 } }) ); - const res = routerStepReached(eps); - expect(res.suspicious).toBe(true); + expect(routerStepReached(eps).suspicious).toBe(true); }); it('suspicious=false when distribution более равномерное', () => { const eps = [ - ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), - ep({ primary_rationale: { step: 2, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), - ep({ primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), + ep({ primary_rationale: { ...at[1], step: 1 } }), + ep({ primary_rationale: { ...at[3], step: 1 } }), + ep({ primary_rationale: { ...at[5], step: 1 } }), ]; - const res = routerStepReached(eps); - expect(res.suspicious).toBe(false); + expect(routerStepReached(eps).suspicious).toBe(false); }); it('ignores v1 episodes and observer_error markers', () => { const eps = [ - { schema_version: 1, primary_rationale: { step: 5 } }, + { schema_version: 1, primary_rationale: { ...at[5] } }, { observer_error: true }, - ep({ primary_rationale: { step: 2, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }), + ep({ primary_rationale: { ...at[3], step: 1 } }), ]; const res = routerStepReached(eps); - expect(res.distribution).toEqual({ '2': 1 }); + expect(res.distribution).toEqual({ '3': 1 }); + expect(res.total).toBe(1); }); });