fix(brain): derive routerStep from observable signals (was hardcoded constant)
Root cause: primary_rationale.step было жёстко прописано как литерал `1` в обоих
episode-builder'ах (observer-transcript-parser.mjs:813, observer-stop-hook.mjs:153).
Поэтому routerStepReached видел { '1': N } и suspicious=true для ВСЕХ данных —
показатель измерял константу, а не дисциплину роутера.
Фикс: новая чистая функция deriveRouterStep(primary_rationale) — берёт максимум
наблюдаемой стадии router-procedure.md из реальных признаков
(task_classification ≠ 'other' → 2; triggers_matched → 3; chain_ref → 4;
node_chosen ≠ 'direct' → 5). routerStepReached теперь вызывает её при чтении,
игнорируя хранимое pr.step. Это делает метрику честной для ВСЕХ существующих
эпизодов (включая исторические 136 за май) — без миграции данных.
Boost для baseline'а CHECKPOINT B этапа 3: на боевых данных
(131 schema-v2+ эпизод) distribution теперь = { 1: 55, 2: 46, 3: 12, 5: 18 },
suspicious=false. Видно реальную картину: ~42% эпизодов остановились на hard-floor,
только ~14% реально дошли до исполнения навыка.
Follow-up: episode-builder'ы продолжают писать step:1 (теперь это безвредно —
метрика игнорирует). Отдельно можно прибрать запись в builder'ах для
self-describing эпизодов.
Test changes:
- tools/discipline-metrics.test.mjs: +describe('deriveRouterStep') (9 cases),
routerStepReached describe переписан под сигналы-источник.
- tools/brain-retro-analyzer.test.mjs: 'returns routerStepReached distribution'
обновлён — эпизоды конструируются с сигналами (triggers vs bare),
не хранимым step.
Full tools/ vitest run: 520/520 GREEN. 4 pre-existing empty test files
(ruflo-*, subagent-prompt-prefix) — не моя регрессия.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -334,10 +334,12 @@ describe('analyze — discipline metrics (stage 2)', () => {
|
||||
expect(res.disciplineByClassification.feature.viaSkill).toBe(1);
|
||||
});
|
||||
|
||||
it('returns routerStepReached distribution', () => {
|
||||
it('returns routerStepReached distribution (derived from signals)', () => {
|
||||
const eps = [
|
||||
ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
||||
ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
||||
// bare/direct → derived step 1
|
||||
ep({ primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
||||
// triggers matched → derived step 3
|
||||
ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [{ node: '#19' }], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
||||
];
|
||||
const res = analyze(eps, { classificationMap: map, dormancy });
|
||||
expect(res.routerStep.distribution['1']).toBe(1);
|
||||
|
||||
@@ -42,9 +42,38 @@ export function disciplinePercentByClassification(episodes, classificationMap) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Распределение по шагу роутера (primary_rationale.step).
|
||||
* suspicious=true если total >= 5 && >90% эпизодов застряли на step=1
|
||||
* (sentinel-bug парсера — Pravila §16.4 sanity-check).
|
||||
* Вывести шаг router-procedure.md, которого реально достиг эпизод, из
|
||||
* НАБЛЮДАЕМЫХ признаков primary_rationale (хранимое поле `step` исторически —
|
||||
* жёсткая константа 1 в обоих episode-builder'ах, поэтому ему не доверяем).
|
||||
*
|
||||
* Стадии (берётся максимум достигнутой):
|
||||
* 1 — hard-floor checkpoint (всегда пройден),
|
||||
* 2 — классификация дала реальный класс (task_classification ≠ 'other'),
|
||||
* 3 — подобраны триггеры (triggers_matched непуст),
|
||||
* 4 — найдена каноническая цепочка (chain_ref непуст),
|
||||
* 5 — выбран и исполнен узел (node_chosen ≠ 'direct').
|
||||
*
|
||||
* @param {object|undefined} pr primary_rationale
|
||||
* @returns {1|2|3|4|5}
|
||||
*/
|
||||
export function deriveRouterStep(pr) {
|
||||
if (!pr || typeof pr !== 'object') return 1;
|
||||
let step = 1;
|
||||
if (pr.task_classification && pr.task_classification !== 'other') step = 2;
|
||||
if (Array.isArray(pr.triggers_matched) && pr.triggers_matched.length > 0) step = Math.max(step, 3);
|
||||
const chain = pr.chain_ref;
|
||||
const hasChain = Array.isArray(chain) ? chain.length > 0 : Boolean(chain);
|
||||
if (hasChain) step = Math.max(step, 4);
|
||||
if (pr.node_chosen && pr.node_chosen !== 'direct') step = Math.max(step, 5);
|
||||
return step;
|
||||
}
|
||||
|
||||
/**
|
||||
* Распределение по шагу роутера, ВЫВЕДЕННОМУ из наблюдаемых признаков
|
||||
* (deriveRouterStep) — а не из хранимого pr.step (он был константой 1).
|
||||
* suspicious=true если total >= 5 && >90% эпизодов выводятся в step 1
|
||||
* (Pravila §16.4 sanity-check — теперь это реальный сигнал «дисциплина
|
||||
* проваливается / признаки не пишутся», а не гарантированный артефакт).
|
||||
*
|
||||
* @param {object[]} episodes
|
||||
* @returns {{ distribution: { [step: string]: number }, total: number, suspicious: boolean }}
|
||||
@@ -53,8 +82,7 @@ export function routerStepReached(episodes) {
|
||||
const distribution = {};
|
||||
let total = 0;
|
||||
for (const e of valid(episodes)) {
|
||||
const step = (e.primary_rationale || {}).step;
|
||||
const key = step === null || step === undefined ? 'null' : String(step);
|
||||
const key = String(deriveRouterStep(e.primary_rationale));
|
||||
distribution[key] = (distribution[key] || 0) + 1;
|
||||
total += 1;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
disciplinePercentByClassification,
|
||||
routerStepReached,
|
||||
deriveRouterStep,
|
||||
boundariesAppliedRate,
|
||||
} from './discipline-metrics.mjs';
|
||||
|
||||
@@ -60,46 +61,92 @@ describe('disciplinePercentByClassification', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('routerStepReached', () => {
|
||||
it('counts episodes by step', () => {
|
||||
describe('deriveRouterStep', () => {
|
||||
// Маппинг наблюдаемых признаков primary_rationale → шаг router-procedure.md
|
||||
// (1 hard-floor → 2 классификация → 3 триггеры → 4 цепочка → 5 исполнение узла).
|
||||
// Берётся МАКСИМУМ достигнутой стадии. Хранимое pr.step игнорируется.
|
||||
it('returns 1 for a bare direct episode (hard-floor only, no signals)', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: 'direct' })).toBe(1);
|
||||
});
|
||||
|
||||
it('returns 2 when a real task_classification was produced', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'feature', triggers_matched: [], chain_ref: [], node_chosen: 'direct' })).toBe(2);
|
||||
});
|
||||
|
||||
it("treats 'other' classification as not reaching step 2", () => {
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: null, node_chosen: 'direct' })).toBe(1);
|
||||
});
|
||||
|
||||
it('returns 3 when triggers matched', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [{ keyword: 'x' }], chain_ref: [], node_chosen: 'direct' })).toBe(3);
|
||||
});
|
||||
|
||||
it('returns 4 when a chain was referenced (array or non-empty string)', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: ['routing-off-phase L1'], node_chosen: 'direct' })).toBe(4);
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: 'L1', node_chosen: 'direct' })).toBe(4);
|
||||
});
|
||||
|
||||
it('returns 5 when a node was actually chosen (execution)', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: '#19' })).toBe(5);
|
||||
});
|
||||
|
||||
it('takes the furthest stage reached (max), not the first', () => {
|
||||
expect(deriveRouterStep({ task_classification: 'feature', triggers_matched: [{ k: 1 }], chain_ref: [], node_chosen: '#19' })).toBe(5);
|
||||
});
|
||||
|
||||
it('handles a missing/empty primary_rationale → 1', () => {
|
||||
expect(deriveRouterStep(undefined)).toBe(1);
|
||||
expect(deriveRouterStep({})).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('routerStepReached (derived from observable signals)', () => {
|
||||
// Признаковые шаблоны (хранимый step специально проставлен 1/99 — должен игнорироваться).
|
||||
const at = {
|
||||
1: { task_classification: 'other', triggers_matched: [], chain_ref: [], node_chosen: 'direct' },
|
||||
3: { task_classification: 'other', triggers_matched: [{ k: 1 }], chain_ref: [], node_chosen: 'direct' },
|
||||
5: { task_classification: 'feature', triggers_matched: [], chain_ref: [], node_chosen: '#19' },
|
||||
};
|
||||
|
||||
it('counts episodes by derived step, ignoring any stored pr.step value', () => {
|
||||
const eps = [
|
||||
ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { step: null, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { ...at[1], step: 1 } }),
|
||||
ep({ primary_rationale: { ...at[1], step: 99 } }),
|
||||
ep({ primary_rationale: { ...at[3], step: 1 } }),
|
||||
ep({ primary_rationale: { ...at[5], step: 1 } }),
|
||||
];
|
||||
const res = routerStepReached(eps);
|
||||
expect(res.distribution['1']).toBe(2);
|
||||
expect(res.distribution['3']).toBe(1);
|
||||
expect(res.distribution['null']).toBe(1);
|
||||
expect(res.distribution['5']).toBe(1);
|
||||
expect(res.total).toBe(4);
|
||||
});
|
||||
|
||||
it('flags suspicious=true when >90% эпизодов остановились на step=1', () => {
|
||||
it('flags suspicious=true when >90% эпизодов выводятся в step 1', () => {
|
||||
const eps = Array.from({ length: 11 }, (_, i) =>
|
||||
ep({ primary_rationale: { step: i === 10 ? 3 : 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } })
|
||||
ep({ primary_rationale: i === 10 ? { ...at[3], step: 1 } : { ...at[1], step: 1 } })
|
||||
);
|
||||
const res = routerStepReached(eps);
|
||||
expect(res.suspicious).toBe(true);
|
||||
expect(routerStepReached(eps).suspicious).toBe(true);
|
||||
});
|
||||
|
||||
it('suspicious=false when distribution более равномерное', () => {
|
||||
const eps = [
|
||||
ep({ primary_rationale: { step: 1, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { step: 2, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { step: 3, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { ...at[1], step: 1 } }),
|
||||
ep({ primary_rationale: { ...at[3], step: 1 } }),
|
||||
ep({ primary_rationale: { ...at[5], step: 1 } }),
|
||||
];
|
||||
const res = routerStepReached(eps);
|
||||
expect(res.suspicious).toBe(false);
|
||||
expect(routerStepReached(eps).suspicious).toBe(false);
|
||||
});
|
||||
|
||||
it('ignores v1 episodes and observer_error markers', () => {
|
||||
const eps = [
|
||||
{ schema_version: 1, primary_rationale: { step: 5 } },
|
||||
{ schema_version: 1, primary_rationale: { ...at[5] } },
|
||||
{ observer_error: true },
|
||||
ep({ primary_rationale: { step: 2, task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [] } }),
|
||||
ep({ primary_rationale: { ...at[3], step: 1 } }),
|
||||
];
|
||||
const res = routerStepReached(eps);
|
||||
expect(res.distribution).toEqual({ '2': 1 });
|
||||
expect(res.distribution).toEqual({ '3': 1 });
|
||||
expect(res.total).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user