import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { dedupeEpisodes, inferOutcome, groupEpisodesToTasks, findCausalChains, buildFactorMatrix, analyze, buildClassCanonCoverage, buildRouterVsOpus, buildChainIgnoreBreakdown, analyzeChainHookEffectiveness, buildChainHookEffectiveness, CHAIN_OUTCOME_BUCKETS, buildRouterGateHookEffectiveness, buildSelfFabricationSignals, } from './brain-retro-analyzer.mjs'; // Stream H Task 8 — sanity check that Tables 16/17 builders are importable. describe('Stream H Task 8 import sanity', () => { it('buildRouterGateHookEffectiveness + buildSelfFabricationSignals exist', () => { expect(typeof buildRouterGateHookEffectiveness).toBe('function'); expect(typeof buildSelfFabricationSignals).toBe('function'); }); }); const __dirname = path.dirname(fileURLToPath(import.meta.url)); // Minimal v2 episode for tests. const ep = (overrides = {}) => ({ schema_version: 2, task_id: 's1', task_ref: 's1', timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:05:00Z' }, path_type: 'regulated', outcome: 'unknown', prompt_signal: 'neutral', decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null }, environment: { economy_level: 0, model: 'claude-opus-4-7', post_compaction: false, session_turn: 1, parallel_session: false }, task_size: { tool_calls: 5, files_touched: 1, files: ['/a.js'] }, primary_rationale: { step: 1, node_chosen: 'direct', triggers_matched: [], candidates_considered: [], boundaries_applied: [], hard_floor: { invoked: false, rules: [] }, task_classification: 'feature' }, events: [], ...overrides, }); describe('dedupeEpisodes', () => { it('keeps the last of two episodes with the same task_id + started_at', () => { const a = ep({ outcome: 'unknown' }); const b = ep({ outcome: 'partial' }); // same task_id + started_at — routing-gate double-write const out = dedupeEpisodes([a, b]); expect(out).toHaveLength(1); expect(out[0].outcome).toBe('partial'); }); it('keeps all observer_error markers', () => { const out = dedupeEpisodes([ep(), { observer_error: true, task_id: 'e' }, { observer_error: true, task_id: 'e2' }]); expect(out.filter((e) => e.observer_error)).toHaveLength(2); }); }); describe('inferOutcome', () => { it('infers rework when the next episode opens with a correction', () => { expect(inferOutcome(ep(), ep({ prompt_signal: 'correction' }))).toBe('rework'); }); it('infers success when the next episode opens with approval', () => { expect(inferOutcome(ep(), ep({ prompt_signal: 'approval' }))).toBe('success'); }); it('infers partial when the episode has an interrupt event', () => { expect(inferOutcome(ep({ events: [{ kind: 'interrupt' }] }), ep())).toBe('partial'); }); it('infers unknown when there is no next episode', () => { expect(inferOutcome(ep(), null)).toBe('unknown'); }); it('infers blocked ONLY when an unrecovered_error event is present (turn ended on error)', () => { const blocked = ep({ events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'unrecovered_error' }] }); expect(inferOutcome(blocked, ep({ prompt_signal: 'approval' }))).toBe('blocked'); }); it('does NOT infer blocked from raw error/retry count (TDD failing-test-first is not a block)', () => { // A turn with N errors + N retries that ends on a successful tool_result — // e.g., TDD red→green, or git command that legitimately fails then recovers — // must NOT count as blocked. The parser emits unrecovered_error iff the LAST // tool_result was is_error, which is absent here. const recovered = ep({ events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'retry' }] }); expect(inferOutcome(recovered, ep({ prompt_signal: 'approval' }))).toBe('success'); }); it('does not infer blocked when every error was retried', () => { const recovered = ep({ events: [{ kind: 'error' }, { kind: 'retry' }] }); expect(inferOutcome(recovered, ep({ prompt_signal: 'approval' }))).toBe('success'); }); }); describe('groupEpisodesToTasks', () => { it('starts a new task after a success and on a new_task prompt', () => { const eps = [ ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, prompt_signal: 'new_task' }), ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, prompt_signal: 'approval' }), ep({ timestamps: { started_at: '2026-05-19T10:04:00Z', ended_at: '2026-05-19T10:05:00Z' }, prompt_signal: 'new_task' }), ]; const tasks = groupEpisodesToTasks(eps); expect(tasks.length).toBeGreaterThanOrEqual(2); }); }); describe('findCausalChains', () => { it('links an errored episode to a later episode that shares a file', () => { const a = ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } }); const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } }); const chains = findCausalChains([a, b]); expect(chains).toHaveLength(1); expect(chains[0].sharedFiles).toEqual(['/shared.js']); }); it('returns no chain when no files are shared', () => { const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/a.js'] } }); const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/b.js'] } }); expect(findCausalChains([a, b])).toHaveLength(0); }); it('excludes hot/normative files (CLAUDE.md) from the shared-file signal', () => { const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['c:\\моя\\проекты\\портал crm\\Документация\\CLAUDE.md'] }, }); const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['c:\\моя\\проекты\\портал crm\\Документация\\CLAUDE.md'] }, }); expect(findCausalChains([a, b])).toHaveLength(0); }); it('excludes memory store .md files from the shared-file signal', () => { const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['C:\\Users\\Administrator\\.claude\\projects\\proj\\memory\\reference_github.md'] }, }); const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['C:\\Users\\Administrator\\.claude\\projects\\proj\\memory\\reference_github.md'] }, }); expect(findCausalChains([a, b])).toHaveLength(0); }); it('excludes episodes JSONL + STATUS.md + MEMORY.md from chains', () => { const mk = (path, evts = []) => ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, events: evts, task_size: { tool_calls: 1, files_touched: 1, files: [path] }, }); const later = (path) => ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: [path] }, }); const errored = [{ kind: 'error', message: 'x' }]; expect(findCausalChains([mk('/docs/observer/episodes-2026-05.jsonl', errored), later('/docs/observer/episodes-2026-05.jsonl')])).toHaveLength(0); expect(findCausalChains([mk('/docs/observer/STATUS.md', errored), later('/docs/observer/STATUS.md')])).toHaveLength(0); expect(findCausalChains([mk('/some/dir/MEMORY.md', errored), later('/some/dir/MEMORY.md')])).toHaveLength(0); }); it('still links chains via genuinely-shared source files', () => { const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 2, files: ['c:\\path\\CLAUDE.md', '/src/app.ts'] }, }); const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 2, files: ['c:\\path\\CLAUDE.md', '/src/app.ts'] }, }); const chains = findCausalChains([a, b]); expect(chains).toHaveLength(1); expect(chains[0].sharedFiles).toEqual(['/src/app.ts']); }); }); describe('buildFactorMatrix', () => { it('tabulates outcome distribution per factor value', () => { const eps = [ { ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } }, { ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } }, ]; const m = buildFactorMatrix(eps); expect(m.decision_provenance.user_directed_method.rework).toBe(1); expect(m.decision_provenance.autonomous.success).toBe(1); }); it('counts the 3rd kind user_chose_from_options on the provenance axis', () => { const eps = [ { ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } }, { ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } }, { ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'user_chose_from_options' } }, { ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_chose_from_options' } }, ]; const m = buildFactorMatrix(eps); expect(m.decision_provenance).toHaveProperty('autonomous'); expect(m.decision_provenance).toHaveProperty('user_directed_method'); expect(m.decision_provenance).toHaveProperty('user_chose_from_options'); expect(m.decision_provenance.user_chose_from_options.success).toBe(1); expect(m.decision_provenance.user_chose_from_options.rework).toBe(1); }); it('includes session_segment_turn (bucketed, turns-since-last-compaction) and parallel_session factors', () => { const eps = [ { ...ep(), _inferredOutcome: 'success', environment: { session_turn: 3, parallel_session: false } }, { ...ep(), _inferredOutcome: 'rework', environment: { session_turn: 120, parallel_session: true } }, ]; const m = buildFactorMatrix(eps); expect(m.session_segment_turn.early.success).toBe(1); expect(m.session_segment_turn.late.rework).toBe(1); expect(m.parallel_session.false.success).toBe(1); expect(m.parallel_session.true.rework).toBe(1); }); }); describe('analyze', () => { it('returns episodeCount, tasks, causalChains and factorMatrix', () => { const result = analyze([ep(), ep({ timestamps: { started_at: '2026-05-19T11:00:00Z', ended_at: '2026-05-19T11:01:00Z' }, prompt_signal: 'correction' })]); expect(result.episodeCount).toBe(2); expect(result.factorMatrix).toBeDefined(); expect(Array.isArray(result.tasks)).toBe(true); expect(Array.isArray(result.causalChains)).toBe(true); }); it('skips v1 episodes (no schema_version 2) from the analysis', () => { const v1 = { task_id: 's-old', timestamps: { started_at: '2026-05-19T09:00:00Z' }, outcome: 'success' }; const result = analyze([ v1, ep(), ep({ timestamps: { started_at: '2026-05-19T11:00:00Z', ended_at: '2026-05-19T11:01:00Z' } }), ]); expect(result.episodeCount).toBe(2); expect(result.v1SkippedCount).toBe(1); }); }); describe('buildFactorMatrix — session_segment_turn axis rename (Task 14)', () => { it('matrix has session_segment_turn axis, NOT legacy session_turn', () => { const result = analyze([ { schema_version: 2, task_id: 's', task_ref: 's', timestamps: { started_at: '2026-05-20T00:00:00Z' }, events: [], environment: { economy_level: null, model: 'opus', post_compaction: false, session_turn: 5, parallel_session: false }, task_size: { tool_calls: 0 }, primary_rationale: { node_chosen: 'direct', task_classification: 'other' }, decision_provenance: { kind: 'autonomous' } }, ]); expect(result.factorMatrix).toHaveProperty('session_segment_turn'); expect(result.factorMatrix).not.toHaveProperty('session_turn'); }); }); describe('buildFactorMatrix — chain_ref axis (multi-chain)', () => { it('counts a multi-chain episode in each chain and null for direct', () => { const m = buildFactorMatrix([ { _inferredOutcome: 'success', primary_rationale: { node_chosen: 'discovery-interview', chain_ref: ['L1', 'L2'] } }, { _inferredOutcome: 'unknown', primary_rationale: { node_chosen: 'direct', chain_ref: null } }, ]); expect(m.chain_ref.L1).toEqual({ success: 1 }); expect(m.chain_ref.L2).toEqual({ success: 1 }); expect(m.chain_ref.null).toEqual({ unknown: 1 }); }); it('chain_ref axis present via analyze()', () => { const result = analyze([ep({ primary_rationale: { node_chosen: 'billing-audit', chain_ref: ['L13'], task_classification: 'other' } })]); expect(result.factorMatrix).toHaveProperty('chain_ref'); }); }); describe('inferOutcome — neutral → soft_success (Task 16)', () => { it('returns soft_success when next prompt is neutral', () => { const a = { events: [] }; const b = { prompt_signal: 'neutral' }; expect(inferOutcome(a, b)).toBe('soft_success'); }); it('returns unknown when no next episode', () => { expect(inferOutcome({ events: [] }, null)).toBe('unknown'); }); it('rework still wins over neutral on correction', () => { expect(inferOutcome({ events: [] }, { prompt_signal: 'correction' })).toBe('rework'); }); it('explicit success still wins over neutral on approval', () => { expect(inferOutcome({ events: [] }, { prompt_signal: 'approval' })).toBe('success'); }); }); describe('analyze() — missedActivations integration', () => { it('includes missedActivations in the result', () => { const eps = [ { schema_version: 2, task_id: 't1', timestamps: { started_at: '2026-05-21T00:00:00Z' }, primary_rationale: { node_chosen: 'direct', task_classification: 'refactor' }, events: [], }, ]; const map = { refactor: ['#11'], other: [] }; const dormancy = { '#11': false }; const result = analyze(eps, { classificationMap: map, dormancy }); expect(result.missedActivations).toBeDefined(); expect(result.missedActivations.totalMissed).toBe(1); expect(result.missedActivations.byNode).toEqual({ '#11': 1 }); }); it('returns missedActivations.totalMissed=0 when no map/dormancy provided', () => { const eps = [{ schema_version: 2, task_id: 't1', timestamps: { started_at: 'x' }, primary_rationale: { node_chosen: 'direct', task_classification: 'refactor' }, events: [] }]; const result = analyze(eps); expect(result.missedActivations.totalMissed).toBe(0); }); }); describe('analyze: schema_version filter', () => { it('accepts both v2 and v3 episodes', () => { const v2 = { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }; const v3 = { ...v2, schema_version: 3, task_id: 's2', timestamps: { started_at: '2026-05-23T11:00:00Z' }, primary_rationale: { ...v2.primary_rationale, recommended_node: '#19' } }; const result = analyze([v2, v3]); expect(result.episodeCount).toBe(2); }); it('factorMatrix has recommended_node_for_direct axis', () => { const v3 = { schema_version: 3, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }; const result = analyze([v3]); expect(result.factorMatrix.recommended_node_for_direct).toBeDefined(); expect(result.factorMatrix.recommended_node_for_direct['#19']).toBeDefined(); }); it('v2 episode bucket=none in recommended_node_for_direct', () => { const v2 = { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }; const result = analyze([v2]); expect(result.factorMatrix.recommended_node_for_direct.none).toBeDefined(); }); }); describe('analyze — discipline metrics (stage 2)', () => { const map = { feature: ['#19'], bugfix: ['#18'] }; const dormancy = { '#19': false, '#18': false }; it('returns disciplinePercentByClassification', () => { const eps = [ ep({ primary_rationale: { task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { task_classification: 'feature', node_chosen: '#19', triggers_matched: [{node:'#19'}], boundaries_applied: [], step: 3, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ]; const res = analyze(eps, { classificationMap: map, dormancy }); expect(res.disciplineByClassification.feature.episodes).toBe(2); expect(res.disciplineByClassification.feature.withTriggerMatch).toBe(1); expect(res.disciplineByClassification.feature.viaSkill).toBe(1); }); it('returns routerStepReached distribution (derived from signals)', () => { const eps = [ // bare/direct → derived step 1 ep({ primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), // triggers matched → derived step 3 ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [{ node: '#19' }], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ]; const res = analyze(eps, { classificationMap: map, dormancy }); expect(res.routerStep.distribution['1']).toBe(1); expect(res.routerStep.distribution['3']).toBe(1); }); it('returns boundariesAppliedRate', () => { const eps = [ ep({ primary_rationale: { boundaries_applied: [{ adr: 'X' }], task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { boundaries_applied: [], task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }), ]; const res = analyze(eps, { classificationMap: map, dormancy }); expect(res.boundariesRate.total).toBe(2); expect(res.boundariesRate.withBoundaries).toBe(1); expect(res.boundariesRate.rate).toBeCloseTo(0.5); }); }); describe('analyze — v4 aggregations (Phase 3 Task 20)', () => { it('aggregates inheritanceCount across v4 episodes', () => { const eps = [ ep({ schema_version: 4, inheritance: { inherited_from_task_id: 'x' } }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, inheritance: { inherited_from_task_id: 'y' } }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' } }), ]; expect(analyze(eps).inheritanceCount).toBe(2); }); it('aggregates reviewQuality distribution from review.node_quality', () => { const eps = [ ep({ schema_version: 4, review: { node_quality: 'correct' } }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, review: { node_quality: 'correct' } }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, review: { node_quality: 'wrong_node' } }), ]; const res = analyze(eps); expect(res.reviewQuality.correct).toBe(2); expect(res.reviewQuality.wrong_node).toBe(1); expect(res.reviewerCoverage.reviewed).toBe(3); }); it('counts review pending for v4 episodes without a review block', () => { const eps = [ep({ schema_version: 4 })]; expect(analyze(eps).reviewerCoverage.pending).toBe(1); }); it('counts reviewer_error escalations under reviewerCoverage.errored', () => { const eps = [ep({ schema_version: 4, review: { reviewer_error: 'malformed episode' } })]; expect(analyze(eps).reviewerCoverage.errored).toBe(1); }); it('aggregates degradedCount on degraded_mode=true', () => { const eps = [ ep({ schema_version: 4, degraded_mode: true }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, degraded_mode: false }), ]; expect(analyze(eps).degradedCount).toBe(1); }); it('sums task_cost tokens into costTotals', () => { const eps = [ ep({ schema_version: 4, task_cost: { classifier_input_tokens: 100, classifier_output_tokens: 30 } }), ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, task_cost: { classifier_input_tokens: 200, reviewer_input_tokens: 500 } }), ]; const ct = analyze(eps).costTotals; expect(ct.classifier_input_tokens).toBe(300); expect(ct.classifier_output_tokens).toBe(30); expect(ct.reviewer_input_tokens).toBe(500); }); }); describe('buildFactorMatrix — Pass 5 v4-signal axes', () => { it('factor matrix exposes v4-signal axes', () => { const eps = [ { _inferredOutcome: 'success', v4_signals: { rationalization_flag_count: 0, judge_verdict: 'YES', safe_baseline_action: 'allow', judge_calls: 0 }, task_cost: { judge_spend_usd: 0 } }, { _inferredOutcome: 'rework', v4_signals: { rationalization_flag_count: 3, judge_verdict: 'block', safe_baseline_action: 'hard_block', judge_calls: 12 }, task_cost: { judge_spend_usd: 0.03 } }, ]; const m = buildFactorMatrix(eps); expect(m.rationalization_flag_count['0']).toEqual({ success: 1 }); expect(m.rationalization_flag_count['2+']).toEqual({ rework: 1 }); expect(m.judge_verdict.YES).toEqual({ success: 1 }); expect(m.judge_verdict.block).toEqual({ rework: 1 }); expect(m.safe_baseline_action.hard_block).toEqual({ rework: 1 }); expect(m.judge_calls_bucket['0']).toEqual({ success: 1 }); expect(m.judge_calls_bucket['10+']).toEqual({ rework: 1 }); }); }); describe('buildFactorMatrix — Pass 1 cheap axes (project-brain-factor-analysis-4passes)', () => { // Each new axis: smoke + null-safety on missing fields. it('prompt_signal axis: raw discrete values + null fallback', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', prompt_signal: 'new_task' }, { ...ep(), _inferredOutcome: 'rework', prompt_signal: 'correction' }, { ...ep(), _inferredOutcome: 'unknown', prompt_signal: undefined }, ]); expect(m.prompt_signal.new_task.success).toBe(1); expect(m.prompt_signal.correction.rework).toBe(1); expect(m.prompt_signal.null.unknown).toBe(1); }); it('classifier_source axis: reads classifier_output.source verbatim', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'llm' } }, { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'regex' } }, { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'prefilter_inherited' } }, { ...ep(), _inferredOutcome: 'unknown', classifier_output: null }, ]); expect(m.classifier_source.llm.success).toBe(1); expect(m.classifier_source.regex.success).toBe(1); expect(m.classifier_source.prefilter_inherited.success).toBe(1); expect(m.classifier_source.null.unknown).toBe(1); }); it('degraded_mode axis: true/false buckets, false default', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', degraded_mode: false }, { ...ep(), _inferredOutcome: 'rework', degraded_mode: true }, { ...ep(), _inferredOutcome: 'unknown' /* missing */ }, ]); expect(m.degraded_mode.true.rework).toBe(1); expect(m.degraded_mode.false.success).toBe(1); expect(m.degraded_mode.false.unknown).toBe(1); }); it('path_type axis: regulated / improvised / null', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', path_type: 'regulated' }, { ...ep(), _inferredOutcome: 'rework', path_type: 'improvised' }, { ...ep(), _inferredOutcome: 'unknown', path_type: undefined }, ]); expect(m.path_type.regulated.success).toBe(1); expect(m.path_type.improvised.rework).toBe(1); expect(m.path_type.null.unknown).toBe(1); }); it('retry_count axis: 0 / 1-2 / 3+ buckets from events[].kind=retry', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', events: [] }, { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }] }, { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }, { kind: 'retry' }] }, { ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }] }, ]); expect(m.retry_count['0'].success).toBe(1); expect(m.retry_count['1-2'].rework).toBe(2); expect(m.retry_count['3+'].blocked).toBe(1); }); it('error_count axis: 0 / 1 / 2+ buckets from events[].kind=error', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', events: [] }, { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'error' }] }, { ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'error' }] }, ]); expect(m.error_count['0'].success).toBe(1); expect(m.error_count['1'].rework).toBe(1); expect(m.error_count['2+'].blocked).toBe(1); }); it('hard_floor_invoked axis: true/false from primary_rationale.hard_floor.invoked', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: true } } }, { ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: false } } }, { ...ep(), _inferredOutcome: 'unknown', primary_rationale: {} }, ]); expect(m.hard_floor_invoked.true.success).toBe(1); expect(m.hard_floor_invoked.false.success).toBe(1); expect(m.hard_floor_invoked.false.unknown).toBe(1); }); it('iterations_bucket axis: 0 / 1-3 / 4-10 / 11+ from task_cost.iterations', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 0 } }, { ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 2 } }, { ...ep(), _inferredOutcome: 'rework', task_cost: { iterations: 7 } }, { ...ep(), _inferredOutcome: 'blocked', task_cost: { iterations: 51 } }, { ...ep(), _inferredOutcome: 'unknown', task_cost: {} }, ]); expect(m.iterations_bucket['0'].success).toBe(1); expect(m.iterations_bucket['1-3'].success).toBe(1); expect(m.iterations_bucket['4-10'].rework).toBe(1); expect(m.iterations_bucket['11+'].blocked).toBe(1); // Missing iterations counts as 0 — task_cost block may be absent on early episodes. expect(m.iterations_bucket['0'].unknown).toBe(1); }); it('all 8 Pass 1 axes are present via analyze() on a minimal v2 episode', () => { const result = analyze([ep()]); for (const axis of ['prompt_signal', 'classifier_source', 'degraded_mode', 'path_type', 'retry_count', 'error_count', 'hard_floor_invoked', 'iterations_bucket']) { expect(result.factorMatrix, `axis ${axis} missing`).toHaveProperty(axis); } }); }); describe('buildFactorMatrix — Pass 3 dynamics axes (project-brain-factor-analysis-4passes)', () => { it('prompt_length_bucket axis: short / medium / long / huge / null', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', task_meta: { prompt_length_chars: 42 } }, { ...ep(), _inferredOutcome: 'success', task_meta: { prompt_length_chars: 300 } }, { ...ep(), _inferredOutcome: 'rework', task_meta: { prompt_length_chars: 1200 } }, { ...ep(), _inferredOutcome: 'blocked', task_meta: { prompt_length_chars: 5000 } }, { ...ep(), _inferredOutcome: 'unknown', task_meta: undefined }, ]); expect(m.prompt_length_bucket.short.success).toBe(1); expect(m.prompt_length_bucket.medium.success).toBe(1); expect(m.prompt_length_bucket.long.rework).toBe(1); expect(m.prompt_length_bucket.huge.blocked).toBe(1); expect(m.prompt_length_bucket.null.unknown).toBe(1); }); it('time_of_day_bucket axis derived from timestamps.started_at UTC hour', () => { const at = (iso) => ({ ...ep(), _inferredOutcome: 'success', timestamps: { started_at: iso } }); const m = buildFactorMatrix([ at('2026-05-25T03:00:00Z'), // night (0-5) at('2026-05-25T09:00:00Z'), // morning (6-11) at('2026-05-25T14:00:00Z'), // afternoon (12-17) at('2026-05-25T20:00:00Z'), // evening (18-23) ]); expect(m.time_of_day_bucket.night.success).toBe(1); expect(m.time_of_day_bucket.morning.success).toBe(1); expect(m.time_of_day_bucket.afternoon.success).toBe(1); expect(m.time_of_day_bucket.evening.success).toBe(1); }); it('day_of_week axis: Mon..Sun derived from started_at UTC', () => { // 2026-05-25 is a Monday (UTC). const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', timestamps: { started_at: '2026-05-25T10:00:00Z' } }, // Mon { ...ep(), _inferredOutcome: 'success', timestamps: { started_at: '2026-05-27T10:00:00Z' } }, // Wed { ...ep(), _inferredOutcome: 'unknown', timestamps: { started_at: null } }, ]); expect(m.day_of_week.Mon.success).toBe(1); expect(m.day_of_week.Wed.success).toBe(1); expect(m.day_of_week.null.unknown).toBe(1); }); it('inter_prompt_gap_bucket axis: gap between current and previous episode of same session', () => { const eps = [ { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T10:00:00Z', ended_at: '2026-05-25T10:05:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }, // 2-minute gap → bucket "1-10m" { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T10:07:00Z', ended_at: '2026-05-25T10:10:00Z' }, prompt_signal: 'correction', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }, // 80-minute gap → bucket "60m+" { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T11:30:00Z', ended_at: '2026-05-25T11:35:00Z' }, prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] }, ]; const result = analyze(eps); expect(result.factorMatrix.inter_prompt_gap_bucket).toBeDefined(); // First episode has no previous → bucket 'null'. expect(result.factorMatrix.inter_prompt_gap_bucket.null).toBeDefined(); expect(result.factorMatrix.inter_prompt_gap_bucket['1-10m']).toBeDefined(); expect(result.factorMatrix.inter_prompt_gap_bucket['60m+']).toBeDefined(); }); it('mcp_server_used axis: any / none (presence of any mcp_servers_used entry)', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', task_meta: { mcp_servers_used: ['github'] } }, { ...ep(), _inferredOutcome: 'success', task_meta: { mcp_servers_used: [] } }, { ...ep(), _inferredOutcome: 'unknown' /* missing */ }, ]); expect(m.mcp_server_used.any.success).toBe(1); expect(m.mcp_server_used.none.success).toBe(1); expect(m.mcp_server_used.none.unknown).toBe(1); }); it('file_type_main axis: dominant path category from file_type_distribution', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', task_meta: { file_type_distribution: { src: 3, test: 1, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, { ...ep(), _inferredOutcome: 'rework', task_meta: { file_type_distribution: { src: 0, test: 4, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, { ...ep(), _inferredOutcome: 'success', task_meta: { file_type_distribution: { src: 2, test: 2, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, // tie → mixed { ...ep(), _inferredOutcome: 'unknown', task_meta: { file_type_distribution: { src: 0, test: 0, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, // empty → none { ...ep(), _inferredOutcome: 'unknown' /* missing */ }, ]); expect(m.file_type_main.src.success).toBe(1); expect(m.file_type_main.test.rework).toBe(1); expect(m.file_type_main.mixed.success).toBe(1); expect(m.file_type_main.none.unknown).toBe(2); // empty + missing }); it('skill_invocations_bucket axis: 0 / 1 / 2+ from events tool_summary.Skill', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', events: [] }, { ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Skill: 1, Read: 5 } }] }, { ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Skill: 3 } }] }, ]); expect(m.skill_invocations_bucket['0'].success).toBe(1); expect(m.skill_invocations_bucket['1'].success).toBe(1); expect(m.skill_invocations_bucket['2+'].success).toBe(1); }); it('subagent_spawns_bucket axis: 0 / 1 / 2+ from events tool_summary.Agent (or Task)', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', events: [] }, { ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Agent: 1 } }] }, { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'tool_summary', counts: { Agent: 4 } }] }, ]); expect(m.subagent_spawns_bucket['0'].success).toBe(1); expect(m.subagent_spawns_bucket['1'].success).toBe(1); expect(m.subagent_spawns_bucket['2+'].rework).toBe(1); }); it('all 8 Pass 3 axes are present via analyze() on a minimal v2 episode', () => { const result = analyze([ep()]); for (const axis of ['prompt_length_bucket', 'time_of_day_bucket', 'day_of_week', 'inter_prompt_gap_bucket', 'mcp_server_used', 'file_type_main', 'skill_invocations_bucket', 'subagent_spawns_bucket']) { expect(result.factorMatrix, `axis ${axis} missing`).toHaveProperty(axis); } }); }); describe('buildFactorMatrix — Pass 2 classifier-metric axes', () => { it('latency_bucket axis: fast / medium / slow / very_slow / null', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 250 } }, { ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 1500 } }, { ...ep(), _inferredOutcome: 'rework', classifier_output: { latency_ms: 5000 } }, { ...ep(), _inferredOutcome: 'blocked', classifier_output: { latency_ms: 15000 } }, { ...ep(), _inferredOutcome: 'unknown', classifier_output: null }, ]); expect(m.latency_bucket.fast.success).toBe(1); expect(m.latency_bucket.medium.success).toBe(1); expect(m.latency_bucket.slow.rework).toBe(1); expect(m.latency_bucket.very_slow.blocked).toBe(1); expect(m.latency_bucket.null.unknown).toBe(1); }); it('error_type axis: reads classifier_output.llm_error verbatim with null default', () => { const m = buildFactorMatrix([ { ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'timeout' } }, { ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'econnreset' } }, { ...ep(), _inferredOutcome: 'success', classifier_output: { llm_error: null } }, { ...ep(), _inferredOutcome: 'success', classifier_output: null }, ]); expect(m.error_type.timeout.rework).toBe(1); expect(m.error_type.econnreset.rework).toBe(1); expect(m.error_type.null.success).toBe(2); }); }); describe('analyze — Pass 4 similar_past_outcome_majority axis (project-brain-factor-analysis-4passes)', () => { // Build a 4-dim embedding base64 manually to avoid loading @xenova in tests. const encode = (arr) => { const f = new Float32Array(arr); const buf = Buffer.from(f.buffer, f.byteOffset, f.byteLength); return buf.toString('base64'); }; it('attaches similar_past_outcome_majority axis to factor matrix', () => { // All four episodes share the same task_id (= sessionId in real episodes — // task_id IS the session id; one Claude Code session can contain N turns). // bySessionSorted groups by task_id, so inferOutcome only finds a "next" // episode within the same session group. const SID = 'session-A'; const eps = [ { schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:00:00Z', ended_at: '2026-05-20T10:01:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, prompt_embedding_base64: encode([1, 0, 0, 0]), events: [] }, { schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:02:00Z', ended_at: '2026-05-20T10:03:00Z' }, prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, prompt_embedding_base64: encode([0.95, 0.05, 0, 0]), events: [] }, { schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:04:00Z', ended_at: '2026-05-20T10:05:00Z' }, prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, prompt_embedding_base64: encode([0.9, 0.1, 0, 0]), events: [] }, { schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:06:00Z', ended_at: '2026-05-20T10:07:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, prompt_embedding_base64: encode([0.98, 0.02, 0, 0]), events: [] }, ]; const result = analyze(eps); expect(result.factorMatrix.similar_past_outcome_majority).toBeDefined(); // 3 of 4 episodes have resolved success outcome → indexed. Each gets a // nearest-neighbour lookup that returns success peers. expect(result.factorMatrix.similar_past_outcome_majority.success).toBeDefined(); }); it('bucket no_neighbors when no episode has embeddings', () => { const eps = [ { schema_version: 4, task_id: 'a', timestamps: { started_at: '2026-05-20T10:00:00Z', ended_at: '2026-05-20T10:01:00Z' }, prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' }, environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, prompt_embedding_base64: null, events: [] }, ]; const result = analyze(eps); expect(result.factorMatrix.similar_past_outcome_majority.no_neighbors).toBeDefined(); }); }); // ──────────────────────────────────────────────────────────────── // NEW CUTS: buildClassCanonCoverage, buildRouterVsOpus, buildChainIgnoreBreakdown // ──────────────────────────────────────────────────────────────── // Shared classMap fixture (embedded — no external file dependency) const testClassMap = { monitoring: ['#34', '#35'], bugfix: ['#18', '#34'], feature: ['#19'], release: ['#37'], planning: ['#19', '#41', '#42'], other: [], }; // Helper: episode for the new cuts (minimal — no embeddings needed) const epC = (overrides = {}) => ({ schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:05:00Z' }, primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: null, }, outcome_reviewed: 'unknown', ...overrides, }); describe('buildClassCanonCoverage', () => { it('returns [] for empty input', () => { expect(buildClassCanonCoverage([], testClassMap)).toEqual([]); }); it('single monitoring episode with recommended_node=#34, node_chosen=direct, rework', () => { const eps = [epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'monitoring', recommended_node: '#34', recommended_chain: null }, outcome_reviewed: 'rework', })]; const rows = buildClassCanonCoverage(eps, testClassMap); expect(rows).toHaveLength(1); const row = rows[0]; expect(row.classification).toBe('monitoring'); expect(row.count).toBe(1); expect(row.canonicalNodes).toEqual(['#34', '#35']); expect(row.routerRecommended).toBe(1); // has recommended_node expect(row.claudeTook).toBe(0); // node_chosen === 'direct' expect(row.recWithinCanon).toBe(1); // '#34' is in canonical expect(row.rework).toBe(1); }); it('classification not in map gets canonicalNodes=[]', () => { const eps = [epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: null }, outcome_reviewed: 'success' })]; const rows = buildClassCanonCoverage(eps, {}); expect(rows[0].canonicalNodes).toEqual([]); }); it('recommended_chain with numeric ids normalized to #N for canon check', () => { const eps = [epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'monitoring', recommended_node: null, recommended_chain: [19, 34] }, outcome_reviewed: 'success', })]; const rows = buildClassCanonCoverage(eps, testClassMap); // chain [19,34] → normalized ['#19','#34']. '#34' is in monitoring canonical → recWithinCanon=1 expect(rows[0].routerRecommended).toBe(1); expect(rows[0].recWithinCanon).toBe(1); }); it('mixed: 3 release episodes sorted desc, counting correctly', () => { // 3 release, 2 feature (release > feature by count) const eps = [ epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'release', recommended_node: '#37', recommended_chain: null }, outcome_reviewed: 'rework', timestamps: { started_at: '2026-05-19T10:00:00Z' } }), epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'release', recommended_node: '#99', recommended_chain: null }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:01:00Z' } }), epC({ primary_rationale: { node_chosen: '#37', task_classification: 'release', recommended_node: '#37', recommended_chain: null }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:02:00Z' } }), epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19', recommended_chain: null }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:03:00Z' } }), epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: null, recommended_chain: null }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:04:00Z' } }), ]; const rows = buildClassCanonCoverage(eps, testClassMap); // Sorted by count desc: release=3, feature=2 expect(rows[0].classification).toBe('release'); expect(rows[0].count).toBe(3); expect(rows[0].routerRecommended).toBe(3); // all 3 have recommended_node expect(rows[0].claudeTook).toBe(1); // one has node_chosen='#37' expect(rows[0].recWithinCanon).toBe(2); // '#37' in release canonical for ep1 and ep3; '#99' not in canonical for ep2 expect(rows[0].rework).toBe(1); expect(rows[1].classification).toBe('feature'); expect(rows[1].count).toBe(2); expect(rows[1].routerRecommended).toBe(1); // only 1 has recommended_node expect(rows[1].claudeTook).toBe(0); }); }); describe('buildRouterVsOpus', () => { const epR = (overrides = {}) => ({ schema_version: 4, task_id: 'session-abc-12345', timestamps: { started_at: '2026-05-19T10:00:00Z' }, primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: null, }, outcome_reviewed: 'unknown', review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: null, error_root_cause: 'n/a', reasoning: 'ok', }, ...overrides, }); it('one episode in each of A/B/C → 1/1/1', () => { const eps = [ // A: router gave recommendation, has review epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19', recommended_chain: null }, review: { node_quality: 'wrong_node', chain_quality: 'n/a', alternative_better: '#37', error_root_cause: 'wrong_skill', reasoning: 'x' }, outcome_reviewed: 'rework' }), // B: router silent, alternative_better set epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'planning', recommended_node: null, recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: '#41', error_root_cause: 'n/a', reasoning: 'should have used planning' }, outcome_reviewed: 'soft_success', timestamps: { started_at: '2026-05-19T10:01:00Z' } }), // C: router gave, node_quality=correct, no alternative epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'release', recommended_node: '#37', recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: null, error_root_cause: 'n/a', reasoning: 'direct was fine' }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:02:00Z' } }), ]; const result = buildRouterVsOpus(eps); expect(result.sectionA).toHaveLength(1); expect(result.sectionB).toHaveLength(1); expect(result.sectionC).toHaveLength(1); }); it('episode without review is excluded from all three sections', () => { const eps = [ epR({ review: undefined, primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: '#19', recommended_chain: null } }), ]; const result = buildRouterVsOpus(eps); expect(result.sectionA).toHaveLength(0); expect(result.sectionB).toHaveLength(0); expect(result.sectionC).toHaveLength(0); }); it('A: episode with recommended_chain array of strings goes into A with routerRecommendation = the array', () => { const eps = [ epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'planning', recommended_node: null, recommended_chain: ['#19', '#41'] }, review: { node_quality: 'wrong_node', chain_quality: 'missing_step', alternative_better: '#19', error_root_cause: 'wrong_chain_order', reasoning: 'chain needed' }, outcome_reviewed: 'rework' }), ]; const result = buildRouterVsOpus(eps); expect(result.sectionA).toHaveLength(1); expect(Array.isArray(result.sectionA[0].routerRecommendation)).toBe(true); expect(result.sectionA[0].routerRecommendation).toEqual(['#19', '#41']); }); it('B: router silent AND alternative_better truthy → in B; router silent AND alternative_better=null → not in B', () => { const eps = [ epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: '#60', error_root_cause: 'n/a', reasoning: 'should use docs' }, outcome_reviewed: 'soft_success' }), epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: null, error_root_cause: 'n/a', reasoning: 'fine' }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:01:00Z' } }), ]; const result = buildRouterVsOpus(eps); expect(result.sectionB).toHaveLength(1); expect(result.sectionB[0].opusSuggests).toBe('#60'); }); it('C: router gave + node_quality=correct + no alternative → in C; same but alternative_better truthy → NOT in C', () => { const inC = epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'release', recommended_node: '#37', recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: null, error_root_cause: 'n/a', reasoning: 'fine' }, outcome_reviewed: 'success' }); const notInC = epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'release', recommended_node: '#37', recommended_chain: null }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: '#41', error_root_cause: 'n/a', reasoning: 'actually #41 better' }, outcome_reviewed: 'rework', timestamps: { started_at: '2026-05-19T10:01:00Z' } }); const result = buildRouterVsOpus([inC, notInC]); expect(result.sectionC).toHaveLength(1); // The one NOT in C (has alternative_better) should be in A instead expect(result.sectionA).toHaveLength(1); }); it('sectionA item has all expected shape fields', () => { const eps = [ // Must be wrong_node or have alternative to end up in A (not C) epR({ primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19', recommended_chain: null }, review: { node_quality: 'wrong_node', chain_quality: 'n/a', alternative_better: '#37', error_root_cause: 'wrong_skill', reasoning: 'should be #37' }, outcome_reviewed: 'rework' }), ]; const result = buildRouterVsOpus(eps); const item = result.sectionA[0]; expect(item).toHaveProperty('time'); expect(item).toHaveProperty('taskId'); expect(item).toHaveProperty('classification'); expect(item).toHaveProperty('routerRecommendation'); expect(item).toHaveProperty('claudeChose'); expect(item).toHaveProperty('opusNodeQuality'); expect(item).toHaveProperty('opusChainQuality'); expect(item).toHaveProperty('outcomeReviewed'); expect(item).toHaveProperty('opusAlternative'); expect(item).toHaveProperty('opusRootCause'); expect(item.taskId).toHaveLength(8); // first 8 chars of task_id }); }); describe('buildChainIgnoreBreakdown', () => { it('returns all zeros for empty input', () => { const result = buildChainIgnoreBreakdown([]); expect(result.totalChainRecommendations).toBe(0); expect(result.ignoredChainCount).toBe(0); expect(result.ignoredChainRework).toBe(0); expect(result.totalNodeOnlyRecommendations).toBe(0); expect(result.ignoredNodeOnlyCount).toBe(0); expect(result.ignoredNodeOnlyRework).toBe(0); expect(result.breakdownByChainLength['1']).toEqual({ count: 0, ignored: 0, rework: 0 }); expect(result.breakdownByChainLength['2']).toEqual({ count: 0, ignored: 0, rework: 0 }); expect(result.breakdownByChainLength['3+']).toEqual({ count: 0, ignored: 0, rework: 0 }); }); it('chain-len-4 ep with node_chosen=direct and outcome=rework → ignoredChainCount=1, rework=1, 3+ bucket', () => { const eps = [epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'planning', recommended_node: null, recommended_chain: ['#19','#41','#42','#37'] }, outcome_reviewed: 'rework', })]; const result = buildChainIgnoreBreakdown(eps); expect(result.totalChainRecommendations).toBe(1); expect(result.ignoredChainCount).toBe(1); expect(result.ignoredChainRework).toBe(1); expect(result.breakdownByChainLength['3+']).toEqual({ count: 1, ignored: 1, rework: 1 }); }); it('node-only rec ep with node_chosen=direct → ignoredNodeOnlyCount=1', () => { const eps = [epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'monitoring', recommended_node: '#34', recommended_chain: null }, outcome_reviewed: 'success', })]; const result = buildChainIgnoreBreakdown(eps); expect(result.totalNodeOnlyRecommendations).toBe(1); expect(result.ignoredNodeOnlyCount).toBe(1); expect(result.ignoredNodeOnlyRework).toBe(0); expect(result.totalChainRecommendations).toBe(0); }); it('chains of length 1, 2, 5 bucketed correctly into 1/2/3+', () => { const eps = [ epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: ['#19'] }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:00:00Z' } }), epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: ['#19','#34'] }, outcome_reviewed: 'success', timestamps: { started_at: '2026-05-19T10:01:00Z' } }), epC({ primary_rationale: { node_chosen: 'direct', task_classification: 'other', recommended_node: null, recommended_chain: ['#19','#34','#37','#41','#42'] }, outcome_reviewed: 'rework', timestamps: { started_at: '2026-05-19T10:02:00Z' } }), ]; const result = buildChainIgnoreBreakdown(eps); expect(result.totalChainRecommendations).toBe(3); expect(result.breakdownByChainLength['1']).toEqual({ count: 1, ignored: 1, rework: 0 }); expect(result.breakdownByChainLength['2']).toEqual({ count: 1, ignored: 1, rework: 0 }); expect(result.breakdownByChainLength['3+']).toEqual({ count: 1, ignored: 1, rework: 1 }); }); it('chain-rec ep where node_chosen != direct → in totalChainRecommendations but NOT in ignoredChainCount', () => { const eps = [epC({ primary_rationale: { node_chosen: '#19', task_classification: 'feature', recommended_node: null, recommended_chain: ['#19', '#34'] }, outcome_reviewed: 'success', })]; const result = buildChainIgnoreBreakdown(eps); expect(result.totalChainRecommendations).toBe(1); expect(result.ignoredChainCount).toBe(0); expect(result.breakdownByChainLength['2']).toEqual({ count: 1, ignored: 0, rework: 0 }); }); }); describe('analyze — classCanonCoverage / routerVsOpus / chainIgnoreBreakdown integrated', () => { it('analyze() result includes classCanonCoverage, routerVsOpus, chainIgnoreBreakdown keys', () => { const eps = [ ep({ schema_version: 4, primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19', recommended_chain: null, triggers_matched: [], boundaries_applied: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } }, review: { node_quality: 'correct', chain_quality: 'n/a', alternative_better: null, error_root_cause: 'n/a', reasoning: 'ok' }, outcome_reviewed: 'success' }), ]; const result = analyze(eps); expect(result.classCanonCoverage).toBeDefined(); expect(result.routerVsOpus).toBeDefined(); expect(result.chainIgnoreBreakdown).toBeDefined(); expect(Array.isArray(result.classCanonCoverage)).toBe(true); expect(result.routerVsOpus).toHaveProperty('sectionA'); expect(result.routerVsOpus).toHaveProperty('sectionB'); expect(result.routerVsOpus).toHaveProperty('sectionC'); expect(result.chainIgnoreBreakdown).toHaveProperty('totalChainRecommendations'); }); }); describe('analyze() archive-fallback regression', () => { it('does NOT use archived classification map when classificationMap is empty — canonicalNodes must be [] for bugfix', () => { // The archived observer-classification-map.json contains bugfix: ["#18", "#34"]. // If the archive-fallback is active, classCanonCoverage[0].canonicalNodes will be non-empty. // After removing the fallback, classificationMap: {} → canonicalNodes must be []. const episodes = [ { schema_version: 2, primary_rationale: { task_classification: 'bugfix', node_chosen: 'direct' }, events: [], }, ]; const result = analyze(episodes, { classificationMap: {} }); const bugfixRow = result.classCanonCoverage.find(r => r.classification === 'bugfix'); expect(bugfixRow).toBeDefined(); // With empty classificationMap, no archive should be consulted → canonicalNodes empty expect(bugfixRow.canonicalNodes).toEqual([]); }); }); describe('analyzeChainHookEffectiveness', () => { const tmpLedger = path.join(__dirname, '..', '.scratch', `hook-outcomes-test-${Date.now()}.jsonl`); beforeEach(() => { fs.mkdirSync(path.dirname(tmpLedger), { recursive: true }); fs.writeFileSync(tmpLedger, [ JSON.stringify({ ts: '2026-05-28T10:00:00Z', rule: 'chain-recommendation', outcome: 'blocked', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:01:00Z', rule: 'chain-recommendation', outcome: 'passed-with-skill', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:02:00Z', rule: 'chain-recommendation', outcome: 'passed-inline-override', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:03:00Z', rule: 'chain-recommendation', outcome: 'passed-global-override', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:04:00Z', rule: 'chain-recommendation', outcome: 'passed-short-chain', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:05:00Z', rule: 'chain-recommendation', outcome: 'passed-no-mutating', session_id: 's1' }), JSON.stringify({ ts: '2026-05-28T10:06:00Z', rule: 'graph-first', outcome: 'blocked', session_id: 's1' }), // OTHER RULE — must be ignored JSON.stringify({ ts: '2026-05-27T10:00:00Z', rule: 'chain-recommendation', outcome: 'blocked', session_id: 's0' }), // BEFORE period '', ].join('\n')); }); afterEach(() => { try { fs.unlinkSync(tmpLedger); } catch {} }); it('counts outcomes within [periodStart, periodEnd]', () => { const result = analyzeChainHookEffectiveness({ ledgerPath: tmpLedger, periodStart: '2026-05-28T00:00:00Z', periodEnd: '2026-05-28T23:59:59Z', }); expect(result.total).toBe(6); expect(result.buckets.blocked).toBe(1); expect(result.buckets['passed-with-skill']).toBe(1); expect(result.buckets['passed-inline-override']).toBe(1); expect(result.buckets['passed-global-override']).toBe(1); expect(result.buckets['passed-short-chain']).toBe(1); expect(result.buckets['passed-no-mutating']).toBe(1); }); it('returns zero-counts when ledger missing', () => { const result = analyzeChainHookEffectiveness({ ledgerPath: '/nonexistent/ledger.jsonl', periodStart: '2026-05-28T00:00:00Z', periodEnd: '2026-05-28T23:59:59Z', }); expect(result.total).toBe(0); }); }); describe('buildChainHookEffectiveness markdown', () => { it('renders markdown table with totals and percentages', () => { const md = buildChainHookEffectiveness({ total: 10, buckets: { blocked: 5, 'passed-with-skill': 2, 'passed-inline-override': 1, 'passed-global-override': 1, 'passed-short-chain': 0, 'passed-no-mutating': 1, }, }); expect(md).toContain('Chain-hook effectiveness'); expect(md).toContain('blocked'); expect(md).toContain('50%'); // 5/10 expect(md).toContain('passed-with-skill'); expect(md).toContain('20%'); // 2/10 }); it('returns "(нет данных)" when total=0', () => { const md = buildChainHookEffectiveness({ total: 0, buckets: {} }); expect(md).toMatch(/нет данных|no data/i); }); }); describe('analyze() integration — chainHookEffectiveness', () => { it('exposes chainHookEffectiveness in result', () => { const result = analyze([], { hookOutcomesLedgerPath: '/nonexistent.jsonl' }); expect(result.chainHookEffectiveness).toBeDefined(); expect(result.chainHookEffectiveness.total).toBe(0); expect(result.chainHookEffectiveness.buckets).toBeDefined(); }); }); describe('CHAIN_OUTCOME_BUCKETS export', () => { it('exports 6 canonical bucket names in priority order', () => { expect(Array.isArray(CHAIN_OUTCOME_BUCKETS)).toBe(true); expect(CHAIN_OUTCOME_BUCKETS).toEqual([ 'blocked', 'passed-with-skill', 'passed-inline-override', 'passed-global-override', 'passed-short-chain', 'passed-no-mutating', ]); }); }); // Stream H Task 8 — Tables 16 & 17 builders. describe('buildRouterGateHookEffectiveness (Stream H Task 8 — Table 16)', () => { it('counts hook fires per rule, blocks vs warns', () => { const eps = [ { hook_fired: { rule: 'path-deny', outcome: 'block' } }, { hook_fired: { rule: 'path-deny', outcome: 'block' } }, { hook_fired: { rule: 'git-conditional', outcome: 'block' } }, { hook_fired: { rule: 'git-conditional', outcome: 'allow-after-approval' } }, ]; const r = buildRouterGateHookEffectiveness(eps); expect(r.rules['path-deny'].fires).toBe(2); expect(r.rules['path-deny'].blocks).toBe(2); expect(r.rules['git-conditional'].fires).toBe(2); expect(r.rules['git-conditional'].blocks).toBe(1); }); it('returns empty rules object for empty input', () => { expect(buildRouterGateHookEffectiveness([]).rules).toEqual({}); expect(buildRouterGateHookEffectiveness(null).rules).toEqual({}); }); it('ignores episodes without hook_fired', () => { const r = buildRouterGateHookEffectiveness([{ task_id: 'x' }, { hook_fired: null }]); expect(r.rules).toEqual({}); }); }); describe('buildSelfFabricationSignals (Stream H Task 8 — Table 17)', () => { it('flags episodes where controller claim mismatches tool_use record', () => { const eps = [ { controller_claim: 'committed fix', tool_uses: [] }, { controller_claim: 'committed fix', tool_uses: ['Bash:git commit'] }, { controller_claim: 'tests pass', tool_uses: [] }, ]; const r = buildSelfFabricationSignals(eps); expect(r.fabrications.length).toBe(2); expect(r.legit.length).toBe(1); }); it('handles missing controller_claim (no fabrication)', () => { const r = buildSelfFabricationSignals([{ tool_uses: ['Edit:x'] }, { task_id: 'y' }]); expect(r.fabrications.length).toBe(0); expect(r.legit.length).toBe(0); }); it('handles missing tool_uses as fabrication when claim present', () => { const r = buildSelfFabricationSignals([{ controller_claim: 'X' }]); expect(r.fabrications.length).toBe(1); }); }); describe('analyze() integration — Stream H Tables 16/17', () => { it('exposes routerGateHookEffectiveness in result', () => { const result = analyze([]); expect(result.routerGateHookEffectiveness).toBeDefined(); expect(result.routerGateHookEffectiveness.rules).toEqual({}); }); it('exposes selfFabricationSignals in result', () => { const result = analyze([]); expect(result.selfFabricationSignals).toBeDefined(); expect(result.selfFabricationSignals.fabrications).toEqual([]); }); });