diff --git a/tools/brain-retro-analyzer.mjs b/tools/brain-retro-analyzer.mjs index 457933cb..31de2e86 100644 --- a/tools/brain-retro-analyzer.mjs +++ b/tools/brain-retro-analyzer.mjs @@ -161,6 +161,32 @@ function sessionTurnBucket(turn) { return n < SESSION_TURN_EARLY ? 'early' : n <= SESSION_TURN_LATE ? 'mid' : 'late'; } +// Pass 1 cheap-axis helpers (project-brain-factor-analysis-4passes). +function countEventKind(events, kind) { + if (!Array.isArray(events)) return 0; + let c = 0; + for (const ev of events) if (ev && ev.kind === kind) c++; + return c; +} + +function retryBucket(events) { + const n = countEventKind(events, 'retry'); + return n === 0 ? '0' : n <= 2 ? '1-2' : '3+'; +} + +function errorBucket(events) { + const n = countEventKind(events, 'error'); + return n === 0 ? '0' : n === 1 ? '1' : '2+'; +} + +function iterationsBucket(iterations) { + const n = Number(iterations); + if (!Number.isFinite(n) || n <= 0) return '0'; + if (n <= 3) return '1-3'; + if (n <= 10) return '4-10'; + return '11+'; +} + const FACTOR_FNS = { decision_provenance: (e) => (e.decision_provenance || {}).kind || 'unknown', economy_level: (e) => String((e.environment || {}).economy_level ?? 'null'), @@ -172,6 +198,15 @@ const FACTOR_FNS = { node_chosen: (e) => (e.primary_rationale || {}).node_chosen || 'direct', task_classification: (e) => (e.primary_rationale || {}).task_classification || 'other', recommended_node_for_direct: (e) => (e.primary_rationale || {}).recommended_node || 'none', + // Pass 1 — 8 cheap axes (data already in v4 episode, just expose): + prompt_signal: (e) => e.prompt_signal || 'null', + classifier_source: (e) => (e.classifier_output || {}).source || 'null', + degraded_mode: (e) => String(e.degraded_mode ?? false), + path_type: (e) => e.path_type || 'null', + retry_count: (e) => retryBucket(e.events), + error_count: (e) => errorBucket(e.events), + hard_floor_invoked: (e) => String(((e.primary_rationale || {}).hard_floor || {}).invoked ?? false), + iterations_bucket: (e) => iterationsBucket((e.task_cost || {}).iterations), }; /** Factor matrix: rows = factor values, columns = outcome distribution (spec §6). */ diff --git a/tools/brain-retro-analyzer.test.mjs b/tools/brain-retro-analyzer.test.mjs index ffd63b6c..9a5f3c97 100644 --- a/tools/brain-retro-analyzer.test.mjs +++ b/tools/brain-retro-analyzer.test.mjs @@ -409,3 +409,110 @@ describe('analyze — v4 aggregations (Phase 3 Task 20)', () => { expect(ct.reviewer_input_tokens).toBe(500); }); }); + +describe('buildFactorMatrix — Pass 1 cheap axes (project-brain-factor-analysis-4passes)', () => { + // Each new axis: smoke + null-safety on missing fields. + it('prompt_signal axis: raw discrete values + null fallback', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', prompt_signal: 'new_task' }, + { ...ep(), _inferredOutcome: 'rework', prompt_signal: 'correction' }, + { ...ep(), _inferredOutcome: 'unknown', prompt_signal: undefined }, + ]); + expect(m.prompt_signal.new_task.success).toBe(1); + expect(m.prompt_signal.correction.rework).toBe(1); + expect(m.prompt_signal.null.unknown).toBe(1); + }); + + it('classifier_source axis: reads classifier_output.source verbatim', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'llm' } }, + { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'regex' } }, + { ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'prefilter_inherited' } }, + { ...ep(), _inferredOutcome: 'unknown', classifier_output: null }, + ]); + expect(m.classifier_source.llm.success).toBe(1); + expect(m.classifier_source.regex.success).toBe(1); + expect(m.classifier_source.prefilter_inherited.success).toBe(1); + expect(m.classifier_source.null.unknown).toBe(1); + }); + + it('degraded_mode axis: true/false buckets, false default', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', degraded_mode: false }, + { ...ep(), _inferredOutcome: 'rework', degraded_mode: true }, + { ...ep(), _inferredOutcome: 'unknown' /* missing */ }, + ]); + expect(m.degraded_mode.true.rework).toBe(1); + expect(m.degraded_mode.false.success).toBe(1); + expect(m.degraded_mode.false.unknown).toBe(1); + }); + + it('path_type axis: regulated / improvised / null', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', path_type: 'regulated' }, + { ...ep(), _inferredOutcome: 'rework', path_type: 'improvised' }, + { ...ep(), _inferredOutcome: 'unknown', path_type: undefined }, + ]); + expect(m.path_type.regulated.success).toBe(1); + expect(m.path_type.improvised.rework).toBe(1); + expect(m.path_type.null.unknown).toBe(1); + }); + + it('retry_count axis: 0 / 1-2 / 3+ buckets from events[].kind=retry', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', events: [] }, + { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }] }, + { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }, { kind: 'retry' }] }, + { ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }] }, + ]); + expect(m.retry_count['0'].success).toBe(1); + expect(m.retry_count['1-2'].rework).toBe(2); + expect(m.retry_count['3+'].blocked).toBe(1); + }); + + it('error_count axis: 0 / 1 / 2+ buckets from events[].kind=error', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', events: [] }, + { ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'error' }] }, + { ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'error' }] }, + ]); + expect(m.error_count['0'].success).toBe(1); + expect(m.error_count['1'].rework).toBe(1); + expect(m.error_count['2+'].blocked).toBe(1); + }); + + it('hard_floor_invoked axis: true/false from primary_rationale.hard_floor.invoked', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: true } } }, + { ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: false } } }, + { ...ep(), _inferredOutcome: 'unknown', primary_rationale: {} }, + ]); + expect(m.hard_floor_invoked.true.success).toBe(1); + expect(m.hard_floor_invoked.false.success).toBe(1); + expect(m.hard_floor_invoked.false.unknown).toBe(1); + }); + + it('iterations_bucket axis: 0 / 1-3 / 4-10 / 11+ from task_cost.iterations', () => { + const m = buildFactorMatrix([ + { ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 0 } }, + { ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 2 } }, + { ...ep(), _inferredOutcome: 'rework', task_cost: { iterations: 7 } }, + { ...ep(), _inferredOutcome: 'blocked', task_cost: { iterations: 51 } }, + { ...ep(), _inferredOutcome: 'unknown', task_cost: {} }, + ]); + expect(m.iterations_bucket['0'].success).toBe(1); + expect(m.iterations_bucket['1-3'].success).toBe(1); + expect(m.iterations_bucket['4-10'].rework).toBe(1); + expect(m.iterations_bucket['11+'].blocked).toBe(1); + // Missing iterations counts as 0 — task_cost block may be absent on early episodes. + expect(m.iterations_bucket['0'].unknown).toBe(1); + }); + + it('all 8 Pass 1 axes are present via analyze() on a minimal v2 episode', () => { + const result = analyze([ep()]); + for (const axis of ['prompt_signal', 'classifier_source', 'degraded_mode', 'path_type', + 'retry_count', 'error_count', 'hard_floor_invoked', 'iterations_bucket']) { + expect(result.factorMatrix, `axis ${axis} missing`).toHaveProperty(axis); + } + }); +});