dc6d2dd358
buildFactorMatrix already buckets decision_provenance.kind dynamically (brain-retro-analyzer.mjs:112) — no production change needed. Test pins that user_chose_from_options is counted on the provenance axis. 12/12 brain-retro tests GREEN. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
122 lines
5.8 KiB
JavaScript
122 lines
5.8 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
dedupeEpisodes,
|
|
inferOutcome,
|
|
groupEpisodesToTasks,
|
|
findCausalChains,
|
|
buildFactorMatrix,
|
|
analyze,
|
|
} from './brain-retro-analyzer.mjs';
|
|
|
|
// Minimal v2 episode for tests.
|
|
const ep = (overrides = {}) => ({
|
|
schema_version: 2,
|
|
task_id: 's1',
|
|
task_ref: 's1',
|
|
timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:05:00Z' },
|
|
path_type: 'regulated',
|
|
outcome: 'unknown',
|
|
prompt_signal: 'neutral',
|
|
decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null },
|
|
environment: { economy_level: 0, model: 'claude-opus-4-7', post_compaction: false, session_turn: 1, parallel_session: false },
|
|
task_size: { tool_calls: 5, files_touched: 1, files: ['/a.js'] },
|
|
primary_rationale: { step: 1, node_chosen: 'direct', triggers_matched: [], candidates_considered: [], boundaries_applied: [], hard_floor: { invoked: false, rules: [] }, task_classification: 'feature' },
|
|
events: [],
|
|
...overrides,
|
|
});
|
|
|
|
describe('dedupeEpisodes', () => {
|
|
it('keeps the last of two episodes with the same task_id + started_at', () => {
|
|
const a = ep({ outcome: 'unknown' });
|
|
const b = ep({ outcome: 'partial' }); // same task_id + started_at — routing-gate double-write
|
|
const out = dedupeEpisodes([a, b]);
|
|
expect(out).toHaveLength(1);
|
|
expect(out[0].outcome).toBe('partial');
|
|
});
|
|
|
|
it('keeps all observer_error markers', () => {
|
|
const out = dedupeEpisodes([ep(), { observer_error: true, task_id: 'e' }, { observer_error: true, task_id: 'e2' }]);
|
|
expect(out.filter((e) => e.observer_error)).toHaveLength(2);
|
|
});
|
|
});
|
|
|
|
describe('inferOutcome', () => {
|
|
it('infers rework when the next episode opens with a correction', () => {
|
|
expect(inferOutcome(ep(), ep({ prompt_signal: 'correction' }))).toBe('rework');
|
|
});
|
|
it('infers success when the next episode opens with approval', () => {
|
|
expect(inferOutcome(ep(), ep({ prompt_signal: 'approval' }))).toBe('success');
|
|
});
|
|
it('infers partial when the episode has an interrupt event', () => {
|
|
expect(inferOutcome(ep({ events: [{ kind: 'interrupt' }] }), ep())).toBe('partial');
|
|
});
|
|
it('infers unknown when there is no next episode', () => {
|
|
expect(inferOutcome(ep(), null)).toBe('unknown');
|
|
});
|
|
});
|
|
|
|
describe('groupEpisodesToTasks', () => {
|
|
it('starts a new task after a success and on a new_task prompt', () => {
|
|
const eps = [
|
|
ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, prompt_signal: 'new_task' }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, prompt_signal: 'approval' }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:04:00Z', ended_at: '2026-05-19T10:05:00Z' }, prompt_signal: 'new_task' }),
|
|
];
|
|
const tasks = groupEpisodesToTasks(eps);
|
|
expect(tasks.length).toBeGreaterThanOrEqual(2);
|
|
});
|
|
});
|
|
|
|
describe('findCausalChains', () => {
|
|
it('links an errored episode to a later episode that shares a file', () => {
|
|
const a = ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } });
|
|
const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } });
|
|
const chains = findCausalChains([a, b]);
|
|
expect(chains).toHaveLength(1);
|
|
expect(chains[0].sharedFiles).toEqual(['/shared.js']);
|
|
});
|
|
|
|
it('returns no chain when no files are shared', () => {
|
|
const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/a.js'] } });
|
|
const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/b.js'] } });
|
|
expect(findCausalChains([a, b])).toHaveLength(0);
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix', () => {
|
|
it('tabulates outcome distribution per factor value', () => {
|
|
const eps = [
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } },
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } },
|
|
];
|
|
const m = buildFactorMatrix(eps);
|
|
expect(m.decision_provenance.user_directed_method.rework).toBe(1);
|
|
expect(m.decision_provenance.autonomous.success).toBe(1);
|
|
});
|
|
|
|
it('counts the 3rd kind user_chose_from_options on the provenance axis', () => {
|
|
const eps = [
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } },
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } },
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'user_chose_from_options' } },
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_chose_from_options' } },
|
|
];
|
|
const m = buildFactorMatrix(eps);
|
|
expect(m.decision_provenance).toHaveProperty('autonomous');
|
|
expect(m.decision_provenance).toHaveProperty('user_directed_method');
|
|
expect(m.decision_provenance).toHaveProperty('user_chose_from_options');
|
|
expect(m.decision_provenance.user_chose_from_options.success).toBe(1);
|
|
expect(m.decision_provenance.user_chose_from_options.rework).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('analyze', () => {
|
|
it('returns episodeCount, tasks, causalChains and factorMatrix', () => {
|
|
const result = analyze([ep(), ep({ timestamps: { started_at: '2026-05-19T11:00:00Z', ended_at: '2026-05-19T11:01:00Z' }, prompt_signal: 'correction' })]);
|
|
expect(result.episodeCount).toBe(2);
|
|
expect(result.factorMatrix).toBeDefined();
|
|
expect(Array.isArray(result.tasks)).toBe(true);
|
|
expect(Array.isArray(result.causalChains)).toBe(true);
|
|
});
|
|
});
|