Files
brain/tools/brain-dashboard-core.test.mjs
T

195 lines
8.9 KiB
JavaScript

import { describe, it, expect } from 'vitest';
import { parseEpisodes, normalizeEpisode, attributeNodes, filterEpisodes, groupBySession, aggregate, inferConflicts } from '../docs/observer/dashboard-core.js';
const v1 = {
task_id: 'a', timestamps: { started_at: '2026-05-19T05:18:16.342Z', ended_at: '2026-05-19T06:05:55.439Z' },
path_type: 'improvised', outcome: 'success',
primary_rationale: { node_chosen: 'direct', hard_floor: { invoked: false, rules: [] }, task_classification: 'refactor' },
events: [{ kind: 'tool_summary', counts: { TodoWrite: 2, AskUserQuestion: 5 } }],
};
const v2 = {
schema_version: 2, task_id: 'b', task_ref: 'b',
timestamps: { started_at: '2026-05-19T08:06:30.059Z', ended_at: '2026-05-19T08:10:43.437Z' },
path_type: 'improvised', outcome: 'unknown', prompt_signal: 'new_task',
decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null },
environment: { economy_level: 5, model: 'claude-opus-4-7', post_compaction: true, session_turn: 82, parallel_session: true },
task_size: { tool_calls: 12, files_touched: 1, files: ['x'] },
primary_rationale: { node_chosen: 'direct', hard_floor: { invoked: false, rules: [] }, task_classification: 'bugfix' },
events: [{ kind: 'tool_summary', counts: { Edit: 5 } }, { kind: 'error', message: 'e' }, { kind: 'retry' }],
};
describe('parseEpisodes', () => {
it('parses valid JSONL lines', () => {
const text = [JSON.stringify(v1), JSON.stringify(v2)].join('\n');
const r = parseEpisodes(text);
expect(r.episodes).toHaveLength(2);
expect(r.skipped).toBe(0);
});
it('skips broken lines and counts them', () => {
const text = [JSON.stringify(v1), '{ broken', '', JSON.stringify(v2)].join('\n');
const r = parseEpisodes(text);
expect(r.episodes).toHaveLength(2);
expect(r.skipped).toBe(1);
});
it('skips observer_error marker lines', () => {
const text = [JSON.stringify({ observer_error: 'hook failed' }), JSON.stringify(v1)].join('\n');
const r = parseEpisodes(text);
expect(r.episodes).toHaveLength(1);
expect(r.skipped).toBe(1);
});
});
describe('normalizeEpisode', () => {
it('normalizes a v1 episode — v2-only fields are null', () => {
const e = normalizeEpisode(v1);
expect(e.schemaVersion).toBe(1);
expect(e.outcome).toBe('success');
expect(e.environment).toBeNull();
expect(e.decisionProvenance).toBeNull();
expect(e.taskSize).toBeNull();
expect(e.durationMs).toBe(Date.parse(v1.timestamps.ended_at) - Date.parse(v1.timestamps.started_at));
expect(e.tools).toEqual({ TodoWrite: 2, AskUserQuestion: 5 });
});
it('normalizes a v2 episode with all fields', () => {
const e = normalizeEpisode(v2);
expect(e.schemaVersion).toBe(2);
expect(e.environment.economy_level).toBe(5);
expect(e.errorCount).toBe(1);
expect(e.retryCount).toBe(1);
expect(e.taskClassification).toBe('bugfix');
});
it('merges tool_summary counts across multiple events', () => {
const e = normalizeEpisode({
...v1,
events: [{ kind: 'tool_summary', counts: { Read: 2 } }, { kind: 'tool_summary', counts: { Read: 3, Bash: 1 } }],
});
expect(e.tools).toEqual({ Read: 5, Bash: 1 });
});
it('collects skill_invoked skills in order', () => {
const e = normalizeEpisode({
...v1,
events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }, { kind: 'skill_invoked', skill: 'superpowers:test-driven-development' }],
});
expect(e.skills).toEqual(['superpowers:writing-plans', 'superpowers:test-driven-development']);
});
});
describe('attributeNodes', () => {
const ep = (over) => normalizeEpisode({ ...v1, ...over });
it('maps node_chosen skill id to a graph node', () => {
const r = attributeNodes(ep({ primary_rationale: { node_chosen: 'superpowers:systematic-debugging', hard_floor: {} } }));
expect(r.nodeIds).toContain('sk_debug');
});
it('ignores node_chosen === "direct"', () => {
const r = attributeNodes(ep({ primary_rationale: { node_chosen: 'direct', hard_floor: {} } }));
expect(r.nodeIds).toEqual([]);
});
it('maps skill_invoked events to graph nodes', () => {
const r = attributeNodes(ep({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }));
expect(r.nodeIds).toContain('sk_wplans');
});
it('maps mcp__<server>__ tool names to MCP graph nodes', () => {
const r = attributeNodes(ep({ events: [{ kind: 'tool_summary', counts: { 'mcp__github__get_issue': 2, 'mcp__laravel-boost__database-query': 1, Read: 4 } }] }));
expect(r.nodeIds).toContain('mcp_gh');
expect(r.nodeIds).toContain('mcp_boost');
});
it('counts signals vs attributed — builtin tools are not signals', () => {
const r = attributeNodes(ep({ events: [{ kind: 'tool_summary', counts: { Read: 1, 'mcp__github__x': 1 } }],
primary_rationale: { node_chosen: 'superpowers:test-driven-development', hard_floor: {} } }));
expect(r.attributed).toBe(2); // tdd skill + github mcp
expect(r.signals).toBe(2); // only the tdd skill and the mcp tool count as signals
});
});
describe('filterEpisodes', () => {
const list = [
normalizeEpisode({ ...v1, primary_rationale: { node_chosen: 'direct', hard_floor: {}, task_classification: 'refactor' }, events: [] }),
normalizeEpisode({ ...v2, primary_rationale: { node_chosen: 'direct', hard_floor: {}, task_classification: 'bugfix' }, events: [{ kind: 'error', message: 'e' }] }),
];
it('returns all with an empty filter', () => {
expect(filterEpisodes(list, {})).toHaveLength(2);
});
it('filters by task classification', () => {
expect(filterEpisodes(list, { classification: 'bugfix' })).toHaveLength(1);
});
it('filters to episodes with errors only', () => {
expect(filterEpisodes(list, { withErrors: true })).toHaveLength(1);
});
});
describe('groupBySession', () => {
it('groups episodes by taskRef, newest first within and across groups', () => {
const a1 = normalizeEpisode({ ...v2, task_ref: 'S', timestamps: { started_at: '2026-05-19T08:00:00Z', ended_at: '2026-05-19T08:01:00Z' } });
const a2 = normalizeEpisode({ ...v2, task_ref: 'S', timestamps: { started_at: '2026-05-19T09:00:00Z', ended_at: '2026-05-19T09:01:00Z' } });
const b1 = normalizeEpisode({ ...v2, task_ref: 'T', timestamps: { started_at: '2026-05-19T07:00:00Z', ended_at: '2026-05-19T07:01:00Z' } });
const groups = groupBySession([a1, a2, b1]);
const s = groups.find((g) => g.taskRef === 'S');
expect(s.episodes[0].startedAt).toBe('2026-05-19T09:00:00Z');
expect(groups[0].taskRef).toBe('S');
});
});
describe('aggregate', () => {
const mk = (over) => normalizeEpisode({ ...v2, ...over });
it('counts node heat from attributed nodes', () => {
const list = [
mk({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }),
mk({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }),
];
expect(aggregate(list).nodeHeat.sk_wplans).toBe(2);
});
it('computes redirect rate', () => {
const list = [
mk({ decision_provenance: { kind: 'user_directed_method', claude_would_have_chosen: 'x' } }),
mk({ decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null } }),
];
expect(aggregate(list).redirectRate).toBe(0.5);
});
it('tallies path_type and outcome distributions', () => {
const list = [mk({ path_type: 'improvised', outcome: 'unknown' }), mk({ path_type: 'regulated', outcome: 'success' })];
const a = aggregate(list);
expect(a.pathType).toEqual({ improvised: 1, regulated: 1 });
expect(a.outcome).toEqual({ unknown: 1, success: 1 });
});
it('reports total error and retry counts', () => {
const list = [mk({ events: [{ kind: 'error', message: 'e' }, { kind: 'retry' }] })];
const a = aggregate(list);
expect(a.totalErrors).toBe(1);
expect(a.totalRetries).toBe(1);
});
});
describe('inferConflicts', () => {
const conflictEdges = [{ from: 'sk_wplans', to: 'sk_debug', dashes: true, label: '⚫', title: 't' }];
it('returns design conflicts from dashed edges', () => {
const r = inferConflicts([], conflictEdges);
expect(r.design).toHaveLength(1);
});
it('reports friction — episodes with errors attributed to nodes', () => {
const ep = normalizeEpisode({ ...v2,
events: [{ kind: 'error', message: 'e' }, { kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] });
const r = inferConflicts([ep], conflictEdges);
expect(r.friction.sk_wplans).toBe(1);
});
it('reports correlation when an errored episode spans a conflict-edge pair', () => {
const ep = normalizeEpisode({ ...v2, events: [
{ kind: 'error', message: 'e' },
{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' },
{ kind: 'skill_invoked', skill: 'superpowers:systematic-debugging' },
] });
const r = inferConflicts([ep], conflictEdges);
expect(r.correlation).toHaveLength(1);
expect(r.correlation[0].pair).toEqual(['sk_wplans', 'sk_debug']);
});
});