397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
195 lines
8.9 KiB
JavaScript
195 lines
8.9 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import { parseEpisodes, normalizeEpisode, attributeNodes, filterEpisodes, groupBySession, aggregate, inferConflicts } from '../docs/observer/dashboard-core.js';
|
|
|
|
const v1 = {
|
|
task_id: 'a', timestamps: { started_at: '2026-05-19T05:18:16.342Z', ended_at: '2026-05-19T06:05:55.439Z' },
|
|
path_type: 'improvised', outcome: 'success',
|
|
primary_rationale: { node_chosen: 'direct', hard_floor: { invoked: false, rules: [] }, task_classification: 'refactor' },
|
|
events: [{ kind: 'tool_summary', counts: { TodoWrite: 2, AskUserQuestion: 5 } }],
|
|
};
|
|
const v2 = {
|
|
schema_version: 2, task_id: 'b', task_ref: 'b',
|
|
timestamps: { started_at: '2026-05-19T08:06:30.059Z', ended_at: '2026-05-19T08:10:43.437Z' },
|
|
path_type: 'improvised', outcome: 'unknown', prompt_signal: 'new_task',
|
|
decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null },
|
|
environment: { economy_level: 5, model: 'claude-opus-4-7', post_compaction: true, session_turn: 82, parallel_session: true },
|
|
task_size: { tool_calls: 12, files_touched: 1, files: ['x'] },
|
|
primary_rationale: { node_chosen: 'direct', hard_floor: { invoked: false, rules: [] }, task_classification: 'bugfix' },
|
|
events: [{ kind: 'tool_summary', counts: { Edit: 5 } }, { kind: 'error', message: 'e' }, { kind: 'retry' }],
|
|
};
|
|
|
|
describe('parseEpisodes', () => {
|
|
it('parses valid JSONL lines', () => {
|
|
const text = [JSON.stringify(v1), JSON.stringify(v2)].join('\n');
|
|
const r = parseEpisodes(text);
|
|
expect(r.episodes).toHaveLength(2);
|
|
expect(r.skipped).toBe(0);
|
|
});
|
|
|
|
it('skips broken lines and counts them', () => {
|
|
const text = [JSON.stringify(v1), '{ broken', '', JSON.stringify(v2)].join('\n');
|
|
const r = parseEpisodes(text);
|
|
expect(r.episodes).toHaveLength(2);
|
|
expect(r.skipped).toBe(1);
|
|
});
|
|
|
|
it('skips observer_error marker lines', () => {
|
|
const text = [JSON.stringify({ observer_error: 'hook failed' }), JSON.stringify(v1)].join('\n');
|
|
const r = parseEpisodes(text);
|
|
expect(r.episodes).toHaveLength(1);
|
|
expect(r.skipped).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('normalizeEpisode', () => {
|
|
it('normalizes a v1 episode — v2-only fields are null', () => {
|
|
const e = normalizeEpisode(v1);
|
|
expect(e.schemaVersion).toBe(1);
|
|
expect(e.outcome).toBe('success');
|
|
expect(e.environment).toBeNull();
|
|
expect(e.decisionProvenance).toBeNull();
|
|
expect(e.taskSize).toBeNull();
|
|
expect(e.durationMs).toBe(Date.parse(v1.timestamps.ended_at) - Date.parse(v1.timestamps.started_at));
|
|
expect(e.tools).toEqual({ TodoWrite: 2, AskUserQuestion: 5 });
|
|
});
|
|
|
|
it('normalizes a v2 episode with all fields', () => {
|
|
const e = normalizeEpisode(v2);
|
|
expect(e.schemaVersion).toBe(2);
|
|
expect(e.environment.economy_level).toBe(5);
|
|
expect(e.errorCount).toBe(1);
|
|
expect(e.retryCount).toBe(1);
|
|
expect(e.taskClassification).toBe('bugfix');
|
|
});
|
|
|
|
it('merges tool_summary counts across multiple events', () => {
|
|
const e = normalizeEpisode({
|
|
...v1,
|
|
events: [{ kind: 'tool_summary', counts: { Read: 2 } }, { kind: 'tool_summary', counts: { Read: 3, Bash: 1 } }],
|
|
});
|
|
expect(e.tools).toEqual({ Read: 5, Bash: 1 });
|
|
});
|
|
|
|
it('collects skill_invoked skills in order', () => {
|
|
const e = normalizeEpisode({
|
|
...v1,
|
|
events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }, { kind: 'skill_invoked', skill: 'superpowers:test-driven-development' }],
|
|
});
|
|
expect(e.skills).toEqual(['superpowers:writing-plans', 'superpowers:test-driven-development']);
|
|
});
|
|
});
|
|
|
|
describe('attributeNodes', () => {
|
|
const ep = (over) => normalizeEpisode({ ...v1, ...over });
|
|
|
|
it('maps node_chosen skill id to a graph node', () => {
|
|
const r = attributeNodes(ep({ primary_rationale: { node_chosen: 'superpowers:systematic-debugging', hard_floor: {} } }));
|
|
expect(r.nodeIds).toContain('sk_debug');
|
|
});
|
|
|
|
it('ignores node_chosen === "direct"', () => {
|
|
const r = attributeNodes(ep({ primary_rationale: { node_chosen: 'direct', hard_floor: {} } }));
|
|
expect(r.nodeIds).toEqual([]);
|
|
});
|
|
|
|
it('maps skill_invoked events to graph nodes', () => {
|
|
const r = attributeNodes(ep({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }));
|
|
expect(r.nodeIds).toContain('sk_wplans');
|
|
});
|
|
|
|
it('maps mcp__<server>__ tool names to MCP graph nodes', () => {
|
|
const r = attributeNodes(ep({ events: [{ kind: 'tool_summary', counts: { 'mcp__github__get_issue': 2, 'mcp__laravel-boost__database-query': 1, Read: 4 } }] }));
|
|
expect(r.nodeIds).toContain('mcp_gh');
|
|
expect(r.nodeIds).toContain('mcp_boost');
|
|
});
|
|
|
|
it('counts signals vs attributed — builtin tools are not signals', () => {
|
|
const r = attributeNodes(ep({ events: [{ kind: 'tool_summary', counts: { Read: 1, 'mcp__github__x': 1 } }],
|
|
primary_rationale: { node_chosen: 'superpowers:test-driven-development', hard_floor: {} } }));
|
|
expect(r.attributed).toBe(2); // tdd skill + github mcp
|
|
expect(r.signals).toBe(2); // only the tdd skill and the mcp tool count as signals
|
|
});
|
|
});
|
|
|
|
describe('filterEpisodes', () => {
|
|
const list = [
|
|
normalizeEpisode({ ...v1, primary_rationale: { node_chosen: 'direct', hard_floor: {}, task_classification: 'refactor' }, events: [] }),
|
|
normalizeEpisode({ ...v2, primary_rationale: { node_chosen: 'direct', hard_floor: {}, task_classification: 'bugfix' }, events: [{ kind: 'error', message: 'e' }] }),
|
|
];
|
|
it('returns all with an empty filter', () => {
|
|
expect(filterEpisodes(list, {})).toHaveLength(2);
|
|
});
|
|
it('filters by task classification', () => {
|
|
expect(filterEpisodes(list, { classification: 'bugfix' })).toHaveLength(1);
|
|
});
|
|
it('filters to episodes with errors only', () => {
|
|
expect(filterEpisodes(list, { withErrors: true })).toHaveLength(1);
|
|
});
|
|
});
|
|
|
|
describe('groupBySession', () => {
|
|
it('groups episodes by taskRef, newest first within and across groups', () => {
|
|
const a1 = normalizeEpisode({ ...v2, task_ref: 'S', timestamps: { started_at: '2026-05-19T08:00:00Z', ended_at: '2026-05-19T08:01:00Z' } });
|
|
const a2 = normalizeEpisode({ ...v2, task_ref: 'S', timestamps: { started_at: '2026-05-19T09:00:00Z', ended_at: '2026-05-19T09:01:00Z' } });
|
|
const b1 = normalizeEpisode({ ...v2, task_ref: 'T', timestamps: { started_at: '2026-05-19T07:00:00Z', ended_at: '2026-05-19T07:01:00Z' } });
|
|
const groups = groupBySession([a1, a2, b1]);
|
|
const s = groups.find((g) => g.taskRef === 'S');
|
|
expect(s.episodes[0].startedAt).toBe('2026-05-19T09:00:00Z');
|
|
expect(groups[0].taskRef).toBe('S');
|
|
});
|
|
});
|
|
|
|
describe('aggregate', () => {
|
|
const mk = (over) => normalizeEpisode({ ...v2, ...over });
|
|
it('counts node heat from attributed nodes', () => {
|
|
const list = [
|
|
mk({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }),
|
|
mk({ events: [{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] }),
|
|
];
|
|
expect(aggregate(list).nodeHeat.sk_wplans).toBe(2);
|
|
});
|
|
it('computes redirect rate', () => {
|
|
const list = [
|
|
mk({ decision_provenance: { kind: 'user_directed_method', claude_would_have_chosen: 'x' } }),
|
|
mk({ decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null } }),
|
|
];
|
|
expect(aggregate(list).redirectRate).toBe(0.5);
|
|
});
|
|
it('tallies path_type and outcome distributions', () => {
|
|
const list = [mk({ path_type: 'improvised', outcome: 'unknown' }), mk({ path_type: 'regulated', outcome: 'success' })];
|
|
const a = aggregate(list);
|
|
expect(a.pathType).toEqual({ improvised: 1, regulated: 1 });
|
|
expect(a.outcome).toEqual({ unknown: 1, success: 1 });
|
|
});
|
|
it('reports total error and retry counts', () => {
|
|
const list = [mk({ events: [{ kind: 'error', message: 'e' }, { kind: 'retry' }] })];
|
|
const a = aggregate(list);
|
|
expect(a.totalErrors).toBe(1);
|
|
expect(a.totalRetries).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('inferConflicts', () => {
|
|
const conflictEdges = [{ from: 'sk_wplans', to: 'sk_debug', dashes: true, label: '⚫', title: 't' }];
|
|
it('returns design conflicts from dashed edges', () => {
|
|
const r = inferConflicts([], conflictEdges);
|
|
expect(r.design).toHaveLength(1);
|
|
});
|
|
it('reports friction — episodes with errors attributed to nodes', () => {
|
|
const ep = normalizeEpisode({ ...v2,
|
|
events: [{ kind: 'error', message: 'e' }, { kind: 'skill_invoked', skill: 'superpowers:writing-plans' }] });
|
|
const r = inferConflicts([ep], conflictEdges);
|
|
expect(r.friction.sk_wplans).toBe(1);
|
|
});
|
|
it('reports correlation when an errored episode spans a conflict-edge pair', () => {
|
|
const ep = normalizeEpisode({ ...v2, events: [
|
|
{ kind: 'error', message: 'e' },
|
|
{ kind: 'skill_invoked', skill: 'superpowers:writing-plans' },
|
|
{ kind: 'skill_invoked', skill: 'superpowers:systematic-debugging' },
|
|
] });
|
|
const r = inferConflicts([ep], conflictEdges);
|
|
expect(r.correlation).toHaveLength(1);
|
|
expect(r.correlation[0].pair).toEqual(['sk_wplans', 'sk_debug']);
|
|
});
|
|
});
|