58784b182d
Closes the 4-pass factor-analysis expansion plan in
memory/project_brain_factor_analysis_4passes.md. Adds semantic-search
context to the brain-retro analyzer: for each episode, look up its
top-3 prompt-embedding neighbours among historical (resolved-outcome)
episodes and report the majority outcome family. Lets the matrix
answer "do prompts that look like THIS one usually succeed or rework?"
# New module: tools/observer-embedding-index.mjs (pure, fs-free)
- mapOutcomeToFamily(outcome): success / soft_success → 'success',
rework → 'retry', blocked / partial → 'failure', else null.
- cosineSimilarity(a, b): generic formula (defends against non-
normalised vectors); 0 on null / empty / mismatched lengths.
- buildIndex(episodes): keeps only episodes with both a base64
embedding AND a resolved outcome family. Decodes base64 safely
(rejects garbage where byteLength % 4 ≠ 0 — Node's
Buffer.from('garbage', 'base64') silently strips invalid chars).
- findNearestNeighbors(target, index, k, opts): top-k by descending
cosine. Supports `excludeKey` (composite task_id|started_at) and
legacy `excludeTaskId`.
- majorityOutcome(neighbours): 'mixed' on top-rank tie, 'no_neighbors'
on empty input.
- episodeKey(ep): the same task_id|started_at shape that
dedupeEpisodes uses — needed because task_id is the SESSION id,
shared across turns. task_id alone cannot identify a single turn.
# brain-retro-analyzer.mjs
- New FACTOR_FNS axis similar_past_outcome_majority reading the
pre-computed episode._similarPastOutcomeMajority field.
- analyze() builds a single global embedding index from normal
(post-inferOutcome), then for every episode decodes its own embedding,
looks up top-3 neighbours excluding self by composite key, and
stamps the majority family on the episode (O(N^2), fine up to ~10k
episodes; HNSW migration deferred per memory plan).
- Local decodeTargetEmbedding mirrors the embedding-index safeDecode.
# Tests
20 new tests (RED -> GREEN):
- observer-embedding-index.test.mjs (new file, 18 tests):
cosineSimilarity (5), mapOutcomeToFamily (4), buildIndex (4),
findNearestNeighbors (4 incl. self-exclusion), majorityOutcome (3).
- brain-retro-analyzer.test.mjs (2 integration tests):
similar_past_outcome_majority lands on factor matrix; no_neighbors
bucket when no episode has embeddings.
Targeted sweep: 632/632 PASS on the 2 directly-affected suites.
Broader tools/ sweep: 7968/7969 PASS. Pre-existing 1 test failure in
observer-self-assessment-api.test.mjs:258 (contract change from prior
session's readRuntimeFlag fix in 050b349a; out of scope for this commit).
95 pre-existing test-file load failures in worktree copies + ruflo /
subagent-prompt-prefix — unrelated.
Factor matrix grew 11 -> 19 -> 21 -> 29 -> 30 axes across Pass 1+2+3+4.
LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
720 lines
38 KiB
JavaScript
720 lines
38 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
dedupeEpisodes,
|
|
inferOutcome,
|
|
groupEpisodesToTasks,
|
|
findCausalChains,
|
|
buildFactorMatrix,
|
|
analyze,
|
|
} from './brain-retro-analyzer.mjs';
|
|
|
|
// Minimal v2 episode for tests.
|
|
const ep = (overrides = {}) => ({
|
|
schema_version: 2,
|
|
task_id: 's1',
|
|
task_ref: 's1',
|
|
timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:05:00Z' },
|
|
path_type: 'regulated',
|
|
outcome: 'unknown',
|
|
prompt_signal: 'neutral',
|
|
decision_provenance: { kind: 'autonomous', claude_would_have_chosen: null },
|
|
environment: { economy_level: 0, model: 'claude-opus-4-7', post_compaction: false, session_turn: 1, parallel_session: false },
|
|
task_size: { tool_calls: 5, files_touched: 1, files: ['/a.js'] },
|
|
primary_rationale: { step: 1, node_chosen: 'direct', triggers_matched: [], candidates_considered: [], boundaries_applied: [], hard_floor: { invoked: false, rules: [] }, task_classification: 'feature' },
|
|
events: [],
|
|
...overrides,
|
|
});
|
|
|
|
describe('dedupeEpisodes', () => {
|
|
it('keeps the last of two episodes with the same task_id + started_at', () => {
|
|
const a = ep({ outcome: 'unknown' });
|
|
const b = ep({ outcome: 'partial' }); // same task_id + started_at — routing-gate double-write
|
|
const out = dedupeEpisodes([a, b]);
|
|
expect(out).toHaveLength(1);
|
|
expect(out[0].outcome).toBe('partial');
|
|
});
|
|
|
|
it('keeps all observer_error markers', () => {
|
|
const out = dedupeEpisodes([ep(), { observer_error: true, task_id: 'e' }, { observer_error: true, task_id: 'e2' }]);
|
|
expect(out.filter((e) => e.observer_error)).toHaveLength(2);
|
|
});
|
|
});
|
|
|
|
describe('inferOutcome', () => {
|
|
it('infers rework when the next episode opens with a correction', () => {
|
|
expect(inferOutcome(ep(), ep({ prompt_signal: 'correction' }))).toBe('rework');
|
|
});
|
|
it('infers success when the next episode opens with approval', () => {
|
|
expect(inferOutcome(ep(), ep({ prompt_signal: 'approval' }))).toBe('success');
|
|
});
|
|
it('infers partial when the episode has an interrupt event', () => {
|
|
expect(inferOutcome(ep({ events: [{ kind: 'interrupt' }] }), ep())).toBe('partial');
|
|
});
|
|
it('infers unknown when there is no next episode', () => {
|
|
expect(inferOutcome(ep(), null)).toBe('unknown');
|
|
});
|
|
it('infers blocked ONLY when an unrecovered_error event is present (turn ended on error)', () => {
|
|
const blocked = ep({ events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'unrecovered_error' }] });
|
|
expect(inferOutcome(blocked, ep({ prompt_signal: 'approval' }))).toBe('blocked');
|
|
});
|
|
it('does NOT infer blocked from raw error/retry count (TDD failing-test-first is not a block)', () => {
|
|
// A turn with N errors + N retries that ends on a successful tool_result —
|
|
// e.g., TDD red→green, or git command that legitimately fails then recovers —
|
|
// must NOT count as blocked. The parser emits unrecovered_error iff the LAST
|
|
// tool_result was is_error, which is absent here.
|
|
const recovered = ep({ events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'retry' }] });
|
|
expect(inferOutcome(recovered, ep({ prompt_signal: 'approval' }))).toBe('success');
|
|
});
|
|
it('does not infer blocked when every error was retried', () => {
|
|
const recovered = ep({ events: [{ kind: 'error' }, { kind: 'retry' }] });
|
|
expect(inferOutcome(recovered, ep({ prompt_signal: 'approval' }))).toBe('success');
|
|
});
|
|
});
|
|
|
|
describe('groupEpisodesToTasks', () => {
|
|
it('starts a new task after a success and on a new_task prompt', () => {
|
|
const eps = [
|
|
ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, prompt_signal: 'new_task' }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, prompt_signal: 'approval' }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:04:00Z', ended_at: '2026-05-19T10:05:00Z' }, prompt_signal: 'new_task' }),
|
|
];
|
|
const tasks = groupEpisodesToTasks(eps);
|
|
expect(tasks.length).toBeGreaterThanOrEqual(2);
|
|
});
|
|
});
|
|
|
|
describe('findCausalChains', () => {
|
|
it('links an errored episode to a later episode that shares a file', () => {
|
|
const a = ep({ timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' }, events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } });
|
|
const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/shared.js'] } });
|
|
const chains = findCausalChains([a, b]);
|
|
expect(chains).toHaveLength(1);
|
|
expect(chains[0].sharedFiles).toEqual(['/shared.js']);
|
|
});
|
|
|
|
it('returns no chain when no files are shared', () => {
|
|
const a = ep({ events: [{ kind: 'error', message: 'x' }], task_size: { tool_calls: 1, files_touched: 1, files: ['/a.js'] } });
|
|
const b = ep({ timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, task_size: { tool_calls: 1, files_touched: 1, files: ['/b.js'] } });
|
|
expect(findCausalChains([a, b])).toHaveLength(0);
|
|
});
|
|
|
|
it('excludes hot/normative files (CLAUDE.md) from the shared-file signal', () => {
|
|
const a = ep({
|
|
events: [{ kind: 'error', message: 'x' }],
|
|
task_size: { tool_calls: 1, files_touched: 1, files: ['c:\\моя\\проекты\\портал crm\\Документация\\CLAUDE.md'] },
|
|
});
|
|
const b = ep({
|
|
timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' },
|
|
task_size: { tool_calls: 1, files_touched: 1, files: ['c:\\моя\\проекты\\портал crm\\Документация\\CLAUDE.md'] },
|
|
});
|
|
expect(findCausalChains([a, b])).toHaveLength(0);
|
|
});
|
|
|
|
it('excludes memory store .md files from the shared-file signal', () => {
|
|
const a = ep({
|
|
events: [{ kind: 'error', message: 'x' }],
|
|
task_size: { tool_calls: 1, files_touched: 1, files: ['C:\\Users\\Administrator\\.claude\\projects\\proj\\memory\\reference_github.md'] },
|
|
});
|
|
const b = ep({
|
|
timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' },
|
|
task_size: { tool_calls: 1, files_touched: 1, files: ['C:\\Users\\Administrator\\.claude\\projects\\proj\\memory\\reference_github.md'] },
|
|
});
|
|
expect(findCausalChains([a, b])).toHaveLength(0);
|
|
});
|
|
|
|
it('excludes episodes JSONL + STATUS.md + MEMORY.md from chains', () => {
|
|
const mk = (path, evts = []) =>
|
|
ep({
|
|
timestamps: { started_at: '2026-05-19T10:00:00Z', ended_at: '2026-05-19T10:01:00Z' },
|
|
events: evts,
|
|
task_size: { tool_calls: 1, files_touched: 1, files: [path] },
|
|
});
|
|
const later = (path) =>
|
|
ep({
|
|
timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' },
|
|
task_size: { tool_calls: 1, files_touched: 1, files: [path] },
|
|
});
|
|
const errored = [{ kind: 'error', message: 'x' }];
|
|
expect(findCausalChains([mk('/docs/observer/episodes-2026-05.jsonl', errored), later('/docs/observer/episodes-2026-05.jsonl')])).toHaveLength(0);
|
|
expect(findCausalChains([mk('/docs/observer/STATUS.md', errored), later('/docs/observer/STATUS.md')])).toHaveLength(0);
|
|
expect(findCausalChains([mk('/some/dir/MEMORY.md', errored), later('/some/dir/MEMORY.md')])).toHaveLength(0);
|
|
});
|
|
|
|
it('still links chains via genuinely-shared source files', () => {
|
|
const a = ep({
|
|
events: [{ kind: 'error', message: 'x' }],
|
|
task_size: { tool_calls: 1, files_touched: 2, files: ['c:\\path\\CLAUDE.md', '/src/app.ts'] },
|
|
});
|
|
const b = ep({
|
|
timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' },
|
|
task_size: { tool_calls: 1, files_touched: 2, files: ['c:\\path\\CLAUDE.md', '/src/app.ts'] },
|
|
});
|
|
const chains = findCausalChains([a, b]);
|
|
expect(chains).toHaveLength(1);
|
|
expect(chains[0].sharedFiles).toEqual(['/src/app.ts']);
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix', () => {
|
|
it('tabulates outcome distribution per factor value', () => {
|
|
const eps = [
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } },
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } },
|
|
];
|
|
const m = buildFactorMatrix(eps);
|
|
expect(m.decision_provenance.user_directed_method.rework).toBe(1);
|
|
expect(m.decision_provenance.autonomous.success).toBe(1);
|
|
});
|
|
|
|
it('counts the 3rd kind user_chose_from_options on the provenance axis', () => {
|
|
const eps = [
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'autonomous' } },
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_directed_method' } },
|
|
{ ...ep(), _inferredOutcome: 'success', decision_provenance: { kind: 'user_chose_from_options' } },
|
|
{ ...ep(), _inferredOutcome: 'rework', decision_provenance: { kind: 'user_chose_from_options' } },
|
|
];
|
|
const m = buildFactorMatrix(eps);
|
|
expect(m.decision_provenance).toHaveProperty('autonomous');
|
|
expect(m.decision_provenance).toHaveProperty('user_directed_method');
|
|
expect(m.decision_provenance).toHaveProperty('user_chose_from_options');
|
|
expect(m.decision_provenance.user_chose_from_options.success).toBe(1);
|
|
expect(m.decision_provenance.user_chose_from_options.rework).toBe(1);
|
|
});
|
|
|
|
it('includes session_segment_turn (bucketed, turns-since-last-compaction) and parallel_session factors', () => {
|
|
const eps = [
|
|
{ ...ep(), _inferredOutcome: 'success', environment: { session_turn: 3, parallel_session: false } },
|
|
{ ...ep(), _inferredOutcome: 'rework', environment: { session_turn: 120, parallel_session: true } },
|
|
];
|
|
const m = buildFactorMatrix(eps);
|
|
expect(m.session_segment_turn.early.success).toBe(1);
|
|
expect(m.session_segment_turn.late.rework).toBe(1);
|
|
expect(m.parallel_session.false.success).toBe(1);
|
|
expect(m.parallel_session.true.rework).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('analyze', () => {
|
|
it('returns episodeCount, tasks, causalChains and factorMatrix', () => {
|
|
const result = analyze([ep(), ep({ timestamps: { started_at: '2026-05-19T11:00:00Z', ended_at: '2026-05-19T11:01:00Z' }, prompt_signal: 'correction' })]);
|
|
expect(result.episodeCount).toBe(2);
|
|
expect(result.factorMatrix).toBeDefined();
|
|
expect(Array.isArray(result.tasks)).toBe(true);
|
|
expect(Array.isArray(result.causalChains)).toBe(true);
|
|
});
|
|
|
|
it('skips v1 episodes (no schema_version 2) from the analysis', () => {
|
|
const v1 = { task_id: 's-old', timestamps: { started_at: '2026-05-19T09:00:00Z' }, outcome: 'success' };
|
|
const result = analyze([
|
|
v1,
|
|
ep(),
|
|
ep({ timestamps: { started_at: '2026-05-19T11:00:00Z', ended_at: '2026-05-19T11:01:00Z' } }),
|
|
]);
|
|
expect(result.episodeCount).toBe(2);
|
|
expect(result.v1SkippedCount).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix — session_segment_turn axis rename (Task 14)', () => {
|
|
it('matrix has session_segment_turn axis, NOT legacy session_turn', () => {
|
|
const result = analyze([
|
|
{ schema_version: 2, task_id: 's', task_ref: 's',
|
|
timestamps: { started_at: '2026-05-20T00:00:00Z' }, events: [],
|
|
environment: { economy_level: null, model: 'opus', post_compaction: false, session_turn: 5, parallel_session: false },
|
|
task_size: { tool_calls: 0 },
|
|
primary_rationale: { node_chosen: 'direct', task_classification: 'other' },
|
|
decision_provenance: { kind: 'autonomous' } },
|
|
]);
|
|
expect(result.factorMatrix).toHaveProperty('session_segment_turn');
|
|
expect(result.factorMatrix).not.toHaveProperty('session_turn');
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix — chain_ref axis (multi-chain)', () => {
|
|
it('counts a multi-chain episode in each chain and null for direct', () => {
|
|
const m = buildFactorMatrix([
|
|
{ _inferredOutcome: 'success', primary_rationale: { node_chosen: 'discovery-interview', chain_ref: ['L1', 'L2'] } },
|
|
{ _inferredOutcome: 'unknown', primary_rationale: { node_chosen: 'direct', chain_ref: null } },
|
|
]);
|
|
expect(m.chain_ref.L1).toEqual({ success: 1 });
|
|
expect(m.chain_ref.L2).toEqual({ success: 1 });
|
|
expect(m.chain_ref.null).toEqual({ unknown: 1 });
|
|
});
|
|
|
|
it('chain_ref axis present via analyze()', () => {
|
|
const result = analyze([ep({ primary_rationale: { node_chosen: 'billing-audit', chain_ref: ['L13'], task_classification: 'other' } })]);
|
|
expect(result.factorMatrix).toHaveProperty('chain_ref');
|
|
});
|
|
});
|
|
|
|
describe('inferOutcome — neutral → soft_success (Task 16)', () => {
|
|
it('returns soft_success when next prompt is neutral', () => {
|
|
const a = { events: [] };
|
|
const b = { prompt_signal: 'neutral' };
|
|
expect(inferOutcome(a, b)).toBe('soft_success');
|
|
});
|
|
it('returns unknown when no next episode', () => {
|
|
expect(inferOutcome({ events: [] }, null)).toBe('unknown');
|
|
});
|
|
it('rework still wins over neutral on correction', () => {
|
|
expect(inferOutcome({ events: [] }, { prompt_signal: 'correction' })).toBe('rework');
|
|
});
|
|
it('explicit success still wins over neutral on approval', () => {
|
|
expect(inferOutcome({ events: [] }, { prompt_signal: 'approval' })).toBe('success');
|
|
});
|
|
});
|
|
|
|
describe('analyze() — missedActivations integration', () => {
|
|
it('includes missedActivations in the result', () => {
|
|
const eps = [
|
|
{
|
|
schema_version: 2,
|
|
task_id: 't1',
|
|
timestamps: { started_at: '2026-05-21T00:00:00Z' },
|
|
primary_rationale: { node_chosen: 'direct', task_classification: 'refactor' },
|
|
events: [],
|
|
},
|
|
];
|
|
const map = { refactor: ['#11'], other: [] };
|
|
const dormancy = { '#11': false };
|
|
const result = analyze(eps, { classificationMap: map, dormancy });
|
|
expect(result.missedActivations).toBeDefined();
|
|
expect(result.missedActivations.totalMissed).toBe(1);
|
|
expect(result.missedActivations.byNode).toEqual({ '#11': 1 });
|
|
});
|
|
|
|
it('returns missedActivations.totalMissed=0 when no map/dormancy provided', () => {
|
|
const eps = [{ schema_version: 2, task_id: 't1', timestamps: { started_at: 'x' }, primary_rationale: { node_chosen: 'direct', task_classification: 'refactor' }, events: [] }];
|
|
const result = analyze(eps);
|
|
expect(result.missedActivations.totalMissed).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe('analyze: schema_version filter', () => {
|
|
it('accepts both v2 and v3 episodes', () => {
|
|
const v2 = { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] };
|
|
const v3 = { ...v2, schema_version: 3, task_id: 's2', timestamps: { started_at: '2026-05-23T11:00:00Z' },
|
|
primary_rationale: { ...v2.primary_rationale, recommended_node: '#19' } };
|
|
const result = analyze([v2, v3]);
|
|
expect(result.episodeCount).toBe(2);
|
|
});
|
|
|
|
it('factorMatrix has recommended_node_for_direct axis', () => {
|
|
const v3 = { schema_version: 3, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature', recommended_node: '#19' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] };
|
|
const result = analyze([v3]);
|
|
expect(result.factorMatrix.recommended_node_for_direct).toBeDefined();
|
|
expect(result.factorMatrix.recommended_node_for_direct['#19']).toBeDefined();
|
|
});
|
|
|
|
it('v2 episode bucket=none in recommended_node_for_direct', () => {
|
|
const v2 = { schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-23T10:00:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] };
|
|
const result = analyze([v2]);
|
|
expect(result.factorMatrix.recommended_node_for_direct.none).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe('analyze — discipline metrics (stage 2)', () => {
|
|
const map = { feature: ['#19'], bugfix: ['#18'] };
|
|
const dormancy = { '#19': false, '#18': false };
|
|
|
|
it('returns disciplinePercentByClassification', () => {
|
|
const eps = [
|
|
ep({ primary_rationale: { task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], boundaries_applied: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { task_classification: 'feature', node_chosen: '#19', triggers_matched: [{node:'#19'}], boundaries_applied: [], step: 3, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
];
|
|
const res = analyze(eps, { classificationMap: map, dormancy });
|
|
expect(res.disciplineByClassification.feature.episodes).toBe(2);
|
|
expect(res.disciplineByClassification.feature.withTriggerMatch).toBe(1);
|
|
expect(res.disciplineByClassification.feature.viaSkill).toBe(1);
|
|
});
|
|
|
|
it('returns routerStepReached distribution (derived from signals)', () => {
|
|
const eps = [
|
|
// bare/direct → derived step 1
|
|
ep({ primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
// triggers matched → derived step 3
|
|
ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { step: 1, task_classification: 'other', node_chosen: 'direct', triggers_matched: [{ node: '#19' }], chain_ref: [], boundaries_applied: [], candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
];
|
|
const res = analyze(eps, { classificationMap: map, dormancy });
|
|
expect(res.routerStep.distribution['1']).toBe(1);
|
|
expect(res.routerStep.distribution['3']).toBe(1);
|
|
});
|
|
|
|
it('returns boundariesAppliedRate', () => {
|
|
const eps = [
|
|
ep({ primary_rationale: { boundaries_applied: [{ adr: 'X' }], task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
ep({ timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, primary_rationale: { boundaries_applied: [], task_classification: 'feature', node_chosen: 'direct', triggers_matched: [], step: 1, candidates_considered: [], hard_floor: { invoked: false, rules: [] } } }),
|
|
];
|
|
const res = analyze(eps, { classificationMap: map, dormancy });
|
|
expect(res.boundariesRate.total).toBe(2);
|
|
expect(res.boundariesRate.withBoundaries).toBe(1);
|
|
expect(res.boundariesRate.rate).toBeCloseTo(0.5);
|
|
});
|
|
});
|
|
|
|
describe('analyze — v4 aggregations (Phase 3 Task 20)', () => {
|
|
it('aggregates inheritanceCount across v4 episodes', () => {
|
|
const eps = [
|
|
ep({ schema_version: 4, inheritance: { inherited_from_task_id: 'x' } }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, inheritance: { inherited_from_task_id: 'y' } }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' } }),
|
|
];
|
|
expect(analyze(eps).inheritanceCount).toBe(2);
|
|
});
|
|
|
|
it('aggregates reviewQuality distribution from review.node_quality', () => {
|
|
const eps = [
|
|
ep({ schema_version: 4, review: { node_quality: 'correct' } }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, review: { node_quality: 'correct' } }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:02:00Z', ended_at: '2026-05-19T10:03:00Z' }, review: { node_quality: 'wrong_node' } }),
|
|
];
|
|
const res = analyze(eps);
|
|
expect(res.reviewQuality.correct).toBe(2);
|
|
expect(res.reviewQuality.wrong_node).toBe(1);
|
|
expect(res.reviewerCoverage.reviewed).toBe(3);
|
|
});
|
|
|
|
it('counts review pending for v4 episodes without a review block', () => {
|
|
const eps = [ep({ schema_version: 4 })];
|
|
expect(analyze(eps).reviewerCoverage.pending).toBe(1);
|
|
});
|
|
|
|
it('counts reviewer_error escalations under reviewerCoverage.errored', () => {
|
|
const eps = [ep({ schema_version: 4, review: { reviewer_error: 'malformed episode' } })];
|
|
expect(analyze(eps).reviewerCoverage.errored).toBe(1);
|
|
});
|
|
|
|
it('aggregates degradedCount on degraded_mode=true', () => {
|
|
const eps = [
|
|
ep({ schema_version: 4, degraded_mode: true }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, degraded_mode: false }),
|
|
];
|
|
expect(analyze(eps).degradedCount).toBe(1);
|
|
});
|
|
|
|
it('sums task_cost tokens into costTotals', () => {
|
|
const eps = [
|
|
ep({ schema_version: 4, task_cost: { classifier_input_tokens: 100, classifier_output_tokens: 30 } }),
|
|
ep({ schema_version: 4, timestamps: { started_at: '2026-05-19T10:01:00Z', ended_at: '2026-05-19T10:02:00Z' }, task_cost: { classifier_input_tokens: 200, reviewer_input_tokens: 500 } }),
|
|
];
|
|
const ct = analyze(eps).costTotals;
|
|
expect(ct.classifier_input_tokens).toBe(300);
|
|
expect(ct.classifier_output_tokens).toBe(30);
|
|
expect(ct.reviewer_input_tokens).toBe(500);
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix — Pass 1 cheap axes (project-brain-factor-analysis-4passes)', () => {
|
|
// Each new axis: smoke + null-safety on missing fields.
|
|
it('prompt_signal axis: raw discrete values + null fallback', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', prompt_signal: 'new_task' },
|
|
{ ...ep(), _inferredOutcome: 'rework', prompt_signal: 'correction' },
|
|
{ ...ep(), _inferredOutcome: 'unknown', prompt_signal: undefined },
|
|
]);
|
|
expect(m.prompt_signal.new_task.success).toBe(1);
|
|
expect(m.prompt_signal.correction.rework).toBe(1);
|
|
expect(m.prompt_signal.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('classifier_source axis: reads classifier_output.source verbatim', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'llm' } },
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'regex' } },
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { source: 'prefilter_inherited' } },
|
|
{ ...ep(), _inferredOutcome: 'unknown', classifier_output: null },
|
|
]);
|
|
expect(m.classifier_source.llm.success).toBe(1);
|
|
expect(m.classifier_source.regex.success).toBe(1);
|
|
expect(m.classifier_source.prefilter_inherited.success).toBe(1);
|
|
expect(m.classifier_source.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('degraded_mode axis: true/false buckets, false default', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', degraded_mode: false },
|
|
{ ...ep(), _inferredOutcome: 'rework', degraded_mode: true },
|
|
{ ...ep(), _inferredOutcome: 'unknown' /* missing */ },
|
|
]);
|
|
expect(m.degraded_mode.true.rework).toBe(1);
|
|
expect(m.degraded_mode.false.success).toBe(1);
|
|
expect(m.degraded_mode.false.unknown).toBe(1);
|
|
});
|
|
|
|
it('path_type axis: regulated / improvised / null', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', path_type: 'regulated' },
|
|
{ ...ep(), _inferredOutcome: 'rework', path_type: 'improvised' },
|
|
{ ...ep(), _inferredOutcome: 'unknown', path_type: undefined },
|
|
]);
|
|
expect(m.path_type.regulated.success).toBe(1);
|
|
expect(m.path_type.improvised.rework).toBe(1);
|
|
expect(m.path_type.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('retry_count axis: 0 / 1-2 / 3+ buckets from events[].kind=retry', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', events: [] },
|
|
{ ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }] },
|
|
{ ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'retry' }, { kind: 'retry' }] },
|
|
{ ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }, { kind: 'retry' }] },
|
|
]);
|
|
expect(m.retry_count['0'].success).toBe(1);
|
|
expect(m.retry_count['1-2'].rework).toBe(2);
|
|
expect(m.retry_count['3+'].blocked).toBe(1);
|
|
});
|
|
|
|
it('error_count axis: 0 / 1 / 2+ buckets from events[].kind=error', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', events: [] },
|
|
{ ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'error' }] },
|
|
{ ...ep(), _inferredOutcome: 'blocked', events: [{ kind: 'error' }, { kind: 'error' }, { kind: 'error' }] },
|
|
]);
|
|
expect(m.error_count['0'].success).toBe(1);
|
|
expect(m.error_count['1'].rework).toBe(1);
|
|
expect(m.error_count['2+'].blocked).toBe(1);
|
|
});
|
|
|
|
it('hard_floor_invoked axis: true/false from primary_rationale.hard_floor.invoked', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: true } } },
|
|
{ ...ep(), _inferredOutcome: 'success', primary_rationale: { hard_floor: { invoked: false } } },
|
|
{ ...ep(), _inferredOutcome: 'unknown', primary_rationale: {} },
|
|
]);
|
|
expect(m.hard_floor_invoked.true.success).toBe(1);
|
|
expect(m.hard_floor_invoked.false.success).toBe(1);
|
|
expect(m.hard_floor_invoked.false.unknown).toBe(1);
|
|
});
|
|
|
|
it('iterations_bucket axis: 0 / 1-3 / 4-10 / 11+ from task_cost.iterations', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 0 } },
|
|
{ ...ep(), _inferredOutcome: 'success', task_cost: { iterations: 2 } },
|
|
{ ...ep(), _inferredOutcome: 'rework', task_cost: { iterations: 7 } },
|
|
{ ...ep(), _inferredOutcome: 'blocked', task_cost: { iterations: 51 } },
|
|
{ ...ep(), _inferredOutcome: 'unknown', task_cost: {} },
|
|
]);
|
|
expect(m.iterations_bucket['0'].success).toBe(1);
|
|
expect(m.iterations_bucket['1-3'].success).toBe(1);
|
|
expect(m.iterations_bucket['4-10'].rework).toBe(1);
|
|
expect(m.iterations_bucket['11+'].blocked).toBe(1);
|
|
// Missing iterations counts as 0 — task_cost block may be absent on early episodes.
|
|
expect(m.iterations_bucket['0'].unknown).toBe(1);
|
|
});
|
|
|
|
it('all 8 Pass 1 axes are present via analyze() on a minimal v2 episode', () => {
|
|
const result = analyze([ep()]);
|
|
for (const axis of ['prompt_signal', 'classifier_source', 'degraded_mode', 'path_type',
|
|
'retry_count', 'error_count', 'hard_floor_invoked', 'iterations_bucket']) {
|
|
expect(result.factorMatrix, `axis ${axis} missing`).toHaveProperty(axis);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix — Pass 3 dynamics axes (project-brain-factor-analysis-4passes)', () => {
|
|
it('prompt_length_bucket axis: short / medium / long / huge / null', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { prompt_length_chars: 42 } },
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { prompt_length_chars: 300 } },
|
|
{ ...ep(), _inferredOutcome: 'rework', task_meta: { prompt_length_chars: 1200 } },
|
|
{ ...ep(), _inferredOutcome: 'blocked', task_meta: { prompt_length_chars: 5000 } },
|
|
{ ...ep(), _inferredOutcome: 'unknown', task_meta: undefined },
|
|
]);
|
|
expect(m.prompt_length_bucket.short.success).toBe(1);
|
|
expect(m.prompt_length_bucket.medium.success).toBe(1);
|
|
expect(m.prompt_length_bucket.long.rework).toBe(1);
|
|
expect(m.prompt_length_bucket.huge.blocked).toBe(1);
|
|
expect(m.prompt_length_bucket.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('time_of_day_bucket axis derived from timestamps.started_at UTC hour', () => {
|
|
const at = (iso) => ({ ...ep(), _inferredOutcome: 'success', timestamps: { started_at: iso } });
|
|
const m = buildFactorMatrix([
|
|
at('2026-05-25T03:00:00Z'), // night (0-5)
|
|
at('2026-05-25T09:00:00Z'), // morning (6-11)
|
|
at('2026-05-25T14:00:00Z'), // afternoon (12-17)
|
|
at('2026-05-25T20:00:00Z'), // evening (18-23)
|
|
]);
|
|
expect(m.time_of_day_bucket.night.success).toBe(1);
|
|
expect(m.time_of_day_bucket.morning.success).toBe(1);
|
|
expect(m.time_of_day_bucket.afternoon.success).toBe(1);
|
|
expect(m.time_of_day_bucket.evening.success).toBe(1);
|
|
});
|
|
|
|
it('day_of_week axis: Mon..Sun derived from started_at UTC', () => {
|
|
// 2026-05-25 is a Monday (UTC).
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', timestamps: { started_at: '2026-05-25T10:00:00Z' } }, // Mon
|
|
{ ...ep(), _inferredOutcome: 'success', timestamps: { started_at: '2026-05-27T10:00:00Z' } }, // Wed
|
|
{ ...ep(), _inferredOutcome: 'unknown', timestamps: { started_at: null } },
|
|
]);
|
|
expect(m.day_of_week.Mon.success).toBe(1);
|
|
expect(m.day_of_week.Wed.success).toBe(1);
|
|
expect(m.day_of_week.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('inter_prompt_gap_bucket axis: gap between current and previous episode of same session', () => {
|
|
const eps = [
|
|
{ schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T10:00:00Z', ended_at: '2026-05-25T10:05:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] },
|
|
// 2-minute gap → bucket "1-10m"
|
|
{ schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T10:07:00Z', ended_at: '2026-05-25T10:10:00Z' },
|
|
prompt_signal: 'correction', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] },
|
|
// 80-minute gap → bucket "60m+"
|
|
{ schema_version: 2, task_id: 's1', timestamps: { started_at: '2026-05-25T11:30:00Z', ended_at: '2026-05-25T11:35:00Z' },
|
|
prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' }, events: [] },
|
|
];
|
|
const result = analyze(eps);
|
|
expect(result.factorMatrix.inter_prompt_gap_bucket).toBeDefined();
|
|
// First episode has no previous → bucket 'null'.
|
|
expect(result.factorMatrix.inter_prompt_gap_bucket.null).toBeDefined();
|
|
expect(result.factorMatrix.inter_prompt_gap_bucket['1-10m']).toBeDefined();
|
|
expect(result.factorMatrix.inter_prompt_gap_bucket['60m+']).toBeDefined();
|
|
});
|
|
|
|
it('mcp_server_used axis: any / none (presence of any mcp_servers_used entry)', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { mcp_servers_used: ['github'] } },
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { mcp_servers_used: [] } },
|
|
{ ...ep(), _inferredOutcome: 'unknown' /* missing */ },
|
|
]);
|
|
expect(m.mcp_server_used.any.success).toBe(1);
|
|
expect(m.mcp_server_used.none.success).toBe(1);
|
|
expect(m.mcp_server_used.none.unknown).toBe(1);
|
|
});
|
|
|
|
it('file_type_main axis: dominant path category from file_type_distribution', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { file_type_distribution: { src: 3, test: 1, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } },
|
|
{ ...ep(), _inferredOutcome: 'rework', task_meta: { file_type_distribution: { src: 0, test: 4, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } },
|
|
{ ...ep(), _inferredOutcome: 'success', task_meta: { file_type_distribution: { src: 2, test: 2, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, // tie → mixed
|
|
{ ...ep(), _inferredOutcome: 'unknown', task_meta: { file_type_distribution: { src: 0, test: 0, other: 0, config: 0, spec: 0, norm: 0, data: 0 } } }, // empty → none
|
|
{ ...ep(), _inferredOutcome: 'unknown' /* missing */ },
|
|
]);
|
|
expect(m.file_type_main.src.success).toBe(1);
|
|
expect(m.file_type_main.test.rework).toBe(1);
|
|
expect(m.file_type_main.mixed.success).toBe(1);
|
|
expect(m.file_type_main.none.unknown).toBe(2); // empty + missing
|
|
});
|
|
|
|
it('skill_invocations_bucket axis: 0 / 1 / 2+ from events tool_summary.Skill', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', events: [] },
|
|
{ ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Skill: 1, Read: 5 } }] },
|
|
{ ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Skill: 3 } }] },
|
|
]);
|
|
expect(m.skill_invocations_bucket['0'].success).toBe(1);
|
|
expect(m.skill_invocations_bucket['1'].success).toBe(1);
|
|
expect(m.skill_invocations_bucket['2+'].success).toBe(1);
|
|
});
|
|
|
|
it('subagent_spawns_bucket axis: 0 / 1 / 2+ from events tool_summary.Agent (or Task)', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', events: [] },
|
|
{ ...ep(), _inferredOutcome: 'success', events: [{ kind: 'tool_summary', counts: { Agent: 1 } }] },
|
|
{ ...ep(), _inferredOutcome: 'rework', events: [{ kind: 'tool_summary', counts: { Agent: 4 } }] },
|
|
]);
|
|
expect(m.subagent_spawns_bucket['0'].success).toBe(1);
|
|
expect(m.subagent_spawns_bucket['1'].success).toBe(1);
|
|
expect(m.subagent_spawns_bucket['2+'].rework).toBe(1);
|
|
});
|
|
|
|
it('all 8 Pass 3 axes are present via analyze() on a minimal v2 episode', () => {
|
|
const result = analyze([ep()]);
|
|
for (const axis of ['prompt_length_bucket', 'time_of_day_bucket', 'day_of_week',
|
|
'inter_prompt_gap_bucket', 'mcp_server_used', 'file_type_main',
|
|
'skill_invocations_bucket', 'subagent_spawns_bucket']) {
|
|
expect(result.factorMatrix, `axis ${axis} missing`).toHaveProperty(axis);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('buildFactorMatrix — Pass 2 classifier-metric axes', () => {
|
|
it('latency_bucket axis: fast / medium / slow / very_slow / null', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 250 } },
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 1500 } },
|
|
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { latency_ms: 5000 } },
|
|
{ ...ep(), _inferredOutcome: 'blocked', classifier_output: { latency_ms: 15000 } },
|
|
{ ...ep(), _inferredOutcome: 'unknown', classifier_output: null },
|
|
]);
|
|
expect(m.latency_bucket.fast.success).toBe(1);
|
|
expect(m.latency_bucket.medium.success).toBe(1);
|
|
expect(m.latency_bucket.slow.rework).toBe(1);
|
|
expect(m.latency_bucket.very_slow.blocked).toBe(1);
|
|
expect(m.latency_bucket.null.unknown).toBe(1);
|
|
});
|
|
|
|
it('error_type axis: reads classifier_output.llm_error verbatim with null default', () => {
|
|
const m = buildFactorMatrix([
|
|
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'timeout' } },
|
|
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'econnreset' } },
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: { llm_error: null } },
|
|
{ ...ep(), _inferredOutcome: 'success', classifier_output: null },
|
|
]);
|
|
expect(m.error_type.timeout.rework).toBe(1);
|
|
expect(m.error_type.econnreset.rework).toBe(1);
|
|
expect(m.error_type.null.success).toBe(2);
|
|
});
|
|
});
|
|
|
|
describe('analyze — Pass 4 similar_past_outcome_majority axis (project-brain-factor-analysis-4passes)', () => {
|
|
// Build a 4-dim embedding base64 manually to avoid loading @xenova in tests.
|
|
const encode = (arr) => {
|
|
const f = new Float32Array(arr);
|
|
const buf = Buffer.from(f.buffer, f.byteOffset, f.byteLength);
|
|
return buf.toString('base64');
|
|
};
|
|
|
|
it('attaches similar_past_outcome_majority axis to factor matrix', () => {
|
|
// All four episodes share the same task_id (= sessionId in real episodes —
|
|
// task_id IS the session id; one Claude Code session can contain N turns).
|
|
// bySessionSorted groups by task_id, so inferOutcome only finds a "next"
|
|
// episode within the same session group.
|
|
const SID = 'session-A';
|
|
const eps = [
|
|
{ schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:00:00Z', ended_at: '2026-05-20T10:01:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' },
|
|
prompt_embedding_base64: encode([1, 0, 0, 0]), events: [] },
|
|
{ schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:02:00Z', ended_at: '2026-05-20T10:03:00Z' },
|
|
prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' },
|
|
prompt_embedding_base64: encode([0.95, 0.05, 0, 0]), events: [] },
|
|
{ schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:04:00Z', ended_at: '2026-05-20T10:05:00Z' },
|
|
prompt_signal: 'approval', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' },
|
|
prompt_embedding_base64: encode([0.9, 0.1, 0, 0]), events: [] },
|
|
{ schema_version: 4, task_id: SID, timestamps: { started_at: '2026-05-20T10:06:00Z', ended_at: '2026-05-20T10:07:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' },
|
|
prompt_embedding_base64: encode([0.98, 0.02, 0, 0]), events: [] },
|
|
];
|
|
const result = analyze(eps);
|
|
expect(result.factorMatrix.similar_past_outcome_majority).toBeDefined();
|
|
// 3 of 4 episodes have resolved success outcome → indexed. Each gets a
|
|
// nearest-neighbour lookup that returns success peers.
|
|
expect(result.factorMatrix.similar_past_outcome_majority.success).toBeDefined();
|
|
});
|
|
|
|
it('bucket no_neighbors when no episode has embeddings', () => {
|
|
const eps = [
|
|
{ schema_version: 4, task_id: 'a', timestamps: { started_at: '2026-05-20T10:00:00Z', ended_at: '2026-05-20T10:01:00Z' },
|
|
prompt_signal: 'new_task', primary_rationale: { node_chosen: 'direct', task_classification: 'feature' },
|
|
environment: {}, task_size: { tool_calls: 1 }, decision_provenance: { kind: 'autonomous' },
|
|
prompt_embedding_base64: null, events: [] },
|
|
];
|
|
const result = analyze(eps);
|
|
expect(result.factorMatrix.similar_past_outcome_majority.no_neighbors).toBeDefined();
|
|
});
|
|
});
|