Files
brain/tools/brain-retro-analyzer.mjs
T

811 lines
31 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Brain-retro analyzer (brain governance, observer factor-analysis spec §6).
* Pure, deterministic Layer-4 aggregation over observer episodes for the
* /brain-retro skill. Read-only — never writes JSONL. No LLM.
*
* Security Guidance #40: pure parsing — no exec/execSync.
*/
import { Buffer } from 'buffer';
import { readFileSync, existsSync } from 'fs';
import { join as pathJoin } from 'path';
import { homedir } from 'os';
import { detectMissedActivations } from './missed-activations.mjs';
import {
disciplinePercentByClassification,
routerStepReached,
boundariesAppliedRate,
} from './discipline-metrics.mjs';
import { loadRegistry } from './registry-load.mjs';
import { buildClassificationMap, buildDormancyMap } from './registry-to-classification-map.mjs';
import {
buildIndex as buildEmbeddingIndex,
findNearestNeighbors,
majorityOutcome,
} from './observer-embedding-index.mjs';
export const CHAIN_OUTCOME_BUCKETS = [
'blocked',
'passed-with-skill',
'passed-inline-override',
'passed-global-override',
'passed-short-chain',
'passed-no-mutating',
];
export function analyzeChainHookEffectiveness({ ledgerPath, periodStart, periodEnd } = {}) {
const lp = ledgerPath || pathJoin(homedir(), '.claude', 'runtime', 'hook-outcomes.jsonl');
const buckets = Object.fromEntries(CHAIN_OUTCOME_BUCKETS.map((b) => [b, 0]));
let total = 0;
let raw;
try { raw = readFileSync(lp, 'utf-8'); } catch { return { total: 0, buckets }; }
const startMs = periodStart ? Date.parse(periodStart) : -Infinity;
const endMs = periodEnd ? Date.parse(periodEnd) : Infinity;
for (const line of raw.split('\n')) {
if (!line.trim()) continue;
let entry;
try { entry = JSON.parse(line); } catch { continue; }
if (!entry || entry.rule !== 'chain-recommendation') continue;
const ts = Date.parse(entry.ts || '');
if (Number.isNaN(ts) || ts < startMs || ts > endMs) continue;
const bucket = CHAIN_OUTCOME_BUCKETS.includes(entry.outcome) ? entry.outcome : null;
if (!bucket) continue;
buckets[bucket] += 1;
total += 1;
}
return { total, buckets };
}
export function buildChainHookEffectiveness({ total, buckets }) {
if (!total) return '_(нет данных за период — хук не срабатывал или ledger пуст)_\n';
const lines = [
'### Cut 11: Chain-hook effectiveness',
'',
'| Outcome | Count | % |',
'|---|---:|---:|',
];
for (const b of CHAIN_OUTCOME_BUCKETS) {
const c = (buckets && buckets[b]) || 0;
const pct = total ? Math.round((c / total) * 100) : 0;
lines.push(`| ${b} | ${c} | ${pct}% |`);
}
lines.push(`| **TOTAL** | **${total}** | **100%** |`);
return lines.join('\n') + '\n';
}
const SIZE_SMALL = 20;
const SIZE_LARGE = 60;
/**
* Deduplicate the routing-gate double-write: a turn that was blocked then
* re-stopped yields two episodes with the same task_id + started_at. Keep the
* last (most complete). observer_error markers are all kept.
*/
export function dedupeEpisodes(episodes) {
const errors = episodes.filter((e) => e && e.observer_error);
const normal = episodes.filter((e) => e && !e.observer_error);
const byKey = new Map();
for (const e of normal) {
byKey.set(`${e.task_id}|${(e.timestamps || {}).started_at}`, e);
}
return [...byKey.values(), ...errors];
}
/** Infer the true outcome of an episode from its events + the next episode's prompt. */
export function inferOutcome(episode, nextEpisode) {
const events = episode && Array.isArray(episode.events) ? episode.events : [];
if (events.some((e) => e.kind === 'interrupt')) {
return 'partial';
}
// A turn is `blocked` only when it ENDED on an unrecovered tool failure —
// emitted by the parser as a single `unrecovered_error` event when the
// LAST tool_result of the turn was is_error=true. Raw error/retry counts
// do NOT imply blocked: a TDD red→green cycle or a grep that returns
// nothing both surface as `error` events but are intentional and
// recovered — counting them as blocked over-reports failures (A-1 fix).
if (events.some((e) => e.kind === 'unrecovered_error')) {
return 'blocked';
}
// 'failure' (work wrong AND never corrected) is a judgment, not
// deterministically recoverable from a transcript — deferred to the phase-2
// agent-judge. Until then a wrong-then-corrected turn surfaces as 'rework'.
if (!nextEpisode) return 'unknown';
if (nextEpisode.prompt_signal === 'correction') return 'rework';
if (nextEpisode.prompt_signal === 'approval' || nextEpisode.prompt_signal === 'new_task') return 'success';
// Task 16: neutral next-prompt = silent success. Если operator продолжил
// следующей instruction без correction-маркеров — это «no objection».
// Slightly weaker signal than explicit approval — labelled `soft_success`.
if (nextEpisode.prompt_signal === 'neutral') return 'soft_success';
return 'unknown';
}
function bySessionSorted(episodes) {
const map = new Map();
for (const e of episodes) {
if (e.observer_error) continue;
const sid = e.task_id || 'unknown';
if (!map.has(sid)) map.set(sid, []);
map.get(sid).push(e);
}
for (const eps of map.values()) {
eps.sort((a, b) =>
String((a.timestamps || {}).started_at).localeCompare(String((b.timestamps || {}).started_at))
);
}
return map;
}
/** Group episodes into tasks: a new task starts after a success or on a new_task prompt. */
export function groupEpisodesToTasks(episodes) {
const tasks = [];
for (const [sid, eps] of bySessionSorted(episodes)) {
let current = null;
eps.forEach((episode, i) => {
const prev = eps[i - 1];
const prevOutcome = prev ? inferOutcome(prev, episode) : null;
const isNewTask = i === 0 || prevOutcome === 'success' || episode.prompt_signal === 'new_task';
if (isNewTask) {
current = { task_ref: `${sid}#${tasks.length + 1}`, episodes: [] };
tasks.push(current);
}
current.episodes.push(episode);
});
}
return tasks;
}
// Hot/normative files — touched by almost every turn (memory store, CLAUDE.md,
// STATUS.md, episodes JSONL). Sharing one of these is not evidence of a causal
// chain; it just means both turns brushed the same hot file. Excluded from the
// shared-file signal (A-3 fix).
const HOT_FILE_PATTERNS = [
/(?:^|[\\/])CLAUDE\.md$/i,
/(?:^|[\\/])MEMORY\.md$/i,
/(?:^|[\\/])STATUS\.md$/i,
/[\\/]episodes-\d{4}-\d{2}\.jsonl$/i,
/[\\/]memory[\\/][^\\/]+\.md$/i,
];
export function isHotFile(path) {
const s = String(path || '');
return HOT_FILE_PATTERNS.some((re) => re.test(s));
}
/** Causal-chain candidates: an errored episode → a later episode sharing a file. */
export function findCausalChains(episodes) {
const sorted = episodes
.filter((e) => !e.observer_error)
.slice()
.sort((a, b) =>
String((a.timestamps || {}).started_at).localeCompare(String((b.timestamps || {}).started_at))
);
const chains = [];
for (let i = 0; i < sorted.length - 1; i++) {
const a = sorted[i];
const hasError = Array.isArray(a.events) && a.events.some((e) => e.kind === 'error');
if (!hasError) continue;
const filesA = new Set(
(((a.task_size || {}).files) || []).filter((f) => !isHotFile(f))
);
if (filesA.size === 0) continue;
for (let j = i + 1; j < sorted.length; j++) {
const b = sorted[j];
const shared = (((b.task_size || {}).files) || []).filter((f) => !isHotFile(f) && filesA.has(f));
if (shared.length > 0) {
chains.push({
from: `${a.task_id}|${(a.timestamps || {}).started_at}`,
to: `${b.task_id}|${(b.timestamps || {}).started_at}`,
sharedFiles: shared,
});
break;
}
}
}
return chains;
}
function sizeBucket(toolCalls) {
const n = Number(toolCalls) || 0;
return n < SIZE_SMALL ? 'small' : n <= SIZE_LARGE ? 'medium' : 'large';
}
const SESSION_TURN_EARLY = 10;
const SESSION_TURN_LATE = 40;
function sessionTurnBucket(turn) {
const n = Number(turn);
if (!Number.isFinite(n)) return 'null';
return n < SESSION_TURN_EARLY ? 'early' : n <= SESSION_TURN_LATE ? 'mid' : 'late';
}
// Pass 1 cheap-axis helpers (project-brain-factor-analysis-4passes).
function countEventKind(events, kind) {
if (!Array.isArray(events)) return 0;
let c = 0;
for (const ev of events) if (ev && ev.kind === kind) c++;
return c;
}
function retryBucket(events) {
const n = countEventKind(events, 'retry');
return n === 0 ? '0' : n <= 2 ? '1-2' : '3+';
}
function errorBucket(events) {
const n = countEventKind(events, 'error');
return n === 0 ? '0' : n === 1 ? '1' : '2+';
}
function iterationsBucket(iterations) {
const n = Number(iterations);
if (!Number.isFinite(n) || n <= 0) return '0';
if (n <= 3) return '1-3';
if (n <= 10) return '4-10';
return '11+';
}
// Pass 2 — classifier latency bucket. <500ms = fast (cache hit territory),
// 500-2000 = medium (cold call), 2000-10000 = slow (network jitter / overflow),
// >10000 = very_slow (retries fired). Null on non-LLM paths.
function latencyBucket(latency) {
const n = Number(latency);
if (!Number.isFinite(n) || n < 0) return 'null';
if (n < 500) return 'fast';
if (n < 2000) return 'medium';
if (n < 10000) return 'slow';
return 'very_slow';
}
// Pass 3 helpers (project-brain-factor-analysis-4passes).
function promptLengthBucket(n) {
const v = Number(n);
if (!Number.isFinite(v) || v <= 0) return 'null';
if (v < 100) return 'short';
if (v < 1000) return 'medium';
if (v < 2500) return 'long';
return 'huge';
}
function timeOfDayBucket(iso) {
// Reject null / undefined / empty BEFORE Date construction: `new Date(null)`
// is the epoch (1970-01-01), not NaN — would falsely bucket missing
// timestamps as 'night'.
if (iso == null || iso === '') return 'null';
const d = new Date(iso);
if (Number.isNaN(d.getTime())) return 'null';
const h = d.getUTCHours();
if (h < 6) return 'night';
if (h < 12) return 'morning';
if (h < 18) return 'afternoon';
return 'evening';
}
const WEEKDAY_NAMES = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
function dayOfWeekLabel(iso) {
if (iso == null || iso === '') return 'null';
const d = new Date(iso);
if (Number.isNaN(d.getTime())) return 'null';
return WEEKDAY_NAMES[d.getUTCDay()];
}
function interPromptGapBucket(min) {
const v = Number(min);
if (!Number.isFinite(v) || v < 0) return 'null';
if (v < 1) return '<1m';
if (v < 10) return '1-10m';
if (v < 60) return '10-60m';
return '60m+';
}
function fileTypeMain(dist) {
if (!dist || typeof dist !== 'object') return 'none';
const entries = Object.entries(dist).filter(([, n]) => Number(n) > 0);
if (entries.length === 0) return 'none';
let maxN = 0;
for (const [, n] of entries) if (n > maxN) maxN = n;
const winners = entries.filter(([, n]) => n === maxN);
if (winners.length > 1) return 'mixed';
return winners[0][0];
}
function eventToolCount(events, toolName) {
if (!Array.isArray(events)) return 0;
for (const ev of events) {
if (ev && ev.kind === 'tool_summary' && ev.counts) {
return Number(ev.counts[toolName]) || 0;
}
}
return 0;
}
function countBucket012(n) {
const v = Number(n) || 0;
return v === 0 ? '0' : v === 1 ? '1' : '2+';
}
function judgeCallsBucket(n) {
const v = Number(n) || 0;
if (v === 0) return '0';
if (v < 10) return '1-9';
return '10+';
}
const FACTOR_FNS = {
decision_provenance: (e) => (e.decision_provenance || {}).kind || 'unknown',
economy_level: (e) => String((e.environment || {}).economy_level ?? 'null'),
model: (e) => (e.environment || {}).model || 'null',
post_compaction: (e) => String((e.environment || {}).post_compaction ?? false),
session_segment_turn: (e) => sessionTurnBucket((e.environment || {}).session_turn),
parallel_session: (e) => String((e.environment || {}).parallel_session ?? false),
task_size: (e) => sizeBucket((e.task_size || {}).tool_calls),
node_chosen: (e) => (e.primary_rationale || {}).node_chosen || 'direct',
task_classification: (e) => (e.primary_rationale || {}).task_classification || 'other',
recommended_node_for_direct: (e) => (e.primary_rationale || {}).recommended_node || 'none',
// Pass 1 — 8 cheap axes (data already in v4 episode, just expose):
prompt_signal: (e) => e.prompt_signal || 'null',
classifier_source: (e) => (e.classifier_output || {}).source || 'null',
degraded_mode: (e) => String(e.degraded_mode ?? false),
path_type: (e) => e.path_type || 'null',
retry_count: (e) => retryBucket(e.events),
error_count: (e) => errorBucket(e.events),
hard_floor_invoked: (e) => String(((e.primary_rationale || {}).hard_floor || {}).invoked ?? false),
iterations_bucket: (e) => iterationsBucket((e.task_cost || {}).iterations),
// Pass 2 — classifier-metric axes (project-brain-factor-analysis-4passes):
latency_bucket: (e) => latencyBucket((e.classifier_output || {}).latency_ms),
error_type: (e) => (e.classifier_output || {}).llm_error || 'null',
// Pass 3 — dynamics axes (project-brain-factor-analysis-4passes):
prompt_length_bucket: (e) => promptLengthBucket((e.task_meta || {}).prompt_length_chars),
time_of_day_bucket: (e) => timeOfDayBucket((e.timestamps || {}).started_at),
day_of_week: (e) => dayOfWeekLabel((e.timestamps || {}).started_at),
inter_prompt_gap_bucket: (e) => interPromptGapBucket(e._interPromptGapMin),
mcp_server_used: (e) => (((e.task_meta || {}).mcp_servers_used || []).length > 0 ? 'any' : 'none'),
file_type_main: (e) => fileTypeMain((e.task_meta || {}).file_type_distribution),
skill_invocations_bucket: (e) => countBucket012(eventToolCount(e.events, 'Skill')),
subagent_spawns_bucket: (e) => countBucket012(
eventToolCount(e.events, 'Agent') + eventToolCount(e.events, 'Task'),
),
// Pass 4 — semantic NN axis (project-brain-factor-analysis-4passes).
// Reads the pre-computed family label stamped on the episode by analyze()
// (cross-episode pass via observer-embedding-index). Episodes without an
// embedding or with no resolved neighbours bucket as 'no_neighbors'.
similar_past_outcome_majority: (e) => e._similarPastOutcomeMajority || 'no_neighbors',
// Pass 5 — router-gate v4 signal axes (brain-data-catalog раздел F → факторы).
rationalization_flag_count: (e) => countBucket012((e.v4_signals || {}).rationalization_flag_count),
judge_verdict: (e) => (e.v4_signals || {}).judge_verdict || 'null',
safe_baseline_action: (e) => (e.v4_signals || {}).safe_baseline_action || 'null',
judge_calls_bucket: (e) => judgeCallsBucket((e.v4_signals || {}).judge_calls),
};
// Pass 4 — decode prompt_embedding_base64 to Float32Array. Mirrors
// observer-embedding-index safeDecode but kept private here to avoid
// circular surface; analyzer only needs the target-embedding decode path.
function decodeTargetEmbedding(b64) {
if (!b64 || typeof b64 !== 'string') return null;
try {
const buf = Buffer.from(b64, 'base64');
if (buf.byteLength === 0 || buf.byteLength % 4 !== 0) return null;
const v = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
for (let i = 0; i < v.length; i++) if (!Number.isFinite(v[i])) return null;
return v;
} catch {
return null;
}
}
/** Factor matrix: rows = factor values, columns = outcome distribution (spec §6). */
export function buildFactorMatrix(episodesWithOutcome) {
const matrix = {};
for (const [fname, fn] of Object.entries(FACTOR_FNS)) {
matrix[fname] = {};
for (const e of episodesWithOutcome) {
const val = fn(e);
const outcome = e._inferredOutcome || 'unknown';
matrix[fname][val] = matrix[fname][val] || {};
matrix[fname][val][outcome] = (matrix[fname][val][outcome] || 0) + 1;
}
}
// chain_ref is multi-value: a multi-chain episode counts once per chain;
// null/absent → key "null". Handled outside FACTOR_FNS (single-value loop).
matrix.chain_ref = {};
for (const e of episodesWithOutcome) {
const cr = (e.primary_rationale || {}).chain_ref;
const outcome = e._inferredOutcome || 'unknown';
const keys = Array.isArray(cr) && cr.length ? cr : ['null'];
for (const k of keys) {
matrix.chain_ref[k] = matrix.chain_ref[k] || {};
matrix.chain_ref[k][outcome] = (matrix.chain_ref[k][outcome] || 0) + 1;
}
}
return matrix;
}
// ────────────────────────────────────────────────────────────────
// New cut helpers — normalize recommended id to '#N' form for canon
// comparison regardless of whether the source stored 19 or '#19'.
// ────────────────────────────────────────────────────────────────
function normalizeNodeId(id) {
if (id == null) return null;
const s = String(id).trim();
return s.startsWith('#') ? s : `#${s}`;
}
function hasRecommendation(ep) {
const pr = ep.primary_rationale || {};
const co = ep.classifier_output || {};
const recNode = pr.recommended_node || co.recommended_node;
const recChain = pr.recommended_chain || co.recommended_chain;
return !!(recNode || (Array.isArray(recChain) && recChain.length > 0));
}
function getRecommendedNode(ep) {
const pr = ep.primary_rationale || {};
const co = ep.classifier_output || {};
return pr.recommended_node || co.recommended_node || null;
}
function getRecommendedChain(ep) {
const pr = ep.primary_rationale || {};
const co = ep.classifier_output || {};
const chain = pr.recommended_chain || co.recommended_chain;
return Array.isArray(chain) ? chain : [];
}
/**
* Cut 8 — Class × canon coverage.
* Returns one row per task_classification appearing in the episodes, sorted by count desc.
* classificationMap shape: { [classification]: string[] } — canonical node IDs (e.g. '#34').
*/
export function buildClassCanonCoverage(episodes, classificationMap) {
const map = classificationMap || {};
const byClass = new Map();
for (const ep of episodes) {
const classification = (ep.primary_rationale || {}).task_classification || 'other';
if (!byClass.has(classification)) {
byClass.set(classification, {
classification,
count: 0,
canonicalNodes: map[classification] ? [...map[classification]] : [],
routerRecommended: 0,
claudeTook: 0,
recWithinCanon: 0,
rework: 0,
});
}
const row = byClass.get(classification);
row.count += 1;
const recNode = getRecommendedNode(ep);
const recChain = getRecommendedChain(ep);
const hasRec = !!(recNode || recChain.length > 0);
if (hasRec) {
row.routerRecommended += 1;
// Check if any recommended id falls within canonical set
const canonSet = new Set(row.canonicalNodes.map(normalizeNodeId));
const allRecIds = [];
if (recNode) allRecIds.push(normalizeNodeId(recNode));
for (const id of recChain) allRecIds.push(normalizeNodeId(id));
if (allRecIds.some((id) => id && canonSet.has(id))) {
row.recWithinCanon += 1;
}
}
const nodeChosen = (ep.primary_rationale || {}).node_chosen;
if (nodeChosen && nodeChosen !== 'direct') {
row.claudeTook += 1;
}
if (ep.outcome_reviewed === 'rework') {
row.rework += 1;
}
}
return [...byClass.values()].sort((a, b) => b.count - a.count);
}
/**
* Cut 9 — Router vs Opus three-section breakdown.
* Returns { sectionA, sectionB, sectionC } — each an array of structured items.
* Episodes lacking `review` are excluded from all sections.
*/
export function buildRouterVsOpus(episodes) {
const sectionA = [];
const sectionB = [];
const sectionC = [];
for (const ep of episodes) {
const rev = ep.review;
if (!rev || typeof rev !== 'object' || rev.reviewer_error) continue;
const pr = ep.primary_rationale || {};
const hasRec = hasRecommendation(ep);
const recNode = getRecommendedNode(ep);
const recChain = getRecommendedChain(ep);
const routerRecommendation = recChain.length > 0 ? recChain : recNode;
const time = (ep.timestamps || {}).started_at || null;
const taskId = String(ep.task_id || '').slice(0, 8);
const classification = pr.task_classification || 'other';
const nodeChosen = pr.node_chosen || 'direct';
const outcomeReviewed = ep.outcome_reviewed || 'unknown';
if (hasRec) {
const isCorrectNoAlt = rev.node_quality === 'correct' && !rev.alternative_better;
if (isCorrectNoAlt) {
// Section C: router gave + Opus agreed it was fine (correct, no better alternative)
sectionC.push({ time, taskId, classification, routerRecommendation, outcomeReviewed });
} else {
// Section A: router gave + some disagreement or uncertainty (wrong_node / disputable / has alternative)
sectionA.push({
time,
taskId,
classification,
routerRecommendation,
claudeChose: nodeChosen,
opusNodeQuality: rev.node_quality || 'n/a',
opusChainQuality: rev.chain_quality || 'n/a',
outcomeReviewed,
opusAlternative: rev.alternative_better || null,
opusRootCause: rev.error_root_cause || 'n/a',
});
}
} else if (!hasRec && rev.alternative_better) {
// Section B: router silent, Opus identified a better node
sectionB.push({
time,
taskId,
classification,
opusSuggests: rev.alternative_better,
outcomeReviewed,
opusReasoning: String(rev.reasoning || '').slice(0, 200),
});
}
}
return { sectionA, sectionB, sectionC };
}
/**
* Cut 10 — Chain-ignore breakdown.
* Distinguishes chain recommendations from node-only recommendations and reports
* ignore rates + rework rates, bucketed by chain length.
*/
export function buildChainIgnoreBreakdown(episodes) {
const result = {
totalChainRecommendations: 0,
ignoredChainCount: 0,
ignoredChainRework: 0,
totalNodeOnlyRecommendations: 0,
ignoredNodeOnlyCount: 0,
ignoredNodeOnlyRework: 0,
breakdownByChainLength: {
'1': { count: 0, ignored: 0, rework: 0 },
'2': { count: 0, ignored: 0, rework: 0 },
'3+': { count: 0, ignored: 0, rework: 0 },
},
};
for (const ep of episodes) {
const pr = ep.primary_rationale || {};
const recNode = getRecommendedNode(ep);
const recChain = getRecommendedChain(ep);
const hasChain = recChain.length > 0;
const hasNodeOnly = !hasChain && !!recNode;
const nodeChosen = pr.node_chosen || 'direct';
const isIgnored = nodeChosen === 'direct';
const isRework = ep.outcome_reviewed === 'rework';
if (hasChain) {
result.totalChainRecommendations += 1;
const lenBucket = recChain.length === 1 ? '1' : recChain.length === 2 ? '2' : '3+';
result.breakdownByChainLength[lenBucket].count += 1;
if (isIgnored) {
result.ignoredChainCount += 1;
result.breakdownByChainLength[lenBucket].ignored += 1;
if (isRework) {
result.ignoredChainRework += 1;
result.breakdownByChainLength[lenBucket].rework += 1;
}
}
} else if (hasNodeOnly) {
result.totalNodeOnlyRecommendations += 1;
if (isIgnored) {
result.ignoredNodeOnlyCount += 1;
if (isRework) result.ignoredNodeOnlyRework += 1;
}
}
}
return result;
}
/**
* Stream H Task 8 — Table 16: per-rule router-gate hook effectiveness.
*
* Aggregates episode.hook_fired records by `rule` name, counting total fires
* and how many ended with `outcome === 'block'`. Episodes without `hook_fired`
* are ignored.
*
* @returns {{rules: Record<string, {fires: number, blocks: number}>}}
*/
export function buildRouterGateHookEffectiveness(episodes) {
const rules = {};
if (!Array.isArray(episodes)) return { rules };
for (const ep of episodes) {
const hf = ep && ep.hook_fired;
if (!hf || typeof hf !== 'object' || typeof hf.rule !== 'string') continue;
const slot = rules[hf.rule] || { fires: 0, blocks: 0 };
slot.fires += 1;
if (hf.outcome === 'block') slot.blocks += 1;
rules[hf.rule] = slot;
}
return { rules };
}
/**
* Stream H Task 8 — Table 17: self-fabrication signal detection.
*
* An episode is classified as a fabrication when `controller_claim` is a
* non-empty string but `tool_uses` is missing or empty (controller said it
* acted but no recorded tool_use proves it). Episodes with `controller_claim`
* AND at least one tool_use are classified as legit.
*
* Episodes without `controller_claim` are not counted (nothing was claimed).
*
* @returns {{fabrications: Array, legit: Array}}
*/
export function buildSelfFabricationSignals(episodes) {
const fabrications = [];
const legit = [];
if (!Array.isArray(episodes)) return { fabrications, legit };
for (const ep of episodes) {
if (!ep || typeof ep.controller_claim !== 'string' || !ep.controller_claim) continue;
const uses = Array.isArray(ep.tool_uses) ? ep.tool_uses : [];
if (uses.length === 0) fabrications.push(ep);
else legit.push(ep);
}
return { fabrications, legit };
}
/** Full deterministic aggregation: dedup → infer outcomes → group → chains → matrix → missed activations. */
export function analyze(episodes, options = {}) {
const deduped = dedupeEpisodes(episodes);
const allNormal = deduped.filter((e) => !e.observer_error);
// v1 episodes lack environment / prompt_signal / decision_provenance — they
// pollute the factor matrix and break outcome inference. Analyze v2 only.
const normal = allNormal.filter((e) => e.schema_version >= 2);
const v1SkippedCount = allNormal.length - normal.length;
for (const eps of bySessionSorted(normal).values()) {
eps.forEach((episode, i) => {
episode._inferredOutcome = inferOutcome(episode, eps[i + 1]);
// Pass 3 — inter-prompt gap (project-brain-factor-analysis-4passes).
// Cross-episode signal: minutes between this episode's start and the
// previous (same-session) episode's end. First episode of a session
// has no prev → stays undefined → bucket 'null'.
if (i > 0) {
const prevEnded = (eps[i - 1].timestamps || {}).ended_at;
const curStarted = (episode.timestamps || {}).started_at;
const ms = new Date(curStarted) - new Date(prevEnded);
if (Number.isFinite(ms) && ms >= 0) episode._interPromptGapMin = ms / 60000;
}
});
}
// Pass 4 — semantic NN lookup (project-brain-factor-analysis-4passes).
// Build a single global index from episodes with resolved outcomes +
// embeddings, then for EACH episode (resolved or not) find its top-3
// nearest neighbours and stamp the majority family on _similarPastOutcomeMajority.
// O(N²) is fine: typical session has ~50-500 episodes, k=3, embedding=384-dim.
// Future: switch to HNSW / faiss when episode count crosses ~10k.
const embeddingIndex = buildEmbeddingIndex(normal);
for (const episode of normal) {
const target = decodeTargetEmbedding(episode.prompt_embedding_base64);
if (!target) {
episode._similarPastOutcomeMajority = 'no_neighbors';
continue;
}
// task_id is the SESSION id (shared across turns), not a turn id —
// exclude self by (task_id|started_at), the same dedupe key buildIndex uses.
const excludeKey = `${episode.task_id || ''}|${(episode.timestamps || {}).started_at || ''}`;
const neighbours = findNearestNeighbors(target, embeddingIndex, 3, { excludeKey });
episode._similarPastOutcomeMajority = majorityOutcome(neighbours);
}
const classificationMap = options.classificationMap || {};
const dormancy = options.dormancy || {};
const disciplineByClassification = disciplinePercentByClassification(normal, classificationMap);
const routerStep = routerStepReached(normal);
const boundariesRate = boundariesAppliedRate(normal);
// Phase 3 Task 20 — v4 aggregation: inheritance count + reviewer outcome
// distribution + cost totals. Reads schema_version >=4 fields gracefully.
let inheritanceCount = 0;
const reviewQuality = { correct: 0, wrong_node: 0, overkill: 0, underkill: 0, disputable: 0 };
const reviewerCoverage = { reviewed: 0, pending: 0, errored: 0 };
let degradedCount = 0;
const costTotals = {
classifier_input_tokens: 0,
classifier_output_tokens: 0,
self_assessment_input_tokens: 0,
self_assessment_output_tokens: 0,
reviewer_input_tokens: 0,
reviewer_output_tokens: 0,
};
for (const e of normal) {
if (e?.inheritance?.inherited_from_task_id) inheritanceCount += 1;
if (e?.degraded_mode === true) degradedCount += 1;
const r = e?.review;
if (r && typeof r === 'object') {
if (r.reviewer_error) reviewerCoverage.errored += 1;
else if (typeof r.node_quality === 'string') {
reviewerCoverage.reviewed += 1;
if (reviewQuality[r.node_quality] !== undefined) reviewQuality[r.node_quality] += 1;
}
} else if (e?.schema_version >= 4) {
reviewerCoverage.pending += 1;
}
const tc = e?.task_cost;
if (tc && typeof tc === 'object') {
for (const k of Object.keys(costTotals)) {
const v = tc[k];
if (typeof v === 'number' && Number.isFinite(v)) costTotals[k] += v;
}
}
}
// Cuts 8/9/10 — use classificationMap derived from nodes.yaml (registry-to-classification-map.mjs).
// Archive-fallback REMOVED 2026-05-28 — was stale source of #37/deploy noise.
const canonMapForCuts = classificationMap || {};
return {
episodeCount: normal.length,
v1SkippedCount,
observerErrorCount: deduped.length - allNormal.length,
tasks: groupEpisodesToTasks(normal),
causalChains: findCausalChains(normal),
factorMatrix: buildFactorMatrix(normal),
missedActivations: detectMissedActivations(normal, classificationMap, dormancy),
disciplineByClassification,
routerStep,
boundariesRate,
inheritanceCount,
reviewQuality,
reviewerCoverage,
degradedCount,
costTotals,
classCanonCoverage: buildClassCanonCoverage(normal, canonMapForCuts),
routerVsOpus: buildRouterVsOpus(normal),
chainIgnoreBreakdown: buildChainIgnoreBreakdown(normal),
chainHookEffectiveness: analyzeChainHookEffectiveness({
ledgerPath: options && options.hookOutcomesLedgerPath,
periodStart: options && options.periodStart,
periodEnd: options && options.periodEnd,
}),
routerGateHookEffectiveness: buildRouterGateHookEffectiveness(normal),
selfFabricationSignals: buildSelfFabricationSignals(normal),
};
}
function loadEpisodes(files) {
const eps = [];
for (const f of files) {
if (!existsSync(f)) continue;
for (const line of readFileSync(f, 'utf-8').split('\n')) {
const t = line.trim();
if (!t) continue;
try {
eps.push(JSON.parse(t));
} catch {
// skip broken line
}
}
}
return eps;
}
if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/brain-retro-analyzer.mjs')) {
const registry = loadRegistry({ useCache: false });
const classificationMap = buildClassificationMap(registry);
const dormancy = buildDormancyMap(registry);
const result = analyze(loadEpisodes(process.argv.slice(2)), { classificationMap, dormancy });
console.log(JSON.stringify(result, null, 2));
process.exit(0);
}