#!/usr/bin/env node /** * Brain-retro analyzer (brain governance, observer factor-analysis spec §6). * Pure, deterministic Layer-4 aggregation over observer episodes for the * /brain-retro skill. Read-only — never writes JSONL. No LLM. * * Security Guidance #40: pure parsing — no exec/execSync. */ import { Buffer } from 'buffer'; import { readFileSync, existsSync } from 'fs'; import { join as pathJoin } from 'path'; import { homedir } from 'os'; import { detectMissedActivations } from './missed-activations.mjs'; import { disciplinePercentByClassification, routerStepReached, boundariesAppliedRate, } from './discipline-metrics.mjs'; import { loadRegistry } from './registry-load.mjs'; import { buildClassificationMap, buildDormancyMap } from './registry-to-classification-map.mjs'; import { buildIndex as buildEmbeddingIndex, findNearestNeighbors, majorityOutcome, } from './observer-embedding-index.mjs'; export const CHAIN_OUTCOME_BUCKETS = [ 'blocked', 'passed-with-skill', 'passed-inline-override', 'passed-global-override', 'passed-short-chain', 'passed-no-mutating', ]; export function analyzeChainHookEffectiveness({ ledgerPath, periodStart, periodEnd } = {}) { const lp = ledgerPath || pathJoin(homedir(), '.claude', 'runtime', 'hook-outcomes.jsonl'); const buckets = Object.fromEntries(CHAIN_OUTCOME_BUCKETS.map((b) => [b, 0])); let total = 0; let raw; try { raw = readFileSync(lp, 'utf-8'); } catch { return { total: 0, buckets }; } const startMs = periodStart ? Date.parse(periodStart) : -Infinity; const endMs = periodEnd ? Date.parse(periodEnd) : Infinity; for (const line of raw.split('\n')) { if (!line.trim()) continue; let entry; try { entry = JSON.parse(line); } catch { continue; } if (!entry || entry.rule !== 'chain-recommendation') continue; const ts = Date.parse(entry.ts || ''); if (Number.isNaN(ts) || ts < startMs || ts > endMs) continue; const bucket = CHAIN_OUTCOME_BUCKETS.includes(entry.outcome) ? entry.outcome : null; if (!bucket) continue; buckets[bucket] += 1; total += 1; } return { total, buckets }; } export function buildChainHookEffectiveness({ total, buckets }) { if (!total) return '_(нет данных за период — хук не срабатывал или ledger пуст)_\n'; const lines = [ '### Cut 11: Chain-hook effectiveness', '', '| Outcome | Count | % |', '|---|---:|---:|', ]; for (const b of CHAIN_OUTCOME_BUCKETS) { const c = (buckets && buckets[b]) || 0; const pct = total ? Math.round((c / total) * 100) : 0; lines.push(`| ${b} | ${c} | ${pct}% |`); } lines.push(`| **TOTAL** | **${total}** | **100%** |`); return lines.join('\n') + '\n'; } const SIZE_SMALL = 20; const SIZE_LARGE = 60; /** * Deduplicate the routing-gate double-write: a turn that was blocked then * re-stopped yields two episodes with the same task_id + started_at. Keep the * last (most complete). observer_error markers are all kept. */ export function dedupeEpisodes(episodes) { const errors = episodes.filter((e) => e && e.observer_error); const normal = episodes.filter((e) => e && !e.observer_error); const byKey = new Map(); for (const e of normal) { byKey.set(`${e.task_id}|${(e.timestamps || {}).started_at}`, e); } return [...byKey.values(), ...errors]; } /** Infer the true outcome of an episode from its events + the next episode's prompt. */ export function inferOutcome(episode, nextEpisode) { const events = episode && Array.isArray(episode.events) ? episode.events : []; if (events.some((e) => e.kind === 'interrupt')) { return 'partial'; } // A turn is `blocked` only when it ENDED on an unrecovered tool failure — // emitted by the parser as a single `unrecovered_error` event when the // LAST tool_result of the turn was is_error=true. Raw error/retry counts // do NOT imply blocked: a TDD red→green cycle or a grep that returns // nothing both surface as `error` events but are intentional and // recovered — counting them as blocked over-reports failures (A-1 fix). if (events.some((e) => e.kind === 'unrecovered_error')) { return 'blocked'; } // 'failure' (work wrong AND never corrected) is a judgment, not // deterministically recoverable from a transcript — deferred to the phase-2 // agent-judge. Until then a wrong-then-corrected turn surfaces as 'rework'. if (!nextEpisode) return 'unknown'; if (nextEpisode.prompt_signal === 'correction') return 'rework'; if (nextEpisode.prompt_signal === 'approval' || nextEpisode.prompt_signal === 'new_task') return 'success'; // Task 16: neutral next-prompt = silent success. Если operator продолжил // следующей instruction без correction-маркеров — это «no objection». // Slightly weaker signal than explicit approval — labelled `soft_success`. if (nextEpisode.prompt_signal === 'neutral') return 'soft_success'; return 'unknown'; } function bySessionSorted(episodes) { const map = new Map(); for (const e of episodes) { if (e.observer_error) continue; const sid = e.task_id || 'unknown'; if (!map.has(sid)) map.set(sid, []); map.get(sid).push(e); } for (const eps of map.values()) { eps.sort((a, b) => String((a.timestamps || {}).started_at).localeCompare(String((b.timestamps || {}).started_at)) ); } return map; } /** Group episodes into tasks: a new task starts after a success or on a new_task prompt. */ export function groupEpisodesToTasks(episodes) { const tasks = []; for (const [sid, eps] of bySessionSorted(episodes)) { let current = null; eps.forEach((episode, i) => { const prev = eps[i - 1]; const prevOutcome = prev ? inferOutcome(prev, episode) : null; const isNewTask = i === 0 || prevOutcome === 'success' || episode.prompt_signal === 'new_task'; if (isNewTask) { current = { task_ref: `${sid}#${tasks.length + 1}`, episodes: [] }; tasks.push(current); } current.episodes.push(episode); }); } return tasks; } // Hot/normative files — touched by almost every turn (memory store, CLAUDE.md, // STATUS.md, episodes JSONL). Sharing one of these is not evidence of a causal // chain; it just means both turns brushed the same hot file. Excluded from the // shared-file signal (A-3 fix). const HOT_FILE_PATTERNS = [ /(?:^|[\\/])CLAUDE\.md$/i, /(?:^|[\\/])MEMORY\.md$/i, /(?:^|[\\/])STATUS\.md$/i, /[\\/]episodes-\d{4}-\d{2}\.jsonl$/i, /[\\/]memory[\\/][^\\/]+\.md$/i, ]; export function isHotFile(path) { const s = String(path || ''); return HOT_FILE_PATTERNS.some((re) => re.test(s)); } /** Causal-chain candidates: an errored episode → a later episode sharing a file. */ export function findCausalChains(episodes) { const sorted = episodes .filter((e) => !e.observer_error) .slice() .sort((a, b) => String((a.timestamps || {}).started_at).localeCompare(String((b.timestamps || {}).started_at)) ); const chains = []; for (let i = 0; i < sorted.length - 1; i++) { const a = sorted[i]; const hasError = Array.isArray(a.events) && a.events.some((e) => e.kind === 'error'); if (!hasError) continue; const filesA = new Set( (((a.task_size || {}).files) || []).filter((f) => !isHotFile(f)) ); if (filesA.size === 0) continue; for (let j = i + 1; j < sorted.length; j++) { const b = sorted[j]; const shared = (((b.task_size || {}).files) || []).filter((f) => !isHotFile(f) && filesA.has(f)); if (shared.length > 0) { chains.push({ from: `${a.task_id}|${(a.timestamps || {}).started_at}`, to: `${b.task_id}|${(b.timestamps || {}).started_at}`, sharedFiles: shared, }); break; } } } return chains; } function sizeBucket(toolCalls) { const n = Number(toolCalls) || 0; return n < SIZE_SMALL ? 'small' : n <= SIZE_LARGE ? 'medium' : 'large'; } const SESSION_TURN_EARLY = 10; const SESSION_TURN_LATE = 40; function sessionTurnBucket(turn) { const n = Number(turn); if (!Number.isFinite(n)) return 'null'; return n < SESSION_TURN_EARLY ? 'early' : n <= SESSION_TURN_LATE ? 'mid' : 'late'; } // Pass 1 cheap-axis helpers (project-brain-factor-analysis-4passes). function countEventKind(events, kind) { if (!Array.isArray(events)) return 0; let c = 0; for (const ev of events) if (ev && ev.kind === kind) c++; return c; } function retryBucket(events) { const n = countEventKind(events, 'retry'); return n === 0 ? '0' : n <= 2 ? '1-2' : '3+'; } function errorBucket(events) { const n = countEventKind(events, 'error'); return n === 0 ? '0' : n === 1 ? '1' : '2+'; } function iterationsBucket(iterations) { const n = Number(iterations); if (!Number.isFinite(n) || n <= 0) return '0'; if (n <= 3) return '1-3'; if (n <= 10) return '4-10'; return '11+'; } // Pass 2 — classifier latency bucket. <500ms = fast (cache hit territory), // 500-2000 = medium (cold call), 2000-10000 = slow (network jitter / overflow), // >10000 = very_slow (retries fired). Null on non-LLM paths. function latencyBucket(latency) { const n = Number(latency); if (!Number.isFinite(n) || n < 0) return 'null'; if (n < 500) return 'fast'; if (n < 2000) return 'medium'; if (n < 10000) return 'slow'; return 'very_slow'; } // Pass 3 helpers (project-brain-factor-analysis-4passes). function promptLengthBucket(n) { const v = Number(n); if (!Number.isFinite(v) || v <= 0) return 'null'; if (v < 100) return 'short'; if (v < 1000) return 'medium'; if (v < 2500) return 'long'; return 'huge'; } function timeOfDayBucket(iso) { // Reject null / undefined / empty BEFORE Date construction: `new Date(null)` // is the epoch (1970-01-01), not NaN — would falsely bucket missing // timestamps as 'night'. if (iso == null || iso === '') return 'null'; const d = new Date(iso); if (Number.isNaN(d.getTime())) return 'null'; const h = d.getUTCHours(); if (h < 6) return 'night'; if (h < 12) return 'morning'; if (h < 18) return 'afternoon'; return 'evening'; } const WEEKDAY_NAMES = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']; function dayOfWeekLabel(iso) { if (iso == null || iso === '') return 'null'; const d = new Date(iso); if (Number.isNaN(d.getTime())) return 'null'; return WEEKDAY_NAMES[d.getUTCDay()]; } function interPromptGapBucket(min) { const v = Number(min); if (!Number.isFinite(v) || v < 0) return 'null'; if (v < 1) return '<1m'; if (v < 10) return '1-10m'; if (v < 60) return '10-60m'; return '60m+'; } function fileTypeMain(dist) { if (!dist || typeof dist !== 'object') return 'none'; const entries = Object.entries(dist).filter(([, n]) => Number(n) > 0); if (entries.length === 0) return 'none'; let maxN = 0; for (const [, n] of entries) if (n > maxN) maxN = n; const winners = entries.filter(([, n]) => n === maxN); if (winners.length > 1) return 'mixed'; return winners[0][0]; } function eventToolCount(events, toolName) { if (!Array.isArray(events)) return 0; for (const ev of events) { if (ev && ev.kind === 'tool_summary' && ev.counts) { return Number(ev.counts[toolName]) || 0; } } return 0; } function countBucket012(n) { const v = Number(n) || 0; return v === 0 ? '0' : v === 1 ? '1' : '2+'; } function judgeCallsBucket(n) { const v = Number(n) || 0; if (v === 0) return '0'; if (v < 10) return '1-9'; return '10+'; } const FACTOR_FNS = { decision_provenance: (e) => (e.decision_provenance || {}).kind || 'unknown', economy_level: (e) => String((e.environment || {}).economy_level ?? 'null'), model: (e) => (e.environment || {}).model || 'null', post_compaction: (e) => String((e.environment || {}).post_compaction ?? false), session_segment_turn: (e) => sessionTurnBucket((e.environment || {}).session_turn), parallel_session: (e) => String((e.environment || {}).parallel_session ?? false), task_size: (e) => sizeBucket((e.task_size || {}).tool_calls), node_chosen: (e) => (e.primary_rationale || {}).node_chosen || 'direct', task_classification: (e) => (e.primary_rationale || {}).task_classification || 'other', recommended_node_for_direct: (e) => (e.primary_rationale || {}).recommended_node || 'none', // Pass 1 — 8 cheap axes (data already in v4 episode, just expose): prompt_signal: (e) => e.prompt_signal || 'null', classifier_source: (e) => (e.classifier_output || {}).source || 'null', degraded_mode: (e) => String(e.degraded_mode ?? false), path_type: (e) => e.path_type || 'null', retry_count: (e) => retryBucket(e.events), error_count: (e) => errorBucket(e.events), hard_floor_invoked: (e) => String(((e.primary_rationale || {}).hard_floor || {}).invoked ?? false), iterations_bucket: (e) => iterationsBucket((e.task_cost || {}).iterations), // Pass 2 — classifier-metric axes (project-brain-factor-analysis-4passes): latency_bucket: (e) => latencyBucket((e.classifier_output || {}).latency_ms), error_type: (e) => (e.classifier_output || {}).llm_error || 'null', // Pass 3 — dynamics axes (project-brain-factor-analysis-4passes): prompt_length_bucket: (e) => promptLengthBucket((e.task_meta || {}).prompt_length_chars), time_of_day_bucket: (e) => timeOfDayBucket((e.timestamps || {}).started_at), day_of_week: (e) => dayOfWeekLabel((e.timestamps || {}).started_at), inter_prompt_gap_bucket: (e) => interPromptGapBucket(e._interPromptGapMin), mcp_server_used: (e) => (((e.task_meta || {}).mcp_servers_used || []).length > 0 ? 'any' : 'none'), file_type_main: (e) => fileTypeMain((e.task_meta || {}).file_type_distribution), skill_invocations_bucket: (e) => countBucket012(eventToolCount(e.events, 'Skill')), subagent_spawns_bucket: (e) => countBucket012( eventToolCount(e.events, 'Agent') + eventToolCount(e.events, 'Task'), ), // Pass 4 — semantic NN axis (project-brain-factor-analysis-4passes). // Reads the pre-computed family label stamped on the episode by analyze() // (cross-episode pass via observer-embedding-index). Episodes without an // embedding or with no resolved neighbours bucket as 'no_neighbors'. similar_past_outcome_majority: (e) => e._similarPastOutcomeMajority || 'no_neighbors', // Pass 5 — router-gate v4 signal axes (brain-data-catalog раздел F → факторы). rationalization_flag_count: (e) => countBucket012((e.v4_signals || {}).rationalization_flag_count), judge_verdict: (e) => (e.v4_signals || {}).judge_verdict || 'null', safe_baseline_action: (e) => (e.v4_signals || {}).safe_baseline_action || 'null', judge_calls_bucket: (e) => judgeCallsBucket((e.v4_signals || {}).judge_calls), }; // Pass 4 — decode prompt_embedding_base64 to Float32Array. Mirrors // observer-embedding-index safeDecode but kept private here to avoid // circular surface; analyzer only needs the target-embedding decode path. function decodeTargetEmbedding(b64) { if (!b64 || typeof b64 !== 'string') return null; try { const buf = Buffer.from(b64, 'base64'); if (buf.byteLength === 0 || buf.byteLength % 4 !== 0) return null; const v = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4); for (let i = 0; i < v.length; i++) if (!Number.isFinite(v[i])) return null; return v; } catch { return null; } } /** Factor matrix: rows = factor values, columns = outcome distribution (spec §6). */ export function buildFactorMatrix(episodesWithOutcome) { const matrix = {}; for (const [fname, fn] of Object.entries(FACTOR_FNS)) { matrix[fname] = {}; for (const e of episodesWithOutcome) { const val = fn(e); const outcome = e._inferredOutcome || 'unknown'; matrix[fname][val] = matrix[fname][val] || {}; matrix[fname][val][outcome] = (matrix[fname][val][outcome] || 0) + 1; } } // chain_ref is multi-value: a multi-chain episode counts once per chain; // null/absent → key "null". Handled outside FACTOR_FNS (single-value loop). matrix.chain_ref = {}; for (const e of episodesWithOutcome) { const cr = (e.primary_rationale || {}).chain_ref; const outcome = e._inferredOutcome || 'unknown'; const keys = Array.isArray(cr) && cr.length ? cr : ['null']; for (const k of keys) { matrix.chain_ref[k] = matrix.chain_ref[k] || {}; matrix.chain_ref[k][outcome] = (matrix.chain_ref[k][outcome] || 0) + 1; } } return matrix; } // ──────────────────────────────────────────────────────────────── // New cut helpers — normalize recommended id to '#N' form for canon // comparison regardless of whether the source stored 19 or '#19'. // ──────────────────────────────────────────────────────────────── function normalizeNodeId(id) { if (id == null) return null; const s = String(id).trim(); return s.startsWith('#') ? s : `#${s}`; } function hasRecommendation(ep) { const pr = ep.primary_rationale || {}; const co = ep.classifier_output || {}; const recNode = pr.recommended_node || co.recommended_node; const recChain = pr.recommended_chain || co.recommended_chain; return !!(recNode || (Array.isArray(recChain) && recChain.length > 0)); } function getRecommendedNode(ep) { const pr = ep.primary_rationale || {}; const co = ep.classifier_output || {}; return pr.recommended_node || co.recommended_node || null; } function getRecommendedChain(ep) { const pr = ep.primary_rationale || {}; const co = ep.classifier_output || {}; const chain = pr.recommended_chain || co.recommended_chain; return Array.isArray(chain) ? chain : []; } /** * Cut 8 — Class × canon coverage. * Returns one row per task_classification appearing in the episodes, sorted by count desc. * classificationMap shape: { [classification]: string[] } — canonical node IDs (e.g. '#34'). */ export function buildClassCanonCoverage(episodes, classificationMap) { const map = classificationMap || {}; const byClass = new Map(); for (const ep of episodes) { const classification = (ep.primary_rationale || {}).task_classification || 'other'; if (!byClass.has(classification)) { byClass.set(classification, { classification, count: 0, canonicalNodes: map[classification] ? [...map[classification]] : [], routerRecommended: 0, claudeTook: 0, recWithinCanon: 0, rework: 0, }); } const row = byClass.get(classification); row.count += 1; const recNode = getRecommendedNode(ep); const recChain = getRecommendedChain(ep); const hasRec = !!(recNode || recChain.length > 0); if (hasRec) { row.routerRecommended += 1; // Check if any recommended id falls within canonical set const canonSet = new Set(row.canonicalNodes.map(normalizeNodeId)); const allRecIds = []; if (recNode) allRecIds.push(normalizeNodeId(recNode)); for (const id of recChain) allRecIds.push(normalizeNodeId(id)); if (allRecIds.some((id) => id && canonSet.has(id))) { row.recWithinCanon += 1; } } const nodeChosen = (ep.primary_rationale || {}).node_chosen; if (nodeChosen && nodeChosen !== 'direct') { row.claudeTook += 1; } if (ep.outcome_reviewed === 'rework') { row.rework += 1; } } return [...byClass.values()].sort((a, b) => b.count - a.count); } /** * Cut 9 — Router vs Opus three-section breakdown. * Returns { sectionA, sectionB, sectionC } — each an array of structured items. * Episodes lacking `review` are excluded from all sections. */ export function buildRouterVsOpus(episodes) { const sectionA = []; const sectionB = []; const sectionC = []; for (const ep of episodes) { const rev = ep.review; if (!rev || typeof rev !== 'object' || rev.reviewer_error) continue; const pr = ep.primary_rationale || {}; const hasRec = hasRecommendation(ep); const recNode = getRecommendedNode(ep); const recChain = getRecommendedChain(ep); const routerRecommendation = recChain.length > 0 ? recChain : recNode; const time = (ep.timestamps || {}).started_at || null; const taskId = String(ep.task_id || '').slice(0, 8); const classification = pr.task_classification || 'other'; const nodeChosen = pr.node_chosen || 'direct'; const outcomeReviewed = ep.outcome_reviewed || 'unknown'; if (hasRec) { const isCorrectNoAlt = rev.node_quality === 'correct' && !rev.alternative_better; if (isCorrectNoAlt) { // Section C: router gave + Opus agreed it was fine (correct, no better alternative) sectionC.push({ time, taskId, classification, routerRecommendation, outcomeReviewed }); } else { // Section A: router gave + some disagreement or uncertainty (wrong_node / disputable / has alternative) sectionA.push({ time, taskId, classification, routerRecommendation, claudeChose: nodeChosen, opusNodeQuality: rev.node_quality || 'n/a', opusChainQuality: rev.chain_quality || 'n/a', outcomeReviewed, opusAlternative: rev.alternative_better || null, opusRootCause: rev.error_root_cause || 'n/a', }); } } else if (!hasRec && rev.alternative_better) { // Section B: router silent, Opus identified a better node sectionB.push({ time, taskId, classification, opusSuggests: rev.alternative_better, outcomeReviewed, opusReasoning: String(rev.reasoning || '').slice(0, 200), }); } } return { sectionA, sectionB, sectionC }; } /** * Cut 10 — Chain-ignore breakdown. * Distinguishes chain recommendations from node-only recommendations and reports * ignore rates + rework rates, bucketed by chain length. */ export function buildChainIgnoreBreakdown(episodes) { const result = { totalChainRecommendations: 0, ignoredChainCount: 0, ignoredChainRework: 0, totalNodeOnlyRecommendations: 0, ignoredNodeOnlyCount: 0, ignoredNodeOnlyRework: 0, breakdownByChainLength: { '1': { count: 0, ignored: 0, rework: 0 }, '2': { count: 0, ignored: 0, rework: 0 }, '3+': { count: 0, ignored: 0, rework: 0 }, }, }; for (const ep of episodes) { const pr = ep.primary_rationale || {}; const recNode = getRecommendedNode(ep); const recChain = getRecommendedChain(ep); const hasChain = recChain.length > 0; const hasNodeOnly = !hasChain && !!recNode; const nodeChosen = pr.node_chosen || 'direct'; const isIgnored = nodeChosen === 'direct'; const isRework = ep.outcome_reviewed === 'rework'; if (hasChain) { result.totalChainRecommendations += 1; const lenBucket = recChain.length === 1 ? '1' : recChain.length === 2 ? '2' : '3+'; result.breakdownByChainLength[lenBucket].count += 1; if (isIgnored) { result.ignoredChainCount += 1; result.breakdownByChainLength[lenBucket].ignored += 1; if (isRework) { result.ignoredChainRework += 1; result.breakdownByChainLength[lenBucket].rework += 1; } } } else if (hasNodeOnly) { result.totalNodeOnlyRecommendations += 1; if (isIgnored) { result.ignoredNodeOnlyCount += 1; if (isRework) result.ignoredNodeOnlyRework += 1; } } } return result; } /** * Stream H Task 8 — Table 16: per-rule router-gate hook effectiveness. * * Aggregates episode.hook_fired records by `rule` name, counting total fires * and how many ended with `outcome === 'block'`. Episodes without `hook_fired` * are ignored. * * @returns {{rules: Record}} */ export function buildRouterGateHookEffectiveness(episodes) { const rules = {}; if (!Array.isArray(episodes)) return { rules }; for (const ep of episodes) { const hf = ep && ep.hook_fired; if (!hf || typeof hf !== 'object' || typeof hf.rule !== 'string') continue; const slot = rules[hf.rule] || { fires: 0, blocks: 0 }; slot.fires += 1; if (hf.outcome === 'block') slot.blocks += 1; rules[hf.rule] = slot; } return { rules }; } /** * Stream H Task 8 — Table 17: self-fabrication signal detection. * * An episode is classified as a fabrication when `controller_claim` is a * non-empty string but `tool_uses` is missing or empty (controller said it * acted but no recorded tool_use proves it). Episodes with `controller_claim` * AND at least one tool_use are classified as legit. * * Episodes without `controller_claim` are not counted (nothing was claimed). * * @returns {{fabrications: Array, legit: Array}} */ export function buildSelfFabricationSignals(episodes) { const fabrications = []; const legit = []; if (!Array.isArray(episodes)) return { fabrications, legit }; for (const ep of episodes) { if (!ep || typeof ep.controller_claim !== 'string' || !ep.controller_claim) continue; const uses = Array.isArray(ep.tool_uses) ? ep.tool_uses : []; if (uses.length === 0) fabrications.push(ep); else legit.push(ep); } return { fabrications, legit }; } /** Full deterministic aggregation: dedup → infer outcomes → group → chains → matrix → missed activations. */ export function analyze(episodes, options = {}) { const deduped = dedupeEpisodes(episodes); const allNormal = deduped.filter((e) => !e.observer_error); // v1 episodes lack environment / prompt_signal / decision_provenance — they // pollute the factor matrix and break outcome inference. Analyze v2 only. const normal = allNormal.filter((e) => e.schema_version >= 2); const v1SkippedCount = allNormal.length - normal.length; for (const eps of bySessionSorted(normal).values()) { eps.forEach((episode, i) => { episode._inferredOutcome = inferOutcome(episode, eps[i + 1]); // Pass 3 — inter-prompt gap (project-brain-factor-analysis-4passes). // Cross-episode signal: minutes between this episode's start and the // previous (same-session) episode's end. First episode of a session // has no prev → stays undefined → bucket 'null'. if (i > 0) { const prevEnded = (eps[i - 1].timestamps || {}).ended_at; const curStarted = (episode.timestamps || {}).started_at; const ms = new Date(curStarted) - new Date(prevEnded); if (Number.isFinite(ms) && ms >= 0) episode._interPromptGapMin = ms / 60000; } }); } // Pass 4 — semantic NN lookup (project-brain-factor-analysis-4passes). // Build a single global index from episodes with resolved outcomes + // embeddings, then for EACH episode (resolved or not) find its top-3 // nearest neighbours and stamp the majority family on _similarPastOutcomeMajority. // O(N²) is fine: typical session has ~50-500 episodes, k=3, embedding=384-dim. // Future: switch to HNSW / faiss when episode count crosses ~10k. const embeddingIndex = buildEmbeddingIndex(normal); for (const episode of normal) { const target = decodeTargetEmbedding(episode.prompt_embedding_base64); if (!target) { episode._similarPastOutcomeMajority = 'no_neighbors'; continue; } // task_id is the SESSION id (shared across turns), not a turn id — // exclude self by (task_id|started_at), the same dedupe key buildIndex uses. const excludeKey = `${episode.task_id || ''}|${(episode.timestamps || {}).started_at || ''}`; const neighbours = findNearestNeighbors(target, embeddingIndex, 3, { excludeKey }); episode._similarPastOutcomeMajority = majorityOutcome(neighbours); } const classificationMap = options.classificationMap || {}; const dormancy = options.dormancy || {}; const disciplineByClassification = disciplinePercentByClassification(normal, classificationMap); const routerStep = routerStepReached(normal); const boundariesRate = boundariesAppliedRate(normal); // Phase 3 Task 20 — v4 aggregation: inheritance count + reviewer outcome // distribution + cost totals. Reads schema_version >=4 fields gracefully. let inheritanceCount = 0; const reviewQuality = { correct: 0, wrong_node: 0, overkill: 0, underkill: 0, disputable: 0 }; const reviewerCoverage = { reviewed: 0, pending: 0, errored: 0 }; let degradedCount = 0; const costTotals = { classifier_input_tokens: 0, classifier_output_tokens: 0, self_assessment_input_tokens: 0, self_assessment_output_tokens: 0, reviewer_input_tokens: 0, reviewer_output_tokens: 0, }; for (const e of normal) { if (e?.inheritance?.inherited_from_task_id) inheritanceCount += 1; if (e?.degraded_mode === true) degradedCount += 1; const r = e?.review; if (r && typeof r === 'object') { if (r.reviewer_error) reviewerCoverage.errored += 1; else if (typeof r.node_quality === 'string') { reviewerCoverage.reviewed += 1; if (reviewQuality[r.node_quality] !== undefined) reviewQuality[r.node_quality] += 1; } } else if (e?.schema_version >= 4) { reviewerCoverage.pending += 1; } const tc = e?.task_cost; if (tc && typeof tc === 'object') { for (const k of Object.keys(costTotals)) { const v = tc[k]; if (typeof v === 'number' && Number.isFinite(v)) costTotals[k] += v; } } } // Cuts 8/9/10 — use classificationMap derived from nodes.yaml (registry-to-classification-map.mjs). // Archive-fallback REMOVED 2026-05-28 — was stale source of #37/deploy noise. const canonMapForCuts = classificationMap || {}; return { episodeCount: normal.length, v1SkippedCount, observerErrorCount: deduped.length - allNormal.length, tasks: groupEpisodesToTasks(normal), causalChains: findCausalChains(normal), factorMatrix: buildFactorMatrix(normal), missedActivations: detectMissedActivations(normal, classificationMap, dormancy), disciplineByClassification, routerStep, boundariesRate, inheritanceCount, reviewQuality, reviewerCoverage, degradedCount, costTotals, classCanonCoverage: buildClassCanonCoverage(normal, canonMapForCuts), routerVsOpus: buildRouterVsOpus(normal), chainIgnoreBreakdown: buildChainIgnoreBreakdown(normal), chainHookEffectiveness: analyzeChainHookEffectiveness({ ledgerPath: options && options.hookOutcomesLedgerPath, periodStart: options && options.periodStart, periodEnd: options && options.periodEnd, }), routerGateHookEffectiveness: buildRouterGateHookEffectiveness(normal), selfFabricationSignals: buildSelfFabricationSignals(normal), }; } function loadEpisodes(files) { const eps = []; for (const f of files) { if (!existsSync(f)) continue; for (const line of readFileSync(f, 'utf-8').split('\n')) { const t = line.trim(); if (!t) continue; try { eps.push(JSON.parse(t)); } catch { // skip broken line } } } return eps; } if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/brain-retro-analyzer.mjs')) { const registry = loadRegistry({ useCache: false }); const classificationMap = buildClassificationMap(registry); const dormancy = buildDormancyMap(registry); const result = analyze(loadEpisodes(process.argv.slice(2)), { classificationMap, dormancy }); console.log(JSON.stringify(result, null, 2)); process.exit(0); }