#!/usr/bin/env node /** * Transcript parser for the brain governance observer. * Deterministically extracts episode fields from a Claude Code session * transcript (JSONL). No LLM — pure parsing. * * Scope: the last turn (from the last real user prompt to end of file) — * one episode == one prompt→response cycle. * * Reasoning fields (triggers_matched / candidates_considered / * boundaries_applied) are NOT recoverable from a transcript and stay []; * their capture is a separate design question (ADR-011 follow-up). * * Security Guidance #40: pure parsing — no exec/execSync. * Per ADR-011 §6 + spec v1.1 §5.2.1. */ import { detectChoiceProvenance, detectAskUserQuestionChoice } from './observer-choice-detector.mjs'; const SUPERPOWERS_PREFIX = 'superpowers:'; function parseLines(text) { const entries = []; let broken = 0; let total = 0; // quirk #101 root fix: Claude Code's transcript file accumulates duplicated // context-rebuild snapshots — the same entry is re-printed with the SAME // `uuid`. Without dedup, session_turn / task_size / events double-count and // session_turn becomes non-monotonic across episodes parsed at different // file-growth states. Keep the first occurrence per uuid; entries without a // uuid (synthetic test fixtures) pass through unchanged. const seenUuid = new Set(); for (const line of String(text || '').split('\n')) { const trimmed = line.trim(); if (!trimmed) continue; total += 1; let e; try { e = JSON.parse(trimmed); } catch { broken += 1; // broken line — counted for parse_gap, never thrown continue; } if (e && e.uuid) { if (seenUuid.has(e.uuid)) continue; seenUuid.add(e.uuid); } entries.push(e); } return { entries, broken, total }; } // Synthetic user-role messages — NOT genuine prompts, must not be turn boundaries. // Skill invocation content, local slash-command output/invocation, interrupt markers // are recorded with role:'user' but carry no UserPromptSubmit hook context. const SYNTHETIC_PROMPT_MARKERS = [ 'Base directory for this skill:', '', '', '', '[Request interrupted by user]', ]; function isSyntheticPrompt(text) { const t = String(text || '').trimStart(); return SYNTHETIC_PROMPT_MARKERS.some((m) => t.startsWith(m)); } // A genuine user prompt (turn boundary) — not a tool_result carrier nor a // synthetic skill/command/interrupt message. function isRealUserPrompt(entry) { const msg = entry && entry.message; if (!msg || msg.role !== 'user') return false; const c = msg.content; if (typeof c === 'string') { return c.trim().length > 0 && !isSyntheticPrompt(c); } if (Array.isArray(c)) { const hasToolResult = c.some((b) => b && b.type === 'tool_result'); const hasText = c.some((b) => b && b.type === 'text'); if (!hasText || hasToolResult) return false; const text = c .filter((b) => b && b.type === 'text') .map((b) => b.text || '') .join(' '); return !isSyntheticPrompt(text); } return false; } function findTurnStart(entries) { for (let i = entries.length - 1; i >= 0; i--) { if (isRealUserPrompt(entries[i])) return i; } return 0; } function promptText(entry) { const c = entry && entry.message && entry.message.content; if (typeof c === 'string') return c; if (Array.isArray(c)) { return c .filter((b) => b && b.type === 'text') .map((b) => b.text || '') .join(' '); } return ''; } export function classifyTask(text) { const t = String(text || '').toLowerCase(); if (/рефактор|refactor/.test(t)) return 'refactor'; if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix'; if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature'; if (/докум|readme|\bdocs?\b/.test(t)) return 'docs'; if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question'; return 'other'; } function collectToolUse(entries) { const skills = []; const counts = {}; let errorCount = 0; for (const e of entries) { const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : []; for (const block of content) { if (!block || typeof block !== 'object') continue; if (block.type === 'tool_use') { const name = block.name || 'unknown'; counts[name] = (counts[name] || 0) + 1; if (name === 'Skill') { skills.push((block.input && block.input.skill) || 'unknown'); } } else if (block.type === 'tool_result' && block.is_error === true) { errorCount += 1; } } } return { skills, counts, errorCount }; } const FILE_TOOLS = new Set(['Read', 'Edit', 'Write', 'MultiEdit', 'NotebookEdit']); /** * Deterministic environment factors for the turn that starts at turnStartIdx. * economy_level / parallel_session are scanned from the stringified turn; * model / post_compaction / session_turn from structural fields. */ export function extractEnvironment(allEntries, turnStartIdx) { const turn = allEntries.slice(turnStartIdx); const rawTurn = JSON.stringify(turn); const econ = rawTurn.match(/=== ECONOMY MODE:\s*(\d+)\s*%/); const economy_level = econ ? Number(econ[1]) : null; let model = null; for (const e of turn) { if (e && e.message && e.message.model) { model = e.message.model; break; } } // The transcript file accumulates duplicated context-rebuild snapshots // (repeated isCompactSummary entries — see feedback_environment quirk #101). // Counting prompts from i=0 inflates session_turn with those dupes. Count // from the LAST compaction before the turn: session_turn = real prompts // since it, which is monotonic ("turns since last compaction"). let lastCompactIdx = -1; for (let i = 0; i < turnStartIdx && i < allEntries.length; i++) { if (allEntries[i] && allEntries[i].isCompactSummary === true) lastCompactIdx = i; } const post_compaction = lastCompactIdx >= 0; let session_turn = 0; for (let i = lastCompactIdx + 1; i <= turnStartIdx && i < allEntries.length; i++) { if (isRealUserPrompt(allEntries[i])) session_turn += 1; } // Only strong collision evidence — a bare mention of "parallel sessions" is // not a signal (best-effort per spec R2; prefer false-negative over false-positive). // Scope NARROWED to tool_result content (real command output / Bash stderr): prose // mentions in user prompts / assistant text — including analysis text that // references collision phrases — must not trigger. Fixes live FP (episode line 20). const parallel_session = /чужой staged|foreign git index|index\.lock|another git process/i.test( collectToolResultText(turn) ); return { economy_level, model, post_compaction, session_turn, parallel_session }; } /** * Collect text content from tool_result blocks in the turn — the only surface * trusted for parallel_session collision evidence (see extractEnvironment). * Supports both string content and the structured array form * (`content: [{ type: 'text', text }]`). */ function collectToolResultText(turn) { const parts = []; for (const e of turn) { const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : []; for (const b of content) { if (!b || b.type !== 'tool_result') continue; const c = b.content; if (typeof c === 'string') { parts.push(c); } else if (Array.isArray(c)) { for (const sub of c) { if (sub && typeof sub.text === 'string') parts.push(sub.text); } } } } return parts.join('\n'); } /** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */ export function extractTaskSize(turn) { let tool_calls = 0; const files = new Set(); for (const e of turn) { const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : []; for (const b of content) { if (b && b.type === 'tool_use') { tool_calls += 1; if (FILE_TOOLS.has(b.name) && b.input) { const p = b.input.file_path || b.input.notebook_path; if (p) files.add(String(p)); } } } } return { tool_calls, files_touched: files.size, files: [...files] }; } /** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */ export function classifyPromptSignal(text) { const t = String(text || '').toLowerCase().trim(); if ( /не то\b|не так\b|переделай|отбой|\bстоп\b|почему ты|неверно|не верно|это не |не работает|не правильн|сломал|опять|снова не|всё ещё|все ещё|все еще|верни как|откат|\brevert\b|\bundo\b|still not|doesn'?t work|does not work|\bwrong\b/.test( t ) ) { return 'correction'; } if (/^(ок|окей|ok|спасибо|супер|отлично|готово|дальше|идеально)([,\s]|$)/.test(t)) { return 'approval'; } if (classifyTask(t) !== 'other' && t.length > 15) return 'new_task'; return 'neutral'; } const TIME_BURN_THRESHOLD_MS = 900000; // 15 min — turn wall-clock above this = time_burn const PARSE_GAP_RATIO = 0.1; // >10% unparseable lines = parse_gap /** Heuristic retry count: an errored tool whose name is used again later in the turn. */ function detectRetries(turn) { const idToName = {}; const uses = []; turn.forEach((entry, idx) => { const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : []; for (const b of content) { if (b && b.type === 'tool_use') { idToName[b.id] = b.name; uses.push({ name: b.name, idx }); } } }); const errors = []; turn.forEach((entry, idx) => { const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : []; for (const b of content) { if (b && b.type === 'tool_result' && b.is_error === true) { errors.push({ name: idToName[b.tool_use_id] || null, idx }); } } }); let retries = 0; for (const err of errors) { if (err.name && uses.some((u) => u.name === err.name && u.idx > err.idx)) retries += 1; } return retries; } /** * Process events for the turn: hook_fired (summary), interrupt, retry, * time_burn, parse_gap. broken/total/durationMs are computed by the caller. */ export function extractProcessEvents(turn, broken, total, durationMs) { const events = []; const hookCounts = {}; let hookErrors = 0; for (const e of turn) { const att = e && e.attachment; if (att && (att.type === 'hook_success' || att.type === 'hook_error')) { const name = att.hookName || 'unknown'; hookCounts[name] = (hookCounts[name] || 0) + 1; if (att.type === 'hook_error') hookErrors += 1; } } if (Object.keys(hookCounts).length > 0) { events.push({ kind: 'hook_fired', counts: hookCounts, errors: hookErrors }); } for (const e of turn) { const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : []; const isUser = e && e.message && e.message.role === 'user'; if ( isUser && content.some((b) => b && b.type === 'text' && String(b.text || '').includes('[Request interrupted by user]')) ) { events.push({ kind: 'interrupt' }); } } const retries = detectRetries(turn); for (let i = 0; i < retries; i++) events.push({ kind: 'retry' }); if (durationMs > TIME_BURN_THRESHOLD_MS) { events.push({ kind: 'time_burn', duration_ms: durationMs }); } if (total > 0 && broken / total > PARSE_GAP_RATIO) { events.push({ kind: 'parse_gap', broken, total }); } return events; } const ROUTING_TAG_RE = //; /** Find the routing tag Claude prints when a method was user-directed (spec §4.2). */ export function parseRoutingTag(turn) { for (const e of turn) { const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : []; for (const b of content) { if (b && b.type === 'text' && typeof b.text === 'string') { const m = b.text.match(ROUTING_TAG_RE); if (m) return { kind: m[1], node: m[2], claude_would_have_chosen: m[3] }; } } } return null; } /** Text of the last real user prompt — used by the Stop-hook routing-gate (Task 5). */ export function extractLastUserPromptText(transcriptText) { const { entries } = parseLines(transcriptText); const start = findTurnStart(entries); return promptText(entries[start]); } /** * Content of the last assistant message strictly before the turn start — * the message that may have offered options to the user (spec §11.5). */ function extractLastAssistantContent(entries, turnStartIdx) { for (let i = turnStartIdx - 1; i >= 0; i--) { const e = entries[i]; if (e && e.message && e.message.role === 'assistant') { const content = e.message.content; if (Array.isArray(content)) return content; if (typeof content === 'string') return content; } } return null; } /** * Parse a transcript JSONL string into an observer episode (schema v2). * @param {string} transcriptText - Raw JSONL transcript contents. * @param {string|null} fallbackSessionId - Used when the transcript has no sessionId. * @returns {object} v2 episode. */ export function parseTranscript(transcriptText, fallbackSessionId = null) { const { entries, broken, total } = parseLines(transcriptText); const withSession = entries.find((e) => e && e.sessionId); const sessionId = (withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`; const start = findTurnStart(entries); const turn = entries.slice(start); const stamps = turn.map((e) => e && e.timestamp).filter(Boolean); const started_at = stamps[0] || new Date().toISOString(); const ended_at = stamps[stamps.length - 1] || started_at; const durationMs = new Date(ended_at) - new Date(started_at); const { skills, counts, errorCount } = collectToolUse(turn); const events = []; for (const skill of skills) events.push({ kind: 'skill_invoked', skill }); if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts }); for (let i = 0; i < errorCount; i++) { events.push({ kind: 'error', message: 'tool_result reported is_error' }); } events.push(...extractProcessEvents(turn, broken, total, durationMs)); const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX)); const prompt = promptText(entries[start]); const lastAsstContent = extractLastAssistantContent(entries, start); const choice = detectChoiceProvenance(prompt, lastAsstContent) || detectAskUserQuestionChoice(turn); let decision_provenance; if (choice) { decision_provenance = choice; } else { const tag = parseRoutingTag(turn); decision_provenance = tag && tag.kind === 'user_directed_method' ? { kind: 'user_directed_method', claude_would_have_chosen: tag.claude_would_have_chosen } : { kind: 'autonomous', claude_would_have_chosen: null }; } return { schema_version: 2, task_id: sessionId, task_ref: sessionId, timestamps: { started_at, ended_at }, path_type: usedSuperpowers ? 'regulated' : 'improvised', outcome: 'unknown', prompt_signal: classifyPromptSignal(prompt), decision_provenance, environment: extractEnvironment(entries, start), task_size: extractTaskSize(turn), primary_rationale: { step: 1, node_chosen: skills.length > 0 ? skills[0] : 'direct', triggers_matched: [], candidates_considered: [], boundaries_applied: [], hard_floor: usedSuperpowers ? { invoked: true, rules: ['Pravila §12'] } : { invoked: false, rules: [] }, task_classification: classifyTask(prompt), }, events, }; }