Files
portal/tools/observer-transcript-parser.mjs
T
Дмитрий 0663479bb8 feat(observer): heuristic reasoning capture in primary_rationale
Closes brain-retro 2026-05-20 #6 — extractTriggers/Candidates/Boundaries
scan assistant.text for Pravila §N / ADR-N / PSR_v1 RX / routing-off-phase
LN / hard-floor + numbered/bulleted lists (≥2). Populates previously-
always-empty primary_rationale arrays.

Conservative-broad: false positives accepted (mention ≠ application);
/brain-retro determines applied validity. Phase 2 agent-judge out of scope.

19 new tests, 282/282 GREEN.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 13:47:37 +03:00

613 lines
23 KiB
JavaScript

#!/usr/bin/env node
/**
* Transcript parser for the brain governance observer.
* Deterministically extracts episode fields from a Claude Code session
* transcript (JSONL). No LLM — pure parsing.
*
* Scope: the last turn (from the last real user prompt to end of file) —
* one episode == one prompt→response cycle.
*
* Reasoning fields (triggers_matched / candidates_considered /
* boundaries_applied) are NOT recoverable from a transcript and stay [];
* their capture is a separate design question (ADR-011 follow-up).
*
* Security Guidance #40: pure parsing — no exec/execSync.
* Per ADR-011 §6 + spec v1.1 §5.2.1.
*/
import { detectChoiceProvenance, detectAskUserQuestionChoice } from './observer-choice-detector.mjs';
const SUPERPOWERS_PREFIX = 'superpowers:';
function parseLines(text) {
const entries = [];
let broken = 0;
let total = 0;
// quirk #101 root fix: Claude Code's transcript file accumulates duplicated
// context-rebuild snapshots — the same entry is re-printed with the SAME
// `uuid`. Without dedup, session_turn / task_size / events double-count and
// session_turn becomes non-monotonic across episodes parsed at different
// file-growth states. Keep the first occurrence per uuid; entries without a
// uuid (synthetic test fixtures) pass through unchanged.
const seenUuid = new Set();
for (const line of String(text || '').split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
total += 1;
let e;
try {
e = JSON.parse(trimmed);
} catch {
broken += 1; // broken line — counted for parse_gap, never thrown
continue;
}
if (e && e.uuid) {
if (seenUuid.has(e.uuid)) continue;
seenUuid.add(e.uuid);
}
entries.push(e);
}
return { entries, broken, total };
}
// Synthetic user-role messages — NOT genuine prompts, must not be turn boundaries.
// Skill invocation content, local slash-command output/invocation, interrupt markers
// are recorded with role:'user' but carry no UserPromptSubmit hook context.
const SYNTHETIC_PROMPT_MARKERS = [
'Base directory for this skill:',
'<local-command-stdout>',
'<local-command-caveat>',
'<command-name>',
'[Request interrupted by user]',
];
function isSyntheticPrompt(text) {
const t = String(text || '').trimStart();
return SYNTHETIC_PROMPT_MARKERS.some((m) => t.startsWith(m));
}
// A genuine user prompt (turn boundary) — not a tool_result carrier nor a
// synthetic skill/command/interrupt message.
function isRealUserPrompt(entry) {
const msg = entry && entry.message;
if (!msg || msg.role !== 'user') return false;
const c = msg.content;
if (typeof c === 'string') {
return c.trim().length > 0 && !isSyntheticPrompt(c);
}
if (Array.isArray(c)) {
const hasToolResult = c.some((b) => b && b.type === 'tool_result');
const hasText = c.some((b) => b && b.type === 'text');
if (!hasText || hasToolResult) return false;
const text = c
.filter((b) => b && b.type === 'text')
.map((b) => b.text || '')
.join(' ');
return !isSyntheticPrompt(text);
}
return false;
}
function findTurnStart(entries) {
for (let i = entries.length - 1; i >= 0; i--) {
if (isRealUserPrompt(entries[i])) return i;
}
return 0;
}
function promptText(entry) {
const c = entry && entry.message && entry.message.content;
if (typeof c === 'string') return c;
if (Array.isArray(c)) {
return c
.filter((b) => b && b.type === 'text')
.map((b) => b.text || '')
.join(' ');
}
return '';
}
export function classifyTask(text) {
const t = String(text || '').toLowerCase();
if (/обнови эталон|sync memory|обнови (?:память|memory|memory\.md)/.test(t)) return 'memory-sync';
if (/обнови claude|правк[аи] pravila|update pravila|обнови psr|обнови tooling|нормативка/.test(t)) return 'regulatory-bump';
if (/план|plan\b|спроектируй|design\b|brainstorm|обсудим/.test(t)) return 'planning';
if (/\bpush\b|\bmerge\b|\bdeploy\b|\bcommit\b|\brelease\b|релиз|тегни/.test(t)) return 'release';
if (/рефактор|refactor/.test(t)) return 'refactor';
if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix';
if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature';
if (/докум|readme|\bdocs?\b/.test(t)) return 'docs';
if (/проанализ|анализ|оцени|review|examine|разбор|посмотри что/.test(t)) return 'analysis';
if (/убери|удали|почисть|cleanup|очисти|drop\s/.test(t)) return 'cleanup';
if (/^\s*статус\b|\bstatus\b|проверь состоян|health/.test(t)) return 'monitoring';
if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question';
return 'other';
}
function collectToolUse(entries) {
const skills = [];
const counts = {};
let errorCount = 0;
for (const e of entries) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const block of content) {
if (!block || typeof block !== 'object') continue;
if (block.type === 'tool_use') {
const name = block.name || 'unknown';
counts[name] = (counts[name] || 0) + 1;
if (name === 'Skill') {
skills.push((block.input && block.input.skill) || 'unknown');
}
} else if (block.type === 'tool_result' && block.is_error === true) {
errorCount += 1;
}
}
}
return { skills, counts, errorCount };
}
const FILE_TOOLS = new Set(['Read', 'Edit', 'Write', 'MultiEdit', 'NotebookEdit']);
/**
* Deterministic environment factors for the turn that starts at turnStartIdx.
* economy_level / parallel_session are scanned from the stringified turn;
* model / post_compaction / session_turn from structural fields.
*/
export function extractEnvironment(allEntries, turnStartIdx) {
const turn = allEntries.slice(turnStartIdx);
const rawTurn = JSON.stringify(turn);
const econ = rawTurn.match(/=== ECONOMY MODE:\s*(\d+)\s*%/);
const economy_level = econ ? Number(econ[1]) : null;
let model = null;
for (const e of turn) {
if (e && e.message && e.message.model) {
model = e.message.model;
break;
}
}
// The transcript file accumulates duplicated context-rebuild snapshots
// (repeated isCompactSummary entries — see feedback_environment quirk #101).
// Counting prompts from i=0 inflates session_turn with those dupes. Count
// from the LAST compaction before the turn: session_turn = real prompts
// since it, which is monotonic ("turns since last compaction").
let lastCompactIdx = -1;
for (let i = 0; i < turnStartIdx && i < allEntries.length; i++) {
if (allEntries[i] && allEntries[i].isCompactSummary === true) lastCompactIdx = i;
}
const post_compaction = lastCompactIdx >= 0;
let session_turn = 0;
for (let i = lastCompactIdx + 1; i <= turnStartIdx && i < allEntries.length; i++) {
if (isRealUserPrompt(allEntries[i])) session_turn += 1;
}
// Only strong collision evidence — a bare mention of "parallel sessions" is
// not a signal (best-effort per spec R2; prefer false-negative over false-positive).
// Scope NARROWED to tool_result content (real command output / Bash stderr): prose
// mentions in user prompts / assistant text — including analysis text that
// references collision phrases — must not trigger. Fixes live FP (episode line 20).
const parallel_session = /чужой staged|foreign git index|index\.lock|another git process/i.test(
collectToolResultText(turn)
);
return { economy_level, model, post_compaction, session_turn, parallel_session };
}
/**
* Collect text content from tool_result blocks in the turn — the only surface
* trusted for parallel_session collision evidence (see extractEnvironment).
* Supports both string content and the structured array form
* (`content: [{ type: 'text', text }]`).
*/
function collectToolResultText(turn) {
const parts = [];
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (!b || b.type !== 'tool_result') continue;
const c = b.content;
if (typeof c === 'string') {
parts.push(c);
} else if (Array.isArray(c)) {
for (const sub of c) {
if (sub && typeof sub.text === 'string') parts.push(sub.text);
}
}
}
}
return parts.join('\n');
}
/** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */
export function extractTaskSize(turn) {
let tool_calls = 0;
const files = new Set();
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') {
tool_calls += 1;
if (FILE_TOOLS.has(b.name) && b.input) {
const p = b.input.file_path || b.input.notebook_path;
if (p) files.add(String(p));
}
}
}
}
return { tool_calls, files_touched: files.size, files: [...files] };
}
/**
* Token-usage aggregation across all assistant messages in the turn.
*
* DESIGN: returns zero-filled object (NOT null) when no `usage` data was
* captured. Consumers cannot currently distinguish "actually 0 tokens" from
* "no usage data" — accepted trade-off because (a) every assistant message
* in real Claude Code transcripts has `usage` (verified B1 brain-retro
* 2026-05-20: 6265/6265 messages with usage, 0 partial-stream), and
* (b) `task_cost` is not yet read by analyzer/STATUS.md, so the semantic
* gap is a future-only concern. Re-evaluate when factor matrix adds cost.
*
* Captures: 4 base token fields + `iterations` (extended-thinking detector)
* + `server_tool_use.{web_search,web_fetch}_requests` counts.
* Other usage fields (cache_creation object, inference_geo, service_tier,
* speed) — out-of-scope for current analyzer.
*
* Defensive: skips entries where `usage` is not a plain object (handles
* malformed transcript edge cases like `"usage": 42`).
*/
export function extractTokenUsage(turn) {
let input = 0, output = 0, cache_read = 0, cache_creation = 0;
let web_search = 0, web_fetch = 0, iterations = 0;
for (const e of turn || []) {
const u = e && e.message && e.message.usage;
if (!u || typeof u !== 'object') continue;
input += u.input_tokens || 0;
output += u.output_tokens || 0;
cache_read += u.cache_read_input_tokens || 0;
cache_creation += u.cache_creation_input_tokens || 0;
iterations += u.iterations || 0;
if (u.server_tool_use) {
web_search += u.server_tool_use.web_search_requests || 0;
web_fetch += u.server_tool_use.web_fetch_requests || 0;
}
}
return {
input_tokens: input,
output_tokens: output,
cache_read_input_tokens: cache_read,
cache_creation_input_tokens: cache_creation,
web_search_requests: web_search,
web_fetch_requests: web_fetch,
iterations,
};
}
/**
* For each AskUserQuestion toolUseResult in the turn, emit one event per question.
* answer_kind: 'option' (exact label match), 'custom' (free-text), 'no_answer' (missing/empty).
*/
/** Collect concatenated text from all assistant text blocks in the turn. */
function assistantTextOfTurn(turn) {
const parts = [];
for (const e of turn || []) {
if (!e || !e.message || e.message.role !== 'assistant') continue;
const content = Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'text' && typeof b.text === 'string') parts.push(b.text);
}
}
return parts.join('\n');
}
const TRIGGER_PATTERNS = [
/\bPravila\s+§\d+(?:\.\d+)?/g,
/\bADR-\d+/g,
/\bPSR_v1\s+R\d+(?:\.\d+)?/g,
/\brouting-off-phase\s+L\d+/g,
/\bL\d+\s+chain/g,
/\bhard-(?:floor|rule)\b/gi,
];
/** Heuristic triggers from assistant text. Conservative-broad — false positives OK. */
export function extractTriggers(turn) {
const text = assistantTextOfTurn(turn);
const out = new Set();
for (const re of TRIGGER_PATTERNS) {
const matches = text.match(re);
if (matches) for (const m of matches) {
const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m;
out.add(norm);
}
}
return [...out];
}
const CANDIDATE_NUMBERED_RE = /^\s*\d+[.\)]\s+([^\n]+)$/gm;
const CANDIDATE_BULLET_RE = /^\s*[-*]\s+([^\n]+)$/gm;
/** Heuristic candidates: ≥2 numbered (preferred) or bulleted items. */
export function extractCandidates(turn) {
const text = assistantTextOfTurn(turn);
const numbered = [...text.matchAll(CANDIDATE_NUMBERED_RE)].map((m) => m[1].trim());
if (numbered.length >= 2) return numbered;
const bulleted = [...text.matchAll(CANDIDATE_BULLET_RE)].map((m) => m[1].trim());
if (bulleted.length >= 2) return bulleted;
return [];
}
const BOUNDARY_PATTERNS = [
/\bADR-\d+(?:\s+§\d+(?:\.\d+)?)?/g,
/\bPSR_v1\s+R\d+(?:\.\d+)?/g,
/\bPravila\s+§\d+(?:\.\d+)?/g,
/\brouting-off-phase\s+L\d+/g,
/\bL\d+\s+chain/g,
];
/** Heuristic boundaries — overlaps with triggers, dedup per-array only. */
export function extractBoundaries(turn) {
const text = assistantTextOfTurn(turn);
const out = new Set();
for (const re of BOUNDARY_PATTERNS) {
const matches = text.match(re);
if (matches) for (const m of matches) {
const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m;
out.add(norm);
}
}
return [...out];
}
export function extractAskUserQuestionEvents(turn) {
const events = [];
for (const e of turn || []) {
const tur = e && e.toolUseResult;
if (!tur || !Array.isArray(tur.questions) || !tur.answers) continue;
const qCount = tur.questions.length;
for (const q of tur.questions) {
const labels = (q.options || []).map((o) => o && o.label).filter((l) => typeof l === 'string');
const answer = tur.answers[q.question];
let answer_kind;
if (typeof answer !== 'string' || answer.length === 0) answer_kind = 'no_answer';
else if (labels.some((l) => l.trim() === answer.trim())) answer_kind = 'option';
else answer_kind = 'custom';
events.push({ kind: 'ask_user_question', question_count: qCount, answer_kind });
}
}
return events;
}
/** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
export function classifyPromptSignal(text) {
const t = String(text || '').toLowerCase().trim();
if (
/не то\b|не так\b|переделай|отбой|\bстоп\b|почему ты|неверно|не верно|это не |не работает|не правильн|сломал|опять|снова не|всё ещё|все ещё|все еще|верни как|откат|\brevert\b|\bundo\b|still not|doesn'?t work|does not work|\bwrong\b/.test(
t
)
) {
return 'correction';
}
if (/^(ок|окей|ok|спасибо|супер|отлично|готово|дальше|идеально)([,\s]|$)/.test(t)) {
return 'approval';
}
if (classifyTask(t) !== 'other' && t.length > 15) return 'new_task';
return 'neutral';
}
const TIME_BURN_THRESHOLD_MS = 900000; // 15 min — turn wall-clock above this = time_burn
const PARSE_GAP_RATIO = 0.1; // >10% unparseable lines = parse_gap
/** Heuristic retry count: an errored tool whose name is used again later in the turn. */
function detectRetries(turn) {
const idToName = {};
const uses = [];
turn.forEach((entry, idx) => {
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') {
idToName[b.id] = b.name;
uses.push({ name: b.name, idx });
}
}
});
const errors = [];
turn.forEach((entry, idx) => {
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_result' && b.is_error === true) {
errors.push({ name: idToName[b.tool_use_id] || null, idx });
}
}
});
let retries = 0;
for (const err of errors) {
if (err.name && uses.some((u) => u.name === err.name && u.idx > err.idx)) retries += 1;
}
return retries;
}
/**
* Process events for the turn: hook_fired (summary), interrupt, retry,
* time_burn, parse_gap. broken/total/durationMs are computed by the caller.
*/
export function extractProcessEvents(turn, broken, total, durationMs) {
const events = [];
const hookCounts = {};
let hookErrors = 0;
for (const e of turn) {
const att = e && e.attachment;
if (att && (att.type === 'hook_success' || att.type === 'hook_error')) {
const name = att.hookName || 'unknown';
hookCounts[name] = (hookCounts[name] || 0) + 1;
if (att.type === 'hook_error') hookErrors += 1;
}
}
if (Object.keys(hookCounts).length > 0) {
events.push({ kind: 'hook_fired', counts: hookCounts, errors: hookErrors });
}
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
const isUser = e && e.message && e.message.role === 'user';
if (
isUser &&
content.some((b) => b && b.type === 'text' && String(b.text || '').includes('[Request interrupted by user]'))
) {
events.push({ kind: 'interrupt' });
}
}
const retries = detectRetries(turn);
for (let i = 0; i < retries; i++) events.push({ kind: 'retry' });
if (durationMs > TIME_BURN_THRESHOLD_MS) {
events.push({ kind: 'time_burn', duration_ms: durationMs });
}
if (total > 0 && broken / total > PARSE_GAP_RATIO) {
events.push({ kind: 'parse_gap', broken, total });
}
// unrecovered_error: emitted iff the LAST tool_result in the turn was
// is_error=true. Distinguishes "turn ended on failure" from "errors that
// were retried away" (e.g., TDD red→green, expected-fail commands). The
// analyzer uses this event to flag `blocked` instead of raw error/retry
// count — see brain-retro-analyzer.inferOutcome (A-1 fix).
let lastToolResultIsError = null;
outer: for (let i = turn.length - 1; i >= 0; i--) {
const content =
turn[i] && turn[i].message && Array.isArray(turn[i].message.content) ? turn[i].message.content : [];
for (let j = content.length - 1; j >= 0; j--) {
const b = content[j];
if (b && b.type === 'tool_result') {
lastToolResultIsError = b.is_error === true;
break outer;
}
}
}
if (lastToolResultIsError === true) {
events.push({ kind: 'unrecovered_error' });
}
return events;
}
const ROUTING_TAG_RE =
/<!--\s*routing:\s*provenance=([\w_]+)\s+node=(\S+)\s+counterfactual=(\S+)\s*-->/;
/** Find the routing tag Claude prints when a method was user-directed (spec §4.2). */
export function parseRoutingTag(turn) {
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'text' && typeof b.text === 'string') {
const m = b.text.match(ROUTING_TAG_RE);
if (m) return { kind: m[1], node: m[2], claude_would_have_chosen: m[3] };
}
}
}
return null;
}
/** Text of the last real user prompt — used by the Stop-hook routing-gate (Task 5). */
export function extractLastUserPromptText(transcriptText) {
const { entries } = parseLines(transcriptText);
const start = findTurnStart(entries);
return promptText(entries[start]);
}
/**
* Content of the last assistant message strictly before the turn start —
* the message that may have offered options to the user (spec §11.5).
*/
function extractLastAssistantContent(entries, turnStartIdx) {
for (let i = turnStartIdx - 1; i >= 0; i--) {
const e = entries[i];
if (e && e.message && e.message.role === 'assistant') {
const content = e.message.content;
if (Array.isArray(content)) return content;
if (typeof content === 'string') return content;
}
}
return null;
}
/**
* Parse a transcript JSONL string into an observer episode (schema v2).
* @param {string} transcriptText - Raw JSONL transcript contents.
* @param {string|null} fallbackSessionId - Used when the transcript has no sessionId.
* @returns {object} v2 episode.
*/
export function parseTranscript(transcriptText, fallbackSessionId = null) {
const { entries, broken, total } = parseLines(transcriptText);
const withSession = entries.find((e) => e && e.sessionId);
const sessionId =
(withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`;
const start = findTurnStart(entries);
const turn = entries.slice(start);
const stamps = turn.map((e) => e && e.timestamp).filter(Boolean);
const started_at = stamps[0] || new Date().toISOString();
const ended_at = stamps[stamps.length - 1] || started_at;
const durationMs = new Date(ended_at) - new Date(started_at);
const { skills, counts, errorCount } = collectToolUse(turn);
const events = [];
for (const skill of skills) events.push({ kind: 'skill_invoked', skill });
if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts });
for (let i = 0; i < errorCount; i++) {
events.push({ kind: 'error', message: 'tool_result reported is_error' });
}
events.push(...extractProcessEvents(turn, broken, total, durationMs));
events.push(...extractAskUserQuestionEvents(turn));
const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX));
const prompt = promptText(entries[start]);
const lastAsstContent = extractLastAssistantContent(entries, start);
const choice = detectChoiceProvenance(prompt, lastAsstContent) || detectAskUserQuestionChoice(turn);
let decision_provenance;
if (choice) {
decision_provenance = choice;
} else {
const tag = parseRoutingTag(turn);
decision_provenance =
tag && tag.kind === 'user_directed_method'
? { kind: 'user_directed_method', claude_would_have_chosen: tag.claude_would_have_chosen }
: { kind: 'autonomous', claude_would_have_chosen: null };
}
return {
schema_version: 2,
task_id: sessionId,
task_ref: sessionId,
timestamps: { started_at, ended_at },
path_type: usedSuperpowers ? 'regulated' : 'improvised',
outcome: 'unknown',
prompt_signal: classifyPromptSignal(prompt),
decision_provenance,
environment: extractEnvironment(entries, start),
task_size: extractTaskSize(turn),
task_cost: extractTokenUsage(turn),
primary_rationale: {
step: 1,
node_chosen: skills.length > 0 ? skills[0] : 'direct',
triggers_matched: extractTriggers(turn),
candidates_considered: extractCandidates(turn),
boundaries_applied: extractBoundaries(turn),
hard_floor: usedSuperpowers
? { invoked: true, rules: ['Pravila §12'] }
: { invoked: false, rules: [] },
task_classification: classifyTask(prompt),
},
events,
};
}