Files
portal/tools/observer-transcript-parser.mjs
T
Дмитрий 5d3e29669b feat(observer): parallel_session +OR pre-flight git fetch heuristic (Task 13 PIVOT)
Closes brain-retro 2026-05-20 #13 PIVOT — additive to F1 (parallel
session sessions session). F1 narrowed parallel_session to tool_result-only
to fix live FP. This Task adds OR-clause: Bash command containing
'git fetch && git log HEAD..origin/...' (Pravila §15.2 pre-flight)
is a strong signal that the operator expects parallel sessions.

Does NOT overwrite F1 — both signals coexist via OR.

4 new vitest tests, 319/319 GREEN.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 13:47:41 +03:00

709 lines
27 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Transcript parser for the brain governance observer.
* Deterministically extracts episode fields from a Claude Code session
* transcript (JSONL). No LLM — pure parsing.
*
* Scope: the last turn (from the last real user prompt to end of file) —
* one episode == one prompt→response cycle.
*
* Reasoning fields (triggers_matched / candidates_considered /
* boundaries_applied) are NOT recoverable from a transcript and stay [];
* their capture is a separate design question (ADR-011 follow-up).
*
* Security Guidance #40: pure parsing — no exec/execSync.
* Per ADR-011 §6 + spec v1.1 §5.2.1.
*/
import { detectChoiceProvenance, detectAskUserQuestionChoice } from './observer-choice-detector.mjs';
const SUPERPOWERS_PREFIX = 'superpowers:';
function parseLines(text) {
const entries = [];
let broken = 0;
let total = 0;
// quirk #101 root fix: Claude Code's transcript file accumulates duplicated
// context-rebuild snapshots — the same entry is re-printed with the SAME
// `uuid`. Without dedup, session_turn / task_size / events double-count and
// session_turn becomes non-monotonic across episodes parsed at different
// file-growth states. Keep the first occurrence per uuid; entries without a
// uuid (synthetic test fixtures) pass through unchanged.
const seenUuid = new Set();
for (const line of String(text || '').split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
total += 1;
let e;
try {
e = JSON.parse(trimmed);
} catch {
broken += 1; // broken line — counted for parse_gap, never thrown
continue;
}
if (e && e.uuid) {
if (seenUuid.has(e.uuid)) continue;
seenUuid.add(e.uuid);
}
entries.push(e);
}
return { entries, broken, total };
}
// Synthetic user-role messages — NOT genuine prompts, must not be turn boundaries.
// Skill invocation content, local slash-command output/invocation, interrupt markers
// are recorded with role:'user' but carry no UserPromptSubmit hook context.
const SYNTHETIC_PROMPT_MARKERS = [
'Base directory for this skill:',
'<local-command-stdout>',
'<local-command-caveat>',
'<command-name>',
'[Request interrupted by user]',
];
function isSyntheticPrompt(text) {
const t = String(text || '').trimStart();
return SYNTHETIC_PROMPT_MARKERS.some((m) => t.startsWith(m));
}
// A genuine user prompt (turn boundary) — not a tool_result carrier nor a
// synthetic skill/command/interrupt message.
function isRealUserPrompt(entry) {
const msg = entry && entry.message;
if (!msg || msg.role !== 'user') return false;
const c = msg.content;
if (typeof c === 'string') {
return c.trim().length > 0 && !isSyntheticPrompt(c);
}
if (Array.isArray(c)) {
const hasToolResult = c.some((b) => b && b.type === 'tool_result');
const hasText = c.some((b) => b && b.type === 'text');
if (!hasText || hasToolResult) return false;
const text = c
.filter((b) => b && b.type === 'text')
.map((b) => b.text || '')
.join(' ');
return !isSyntheticPrompt(text);
}
return false;
}
function findTurnStart(entries) {
for (let i = entries.length - 1; i >= 0; i--) {
if (isRealUserPrompt(entries[i])) return i;
}
return 0;
}
function stripSystemReminders(text) {
return String(text || '').replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '');
}
function promptText(entry) {
const c = entry && entry.message && entry.message.content;
if (typeof c === 'string') return stripSystemReminders(c);
if (Array.isArray(c)) {
const joined = c
.filter((b) => b && b.type === 'text')
.map((b) => b.text || '')
.join(' ');
return stripSystemReminders(joined);
}
return '';
}
export function classifyTask(text) {
const t = String(text || '').toLowerCase();
if (/обнови эталон|sync memory|обнови (?:память|memory|memory\.md)/.test(t)) return 'memory-sync';
if (/обнови claude|правк[аи] pravila|update pravila|обнови psr|обнови tooling|нормативка/.test(t)) return 'regulatory-bump';
if (/план|plan\b|спроектируй|design\b|brainstorm|обсудим/.test(t)) return 'planning';
if (/\bpush\b|\bmerge\b|\bdeploy\b|\bcommit\b|\brelease\b|релиз|тегни/.test(t)) return 'release';
if (/рефактор|refactor/.test(t)) return 'refactor';
if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix';
if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature';
if (/докум|readme|\bdocs?\b/.test(t)) return 'docs';
if (/проанализ|анализ|оцени|review|examine|разбор|посмотри что/.test(t)) return 'analysis';
if (/убери|удали|почисть|cleanup|очисти|drop\s/.test(t)) return 'cleanup';
if (/^\s*статус\b|\bstatus\b|проверь состоян|health/.test(t)) return 'monitoring';
if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question';
return 'other';
}
function collectToolUse(entries) {
const skills = [];
const counts = {};
const errors = [];
const idToTool = {};
// First pass — build id→tool name map (tool_results may reference tools across messages)
for (const e of entries) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') idToTool[b.id] = b.name || 'unknown';
}
}
// Second pass — accumulate counts + per-error attribution
for (const e of entries) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const block of content) {
if (!block || typeof block !== 'object') continue;
if (block.type === 'tool_use') {
const name = block.name || 'unknown';
counts[name] = (counts[name] || 0) + 1;
if (name === 'Skill') {
skills.push((block.input && block.input.skill) || 'unknown');
}
} else if (block.type === 'tool_result' && block.is_error === true) {
const tool = idToTool[block.tool_use_id] || 'unknown';
const c = block.content;
const text = typeof c === 'string' ? c
: (Array.isArray(c) ? c.map((b) => (b && typeof b.text === 'string') ? b.text : '').join(' ') : '');
errors.push({ tool, summary: text.slice(0, 80) });
}
}
}
return { skills, counts, errors };
}
const FILE_TOOLS = new Set(['Read', 'Edit', 'Write', 'MultiEdit', 'NotebookEdit']);
/**
* Deterministic environment factors for the turn that starts at turnStartIdx.
* economy_level / parallel_session are scanned from the stringified turn;
* model / post_compaction / session_turn from structural fields.
*/
export function extractEnvironment(allEntries, turnStartIdx) {
const turn = allEntries.slice(turnStartIdx);
const rawTurn = JSON.stringify(turn);
const econ = rawTurn.match(/=== ECONOMY MODE:\s*(\d+)\s*%/);
const economy_level = econ ? Number(econ[1]) : null;
let model = null;
for (const e of turn) {
if (e && e.message && e.message.model) {
model = e.message.model;
break;
}
}
// The transcript file accumulates duplicated context-rebuild snapshots
// (repeated isCompactSummary entries — see feedback_environment quirk #101).
// Counting prompts from i=0 inflates session_turn with those dupes. Count
// from the LAST compaction before the turn: session_turn = real prompts
// since it, which is monotonic ("turns since last compaction").
let lastCompactIdx = -1;
for (let i = 0; i < turnStartIdx && i < allEntries.length; i++) {
if (allEntries[i] && allEntries[i].isCompactSummary === true) lastCompactIdx = i;
}
const post_compaction = lastCompactIdx >= 0;
let session_turn = 0;
for (let i = lastCompactIdx + 1; i <= turnStartIdx && i < allEntries.length; i++) {
if (isRealUserPrompt(allEntries[i])) session_turn += 1;
}
// Only strong collision evidence — a bare mention of "parallel sessions" is
// not a signal (best-effort per spec R2; prefer false-negative over false-positive).
// Scope NARROWED to tool_result content (real command output / Bash stderr): prose
// mentions in user prompts / assistant text — including analysis text that
// references collision phrases — must not trigger. Fixes live FP (episode line 20).
const parallel_session =
/чужой staged|foreign git index|index\.lock|another git process/i.test(collectToolResultText(turn))
|| hasPreFlightFetch(turn);
return { economy_level, model, post_compaction, session_turn, parallel_session };
}
/**
* Pravila §15.2 pre-flight signal (Task 13 PIVOT): Bash-команда turn'а
* содержит `git fetch ... && git log HEAD..origin/main ...` — это hard-rule
* pre-flight sync перед правкой нормативки в параллельных сессиях. Сильный
* сигнал «заказчик ожидает параллельных сессий», аддитивный к F1 collision
* detector (parallel_session). Не overwrite — OR-clause.
*/
function hasPreFlightFetch(turn) {
for (const e of turn || []) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use' && b.name === 'Bash' && b.input) {
const cmd = String(b.input.command || '');
if (/git\s+fetch[^|&;]*&&[^|&;]*git\s+log\s+HEAD\.\.origin\//i.test(cmd)) return true;
}
}
}
return false;
}
/**
* Collect text content from tool_result blocks in the turn — the only surface
* trusted for parallel_session collision evidence (see extractEnvironment).
* Supports both string content and the structured array form
* (`content: [{ type: 'text', text }]`).
*/
function collectToolResultText(turn) {
const parts = [];
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (!b || b.type !== 'tool_result') continue;
const c = b.content;
if (typeof c === 'string') {
parts.push(c);
} else if (Array.isArray(c)) {
for (const sub of c) {
if (sub && typeof sub.text === 'string') parts.push(sub.text);
}
}
}
}
return parts.join('\n');
}
/** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */
export function extractTaskSize(turn) {
let tool_calls = 0;
const files = new Set();
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') {
tool_calls += 1;
if (FILE_TOOLS.has(b.name) && b.input) {
const p = b.input.file_path || b.input.notebook_path;
if (p) files.add(String(p));
}
}
}
}
return { tool_calls, files_touched: files.size, files: [...files] };
}
/**
* Token-usage aggregation across all assistant messages in the turn.
*
* DESIGN: returns zero-filled object (NOT null) when no `usage` data was
* captured. Consumers cannot currently distinguish "actually 0 tokens" from
* "no usage data" — accepted trade-off because (a) every assistant message
* in real Claude Code transcripts has `usage` (verified B1 brain-retro
* 2026-05-20: 6265/6265 messages with usage, 0 partial-stream), and
* (b) `task_cost` is not yet read by analyzer/STATUS.md, so the semantic
* gap is a future-only concern. Re-evaluate when factor matrix adds cost.
*
* Captures: 4 base token fields + `iterations` (extended-thinking detector)
* + `server_tool_use.{web_search,web_fetch}_requests` counts.
* Other usage fields (cache_creation object, inference_geo, service_tier,
* speed) — out-of-scope for current analyzer.
*
* Defensive: skips entries where `usage` is not a plain object (handles
* malformed transcript edge cases like `"usage": 42`).
*/
export function extractTokenUsage(turn) {
let input = 0, output = 0, cache_read = 0, cache_creation = 0;
let web_search = 0, web_fetch = 0, iterations = 0;
for (const e of turn || []) {
const u = e && e.message && e.message.usage;
if (!u || typeof u !== 'object') continue;
input += u.input_tokens || 0;
output += u.output_tokens || 0;
cache_read += u.cache_read_input_tokens || 0;
cache_creation += u.cache_creation_input_tokens || 0;
iterations += u.iterations || 0;
if (u.server_tool_use) {
web_search += u.server_tool_use.web_search_requests || 0;
web_fetch += u.server_tool_use.web_fetch_requests || 0;
}
}
return {
input_tokens: input,
output_tokens: output,
cache_read_input_tokens: cache_read,
cache_creation_input_tokens: cache_creation,
web_search_requests: web_search,
web_fetch_requests: web_fetch,
iterations,
};
}
/**
* For each AskUserQuestion toolUseResult in the turn, emit one event per question.
* answer_kind: 'option' (exact label match), 'custom' (free-text), 'no_answer' (missing/empty).
*/
/** Collect concatenated text from all assistant text blocks in the turn. */
function assistantTextOfTurn(turn) {
const parts = [];
for (const e of turn || []) {
if (!e || !e.message || e.message.role !== 'assistant') continue;
const content = Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'text' && typeof b.text === 'string') parts.push(b.text);
}
}
return parts.join('\n');
}
const TRIGGER_PATTERNS = [
/\bPravila\s+§\d+(?:\.\d+)?/g,
/\bADR-\d+/g,
/\bPSR_v1\s+R\d+(?:\.\d+)?/g,
/\brouting-off-phase\s+L\d+/g,
/\bL\d+\s+chain/g,
/\bhard-(?:floor|rule)\b/gi,
];
/** Heuristic triggers from assistant text. Conservative-broad — false positives OK. */
export function extractTriggers(turn) {
const text = assistantTextOfTurn(turn);
const out = new Set();
for (const re of TRIGGER_PATTERNS) {
const matches = text.match(re);
if (matches) for (const m of matches) {
const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m;
out.add(norm);
}
}
return [...out];
}
const CANDIDATE_NUMBERED_RE = /^\s*\d+[.\)]\s+([^\n]+)$/gm;
const CANDIDATE_BULLET_RE = /^\s*[-*]\s+([^\n]+)$/gm;
/** Heuristic candidates: ≥2 numbered (preferred) or bulleted items. */
export function extractCandidates(turn) {
const text = assistantTextOfTurn(turn);
const numbered = [...text.matchAll(CANDIDATE_NUMBERED_RE)].map((m) => m[1].trim());
if (numbered.length >= 2) return numbered;
const bulleted = [...text.matchAll(CANDIDATE_BULLET_RE)].map((m) => m[1].trim());
if (bulleted.length >= 2) return bulleted;
return [];
}
const BOUNDARY_PATTERNS = [
/\bADR-\d+(?:\s+§\d+(?:\.\d+)?)?/g,
/\bPSR_v1\s+R\d+(?:\.\d+)?/g,
/\bPravila\s+§\d+(?:\.\d+)?/g,
/\brouting-off-phase\s+L\d+/g,
/\bL\d+\s+chain/g,
];
/** Heuristic boundaries — overlaps with triggers, dedup per-array only. */
export function extractBoundaries(turn) {
const text = assistantTextOfTurn(turn);
const out = new Set();
for (const re of BOUNDARY_PATTERNS) {
const matches = text.match(re);
if (matches) for (const m of matches) {
const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m;
out.add(norm);
}
}
return [...out];
}
export function extractAskUserQuestionEvents(turn) {
const events = [];
for (const e of turn || []) {
const tur = e && e.toolUseResult;
if (!tur || !Array.isArray(tur.questions) || !tur.answers) continue;
const qCount = tur.questions.length;
for (const q of tur.questions) {
const labels = (q.options || []).map((o) => o && o.label).filter((l) => typeof l === 'string');
const answer = tur.answers[q.question];
let answer_kind;
if (typeof answer !== 'string' || answer.length === 0) answer_kind = 'no_answer';
else if (labels.some((l) => l.trim() === answer.trim())) answer_kind = 'option';
else answer_kind = 'custom';
events.push({ kind: 'ask_user_question', question_count: qCount, answer_kind });
}
}
return events;
}
/** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
export function classifyPromptSignal(text) {
const t = String(text || '').toLowerCase().trim();
if (
/не совсем|другое|другая|не сходится|wrong direction|не то\b|не так\b|переделай|отбой|\bстоп\b|почему ты|неверно|не верно|это не |не работает|не правильн|сломал|опять|снова не|всё ещё|все ещё|все еще|верни как|откат|\brevert\b|\bundo\b|still not|doesn'?t work|does not work|\bwrong\b/.test(
t
)
) {
return 'correction';
}
if (/^(ок|окей|ok|спасибо|супер|отлично|готово|дальше|идеально|класс|хорошо|принято|well done|\bnice\b)([,\s]|$)/.test(t)) {
return 'approval';
}
if (/^(?:теперь|далее|следующее)(?=\s|[,.!?:;]|$)|^next\b|^now\b/.test(t)) return 'new_task';
if (classifyTask(t) !== 'other' && t.length > 15) return 'new_task';
return 'neutral';
}
const TIME_BURN_THRESHOLD_MS = 900000; // 15 min — turn wall-clock above this = time_burn
const PARSE_GAP_RATIO = 0.1; // >10% unparseable lines = parse_gap
/** Heuristic retry count: an errored tool whose name is used again later in the turn. */
function detectRetries(turn) {
const idToName = {};
const uses = [];
turn.forEach((entry, idx) => {
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') {
idToName[b.id] = b.name;
uses.push({ name: b.name, idx });
}
}
});
const errors = [];
turn.forEach((entry, idx) => {
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_result' && b.is_error === true) {
errors.push({ name: idToName[b.tool_use_id] || null, idx });
}
}
});
let retries = 0;
for (const err of errors) {
if (err.name && uses.some((u) => u.name === err.name && u.idx > err.idx)) retries += 1;
}
return retries;
}
/**
* Process events for the turn: hook_fired (summary), interrupt, retry,
* time_burn, parse_gap. broken/total/durationMs are computed by the caller.
*/
export function extractProcessEvents(turn, broken, total, durationMs) {
const events = [];
const hookCounts = {};
let hookErrors = 0;
for (const e of turn) {
const att = e && e.attachment;
if (att && (att.type === 'hook_success' || att.type === 'hook_error')) {
const name = att.hookName || 'unknown';
hookCounts[name] = (hookCounts[name] || 0) + 1;
if (att.type === 'hook_error') hookErrors += 1;
}
}
if (Object.keys(hookCounts).length > 0) {
events.push({ kind: 'hook_fired', counts: hookCounts, errors: hookErrors });
}
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
const isUser = e && e.message && e.message.role === 'user';
if (
isUser &&
content.some((b) => b && b.type === 'text' && String(b.text || '').includes('[Request interrupted by user]'))
) {
events.push({ kind: 'interrupt' });
}
}
const retries = detectRetries(turn);
for (let i = 0; i < retries; i++) events.push({ kind: 'retry' });
if (durationMs > TIME_BURN_THRESHOLD_MS) {
events.push({ kind: 'time_burn', duration_ms: durationMs });
}
if (total > 0 && broken / total > PARSE_GAP_RATIO) {
events.push({ kind: 'parse_gap', broken, total });
}
// unrecovered_error: emitted iff the LAST tool_result in the turn was
// is_error=true. Distinguishes "turn ended on failure" from "errors that
// were retried away" (e.g., TDD red→green, expected-fail commands). The
// analyzer uses this event to flag `blocked` instead of raw error/retry
// count — see brain-retro-analyzer.inferOutcome (A-1 fix).
let lastToolResultIsError = null;
outer: for (let i = turn.length - 1; i >= 0; i--) {
const content =
turn[i] && turn[i].message && Array.isArray(turn[i].message.content) ? turn[i].message.content : [];
for (let j = content.length - 1; j >= 0; j--) {
const b = content[j];
if (b && b.type === 'tool_result') {
lastToolResultIsError = b.is_error === true;
break outer;
}
}
}
if (lastToolResultIsError === true) {
events.push({ kind: 'unrecovered_error' });
}
return events;
}
const ROUTING_TAG_RE =
/<!--\s*routing:\s*provenance=([\w_]+)\s+node=(\S+)\s+counterfactual=(\S+)\s*-->/;
/** Find the routing tag Claude prints when a method was user-directed (spec §4.2). */
export function parseRoutingTag(turn) {
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'text' && typeof b.text === 'string') {
const m = b.text.match(ROUTING_TAG_RE);
if (m) return { kind: m[1], node: m[2], claude_would_have_chosen: m[3] };
}
}
}
return null;
}
/**
* Per-Agent-tool_use event (Task 12) — surfaces subagent dispatches in the
* episode `events[]`. Captures subagent_type / model (if explicit in input)
* / first 80 chars of description.
*
* Not the full subagent trace (that lives in ~/.claude/projects/.../subagents/);
* just visibility from the parent Claude's perspective.
*/
export function extractAgentInvocations(turn) {
const out = [];
for (const e of turn || []) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use' && b.name === 'Agent') {
const inp = b.input || {};
out.push({
kind: 'subagent_invoked',
subagent_type: inp.subagent_type || 'unknown',
model: inp.model || null,
description: typeof inp.description === 'string' ? inp.description.slice(0, 80) : '',
});
}
}
}
return out;
}
const REASONING_TAG_RE =
/<!--\s*reasoning:\s*triggers="([^"]*)"\s+candidates="([^"]*)"\s+boundaries="([^"]*)"\s*-->/;
/**
* Opt-in reasoning tag (Task 11). Claude may emit at most one such comment
* per turn to declare triggers / candidates / boundaries explicitly. Values
* are semicolon-separated. When present, parser merges them into the
* heuristic-derived arrays via Set-dedupe.
*/
export function parseReasoningTag(turn) {
for (const e of turn || []) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'text' && typeof b.text === 'string') {
const m = b.text.match(REASONING_TAG_RE);
if (m) {
const split = (s) => s.split(';').map((x) => x.trim()).filter(Boolean);
return { triggers: split(m[1]), candidates: split(m[2]), boundaries: split(m[3]) };
}
}
}
}
return null;
}
/** Text of the last real user prompt — used by the Stop-hook routing-gate (Task 5). */
export function extractLastUserPromptText(transcriptText) {
const { entries } = parseLines(transcriptText);
const start = findTurnStart(entries);
return promptText(entries[start]);
}
/**
* Content of the last assistant message strictly before the turn start —
* the message that may have offered options to the user (spec §11.5).
*/
function extractLastAssistantContent(entries, turnStartIdx) {
for (let i = turnStartIdx - 1; i >= 0; i--) {
const e = entries[i];
if (e && e.message && e.message.role === 'assistant') {
const content = e.message.content;
if (Array.isArray(content)) return content;
if (typeof content === 'string') return content;
}
}
return null;
}
/**
* Parse a transcript JSONL string into an observer episode (schema v2).
* @param {string} transcriptText - Raw JSONL transcript contents.
* @param {string|null} fallbackSessionId - Used when the transcript has no sessionId.
* @returns {object} v2 episode.
*/
export function parseTranscript(transcriptText, fallbackSessionId = null) {
const { entries, broken, total } = parseLines(transcriptText);
const withSession = entries.find((e) => e && e.sessionId);
const sessionId =
(withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`;
const start = findTurnStart(entries);
const turn = entries.slice(start);
const stamps = turn.map((e) => e && e.timestamp).filter(Boolean);
const started_at = stamps[0] || new Date().toISOString();
const ended_at = stamps[stamps.length - 1] || started_at;
const durationMs = new Date(ended_at) - new Date(started_at);
const { skills, counts, errors } = collectToolUse(turn);
const events = [];
for (const skill of skills) events.push({ kind: 'skill_invoked', skill });
if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts });
for (const err of errors) {
events.push({ kind: 'error', tool: err.tool, summary: err.summary });
}
events.push(...extractProcessEvents(turn, broken, total, durationMs));
events.push(...extractAskUserQuestionEvents(turn));
events.push(...extractAgentInvocations(turn));
const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX));
const prompt = promptText(entries[start]);
const lastAsstContent = extractLastAssistantContent(entries, start);
const choice = detectChoiceProvenance(prompt, lastAsstContent) || detectAskUserQuestionChoice(turn);
let decision_provenance;
if (choice) {
decision_provenance = choice;
} else {
const tag = parseRoutingTag(turn);
decision_provenance =
tag && tag.kind === 'user_directed_method'
? { kind: 'user_directed_method', claude_would_have_chosen: tag.claude_would_have_chosen }
: { kind: 'autonomous', claude_would_have_chosen: null };
}
return {
schema_version: 2,
task_id: sessionId,
task_ref: sessionId,
timestamps: { started_at, ended_at },
path_type: usedSuperpowers ? 'regulated' : 'improvised',
outcome: 'unknown',
prompt_signal: classifyPromptSignal(prompt),
decision_provenance,
environment: extractEnvironment(entries, start),
task_size: extractTaskSize(turn),
task_cost: extractTokenUsage(turn),
primary_rationale: (() => {
const tag = parseReasoningTag(turn);
const merge = (heur, fromTag) => [...new Set([...heur, ...fromTag])];
return {
step: 1,
node_chosen: skills.length > 0 ? skills[0] : 'direct',
triggers_matched: merge(extractTriggers(turn), tag ? tag.triggers : []),
candidates_considered: merge(extractCandidates(turn), tag ? tag.candidates : []),
boundaries_applied: merge(extractBoundaries(turn), tag ? tag.boundaries : []),
hard_floor: usedSuperpowers
? { invoked: true, rules: ['Pravila §12'] }
: { invoked: false, rules: [] },
task_classification: classifyTask(prompt),
};
})(),
events,
};
}