0e3938f845
detectChoiceProvenance runs BEFORE parseRoutingTag; if last assistant turn offered options and user prompt references one, decision_provenance becomes user_chose_from_options. Otherwise falls back to existing routing-tag / autonomous logic. 3 new parser tests GREEN; all existing tests still GREEN (43/43). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
366 lines
12 KiB
JavaScript
366 lines
12 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Transcript parser for the brain governance observer.
|
|
* Deterministically extracts episode fields from a Claude Code session
|
|
* transcript (JSONL). No LLM — pure parsing.
|
|
*
|
|
* Scope: the last turn (from the last real user prompt to end of file) —
|
|
* one episode == one prompt→response cycle.
|
|
*
|
|
* Reasoning fields (triggers_matched / candidates_considered /
|
|
* boundaries_applied) are NOT recoverable from a transcript and stay [];
|
|
* their capture is a separate design question (ADR-011 follow-up).
|
|
*
|
|
* Security Guidance #40: pure parsing — no exec/execSync.
|
|
* Per ADR-011 §6 + spec v1.1 §5.2.1.
|
|
*/
|
|
|
|
import { detectChoiceProvenance } from './observer-choice-detector.mjs';
|
|
|
|
const SUPERPOWERS_PREFIX = 'superpowers:';
|
|
|
|
function parseLines(text) {
|
|
const entries = [];
|
|
let broken = 0;
|
|
let total = 0;
|
|
for (const line of String(text || '').split('\n')) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed) continue;
|
|
total += 1;
|
|
try {
|
|
entries.push(JSON.parse(trimmed));
|
|
} catch {
|
|
broken += 1; // broken line — counted for parse_gap, never thrown
|
|
}
|
|
}
|
|
return { entries, broken, total };
|
|
}
|
|
|
|
// A genuine user prompt (turn boundary) — not a tool_result carrier message.
|
|
function isRealUserPrompt(entry) {
|
|
const msg = entry && entry.message;
|
|
if (!msg || msg.role !== 'user') return false;
|
|
const c = msg.content;
|
|
if (typeof c === 'string') return c.trim().length > 0;
|
|
if (Array.isArray(c)) {
|
|
const hasToolResult = c.some((b) => b && b.type === 'tool_result');
|
|
const hasText = c.some((b) => b && b.type === 'text');
|
|
return hasText && !hasToolResult;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function findTurnStart(entries) {
|
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
if (isRealUserPrompt(entries[i])) return i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function promptText(entry) {
|
|
const c = entry && entry.message && entry.message.content;
|
|
if (typeof c === 'string') return c;
|
|
if (Array.isArray(c)) {
|
|
return c
|
|
.filter((b) => b && b.type === 'text')
|
|
.map((b) => b.text || '')
|
|
.join(' ');
|
|
}
|
|
return '';
|
|
}
|
|
|
|
export function classifyTask(text) {
|
|
const t = String(text || '').toLowerCase();
|
|
if (/рефактор|refactor/.test(t)) return 'refactor';
|
|
if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix';
|
|
if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature';
|
|
if (/докум|readme|\bdocs?\b/.test(t)) return 'docs';
|
|
if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question';
|
|
return 'other';
|
|
}
|
|
|
|
function collectToolUse(entries) {
|
|
const skills = [];
|
|
const counts = {};
|
|
let errorCount = 0;
|
|
for (const e of entries) {
|
|
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
|
|
for (const block of content) {
|
|
if (!block || typeof block !== 'object') continue;
|
|
if (block.type === 'tool_use') {
|
|
const name = block.name || 'unknown';
|
|
counts[name] = (counts[name] || 0) + 1;
|
|
if (name === 'Skill') {
|
|
skills.push((block.input && block.input.skill) || 'unknown');
|
|
}
|
|
} else if (block.type === 'tool_result' && block.is_error === true) {
|
|
errorCount += 1;
|
|
}
|
|
}
|
|
}
|
|
return { skills, counts, errorCount };
|
|
}
|
|
|
|
const FILE_TOOLS = new Set(['Read', 'Edit', 'Write', 'MultiEdit', 'NotebookEdit']);
|
|
|
|
/**
|
|
* Deterministic environment factors for the turn that starts at turnStartIdx.
|
|
* economy_level / parallel_session are scanned from the stringified turn;
|
|
* model / post_compaction / session_turn from structural fields.
|
|
*/
|
|
export function extractEnvironment(allEntries, turnStartIdx) {
|
|
const turn = allEntries.slice(turnStartIdx);
|
|
const rawTurn = JSON.stringify(turn);
|
|
|
|
const econ = rawTurn.match(/=== ECONOMY MODE:\s*(\d+)\s*%/);
|
|
const economy_level = econ ? Number(econ[1]) : null;
|
|
|
|
let model = null;
|
|
for (const e of turn) {
|
|
if (e && e.message && e.message.model) {
|
|
model = e.message.model;
|
|
break;
|
|
}
|
|
}
|
|
|
|
let post_compaction = false;
|
|
for (let i = 0; i < turnStartIdx && i < allEntries.length; i++) {
|
|
if (allEntries[i] && allEntries[i].isCompactSummary === true) {
|
|
post_compaction = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
let session_turn = 0;
|
|
for (let i = 0; i <= turnStartIdx && i < allEntries.length; i++) {
|
|
if (isRealUserPrompt(allEntries[i])) session_turn += 1;
|
|
}
|
|
|
|
const parallel_session = /параллельн|parallel session|чужой staged|foreign git index/i.test(rawTurn);
|
|
|
|
return { economy_level, model, post_compaction, session_turn, parallel_session };
|
|
}
|
|
|
|
/** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */
|
|
export function extractTaskSize(turn) {
|
|
let tool_calls = 0;
|
|
const files = new Set();
|
|
for (const e of turn) {
|
|
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
|
|
for (const b of content) {
|
|
if (b && b.type === 'tool_use') {
|
|
tool_calls += 1;
|
|
if (FILE_TOOLS.has(b.name) && b.input) {
|
|
const p = b.input.file_path || b.input.notebook_path;
|
|
if (p) files.add(String(p));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return { tool_calls, files_touched: files.size, files: [...files] };
|
|
}
|
|
|
|
/** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
|
|
export function classifyPromptSignal(text) {
|
|
const t = String(text || '').toLowerCase().trim();
|
|
if (/не то\b|не так\b|переделай|отбой|\bстоп\b|почему ты|неверно|не верно|это не /.test(t)) {
|
|
return 'correction';
|
|
}
|
|
if (/^(ок|окей|ok|спасибо|супер|отлично|готово|дальше|идеально)([,\s]|$)/.test(t)) {
|
|
return 'approval';
|
|
}
|
|
if (classifyTask(t) !== 'other' && t.length > 15) return 'new_task';
|
|
return 'neutral';
|
|
}
|
|
|
|
const TIME_BURN_THRESHOLD_MS = 900000; // 15 min — turn wall-clock above this = time_burn
|
|
const PARSE_GAP_RATIO = 0.1; // >10% unparseable lines = parse_gap
|
|
|
|
/** Heuristic retry count: an errored tool whose name is used again later in the turn. */
|
|
function detectRetries(turn) {
|
|
const idToName = {};
|
|
const uses = [];
|
|
turn.forEach((entry, idx) => {
|
|
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
|
|
for (const b of content) {
|
|
if (b && b.type === 'tool_use') {
|
|
idToName[b.id] = b.name;
|
|
uses.push({ name: b.name, idx });
|
|
}
|
|
}
|
|
});
|
|
const errors = [];
|
|
turn.forEach((entry, idx) => {
|
|
const content = entry && entry.message && Array.isArray(entry.message.content) ? entry.message.content : [];
|
|
for (const b of content) {
|
|
if (b && b.type === 'tool_result' && b.is_error === true) {
|
|
errors.push({ name: idToName[b.tool_use_id] || null, idx });
|
|
}
|
|
}
|
|
});
|
|
let retries = 0;
|
|
for (const err of errors) {
|
|
if (err.name && uses.some((u) => u.name === err.name && u.idx > err.idx)) retries += 1;
|
|
}
|
|
return retries;
|
|
}
|
|
|
|
/**
|
|
* Process events for the turn: hook_fired (summary), interrupt, retry,
|
|
* time_burn, parse_gap. broken/total/durationMs are computed by the caller.
|
|
*/
|
|
export function extractProcessEvents(turn, broken, total, durationMs) {
|
|
const events = [];
|
|
|
|
const hookCounts = {};
|
|
let hookErrors = 0;
|
|
for (const e of turn) {
|
|
const att = e && e.attachment;
|
|
if (att && (att.type === 'hook_success' || att.type === 'hook_error')) {
|
|
const name = att.hookName || 'unknown';
|
|
hookCounts[name] = (hookCounts[name] || 0) + 1;
|
|
if (att.type === 'hook_error') hookErrors += 1;
|
|
}
|
|
}
|
|
if (Object.keys(hookCounts).length > 0) {
|
|
events.push({ kind: 'hook_fired', counts: hookCounts, errors: hookErrors });
|
|
}
|
|
|
|
for (const e of turn) {
|
|
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
|
|
const isUser = e && e.message && e.message.role === 'user';
|
|
if (
|
|
isUser &&
|
|
content.some((b) => b && b.type === 'text' && String(b.text || '').includes('[Request interrupted by user]'))
|
|
) {
|
|
events.push({ kind: 'interrupt' });
|
|
}
|
|
}
|
|
|
|
const retries = detectRetries(turn);
|
|
for (let i = 0; i < retries; i++) events.push({ kind: 'retry' });
|
|
|
|
if (durationMs > TIME_BURN_THRESHOLD_MS) {
|
|
events.push({ kind: 'time_burn', duration_ms: durationMs });
|
|
}
|
|
|
|
if (total > 0 && broken / total > PARSE_GAP_RATIO) {
|
|
events.push({ kind: 'parse_gap', broken, total });
|
|
}
|
|
|
|
return events;
|
|
}
|
|
|
|
const ROUTING_TAG_RE =
|
|
/<!--\s*routing:\s*provenance=([\w_]+)\s+node=(\S+)\s+counterfactual=(\S+)\s*-->/;
|
|
|
|
/** Find the routing tag Claude prints when a method was user-directed (spec §4.2). */
|
|
export function parseRoutingTag(turn) {
|
|
for (const e of turn) {
|
|
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
|
|
for (const b of content) {
|
|
if (b && b.type === 'text' && typeof b.text === 'string') {
|
|
const m = b.text.match(ROUTING_TAG_RE);
|
|
if (m) return { kind: m[1], node: m[2], claude_would_have_chosen: m[3] };
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/** Text of the last real user prompt — used by the Stop-hook routing-gate (Task 5). */
|
|
export function extractLastUserPromptText(transcriptText) {
|
|
const { entries } = parseLines(transcriptText);
|
|
const start = findTurnStart(entries);
|
|
return promptText(entries[start]);
|
|
}
|
|
|
|
/**
|
|
* Content of the last assistant message strictly before the turn start —
|
|
* the message that may have offered options to the user (spec §11.5).
|
|
*/
|
|
function extractLastAssistantContent(entries, turnStartIdx) {
|
|
for (let i = turnStartIdx - 1; i >= 0; i--) {
|
|
const e = entries[i];
|
|
if (e && e.message && e.message.role === 'assistant') {
|
|
const content = e.message.content;
|
|
if (Array.isArray(content)) return content;
|
|
if (typeof content === 'string') return content;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Parse a transcript JSONL string into an observer episode (schema v2).
|
|
* @param {string} transcriptText - Raw JSONL transcript contents.
|
|
* @param {string|null} fallbackSessionId - Used when the transcript has no sessionId.
|
|
* @returns {object} v2 episode.
|
|
*/
|
|
export function parseTranscript(transcriptText, fallbackSessionId = null) {
|
|
const { entries, broken, total } = parseLines(transcriptText);
|
|
|
|
const withSession = entries.find((e) => e && e.sessionId);
|
|
const sessionId =
|
|
(withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`;
|
|
|
|
const start = findTurnStart(entries);
|
|
const turn = entries.slice(start);
|
|
|
|
const stamps = turn.map((e) => e && e.timestamp).filter(Boolean);
|
|
const started_at = stamps[0] || new Date().toISOString();
|
|
const ended_at = stamps[stamps.length - 1] || started_at;
|
|
const durationMs = new Date(ended_at) - new Date(started_at);
|
|
|
|
const { skills, counts, errorCount } = collectToolUse(turn);
|
|
|
|
const events = [];
|
|
for (const skill of skills) events.push({ kind: 'skill_invoked', skill });
|
|
if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts });
|
|
for (let i = 0; i < errorCount; i++) {
|
|
events.push({ kind: 'error', message: 'tool_result reported is_error' });
|
|
}
|
|
events.push(...extractProcessEvents(turn, broken, total, durationMs));
|
|
|
|
const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX));
|
|
const prompt = promptText(entries[start]);
|
|
|
|
const lastAsstContent = extractLastAssistantContent(entries, start);
|
|
const choice = detectChoiceProvenance(prompt, lastAsstContent);
|
|
let decision_provenance;
|
|
if (choice) {
|
|
decision_provenance = choice;
|
|
} else {
|
|
const tag = parseRoutingTag(turn);
|
|
decision_provenance =
|
|
tag && tag.kind === 'user_directed_method'
|
|
? { kind: 'user_directed_method', claude_would_have_chosen: tag.claude_would_have_chosen }
|
|
: { kind: 'autonomous', claude_would_have_chosen: null };
|
|
}
|
|
|
|
return {
|
|
schema_version: 2,
|
|
task_id: sessionId,
|
|
task_ref: sessionId,
|
|
timestamps: { started_at, ended_at },
|
|
path_type: usedSuperpowers ? 'regulated' : 'improvised',
|
|
outcome: 'unknown',
|
|
prompt_signal: classifyPromptSignal(prompt),
|
|
decision_provenance,
|
|
environment: extractEnvironment(entries, start),
|
|
task_size: extractTaskSize(turn),
|
|
primary_rationale: {
|
|
step: 1,
|
|
node_chosen: skills.length > 0 ? skills[0] : 'direct',
|
|
triggers_matched: [],
|
|
candidates_considered: [],
|
|
boundaries_applied: [],
|
|
hard_floor: usedSuperpowers
|
|
? { invoked: true, rules: ['Pravila §12'] }
|
|
: { invoked: false, rules: [] },
|
|
task_classification: classifyTask(prompt),
|
|
},
|
|
events,
|
|
};
|
|
}
|