Files
portal/tools/observer-transcript-parser.mjs
T

224 lines
7.6 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* Transcript parser for the brain governance observer.
* Deterministically extracts episode fields from a Claude Code session
* transcript (JSONL). No LLM — pure parsing.
*
* Scope: the last turn (from the last real user prompt to end of file) —
* one episode == one prompt→response cycle.
*
* Reasoning fields (triggers_matched / candidates_considered /
* boundaries_applied) are NOT recoverable from a transcript and stay [];
* their capture is a separate design question (ADR-011 follow-up).
*
* Security Guidance #40: pure parsing — no exec/execSync.
* Per ADR-011 §6 + spec v1.1 §5.2.1.
*/
const SUPERPOWERS_PREFIX = 'superpowers:';
function parseLines(text) {
const entries = [];
let broken = 0;
let total = 0;
for (const line of String(text || '').split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
total += 1;
try {
entries.push(JSON.parse(trimmed));
} catch {
broken += 1; // broken line — counted for parse_gap, never thrown
}
}
return { entries, broken, total };
}
// A genuine user prompt (turn boundary) — not a tool_result carrier message.
function isRealUserPrompt(entry) {
const msg = entry && entry.message;
if (!msg || msg.role !== 'user') return false;
const c = msg.content;
if (typeof c === 'string') return c.trim().length > 0;
if (Array.isArray(c)) {
const hasToolResult = c.some((b) => b && b.type === 'tool_result');
const hasText = c.some((b) => b && b.type === 'text');
return hasText && !hasToolResult;
}
return false;
}
function findTurnStart(entries) {
for (let i = entries.length - 1; i >= 0; i--) {
if (isRealUserPrompt(entries[i])) return i;
}
return 0;
}
function promptText(entry) {
const c = entry && entry.message && entry.message.content;
if (typeof c === 'string') return c;
if (Array.isArray(c)) {
return c
.filter((b) => b && b.type === 'text')
.map((b) => b.text || '')
.join(' ');
}
return '';
}
export function classifyTask(text) {
const t = String(text || '').toLowerCase();
if (/рефактор|refactor/.test(t)) return 'refactor';
if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix';
if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature';
if (/докум|readme|\bdocs?\b/.test(t)) return 'docs';
if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question';
return 'other';
}
function collectToolUse(entries) {
const skills = [];
const counts = {};
let errorCount = 0;
for (const e of entries) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const block of content) {
if (!block || typeof block !== 'object') continue;
if (block.type === 'tool_use') {
const name = block.name || 'unknown';
counts[name] = (counts[name] || 0) + 1;
if (name === 'Skill') {
skills.push((block.input && block.input.skill) || 'unknown');
}
} else if (block.type === 'tool_result' && block.is_error === true) {
errorCount += 1;
}
}
}
return { skills, counts, errorCount };
}
const FILE_TOOLS = new Set(['Read', 'Edit', 'Write', 'MultiEdit', 'NotebookEdit']);
/**
* Deterministic environment factors for the turn that starts at turnStartIdx.
* economy_level / parallel_session are scanned from the stringified turn;
* model / post_compaction / session_turn from structural fields.
*/
export function extractEnvironment(allEntries, turnStartIdx) {
const turn = allEntries.slice(turnStartIdx);
const rawTurn = JSON.stringify(turn);
const econ = rawTurn.match(/=== ECONOMY MODE:\s*(\d+)\s*%/);
const economy_level = econ ? Number(econ[1]) : null;
let model = null;
for (const e of turn) {
if (e && e.message && e.message.model) {
model = e.message.model;
break;
}
}
let post_compaction = false;
for (let i = 0; i < turnStartIdx && i < allEntries.length; i++) {
if (allEntries[i] && allEntries[i].isCompactSummary === true) {
post_compaction = true;
break;
}
}
let session_turn = 0;
for (let i = 0; i <= turnStartIdx && i < allEntries.length; i++) {
if (isRealUserPrompt(allEntries[i])) session_turn += 1;
}
const parallel_session = /параллельн|parallel session|чужой staged|foreign git index/i.test(rawTurn);
return { economy_level, model, post_compaction, session_turn, parallel_session };
}
/** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */
export function extractTaskSize(turn) {
let tool_calls = 0;
const files = new Set();
for (const e of turn) {
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
for (const b of content) {
if (b && b.type === 'tool_use') {
tool_calls += 1;
if (FILE_TOOLS.has(b.name) && b.input) {
const p = b.input.file_path || b.input.notebook_path;
if (p) files.add(String(p));
}
}
}
}
return { tool_calls, files_touched: files.size, files: [...files] };
}
/** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
export function classifyPromptSignal(text) {
const t = String(text || '').toLowerCase().trim();
if (/не то\b|не так\b|переделай|отбой|\bстоп\b|почему ты|неверно|не верно|это не /.test(t)) {
return 'correction';
}
if (/^(ок|окей|ok|спасибо|супер|отлично|готово|дальше|идеально)([,\s]|$)/.test(t)) {
return 'approval';
}
if (classifyTask(t) !== 'other' && t.length > 15) return 'new_task';
return 'neutral';
}
/**
* Parse a transcript JSONL string into observer episode fields.
* @param {string} transcriptText - Raw JSONL transcript contents.
* @param {string|null} fallbackSessionId - Used when the transcript has no sessionId.
* @returns {object} Episode with 5 mandatory fields + events.
*/
export function parseTranscript(transcriptText, fallbackSessionId = null) {
const { entries } = parseLines(transcriptText);
const withSession = entries.find((e) => e && e.sessionId);
const sessionId =
(withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`;
const start = findTurnStart(entries);
const turn = entries.slice(start);
const stamps = turn.map((e) => e && e.timestamp).filter(Boolean);
const started_at = stamps[0] || new Date().toISOString();
const ended_at = stamps[stamps.length - 1] || started_at;
const { skills, counts, errorCount } = collectToolUse(turn);
const events = [];
for (const skill of skills) events.push({ kind: 'skill_invoked', skill });
if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts });
for (let i = 0; i < errorCount; i++) {
events.push({ kind: 'error', message: 'tool_result reported is_error' });
}
const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX));
return {
task_id: sessionId,
timestamps: { started_at, ended_at },
path_type: usedSuperpowers ? 'regulated' : 'improvised',
outcome: 'success',
primary_rationale: {
step: 1,
node_chosen: skills.length > 0 ? skills[0] : 'direct',
triggers_matched: [],
candidates_considered: [],
boundaries_applied: [],
hard_floor: usedSuperpowers
? { invoked: true, rules: ['Pravila §12'] }
: { invoked: false, rules: [] },
task_classification: classifyTask(promptText(entries[start])),
},
events,
};
}