portal/tools/observer-transcript-parser.mjs

#!/usr/bin/env node
/**
 * Transcript parser for the brain governance observer.
 * Deterministically extracts episode fields from a Claude Code session
 * transcript (JSONL). No LLM — pure parsing.
 *
 * Scope: the last turn (from the last real user prompt to end of file) —
 * one episode == one prompt→response cycle.
 *
 * Reasoning fields (triggers_matched / candidates_considered /
 * boundaries_applied) are NOT recoverable from a transcript and stay [];
 * their capture is a separate design question (ADR-011 follow-up).
 *
 * Security Guidance #40: pure parsing — no exec/execSync.
 * Per ADR-011 §6 + spec v1.1 §5.2.1.
 */

const SUPERPOWERS_PREFIX = 'superpowers:';

function parseLines(text) {
  const entries = [];
  for (const line of String(text || '').split('\n')) {
    const trimmed = line.trim();
    if (!trimmed) continue;
    try {
      entries.push(JSON.parse(trimmed));
    } catch {
      // broken line — skip, never throw
    }
  }
  return entries;
}

// A genuine user prompt (turn boundary) — not a tool_result carrier message.
function isRealUserPrompt(entry) {
  const msg = entry && entry.message;
  if (!msg || msg.role !== 'user') return false;
  const c = msg.content;
  if (typeof c === 'string') return c.trim().length > 0;
  if (Array.isArray(c)) {
    const hasToolResult = c.some((b) => b && b.type === 'tool_result');
    const hasText = c.some((b) => b && b.type === 'text');
    return hasText && !hasToolResult;
  }
  return false;
}

function findTurnStart(entries) {
  for (let i = entries.length - 1; i >= 0; i--) {
    if (isRealUserPrompt(entries[i])) return i;
  }
  return 0;
}

function promptText(entry) {
  const c = entry && entry.message && entry.message.content;
  if (typeof c === 'string') return c;
  if (Array.isArray(c)) {
    return c
      .filter((b) => b && b.type === 'text')
      .map((b) => b.text || '')
      .join(' ');
  }
  return '';
}

export function classifyTask(text) {
  const t = String(text || '').toLowerCase();
  if (/рефактор|refactor/.test(t)) return 'refactor';
  if (/баг|bug|почини|исправ|fix\b|сломан|broken/.test(t)) return 'bugfix';
  if (/фич|feature|добав|implement|реализ|создай|create|новый|new /.test(t)) return 'feature';
  if (/докум|readme|\bdocs?\b/.test(t)) return 'docs';
  if (/\?|как |что |почему|зачем|why|how |what /.test(t)) return 'question';
  return 'other';
}

function collectToolUse(entries) {
  const skills = [];
  const counts = {};
  let errorCount = 0;
  for (const e of entries) {
    const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
    for (const block of content) {
      if (!block || typeof block !== 'object') continue;
      if (block.type === 'tool_use') {
        const name = block.name || 'unknown';
        counts[name] = (counts[name] || 0) + 1;
        if (name === 'Skill') {
          skills.push((block.input && block.input.skill) || 'unknown');
        }
      } else if (block.type === 'tool_result' && block.is_error === true) {
        errorCount += 1;
      }
    }
  }
  return { skills, counts, errorCount };
}

/**
 * Parse a transcript JSONL string into observer episode fields.
 * @param {string} transcriptText - Raw JSONL transcript contents.
 * @param {string|null} fallbackSessionId - Used when the transcript has no sessionId.
 * @returns {object} Episode with 5 mandatory fields + events.
 */
export function parseTranscript(transcriptText, fallbackSessionId = null) {
  const entries = parseLines(transcriptText);

  const withSession = entries.find((e) => e && e.sessionId);
  const sessionId =
    (withSession && withSession.sessionId) || fallbackSessionId || `unknown-${Date.now()}`;

  const start = findTurnStart(entries);
  const turn = entries.slice(start);

  const stamps = turn.map((e) => e && e.timestamp).filter(Boolean);
  const started_at = stamps[0] || new Date().toISOString();
  const ended_at = stamps[stamps.length - 1] || started_at;

  const { skills, counts, errorCount } = collectToolUse(turn);

  const events = [];
  for (const skill of skills) events.push({ kind: 'skill_invoked', skill });
  if (Object.keys(counts).length > 0) events.push({ kind: 'tool_summary', counts });
  for (let i = 0; i < errorCount; i++) {
    events.push({ kind: 'error', message: 'tool_result reported is_error' });
  }

  const usedSuperpowers = skills.some((s) => String(s).startsWith(SUPERPOWERS_PREFIX));

  return {
    task_id: sessionId,
    timestamps: { started_at, ended_at },
    path_type: usedSuperpowers ? 'regulated' : 'improvised',
    outcome: 'success',
    primary_rationale: {
      step: 1,
      node_chosen: skills.length > 0 ? skills[0] : 'direct',
      triggers_matched: [],
      candidates_considered: [],
      boundaries_applied: [],
      hard_floor: usedSuperpowers
        ? { invoked: true, rules: ['Pravila §12'] }
        : { invoked: false, rules: [] },
      task_classification: classifyTask(promptText(entries[start])),
    },
    events,
  };
}