brain/tools/observer-stop-hook.mjs

#!/usr/bin/env node
/**
 * Stop-event hook for brain governance observer (B3).
 * Reads JSON context from stdin (Claude Code Stop-event hook contract).
 * When the context provides `transcript_path`, the episode is derived from
 * the real session transcript via parseTranscript; otherwise it falls back
 * to best-effort defaults. Builds an episode with 5 mandatory fields
 * including primary_rationale (7 sub-fields per spec v1.1 §5.2.1),
 * sanitizes via PII filter, appends to docs/observer/episodes-YYYY-MM.jsonl.
 *
 * Never blocks the Stop-event — exits 0 on any error.
 *
 * Security Guidance #40: NO exec/execSync — pure fs + sanitize.
 * Per Pravila §16.2 + ADR-011 + spec v1.1 §5.2.1.
 */

import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { join } from 'path';
import { sanitize, sanitizeWithCount } from './observer-pii-filter.mjs';
import { parseTranscript, extractLastUserPromptText } from './observer-transcript-parser.mjs';
import { detectMethodDirected, loadKnownNodes } from './observer-routing-detector.mjs';
import { callSelfAssessmentApi, readRuntimeFlag } from './observer-self-assessment-api.mjs';
import { shouldEmbed as _shouldEmbed, encodeBase64 as _encodeBase64, embed as _embed } from './router-embedding.mjs';

const REQUIRED_FIELDS = ['task_id', 'timestamps', 'path_type', 'outcome', 'primary_rationale'];
const V2_FIELDS = [
  'schema_version',
  'decision_provenance',
  'environment',
  'task_size',
  'task_ref',
  // C-7: prompt_signal + events are always produced by parser and buildEpisodeFromContext,
  // but were previously unvalidated → a ctx-fallback path that dropped them would silently
  // write a malformed episode. Strict validation closes that gap.
  'prompt_signal',
  'events',
];
const OBSERVER_ERROR_FIELDS = ['schema_version', 'error_message', 'timestamps', 'task_id'];

const RATIONALE_FIELDS = [
  'step',
  'node_chosen',
  'triggers_matched',
  'candidates_considered',
  'boundaries_applied',
  'hard_floor',
  'task_classification',
];

/** Update the monthly PII counter JSON with counts from a single episode write. */
function bumpPiiCounter(counts, baseDir, month, stateDir = 'docs/observer') {
  const counterPath = join(baseDir, stateDir, '.pii-counters.json');
  let store = {};
  if (existsSync(counterPath)) {
    try { store = JSON.parse(readFileSync(counterPath, 'utf-8')); } catch { store = {}; }
  }
  store[month] = store[month] || {};
  for (const [k, n] of Object.entries(counts)) {
    if (n > 0) store[month][k] = (store[month][k] || 0) + n;
  }
  try { writeFileSync(counterPath, JSON.stringify(store, null, 2) + '\n', 'utf-8'); }
  catch { /* counter is informational — never fail the Stop-event */ }
}

function validateRationale(rationale) {
  for (const f of RATIONALE_FIELDS) {
    if (rationale[f] === undefined) {
      throw new Error(`primary_rationale field missing: ${f}`);
    }
  }
}

/**
 * Append a single episode to the monthly JSONL file.
 * Validates either a full schema-v2 episode or a minimal observer_error marker.
 * @param {object} episode - The episode object.
 * @param {string} baseDir - Repository root (default: process.cwd()).
 * @param {string} month   - YYYY-MM string for the file name (default: current UTC month).
 */
export function appendEpisode(episode, baseDir = process.cwd(), month = currentMonth(), stateDir = 'docs/observer') {
  const dir = join(baseDir, stateDir);
  if (!existsSync(dir)) {
    mkdirSync(dir, { recursive: true });
  }
  const file = join(dir, `episodes-${month}.jsonl`);

  if (episode && episode.observer_error === true) {
    for (const f of OBSERVER_ERROR_FIELDS) {
      if (episode[f] === undefined) {
        throw new Error(`observer_error marker field missing: ${f}`);
      }
    }
    const { sanitized: sanitizedErr, counts: countsErr } = sanitizeWithCount(episode);
    appendFileSync(file, JSON.stringify(sanitizedErr) + '\n', 'utf-8');
    bumpPiiCounter(countsErr, baseDir, month, stateDir);
    return;
  }

  for (const f of REQUIRED_FIELDS) {
    if (episode[f] === undefined) {
      throw new Error(`required field missing: ${f}`);
    }
  }
  for (const f of V2_FIELDS) {
    if (episode[f] === undefined) {
      throw new Error(`schema v2 field missing: ${f}`);
    }
  }
  if (![2, 3, 4].includes(episode.schema_version)) {
    throw new Error(`schema_version must be 2, 3 or 4 (got ${episode.schema_version})`);
  }
  validateRationale(episode.primary_rationale);

  const { sanitized, counts } = sanitizeWithCount(episode);
  appendFileSync(file, JSON.stringify(sanitized) + '\n', 'utf-8');
  bumpPiiCounter(counts, baseDir, month, stateDir);
}

/**
 * Build a well-formed schema-v2 episode from a Claude Code Stop-event context.
 * Preferred path: when `transcriptText` is supplied, the episode is derived
 * from the real session transcript via parseTranscript. Fallback path: v2
 * defaults from `ctx` (an explicit ctx.primary_rationale is preserved verbatim).
 * @param {object} ctx - Raw context from stdin (may be partial).
 * @param {string|null} transcriptText - Raw transcript JSONL, if readable.
 * @returns {object} v2 episode.
 */
export function buildEpisodeFromContext(ctx = {}, transcriptText = null, options = {}) {
  if (transcriptText) {
    return parseTranscript(transcriptText, ctx.session_id || ctx.sessionId || ctx.task_id, options);
  }
  const sid = ctx.session_id || ctx.sessionId || ctx.task_id || `unknown-${Date.now()}`;
  const now = new Date().toISOString();
  return {
    schema_version: 4,
    schema_minor: 1,
    task_id: sid,
    task_ref: sid,
    timestamps: {
      started_at: ctx.started || ctx.started_at || now,
      ended_at: ctx.ended || ctx.ended_at || now,
    },
    path_type: ctx.path_type || 'regulated',
    outcome: ctx.result || ctx.outcome || 'unknown',
    prompt_signal: ctx.prompt_signal || 'neutral',
    decision_provenance: ctx.decision_provenance || { kind: 'autonomous', claude_would_have_chosen: null },
    environment: ctx.environment || {
      economy_level: null,
      model: null,
      post_compaction: false,
      session_turn: 0,
      parallel_session: false,
    },
    task_size: ctx.task_size || { tool_calls: 0, files_touched: 0, files: [] },
    primary_rationale: ctx.primary_rationale || {
      step: 1,
      node_chosen: ctx.node_chosen || ctx.skill_id || 'unknown',
      triggers_matched: [],
      candidates_considered: [],
      boundaries_applied: [],
      hard_floor: ctx.hard_floor || { invoked: false, rules: [] },
      task_classification: ctx.task_classification || 'other',
    },
    events: ctx.events || [],
  };
}

/**
 * Build an execution_trace block (spec §5, Phase 3 Task 16).
 * Pure — computes whether the recommended chain was fully executed.
 *
 * chain_gaps is emitted when fewer recommended nodes appear in `invoked` than
 * the chain prescribes (incomplete chain). Empty `recommended_chain` produces
 * no gap (no chain prescribed).
 */
export function buildExecutionTrace({ recommended_chain = [], invoked = [] } = {}) {
  const chain = Array.isArray(recommended_chain) ? recommended_chain : [];
  const inv = Array.isArray(invoked) ? invoked : [];
  const chain_gaps = [];
  if (chain.length > 0) {
    const executed = inv.filter((n) => chain.includes(n)).length;
    if (executed < chain.length) {
      chain_gaps.push({ executed_steps: executed, expected_steps: chain.length });
    }
  }
  return { recommended_chain: chain, invoked: inv, chain_gaps };
}

/**
 * Build a v4.1 episode merging a parsed/fallback base with router state
 * enrichments (inheritance — closes B5). Accepts the same inputs as
 * buildEpisodeFromContext + a `state` blob (the router-state-<session>.json
 * dump read by the Stop-hook CLI). schema_minor bumps to 1 (Task 16).
 */
export function buildEpisode({ state = null, transcriptText = null, ctx = {} } = {}) {
  const base = buildEpisodeFromContext(ctx, transcriptText);
  base.schema_minor = 3; // Task 20 bump (cost totals + reviewer distribution surface)
  if (state?.inheritance) {
    base.inheritance = { ...state.inheritance };
  }
  return base;
}

/**
 * Resolve the user prompt for downstream consumers (self-assessment API,
 * embedding). Bug fix 2026-05-26: Claude Code's Stop-event stdin contract is
 * { session_id, transcript_path, stop_hook_active, hook_event_name } — it
 * never includes `prompt`. The real text lives in the transcript file. Prior
 * code blindly read `ctx.prompt`, so self-assessment always received "(пусто)"
 * and embedding was silently skipped. This helper prefers `ctx.prompt` (test
 * convenience) and falls back to extracting the last user message from the
 * transcript. Returns null when neither source has content.
 */
export function derivePrompt(ctx, transcriptText) {
  if (ctx && typeof ctx.prompt === 'string' && ctx.prompt.length > 0) {
    return ctx.prompt;
  }
  if (typeof transcriptText === 'string' && transcriptText.length > 0) {
    const text = extractLastUserPromptText(transcriptText);
    return text || null;
  }
  return null;
}

/**
 * Build a self_assessment block (spec §4.5, Phase 3 Task 17). Pure.
 *
 * Expects { apiResult: string|null } where apiResult is the raw text returned
 * by the Opus self-assessment API call (4 fields). Null = call skipped or
 * timed out → marks self_assessment_pending so /brain-retro can retroactively
 * dozapolnit'.
 *
 * Schema:
 *   summary: string
 *   confidence_in_choice: number 0.0-1.0 (out-of-range clamped to null)
 *   what_could_be_better: string | null
 *   lesson_learned: string | null
 *   self_assessment_pending: bool
 *   parse_error?: string (only on malformed apiResult)
 */
export function buildSelfAssessment({ apiResult } = {}) {
  if (apiResult == null) return { self_assessment_pending: true };
  const stripped = String(apiResult).trim()
    .replace(/^```(?:json)?\s*\n?/, '')
    .replace(/\n?```$/, '')
    .trim();
  let parsed;
  try { parsed = JSON.parse(stripped); }
  catch (err) { return { self_assessment_pending: true, parse_error: err.message }; }
  if (!parsed || typeof parsed !== 'object') {
    return { self_assessment_pending: true, parse_error: 'apiResult is not an object' };
  }
  const conf = typeof parsed.confidence_in_choice === 'number'
    && parsed.confidence_in_choice >= 0
    && parsed.confidence_in_choice <= 1
    ? parsed.confidence_in_choice
    : null;
  return {
    summary: typeof parsed.summary === 'string' ? parsed.summary : null,
    confidence_in_choice: conf,
    what_could_be_better: parsed.what_could_be_better ?? null,
    lesson_learned: parsed.lesson_learned ?? null,
    self_assessment_pending: false,
  };
}

/**
 * Step 3.6 embedding async wiring (Phase 4 follow-up).
 *
 * Mirrors the Step 3.5 self-assessment pattern (commit c1ec61fa). When the
 * embedding-mode runtime flag is 'on' and the task is non-trivial (per
 * shouldEmbed), computes a 384-dim sentence embedding via Xenova and stores
 * it on the episode as `prompt_embedding_base64`. Fail-quiet: on timeout /
 * model load failure / runtime error → field stays null and
 * `environment.embedding_unavailable = true` is set.
 *
 * Pure-API style: injectable embedFn / shouldEmbedFn / encodeBase64Fn for tests
 * (the CLI binds them to the real router-embedding.mjs implementations).
 *
 * @param {object} ep — episode object to mutate
 * @param {object} ctx — Stop-hook context (uses ctx.prompt)
 * @param {object} opts
 * @param {string} [opts.embedMode]      — runtime flag value ('on' to compute)
 * @param {Function} [opts.shouldEmbedFn] — taskType -> bool
 * @param {Function} [opts.embedFn]       — async(prompt) -> Float32Array | null
 * @param {Function} [opts.encodeBase64Fn]— Float32Array -> base64 string
 * @param {number} [opts.timeoutMs]       — race timeout (default 2000)
 * @returns {Promise<void>}
 */
export async function computeEmbeddingForEpisode(ep, ctx = {}, opts = {}) {
  const {
    embedMode = 'off',
    shouldEmbedFn = _shouldEmbed,
    embedFn = _embed,
    encodeBase64Fn = _encodeBase64,
    timeoutMs = 2000,
  } = opts;

  if (embedMode !== 'on') return;
  const taskType = ep?.primary_rationale?.task_classification;
  if (!shouldEmbedFn(taskType)) return;
  if (!ctx || !ctx.prompt) return;

  try {
    const vec = await Promise.race([
      embedFn(ctx.prompt),
      new Promise((resolve) => setTimeout(() => resolve(null), timeoutMs)),
    ]);
    if (vec && vec.length > 0) {
      ep.prompt_embedding_base64 = encodeBase64Fn(vec);
    } else {
      ep.environment ??= {};
      ep.environment.embedding_unavailable = true;
    }
  } catch (_e) {
    ep.environment ??= {};
    ep.environment.embedding_unavailable = true;
  }
}

/**
 * Build a minimal observer_error marker — written instead of a silent skip
 * when the Stop-hook fails internally (spec §3 / §5.2).
 */
export function buildObserverError(ctx = {}, err) {
  const now = new Date().toISOString();
  return {
    schema_version: 4,
    observer_error: true,
    error_message: String((err && err.message) || err),
    timestamps: { started_at: now, ended_at: now },
    task_id: ctx.session_id || ctx.sessionId || ctx.task_id || `unknown-${Date.now()}`,
  };
}

/**
 * Routing-gate decision (spec §5.1, 3a). Pure — the CLI calls this.
 * Blocks the Stop-event (decision: block) when the user dictated a method
 * but the turn carries no routing tag. Skipped when stop_hook_active is true
 * (the gate fires at most once per turn — no infinite loop).
 * @returns {{block: boolean, reason: string|null}}
 */
export function routingGateDecision(episode, promptText, knownNodes, stopHookActive) {
  if (stopHookActive) return { block: false, reason: null };
  // user_chose_from_options is collaborative-choice from Claude-offered options —
  // not an externally directed method; no routing tag required (spec §11.4).
  if (episode && episode.decision_provenance && episode.decision_provenance.kind === 'user_chose_from_options') {
    return { block: false, reason: null };
  }
  const det = detectMethodDirected(promptText, knownNodes);
  if (!det.directed) return { block: false, reason: null };
  if (episode && episode.decision_provenance && episode.decision_provenance.kind === 'user_directed_method') {
    return { block: false, reason: null };
  }
  return {
    block: true,
    reason:
      `[observer routing-gate] Похоже, метод навязан пользователем (узел "${det.node}"), ` +
      `но routing-тег в этом ходе отсутствует. Добавь в свой ответ ровно одну строку:\n` +
      `<!-- routing: provenance=user_directed_method node=${det.node} ` +
      `counterfactual=<узел, который ты выбрал бы автономно> -->`,
  };
}

function currentMonth() {
  const d = new Date();
  return `${d.getUTCFullYear()}-${String(d.getUTCMonth() + 1).padStart(2, '0')}`;
}

// CLI entry point: read JSON context from stdin (Claude Code Stop-event hook contract)
if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/observer-stop-hook.mjs')) {
  const chunks = [];
  process.stdin.on('data', (c) => chunks.push(c));
  process.stdin.on('end', async () => {
    let ctx = {};
    try {
      const raw = Buffer.concat(chunks).toString('utf-8');
      if (raw.trim()) ctx = JSON.parse(raw);
    } catch (_e) {
      // best-effort: build a minimal episode even if stdin is malformed
    }
    // Claude Code's Stop-event supplies transcript_path — the real source of
    // session data. Read it best-effort; fall back to ctx-only on any error.
    let transcriptText = null;
    const tp = ctx.transcript_path || ctx.transcriptPath;
    if (tp) {
      try {
        if (existsSync(tp)) transcriptText = readFileSync(tp, 'utf-8');
      } catch (_e) {
        transcriptText = null;
      }
    }
    try {
      // Greenfield #3-observer: derive project-normative stems from config so the
      // file-type classifier recognises a consumer project's own normative docs.
      // Fail-safe (design §6): config empty → [] (universal-only); brain-config/cross-ref
      // unavailable → parser default (Лидерра 3, backward-compat).
      let stopOpts = {};
      try {
        const { loadConfig } = await import('./brain-config.mjs');
        const { docStem } = await import('./cross-ref-checker.mjs');
        const nf = loadConfig(process.cwd()).normative_files;
        stopOpts = { normativeStems: (Array.isArray(nf) ? nf : []).map(docStem).filter(Boolean) };
      } catch { /* fallback → parser default */ }
      const ep = buildEpisodeFromContext(ctx, transcriptText, stopOpts);

      // Bug fix 2026-05-26: resolve the real user prompt before calling
      // downstream consumers. ctx.prompt is never set by Stop-event stdin —
      // the prompt lives in the transcript. derivePrompt unifies the fallback.
      const userPrompt = derivePrompt(ctx, transcriptText);

      // Step 3.5: self-assessment API call (fail-quiet).
      // Only runs when the runtime flag is 'on' and ROUTER_LLM_KEY is set.
      const saMode = readRuntimeFlag('self-assessment-mode');
      const saApiKey = process.env.ROUTER_LLM_KEY || null;
      if (saMode === 'on' && saApiKey) {
        const rat = ep.primary_rationale ?? {};
        const apiResult = await callSelfAssessmentApi({
          prompt: userPrompt,
          recommendedNode: rat.recommended_node || null,
          actualNode: rat.node_chosen || null,
          chainExecuted: rat.chain_executed || [],
          apiKey: saApiKey,
        });
        ep.self_assessment = buildSelfAssessment({ apiResult });
      }

      // Step 3.6: embedding async wiring (fail-quiet, 2s timeout).
      // Trivial task types skipped via shouldEmbed. Mirrors Step 3.5 pattern.
      const embMode = readRuntimeFlag('embedding-mode');
      await computeEmbeddingForEpisode(ep, { ...ctx, prompt: userPrompt }, { embedMode: embMode });

      // Always write the episode first — exit-0-safe (spec §5.1 step 1).
      let stateDir = 'docs/observer';
      try {
        const { loadConfig, resolveStateDir } = await import('./brain-config.mjs');
        ({ stateDir } = resolveStateDir(loadConfig(process.cwd()).state_dir));
      } catch { /* brain-config недоступен → fallback docs/observer */ }
      appendEpisode(ep, process.cwd(), currentMonth(), stateDir);
      // Then the routing-gate (spec §5.1 steps 2-4).
      if (transcriptText) {
        const promptText = extractLastUserPromptText(transcriptText);
        const gate = routingGateDecision(ep, promptText, loadKnownNodes(), ctx.stop_hook_active === true);
        if (gate.block) {
          process.stdout.write(JSON.stringify({ decision: 'block', reason: gate.reason }));
          process.exit(0);
        }
      }
      process.exit(0);
    } catch (err) {
      // Visible failure (spec §5.2): write an observer_error marker, never a silent skip.
      try {
        let stateDir = 'docs/observer';
        try {
          const { loadConfig, resolveStateDir } = await import('./brain-config.mjs');
          ({ stateDir } = resolveStateDir(loadConfig(process.cwd()).state_dir));
        } catch { /* fallback docs/observer */ }
        appendEpisode(buildObserverError(ctx, err), process.cwd(), currentMonth(), stateDir);
      } catch (_e2) {
        // last-resort: even the marker failed — do not crash the Stop-event
      }
      console.error(`[observer-stop-hook] error: ${err.message}`);
      process.exit(0); // never block the Stop-event on an internal error
    }
  });
}