#!/usr/bin/env node /** * Stop-event hook for brain governance observer (B3). * Reads JSON context from stdin (Claude Code Stop-event hook contract). * When the context provides `transcript_path`, the episode is derived from * the real session transcript via parseTranscript; otherwise it falls back * to best-effort defaults. Builds an episode with 5 mandatory fields * including primary_rationale (7 sub-fields per spec v1.1 §5.2.1), * sanitizes via PII filter, appends to docs/observer/episodes-YYYY-MM.jsonl. * * Never blocks the Stop-event — exits 0 on any error. * * Security Guidance #40: NO exec/execSync — pure fs + sanitize. * Per Pravila §16.2 + ADR-011 + spec v1.1 §5.2.1. */ import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; import { join } from 'path'; import { sanitize, sanitizeWithCount } from './observer-pii-filter.mjs'; import { parseTranscript, extractLastUserPromptText } from './observer-transcript-parser.mjs'; import { detectMethodDirected, loadKnownNodes } from './observer-routing-detector.mjs'; import { callSelfAssessmentApi, readRuntimeFlag } from './observer-self-assessment-api.mjs'; import { shouldEmbed as _shouldEmbed, encodeBase64 as _encodeBase64, embed as _embed } from './router-embedding.mjs'; const REQUIRED_FIELDS = ['task_id', 'timestamps', 'path_type', 'outcome', 'primary_rationale']; const V2_FIELDS = [ 'schema_version', 'decision_provenance', 'environment', 'task_size', 'task_ref', // C-7: prompt_signal + events are always produced by parser and buildEpisodeFromContext, // but were previously unvalidated → a ctx-fallback path that dropped them would silently // write a malformed episode. Strict validation closes that gap. 'prompt_signal', 'events', ]; const OBSERVER_ERROR_FIELDS = ['schema_version', 'error_message', 'timestamps', 'task_id']; const RATIONALE_FIELDS = [ 'step', 'node_chosen', 'triggers_matched', 'candidates_considered', 'boundaries_applied', 'hard_floor', 'task_classification', ]; /** Update the monthly PII counter JSON with counts from a single episode write. */ function bumpPiiCounter(counts, baseDir, month, stateDir = 'docs/observer') { const counterPath = join(baseDir, stateDir, '.pii-counters.json'); let store = {}; if (existsSync(counterPath)) { try { store = JSON.parse(readFileSync(counterPath, 'utf-8')); } catch { store = {}; } } store[month] = store[month] || {}; for (const [k, n] of Object.entries(counts)) { if (n > 0) store[month][k] = (store[month][k] || 0) + n; } try { writeFileSync(counterPath, JSON.stringify(store, null, 2) + '\n', 'utf-8'); } catch { /* counter is informational — never fail the Stop-event */ } } function validateRationale(rationale) { for (const f of RATIONALE_FIELDS) { if (rationale[f] === undefined) { throw new Error(`primary_rationale field missing: ${f}`); } } } /** * Append a single episode to the monthly JSONL file. * Validates either a full schema-v2 episode or a minimal observer_error marker. * @param {object} episode - The episode object. * @param {string} baseDir - Repository root (default: process.cwd()). * @param {string} month - YYYY-MM string for the file name (default: current UTC month). */ export function appendEpisode(episode, baseDir = process.cwd(), month = currentMonth(), stateDir = 'docs/observer') { const dir = join(baseDir, stateDir); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } const file = join(dir, `episodes-${month}.jsonl`); if (episode && episode.observer_error === true) { for (const f of OBSERVER_ERROR_FIELDS) { if (episode[f] === undefined) { throw new Error(`observer_error marker field missing: ${f}`); } } const { sanitized: sanitizedErr, counts: countsErr } = sanitizeWithCount(episode); appendFileSync(file, JSON.stringify(sanitizedErr) + '\n', 'utf-8'); bumpPiiCounter(countsErr, baseDir, month, stateDir); return; } for (const f of REQUIRED_FIELDS) { if (episode[f] === undefined) { throw new Error(`required field missing: ${f}`); } } for (const f of V2_FIELDS) { if (episode[f] === undefined) { throw new Error(`schema v2 field missing: ${f}`); } } if (![2, 3, 4].includes(episode.schema_version)) { throw new Error(`schema_version must be 2, 3 or 4 (got ${episode.schema_version})`); } validateRationale(episode.primary_rationale); const { sanitized, counts } = sanitizeWithCount(episode); appendFileSync(file, JSON.stringify(sanitized) + '\n', 'utf-8'); bumpPiiCounter(counts, baseDir, month, stateDir); } /** * Build a well-formed schema-v2 episode from a Claude Code Stop-event context. * Preferred path: when `transcriptText` is supplied, the episode is derived * from the real session transcript via parseTranscript. Fallback path: v2 * defaults from `ctx` (an explicit ctx.primary_rationale is preserved verbatim). * @param {object} ctx - Raw context from stdin (may be partial). * @param {string|null} transcriptText - Raw transcript JSONL, if readable. * @returns {object} v2 episode. */ export function buildEpisodeFromContext(ctx = {}, transcriptText = null, options = {}) { if (transcriptText) { return parseTranscript(transcriptText, ctx.session_id || ctx.sessionId || ctx.task_id, options); } const sid = ctx.session_id || ctx.sessionId || ctx.task_id || `unknown-${Date.now()}`; const now = new Date().toISOString(); return { schema_version: 4, schema_minor: 1, task_id: sid, task_ref: sid, timestamps: { started_at: ctx.started || ctx.started_at || now, ended_at: ctx.ended || ctx.ended_at || now, }, path_type: ctx.path_type || 'regulated', outcome: ctx.result || ctx.outcome || 'unknown', prompt_signal: ctx.prompt_signal || 'neutral', decision_provenance: ctx.decision_provenance || { kind: 'autonomous', claude_would_have_chosen: null }, environment: ctx.environment || { economy_level: null, model: null, post_compaction: false, session_turn: 0, parallel_session: false, }, task_size: ctx.task_size || { tool_calls: 0, files_touched: 0, files: [] }, primary_rationale: ctx.primary_rationale || { step: 1, node_chosen: ctx.node_chosen || ctx.skill_id || 'unknown', triggers_matched: [], candidates_considered: [], boundaries_applied: [], hard_floor: ctx.hard_floor || { invoked: false, rules: [] }, task_classification: ctx.task_classification || 'other', }, events: ctx.events || [], }; } /** * Build an execution_trace block (spec §5, Phase 3 Task 16). * Pure — computes whether the recommended chain was fully executed. * * chain_gaps is emitted when fewer recommended nodes appear in `invoked` than * the chain prescribes (incomplete chain). Empty `recommended_chain` produces * no gap (no chain prescribed). */ export function buildExecutionTrace({ recommended_chain = [], invoked = [] } = {}) { const chain = Array.isArray(recommended_chain) ? recommended_chain : []; const inv = Array.isArray(invoked) ? invoked : []; const chain_gaps = []; if (chain.length > 0) { const executed = inv.filter((n) => chain.includes(n)).length; if (executed < chain.length) { chain_gaps.push({ executed_steps: executed, expected_steps: chain.length }); } } return { recommended_chain: chain, invoked: inv, chain_gaps }; } /** * Build a v4.1 episode merging a parsed/fallback base with router state * enrichments (inheritance — closes B5). Accepts the same inputs as * buildEpisodeFromContext + a `state` blob (the router-state-.json * dump read by the Stop-hook CLI). schema_minor bumps to 1 (Task 16). */ export function buildEpisode({ state = null, transcriptText = null, ctx = {} } = {}) { const base = buildEpisodeFromContext(ctx, transcriptText); base.schema_minor = 3; // Task 20 bump (cost totals + reviewer distribution surface) if (state?.inheritance) { base.inheritance = { ...state.inheritance }; } return base; } /** * Resolve the user prompt for downstream consumers (self-assessment API, * embedding). Bug fix 2026-05-26: Claude Code's Stop-event stdin contract is * { session_id, transcript_path, stop_hook_active, hook_event_name } — it * never includes `prompt`. The real text lives in the transcript file. Prior * code blindly read `ctx.prompt`, so self-assessment always received "(пусто)" * and embedding was silently skipped. This helper prefers `ctx.prompt` (test * convenience) and falls back to extracting the last user message from the * transcript. Returns null when neither source has content. */ export function derivePrompt(ctx, transcriptText) { if (ctx && typeof ctx.prompt === 'string' && ctx.prompt.length > 0) { return ctx.prompt; } if (typeof transcriptText === 'string' && transcriptText.length > 0) { const text = extractLastUserPromptText(transcriptText); return text || null; } return null; } /** * Build a self_assessment block (spec §4.5, Phase 3 Task 17). Pure. * * Expects { apiResult: string|null } where apiResult is the raw text returned * by the Opus self-assessment API call (4 fields). Null = call skipped or * timed out → marks self_assessment_pending so /brain-retro can retroactively * dozapolnit'. * * Schema: * summary: string * confidence_in_choice: number 0.0-1.0 (out-of-range clamped to null) * what_could_be_better: string | null * lesson_learned: string | null * self_assessment_pending: bool * parse_error?: string (only on malformed apiResult) */ export function buildSelfAssessment({ apiResult } = {}) { if (apiResult == null) return { self_assessment_pending: true }; const stripped = String(apiResult).trim() .replace(/^```(?:json)?\s*\n?/, '') .replace(/\n?```$/, '') .trim(); let parsed; try { parsed = JSON.parse(stripped); } catch (err) { return { self_assessment_pending: true, parse_error: err.message }; } if (!parsed || typeof parsed !== 'object') { return { self_assessment_pending: true, parse_error: 'apiResult is not an object' }; } const conf = typeof parsed.confidence_in_choice === 'number' && parsed.confidence_in_choice >= 0 && parsed.confidence_in_choice <= 1 ? parsed.confidence_in_choice : null; return { summary: typeof parsed.summary === 'string' ? parsed.summary : null, confidence_in_choice: conf, what_could_be_better: parsed.what_could_be_better ?? null, lesson_learned: parsed.lesson_learned ?? null, self_assessment_pending: false, }; } /** * Step 3.6 embedding async wiring (Phase 4 follow-up). * * Mirrors the Step 3.5 self-assessment pattern (commit c1ec61fa). When the * embedding-mode runtime flag is 'on' and the task is non-trivial (per * shouldEmbed), computes a 384-dim sentence embedding via Xenova and stores * it on the episode as `prompt_embedding_base64`. Fail-quiet: on timeout / * model load failure / runtime error → field stays null and * `environment.embedding_unavailable = true` is set. * * Pure-API style: injectable embedFn / shouldEmbedFn / encodeBase64Fn for tests * (the CLI binds them to the real router-embedding.mjs implementations). * * @param {object} ep — episode object to mutate * @param {object} ctx — Stop-hook context (uses ctx.prompt) * @param {object} opts * @param {string} [opts.embedMode] — runtime flag value ('on' to compute) * @param {Function} [opts.shouldEmbedFn] — taskType -> bool * @param {Function} [opts.embedFn] — async(prompt) -> Float32Array | null * @param {Function} [opts.encodeBase64Fn]— Float32Array -> base64 string * @param {number} [opts.timeoutMs] — race timeout (default 2000) * @returns {Promise} */ export async function computeEmbeddingForEpisode(ep, ctx = {}, opts = {}) { const { embedMode = 'off', shouldEmbedFn = _shouldEmbed, embedFn = _embed, encodeBase64Fn = _encodeBase64, timeoutMs = 2000, } = opts; if (embedMode !== 'on') return; const taskType = ep?.primary_rationale?.task_classification; if (!shouldEmbedFn(taskType)) return; if (!ctx || !ctx.prompt) return; try { const vec = await Promise.race([ embedFn(ctx.prompt), new Promise((resolve) => setTimeout(() => resolve(null), timeoutMs)), ]); if (vec && vec.length > 0) { ep.prompt_embedding_base64 = encodeBase64Fn(vec); } else { ep.environment ??= {}; ep.environment.embedding_unavailable = true; } } catch (_e) { ep.environment ??= {}; ep.environment.embedding_unavailable = true; } } /** * Build a minimal observer_error marker — written instead of a silent skip * when the Stop-hook fails internally (spec §3 / §5.2). */ export function buildObserverError(ctx = {}, err) { const now = new Date().toISOString(); return { schema_version: 4, observer_error: true, error_message: String((err && err.message) || err), timestamps: { started_at: now, ended_at: now }, task_id: ctx.session_id || ctx.sessionId || ctx.task_id || `unknown-${Date.now()}`, }; } /** * Routing-gate decision (spec §5.1, 3a). Pure — the CLI calls this. * Blocks the Stop-event (decision: block) when the user dictated a method * but the turn carries no routing tag. Skipped when stop_hook_active is true * (the gate fires at most once per turn — no infinite loop). * @returns {{block: boolean, reason: string|null}} */ export function routingGateDecision(episode, promptText, knownNodes, stopHookActive) { if (stopHookActive) return { block: false, reason: null }; // user_chose_from_options is collaborative-choice from Claude-offered options — // not an externally directed method; no routing tag required (spec §11.4). if (episode && episode.decision_provenance && episode.decision_provenance.kind === 'user_chose_from_options') { return { block: false, reason: null }; } const det = detectMethodDirected(promptText, knownNodes); if (!det.directed) return { block: false, reason: null }; if (episode && episode.decision_provenance && episode.decision_provenance.kind === 'user_directed_method') { return { block: false, reason: null }; } return { block: true, reason: `[observer routing-gate] Похоже, метод навязан пользователем (узел "${det.node}"), ` + `но routing-тег в этом ходе отсутствует. Добавь в свой ответ ровно одну строку:\n` + ``, }; } function currentMonth() { const d = new Date(); return `${d.getUTCFullYear()}-${String(d.getUTCMonth() + 1).padStart(2, '0')}`; } // CLI entry point: read JSON context from stdin (Claude Code Stop-event hook contract) if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/observer-stop-hook.mjs')) { const chunks = []; process.stdin.on('data', (c) => chunks.push(c)); process.stdin.on('end', async () => { let ctx = {}; try { const raw = Buffer.concat(chunks).toString('utf-8'); if (raw.trim()) ctx = JSON.parse(raw); } catch (_e) { // best-effort: build a minimal episode even if stdin is malformed } // Claude Code's Stop-event supplies transcript_path — the real source of // session data. Read it best-effort; fall back to ctx-only on any error. let transcriptText = null; const tp = ctx.transcript_path || ctx.transcriptPath; if (tp) { try { if (existsSync(tp)) transcriptText = readFileSync(tp, 'utf-8'); } catch (_e) { transcriptText = null; } } try { // Greenfield #3-observer: derive project-normative stems from config so the // file-type classifier recognises a consumer project's own normative docs. // Fail-safe (design §6): config empty → [] (universal-only); brain-config/cross-ref // unavailable → parser default (Лидерра 3, backward-compat). let stopOpts = {}; try { const { loadConfig } = await import('./brain-config.mjs'); const { docStem } = await import('./cross-ref-checker.mjs'); const nf = loadConfig(process.cwd()).normative_files; stopOpts = { normativeStems: (Array.isArray(nf) ? nf : []).map(docStem).filter(Boolean) }; } catch { /* fallback → parser default */ } const ep = buildEpisodeFromContext(ctx, transcriptText, stopOpts); // Bug fix 2026-05-26: resolve the real user prompt before calling // downstream consumers. ctx.prompt is never set by Stop-event stdin — // the prompt lives in the transcript. derivePrompt unifies the fallback. const userPrompt = derivePrompt(ctx, transcriptText); // Step 3.5: self-assessment API call (fail-quiet). // Only runs when the runtime flag is 'on' and ROUTER_LLM_KEY is set. const saMode = readRuntimeFlag('self-assessment-mode'); const saApiKey = process.env.ROUTER_LLM_KEY || null; if (saMode === 'on' && saApiKey) { const rat = ep.primary_rationale ?? {}; const apiResult = await callSelfAssessmentApi({ prompt: userPrompt, recommendedNode: rat.recommended_node || null, actualNode: rat.node_chosen || null, chainExecuted: rat.chain_executed || [], apiKey: saApiKey, }); ep.self_assessment = buildSelfAssessment({ apiResult }); } // Step 3.6: embedding async wiring (fail-quiet, 2s timeout). // Trivial task types skipped via shouldEmbed. Mirrors Step 3.5 pattern. const embMode = readRuntimeFlag('embedding-mode'); await computeEmbeddingForEpisode(ep, { ...ctx, prompt: userPrompt }, { embedMode: embMode }); // Always write the episode first — exit-0-safe (spec §5.1 step 1). let stateDir = 'docs/observer'; try { const { loadConfig, resolveStateDir } = await import('./brain-config.mjs'); ({ stateDir } = resolveStateDir(loadConfig(process.cwd()).state_dir)); } catch { /* brain-config недоступен → fallback docs/observer */ } appendEpisode(ep, process.cwd(), currentMonth(), stateDir); // Then the routing-gate (spec §5.1 steps 2-4). if (transcriptText) { const promptText = extractLastUserPromptText(transcriptText); const gate = routingGateDecision(ep, promptText, loadKnownNodes(), ctx.stop_hook_active === true); if (gate.block) { process.stdout.write(JSON.stringify({ decision: 'block', reason: gate.reason })); process.exit(0); } } process.exit(0); } catch (err) { // Visible failure (spec §5.2): write an observer_error marker, never a silent skip. try { let stateDir = 'docs/observer'; try { const { loadConfig, resolveStateDir } = await import('./brain-config.mjs'); ({ stateDir } = resolveStateDir(loadConfig(process.cwd()).state_dir)); } catch { /* fallback docs/observer */ } appendEpisode(buildObserverError(ctx, err), process.cwd(), currentMonth(), stateDir); } catch (_e2) { // last-resort: even the marker failed — do not crash the Stop-event } console.error(`[observer-stop-hook] error: ${err.message}`); process.exit(0); // never block the Stop-event on an internal error } }); }