Files
portal/tools/observer-state-enricher.mjs
T
Дмитрий 2bf25db72e feat(observer/analyzer): Pass 2 — classifier metrics + 2 factor axes
Surfaces 4 new fields from the Sonnet classifier path into the v4
episode and exposes 2 new factor-matrix axes. Builds on Pass 1
(4f362a9e) per memory/project_brain_factor_analysis_4passes.md.

# router-classifier.mjs

- callAnthropicAPI: new optional onMetrics({ latency_ms,
  retry_count_internal }) callback, mirroring onUsage. Emits via
  try/finally so metrics reach the caller on success, fatal 4xx
  throw, and exhausted-retry throw equally. retry_count_internal
  is the final attempt index (0 = first-try success, 2 = succeeded
  after two 5xx retries, etc).
- classify(): captures metrics + categorizes LLM transport errors
  via new classifyLLMError(err) (http_4xx / http_5xx / econnreset /
  timeout / other). Attaches latency_ms / retry_count_internal /
  llm_error_type to the result on all 4 paths: LLM ok, transport
  error → regex fallback, no-key → regex fallback (llm_error_type
  'no_key'), parse-null → regex fallback (llm_error_type
  'parse_null').
- Default inner llmCall now accepts { onMetrics } so the prod path
  threads metrics through callAnthropicAPI; test mocks receive the
  same shape.

# observer-state-enricher.mjs (extractClassifierOutput)

- +latency_ms, +retry_count_internal, +llm_error (categorized),
  +alternatives_considered (capped at top-3 to bound JSONL line
  size — Sonnet sometimes returns 5+).
- All four fields null-safe on regex / prefilter / cache paths.

# brain-retro-analyzer.mjs (FACTOR_FNS)

- latency_bucket: fast (<500ms) / medium / slow / very_slow / null.
- error_type: classifier_output.llm_error verbatim with null default.

# Tests

15 new tests (all RED first, then GREEN):
- router-classifier.test.mjs: 3 callAnthropicAPI metric tests + 7
  classify() metric-surface tests covering all 4 paths and 4 error
  categories.
- observer-state-enricher.test.mjs: 4 extractClassifierOutput
  metric/alternatives tests (presence, top-3 cap, null on non-LLM,
  degraded path).
- brain-retro-analyzer.test.mjs: 2 axis-presence tests.

Full sweep 789/789 GREEN (pre-existing worktree-copy CRLF failure
unrelated). Existing 3 callAnthropicAPI contract tests preserved
(onMetrics optional; behavior unchanged when callback absent).

LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 16:32:30 +03:00

91 lines
3.6 KiB
JavaScript

#!/usr/bin/env node
/**
* Router state enricher for observer episodes.
* Reads ~/.claude/runtime/router-state-<sessionId>.json and exposes pure
* extraction helpers for primary_rationale enrichment.
*
* Pure-ish — fs is parameterized via options.baseDir for testability.
*
* Per spec: docs/superpowers/specs/2026-05-24-router-stage3-three-fixes-design.md
*/
import { readFileSync, existsSync } from 'fs';
import { join } from 'path';
import { homedir } from 'os';
function defaultBaseDir() {
return join(homedir(), '.claude', 'runtime');
}
export function readRouterState(sessionId, options = {}) {
if (!sessionId || typeof sessionId !== 'string') return null;
const baseDir = options.baseDir || defaultBaseDir();
const path = join(baseDir, `router-state-${sessionId}.json`);
if (!existsSync(path)) return null;
try {
const content = readFileSync(path, 'utf-8');
return JSON.parse(content);
} catch {
return null;
}
}
export function extractRouterFields(state) {
if (!state || typeof state !== 'object') {
return { recommended_node: null, recommended_chain: null, chain_progress: [], chain_completed: false };
}
const cls = state.classification || {};
return {
recommended_node: (cls.recommendedNode || cls.recommended_node) || null,
recommended_chain: (cls.recommendedChain || cls.recommended_chain || cls.recommended_chain_id) || null,
chain_progress: Array.isArray(state.chainProgress) ? state.chainProgress : [],
chain_completed: state.chainCompleted === true,
};
}
/**
* Extract the LLM classifier's output for the v4 episode schema (Task 15).
* Pulls the subset of classification fields the analyzer / brain-retro skill
* cares about. Returns null when the state has no classification (degraded
* path, parser running on a transcript with no prehook state).
*/
export function extractClassifierOutput(state) {
const cls = state?.classification;
if (!cls || typeof cls !== 'object') return null;
return {
task_type: cls.task_type ?? cls.taskType ?? null,
recommended_node: cls.recommended_node ?? cls.recommendedNode ?? null,
recommended_chain: cls.recommended_chain ?? cls.recommendedChain ?? null,
recommended_chain_id: cls.recommended_chain_id ?? null,
no_skill_found: cls.no_skill_found === true,
source: cls.source ?? null,
// Factor-analysis signal: classifier's stated rationale + confidence.
// Field name varies by prompt schema: new (Phase 2) uses `reason_for_choice`,
// legacy uses `reasoning`. Null on regex / prefilter paths. Truncated to
// keep episode JSONL line size bounded.
reasoning: pickReasoning(cls),
confidence: typeof cls.confidence === 'number' ? cls.confidence : null,
// Pass 2 metrics (project-brain-factor-analysis-4passes): network latency,
// internal retry count, categorized transport error, and the classifier's
// own top-3 alternative nodes with rejection rationale. null on regex /
// prefilter / cache paths where the LLM was never (or was already) called.
latency_ms: typeof cls.latency_ms === 'number' ? cls.latency_ms : null,
retry_count_internal: typeof cls.retry_count_internal === 'number' ? cls.retry_count_internal : null,
llm_error: cls.llm_error_type ?? null,
alternatives_considered: pickAlternatives(cls),
};
}
function pickReasoning(cls) {
const v = cls.reasoning ?? cls.reason_for_choice ?? cls.reason ?? null;
if (typeof v !== 'string') return null;
return v.slice(0, 600);
}
function pickAlternatives(cls) {
const v = cls.alternatives_considered;
if (!Array.isArray(v)) return null;
// Cap at top-3 to bound episode JSONL line size; Sonnet sometimes returns 5+.
return v.slice(0, 3);
}