Files
portal/tools/observer-state-enricher.test.mjs
T
Дмитрий 2bf25db72e feat(observer/analyzer): Pass 2 — classifier metrics + 2 factor axes
Surfaces 4 new fields from the Sonnet classifier path into the v4
episode and exposes 2 new factor-matrix axes. Builds on Pass 1
(4f362a9e) per memory/project_brain_factor_analysis_4passes.md.

# router-classifier.mjs

- callAnthropicAPI: new optional onMetrics({ latency_ms,
  retry_count_internal }) callback, mirroring onUsage. Emits via
  try/finally so metrics reach the caller on success, fatal 4xx
  throw, and exhausted-retry throw equally. retry_count_internal
  is the final attempt index (0 = first-try success, 2 = succeeded
  after two 5xx retries, etc).
- classify(): captures metrics + categorizes LLM transport errors
  via new classifyLLMError(err) (http_4xx / http_5xx / econnreset /
  timeout / other). Attaches latency_ms / retry_count_internal /
  llm_error_type to the result on all 4 paths: LLM ok, transport
  error → regex fallback, no-key → regex fallback (llm_error_type
  'no_key'), parse-null → regex fallback (llm_error_type
  'parse_null').
- Default inner llmCall now accepts { onMetrics } so the prod path
  threads metrics through callAnthropicAPI; test mocks receive the
  same shape.

# observer-state-enricher.mjs (extractClassifierOutput)

- +latency_ms, +retry_count_internal, +llm_error (categorized),
  +alternatives_considered (capped at top-3 to bound JSONL line
  size — Sonnet sometimes returns 5+).
- All four fields null-safe on regex / prefilter / cache paths.

# brain-retro-analyzer.mjs (FACTOR_FNS)

- latency_bucket: fast (<500ms) / medium / slow / very_slow / null.
- error_type: classifier_output.llm_error verbatim with null default.

# Tests

15 new tests (all RED first, then GREEN):
- router-classifier.test.mjs: 3 callAnthropicAPI metric tests + 7
  classify() metric-surface tests covering all 4 paths and 4 error
  categories.
- observer-state-enricher.test.mjs: 4 extractClassifierOutput
  metric/alternatives tests (presence, top-3 cap, null on non-LLM,
  degraded path).
- brain-retro-analyzer.test.mjs: 2 axis-presence tests.

Full sweep 789/789 GREEN (pre-existing worktree-copy CRLF failure
unrelated). Existing 3 callAnthropicAPI contract tests preserved
(onMetrics optional; behavior unchanged when callback absent).

LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 16:32:30 +03:00

163 lines
5.9 KiB
JavaScript

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdtempSync, writeFileSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import { readRouterState } from './observer-state-enricher.mjs';
describe('readRouterState', () => {
let baseDir;
beforeEach(() => {
baseDir = mkdtempSync(join(tmpdir(), 'router-state-test-'));
});
afterEach(() => {
rmSync(baseDir, { recursive: true, force: true });
});
it('returns null when state file does not exist', () => {
expect(readRouterState('abc-123', { baseDir })).toBeNull();
});
it('reads state file when present', () => {
const state = {
sessionId: 'abc-123',
classification: { recommendedNode: '#62', recommendedChain: '#13' },
chainProgress: ['brainstorming'],
chainCompleted: false,
};
writeFileSync(join(baseDir, 'router-state-abc-123.json'), JSON.stringify(state));
const result = readRouterState('abc-123', { baseDir });
expect(result).toEqual(state);
});
it('returns null on malformed JSON', () => {
writeFileSync(join(baseDir, 'router-state-broken.json'), 'not-json');
expect(readRouterState('broken', { baseDir })).toBeNull();
});
it('returns null on missing sessionId', () => {
expect(readRouterState(null, { baseDir })).toBeNull();
expect(readRouterState('', { baseDir })).toBeNull();
});
it('uses ~/.claude/runtime/ as default baseDir', () => {
// Smoke-check: default baseDir resolution doesn't throw.
// Real-file reading covered above with explicit baseDir.
const result = readRouterState('non-existent-session-xyz');
// Either null (file doesn't exist there) or object — both fine.
expect(result === null || typeof result === 'object').toBe(true);
});
});
describe('extractRouterFields', () => {
it('extracts the four fields from state, defaulting to null/empty', async () => {
const { extractRouterFields } = await import('./observer-state-enricher.mjs');
const state = {
classification: { recommendedNode: '#62', recommendedChain: '#13' },
chainProgress: ['brainstorming', 'writing-plans'],
chainCompleted: false,
};
expect(extractRouterFields(state)).toEqual({
recommended_node: '#62',
recommended_chain: '#13',
chain_progress: ['brainstorming', 'writing-plans'],
chain_completed: false,
});
});
it('returns nulls/empty when state is null', async () => {
const { extractRouterFields } = await import('./observer-state-enricher.mjs');
expect(extractRouterFields(null)).toEqual({
recommended_node: null,
recommended_chain: null,
chain_progress: [],
chain_completed: false,
});
});
it('handles missing classification block', async () => {
const { extractRouterFields } = await import('./observer-state-enricher.mjs');
expect(extractRouterFields({ chainProgress: ['x'], chainCompleted: true })).toEqual({
recommended_node: null,
recommended_chain: null,
chain_progress: ['x'],
chain_completed: true,
});
});
it('treats empty string recommendedNode/recommendedChain as null', async () => {
const { extractRouterFields } = await import('./observer-state-enricher.mjs');
expect(extractRouterFields({ classification: { recommendedNode: '', recommendedChain: '' } })).toEqual({
recommended_node: null,
recommended_chain: null,
chain_progress: [],
chain_completed: false,
});
});
});
describe('extractClassifierOutput — Pass 2 metrics (project-brain-factor-analysis-4passes)', () => {
it('surfaces latency_ms / retry_count_internal / llm_error / alternatives_considered when present', async () => {
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
const state = {
classification: {
task_type: 'feature',
source: 'llm',
latency_ms: 742,
retry_count_internal: 0,
llm_error_type: null,
alternatives_considered: [
{ node: '#19', score: 0.8, reason: 'close match' },
{ node: '#62', score: 0.4, reason: 'mismatch domain' },
],
},
};
const out = extractClassifierOutput(state);
expect(out.latency_ms).toBe(742);
expect(out.retry_count_internal).toBe(0);
expect(out.llm_error).toBeNull();
expect(Array.isArray(out.alternatives_considered)).toBe(true);
expect(out.alternatives_considered).toHaveLength(2);
});
it('truncates alternatives_considered to top-3 to bound JSONL line size', async () => {
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
const out = extractClassifierOutput({
classification: {
task_type: 'feature',
source: 'llm',
alternatives_considered: [
{ node: '#1' }, { node: '#2' }, { node: '#3' }, { node: '#4' }, { node: '#5' },
],
},
});
expect(out.alternatives_considered).toHaveLength(3);
expect(out.alternatives_considered[0].node).toBe('#1');
});
it('returns null fields on regex / prefilter / cache paths (no LLM hit)', async () => {
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
const out = extractClassifierOutput({
classification: { task_type: 'conversation', source: 'prefilter' },
});
expect(out.latency_ms).toBeNull();
expect(out.retry_count_internal).toBeNull();
expect(out.llm_error).toBeNull();
expect(out.alternatives_considered).toBeNull();
});
it('captures llm_error category on degraded LLM path', async () => {
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
const out = extractClassifierOutput({
classification: {
task_type: 'feature', source: 'regex',
llm_error_type: 'timeout', latency_ms: 30000, retry_count_internal: 4,
},
});
expect(out.llm_error).toBe('timeout');
expect(out.latency_ms).toBe(30000);
expect(out.retry_count_internal).toBe(4);
});
});