feat(observer/analyzer): Pass 2 — classifier metrics + 2 factor axes
Surfaces 4 new fields from the Sonnet classifier path into the v4
episode and exposes 2 new factor-matrix axes. Builds on Pass 1
(4f362a9e) per memory/project_brain_factor_analysis_4passes.md.
# router-classifier.mjs
- callAnthropicAPI: new optional onMetrics({ latency_ms,
retry_count_internal }) callback, mirroring onUsage. Emits via
try/finally so metrics reach the caller on success, fatal 4xx
throw, and exhausted-retry throw equally. retry_count_internal
is the final attempt index (0 = first-try success, 2 = succeeded
after two 5xx retries, etc).
- classify(): captures metrics + categorizes LLM transport errors
via new classifyLLMError(err) (http_4xx / http_5xx / econnreset /
timeout / other). Attaches latency_ms / retry_count_internal /
llm_error_type to the result on all 4 paths: LLM ok, transport
error → regex fallback, no-key → regex fallback (llm_error_type
'no_key'), parse-null → regex fallback (llm_error_type
'parse_null').
- Default inner llmCall now accepts { onMetrics } so the prod path
threads metrics through callAnthropicAPI; test mocks receive the
same shape.
# observer-state-enricher.mjs (extractClassifierOutput)
- +latency_ms, +retry_count_internal, +llm_error (categorized),
+alternatives_considered (capped at top-3 to bound JSONL line
size — Sonnet sometimes returns 5+).
- All four fields null-safe on regex / prefilter / cache paths.
# brain-retro-analyzer.mjs (FACTOR_FNS)
- latency_bucket: fast (<500ms) / medium / slow / very_slow / null.
- error_type: classifier_output.llm_error verbatim with null default.
# Tests
15 new tests (all RED first, then GREEN):
- router-classifier.test.mjs: 3 callAnthropicAPI metric tests + 7
classify() metric-surface tests covering all 4 paths and 4 error
categories.
- observer-state-enricher.test.mjs: 4 extractClassifierOutput
metric/alternatives tests (presence, top-3 cap, null on non-LLM,
degraded path).
- brain-retro-analyzer.test.mjs: 2 axis-presence tests.
Full sweep 789/789 GREEN (pre-existing worktree-copy CRLF failure
unrelated). Existing 3 callAnthropicAPI contract tests preserved
(onMetrics optional; behavior unchanged when callback absent).
LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -187,6 +187,18 @@ function iterationsBucket(iterations) {
|
||||
return '11+';
|
||||
}
|
||||
|
||||
// Pass 2 — classifier latency bucket. <500ms = fast (cache hit territory),
|
||||
// 500-2000 = medium (cold call), 2000-10000 = slow (network jitter / overflow),
|
||||
// >10000 = very_slow (retries fired). Null on non-LLM paths.
|
||||
function latencyBucket(latency) {
|
||||
const n = Number(latency);
|
||||
if (!Number.isFinite(n) || n < 0) return 'null';
|
||||
if (n < 500) return 'fast';
|
||||
if (n < 2000) return 'medium';
|
||||
if (n < 10000) return 'slow';
|
||||
return 'very_slow';
|
||||
}
|
||||
|
||||
const FACTOR_FNS = {
|
||||
decision_provenance: (e) => (e.decision_provenance || {}).kind || 'unknown',
|
||||
economy_level: (e) => String((e.environment || {}).economy_level ?? 'null'),
|
||||
@@ -207,6 +219,9 @@ const FACTOR_FNS = {
|
||||
error_count: (e) => errorBucket(e.events),
|
||||
hard_floor_invoked: (e) => String(((e.primary_rationale || {}).hard_floor || {}).invoked ?? false),
|
||||
iterations_bucket: (e) => iterationsBucket((e.task_cost || {}).iterations),
|
||||
// Pass 2 — classifier-metric axes (project-brain-factor-analysis-4passes):
|
||||
latency_bucket: (e) => latencyBucket((e.classifier_output || {}).latency_ms),
|
||||
error_type: (e) => (e.classifier_output || {}).llm_error || 'null',
|
||||
};
|
||||
|
||||
/** Factor matrix: rows = factor values, columns = outcome distribution (spec §6). */
|
||||
|
||||
@@ -516,3 +516,32 @@ describe('buildFactorMatrix — Pass 1 cheap axes (project-brain-factor-analysis
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildFactorMatrix — Pass 2 classifier-metric axes', () => {
|
||||
it('latency_bucket axis: fast / medium / slow / very_slow / null', () => {
|
||||
const m = buildFactorMatrix([
|
||||
{ ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 250 } },
|
||||
{ ...ep(), _inferredOutcome: 'success', classifier_output: { latency_ms: 1500 } },
|
||||
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { latency_ms: 5000 } },
|
||||
{ ...ep(), _inferredOutcome: 'blocked', classifier_output: { latency_ms: 15000 } },
|
||||
{ ...ep(), _inferredOutcome: 'unknown', classifier_output: null },
|
||||
]);
|
||||
expect(m.latency_bucket.fast.success).toBe(1);
|
||||
expect(m.latency_bucket.medium.success).toBe(1);
|
||||
expect(m.latency_bucket.slow.rework).toBe(1);
|
||||
expect(m.latency_bucket.very_slow.blocked).toBe(1);
|
||||
expect(m.latency_bucket.null.unknown).toBe(1);
|
||||
});
|
||||
|
||||
it('error_type axis: reads classifier_output.llm_error verbatim with null default', () => {
|
||||
const m = buildFactorMatrix([
|
||||
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'timeout' } },
|
||||
{ ...ep(), _inferredOutcome: 'rework', classifier_output: { llm_error: 'econnreset' } },
|
||||
{ ...ep(), _inferredOutcome: 'success', classifier_output: { llm_error: null } },
|
||||
{ ...ep(), _inferredOutcome: 'success', classifier_output: null },
|
||||
]);
|
||||
expect(m.error_type.timeout.rework).toBe(1);
|
||||
expect(m.error_type.econnreset.rework).toBe(1);
|
||||
expect(m.error_type.null.success).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -65,6 +65,14 @@ export function extractClassifierOutput(state) {
|
||||
// keep episode JSONL line size bounded.
|
||||
reasoning: pickReasoning(cls),
|
||||
confidence: typeof cls.confidence === 'number' ? cls.confidence : null,
|
||||
// Pass 2 metrics (project-brain-factor-analysis-4passes): network latency,
|
||||
// internal retry count, categorized transport error, and the classifier's
|
||||
// own top-3 alternative nodes with rejection rationale. null on regex /
|
||||
// prefilter / cache paths where the LLM was never (or was already) called.
|
||||
latency_ms: typeof cls.latency_ms === 'number' ? cls.latency_ms : null,
|
||||
retry_count_internal: typeof cls.retry_count_internal === 'number' ? cls.retry_count_internal : null,
|
||||
llm_error: cls.llm_error_type ?? null,
|
||||
alternatives_considered: pickAlternatives(cls),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -73,3 +81,10 @@ function pickReasoning(cls) {
|
||||
if (typeof v !== 'string') return null;
|
||||
return v.slice(0, 600);
|
||||
}
|
||||
|
||||
function pickAlternatives(cls) {
|
||||
const v = cls.alternatives_considered;
|
||||
if (!Array.isArray(v)) return null;
|
||||
// Cap at top-3 to bound episode JSONL line size; Sonnet sometimes returns 5+.
|
||||
return v.slice(0, 3);
|
||||
}
|
||||
|
||||
@@ -96,3 +96,67 @@ describe('extractRouterFields', () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractClassifierOutput — Pass 2 metrics (project-brain-factor-analysis-4passes)', () => {
|
||||
it('surfaces latency_ms / retry_count_internal / llm_error / alternatives_considered when present', async () => {
|
||||
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
|
||||
const state = {
|
||||
classification: {
|
||||
task_type: 'feature',
|
||||
source: 'llm',
|
||||
latency_ms: 742,
|
||||
retry_count_internal: 0,
|
||||
llm_error_type: null,
|
||||
alternatives_considered: [
|
||||
{ node: '#19', score: 0.8, reason: 'close match' },
|
||||
{ node: '#62', score: 0.4, reason: 'mismatch domain' },
|
||||
],
|
||||
},
|
||||
};
|
||||
const out = extractClassifierOutput(state);
|
||||
expect(out.latency_ms).toBe(742);
|
||||
expect(out.retry_count_internal).toBe(0);
|
||||
expect(out.llm_error).toBeNull();
|
||||
expect(Array.isArray(out.alternatives_considered)).toBe(true);
|
||||
expect(out.alternatives_considered).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('truncates alternatives_considered to top-3 to bound JSONL line size', async () => {
|
||||
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
|
||||
const out = extractClassifierOutput({
|
||||
classification: {
|
||||
task_type: 'feature',
|
||||
source: 'llm',
|
||||
alternatives_considered: [
|
||||
{ node: '#1' }, { node: '#2' }, { node: '#3' }, { node: '#4' }, { node: '#5' },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(out.alternatives_considered).toHaveLength(3);
|
||||
expect(out.alternatives_considered[0].node).toBe('#1');
|
||||
});
|
||||
|
||||
it('returns null fields on regex / prefilter / cache paths (no LLM hit)', async () => {
|
||||
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
|
||||
const out = extractClassifierOutput({
|
||||
classification: { task_type: 'conversation', source: 'prefilter' },
|
||||
});
|
||||
expect(out.latency_ms).toBeNull();
|
||||
expect(out.retry_count_internal).toBeNull();
|
||||
expect(out.llm_error).toBeNull();
|
||||
expect(out.alternatives_considered).toBeNull();
|
||||
});
|
||||
|
||||
it('captures llm_error category on degraded LLM path', async () => {
|
||||
const { extractClassifierOutput } = await import('./observer-state-enricher.mjs');
|
||||
const out = extractClassifierOutput({
|
||||
classification: {
|
||||
task_type: 'feature', source: 'regex',
|
||||
llm_error_type: 'timeout', latency_ms: 30000, retry_count_internal: 4,
|
||||
},
|
||||
});
|
||||
expect(out.llm_error).toBe('timeout');
|
||||
expect(out.latency_ms).toBe(30000);
|
||||
expect(out.retry_count_internal).toBe(4);
|
||||
});
|
||||
});
|
||||
|
||||
+84
-34
@@ -407,6 +407,7 @@ export async function callAnthropicAPI(promptOrMessages, {
|
||||
perAttemptTimeoutMs = 30_000,
|
||||
sleepImpl = (ms) => new Promise((res) => setTimeout(res, ms)),
|
||||
onUsage,
|
||||
onMetrics,
|
||||
}) {
|
||||
const url = `${String(baseUrl).replace(/\/+$/, '')}/v1/messages`;
|
||||
let body;
|
||||
@@ -432,40 +433,65 @@ export async function callAnthropicAPI(promptOrMessages, {
|
||||
'content-type': 'application/json',
|
||||
};
|
||||
|
||||
// Pass 2 metric capture (project-brain-factor-analysis-4passes).
|
||||
const started = Date.now();
|
||||
let attempt = 0;
|
||||
const emitMetrics = () => {
|
||||
if (!onMetrics) return;
|
||||
try { onMetrics({ latency_ms: Date.now() - started, retry_count_internal: attempt }); } catch { /* swallow */ }
|
||||
};
|
||||
|
||||
let lastError;
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
const ctrl = new AbortController();
|
||||
const timer = setTimeout(() => ctrl.abort(new Error(`per-attempt timeout ${perAttemptTimeoutMs}ms`)), perAttemptTimeoutMs);
|
||||
try {
|
||||
const r = await fetchImpl(url, { method: 'POST', headers, body, signal: ctrl.signal });
|
||||
if (r.ok) {
|
||||
const data = await r.json();
|
||||
if (onUsage && data.usage) {
|
||||
try { onUsage(data.usage); } catch { /* swallow callback errors */ }
|
||||
try {
|
||||
for (attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
const ctrl = new AbortController();
|
||||
const timer = setTimeout(() => ctrl.abort(new Error(`per-attempt timeout ${perAttemptTimeoutMs}ms`)), perAttemptTimeoutMs);
|
||||
try {
|
||||
const r = await fetchImpl(url, { method: 'POST', headers, body, signal: ctrl.signal });
|
||||
if (r.ok) {
|
||||
const data = await r.json();
|
||||
if (onUsage && data.usage) {
|
||||
try { onUsage(data.usage); } catch { /* swallow callback errors */ }
|
||||
}
|
||||
return data.content?.[0]?.text || '';
|
||||
}
|
||||
return data.content?.[0]?.text || '';
|
||||
// Retry on 5xx and 429; fail fast on 4xx (auth/quota/bad request — retry won't help).
|
||||
if (r.status >= 500 || r.status === 429) {
|
||||
lastError = new Error(`Router LLM ${r.status}: ${await r.text()}`);
|
||||
} else {
|
||||
const fatal = new Error(`Router LLM ${r.status}: ${await r.text()}`);
|
||||
fatal.fatal = true;
|
||||
throw fatal;
|
||||
}
|
||||
} catch (err) {
|
||||
// Re-throw fatal errors (4xx) instead of retrying them.
|
||||
if (err && err.fatal) { clearTimeout(timer); throw err; }
|
||||
// Network-level failure (fetch failed / ECONNRESET / TLS / per-attempt timeout). Retry-eligible.
|
||||
lastError = err;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
// Retry on 5xx and 429; fail fast on 4xx (auth/quota/bad request — retry won't help).
|
||||
if (r.status >= 500 || r.status === 429) {
|
||||
lastError = new Error(`Router LLM ${r.status}: ${await r.text()}`);
|
||||
} else {
|
||||
const fatal = new Error(`Router LLM ${r.status}: ${await r.text()}`);
|
||||
fatal.fatal = true;
|
||||
throw fatal;
|
||||
if (attempt < maxRetries) {
|
||||
await sleepImpl(retryBaseDelayMs * 2 ** attempt);
|
||||
}
|
||||
} catch (err) {
|
||||
// Re-throw fatal errors (4xx) instead of retrying them.
|
||||
if (err && err.fatal) { clearTimeout(timer); throw err; }
|
||||
// Network-level failure (fetch failed / ECONNRESET / TLS / per-attempt timeout). Retry-eligible.
|
||||
lastError = err;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
if (attempt < maxRetries) {
|
||||
await sleepImpl(retryBaseDelayMs * 2 ** attempt);
|
||||
}
|
||||
throw lastError;
|
||||
} finally {
|
||||
emitMetrics();
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
// Pass 2 — categorize the LLM transport failure for the factor-analysis
|
||||
// error_type axis. Looks at err.fatal + message keywords (no err.code on
|
||||
// undici fetch failures — message is the only reliable signal).
|
||||
export function classifyLLMError(err) {
|
||||
if (!err) return 'other';
|
||||
const msg = String(err.message || err);
|
||||
if (err.fatal && /\b4\d\d\b/.test(msg)) return 'http_4xx';
|
||||
if (/\b5\d\d\b/.test(msg) || /429\b/.test(msg)) return 'http_5xx';
|
||||
if (/ECONNRESET|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|socket hang up/i.test(msg)) return 'econnreset';
|
||||
if (err.name === 'AbortError' || /\btimeout\b/i.test(msg)) return 'timeout';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function hashPrompt(s) {
|
||||
@@ -507,7 +533,9 @@ export async function classify(prompt, registry, options = {}) {
|
||||
}
|
||||
|
||||
// Layer 2 — Sonnet 4.6 with prompt caching (ephemeral 5m TTL on system block).
|
||||
const llmCall = options.llmCall || (async () => {
|
||||
// llmCall receives { onMetrics } so callAnthropicAPI can report latency / retries
|
||||
// (Pass 2 factor-analysis extension); tests pass synthetic metrics directly.
|
||||
const llmCall = options.llmCall || (async ({ onMetrics } = {}) => {
|
||||
const apiKey = process.env.ROUTER_LLM_KEY;
|
||||
if (!apiKey) return null;
|
||||
const structured = buildClassifierPromptStructured(prompt, registry, {
|
||||
@@ -518,26 +546,48 @@ export async function classify(prompt, registry, options = {}) {
|
||||
baseUrl: process.env.ROUTER_LLM_BASE_URL || undefined,
|
||||
model: options.model || CLASSIFIER_MODEL,
|
||||
onUsage: options.onUsage,
|
||||
onMetrics,
|
||||
});
|
||||
return parseClassifierResponse(text);
|
||||
});
|
||||
|
||||
let metrics = null;
|
||||
const captureMetrics = (m) => { metrics = m; };
|
||||
let llmResult;
|
||||
try {
|
||||
llmResult = await llmCall();
|
||||
llmResult = await llmCall({ onMetrics: captureMetrics });
|
||||
} catch (err) {
|
||||
// Layer 3 — regex fallback on LLM transport error.
|
||||
const r = classifyByRegex(prompt, registry);
|
||||
return { ...r, llmError: err.message, degraded: true };
|
||||
return {
|
||||
...r,
|
||||
llmError: err.message,
|
||||
llm_error_type: classifyLLMError(err),
|
||||
latency_ms: metrics?.latency_ms ?? null,
|
||||
retry_count_internal: metrics?.retry_count_internal ?? null,
|
||||
degraded: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (!llmResult) {
|
||||
// Layer 3 — regex fallback on no key / unparseable.
|
||||
// Layer 3 — regex fallback on no key (metrics null) / unparseable response
|
||||
// (metrics set, classify as parse_null so the analyzer error_type axis
|
||||
// distinguishes "API never called" from "API returned garbage").
|
||||
const r = classifyByRegex(prompt, registry);
|
||||
return r;
|
||||
return {
|
||||
...r,
|
||||
llm_error_type: metrics ? 'parse_null' : 'no_key',
|
||||
latency_ms: metrics?.latency_ms ?? null,
|
||||
retry_count_internal: metrics?.retry_count_internal ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
const finalResult = { ...llmResult, source: 'llm' };
|
||||
const finalResult = {
|
||||
...llmResult,
|
||||
source: 'llm',
|
||||
latency_ms: metrics?.latency_ms ?? null,
|
||||
retry_count_internal: metrics?.retry_count_internal ?? null,
|
||||
};
|
||||
if (cache) cache.set(key, finalResult);
|
||||
return finalResult;
|
||||
}
|
||||
|
||||
@@ -341,3 +341,106 @@ describe('classify — isolation from Claude Code auth', () => {
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('callAnthropicAPI — Pass 2 metrics (project-brain-factor-analysis-4passes)', () => {
|
||||
it('emits onMetrics({latency_ms, retry_count_internal}) on success', async () => {
|
||||
const fetchImpl = async () => ({ ok: true, json: async () => ({ content: [{ text: '{"task_type":"question"}' }] }) });
|
||||
let captured = null;
|
||||
await callAnthropicAPI('hi', { apiKey: 'k', fetchImpl, onMetrics: (m) => { captured = m; } });
|
||||
expect(captured).not.toBeNull();
|
||||
expect(typeof captured.latency_ms).toBe('number');
|
||||
expect(captured.latency_ms).toBeGreaterThanOrEqual(0);
|
||||
expect(captured.retry_count_internal).toBe(0);
|
||||
});
|
||||
|
||||
it('emits onMetrics with retry_count_internal>0 after 5xx retries', async () => {
|
||||
let calls = 0;
|
||||
const fetchImpl = async () => {
|
||||
calls += 1;
|
||||
if (calls < 3) return { ok: false, status: 503, text: async () => 'unavailable' };
|
||||
return { ok: true, json: async () => ({ content: [{ text: '{"task_type":"question"}' }] }) };
|
||||
};
|
||||
let captured = null;
|
||||
const sleepImpl = () => Promise.resolve(); // skip backoff in tests
|
||||
await callAnthropicAPI('hi', { apiKey: 'k', fetchImpl, sleepImpl, onMetrics: (m) => { captured = m; } });
|
||||
expect(captured.retry_count_internal).toBe(2);
|
||||
});
|
||||
|
||||
it('emits onMetrics even on fatal 4xx (so latency / retry count reach the classifier state)', async () => {
|
||||
const fetchImpl = async () => ({ ok: false, status: 401, text: async () => 'invalid key' });
|
||||
let captured = null;
|
||||
await expect(callAnthropicAPI('hi', { apiKey: 'k', fetchImpl, onMetrics: (m) => { captured = m; } })).rejects.toThrow(/401/);
|
||||
expect(captured).not.toBeNull();
|
||||
expect(typeof captured.latency_ms).toBe('number');
|
||||
expect(captured.retry_count_internal).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('classify — Pass 2 metrics surface to result', () => {
|
||||
const fakeRegistry = { nodes: [{ id: '#19', status: 'active', triggers: [] }], chains: {} };
|
||||
|
||||
it('attaches latency_ms / retry_count_internal on LLM success', async () => {
|
||||
const llmCall = async ({ onMetrics } = {}) => {
|
||||
if (onMetrics) onMetrics({ latency_ms: 432, retry_count_internal: 1 });
|
||||
return { task_type: 'feature', recommended_node: '#19', recommended_chain: null, recommended_chain_id: null, alternatives_considered: [] };
|
||||
};
|
||||
const r = await classify('новая фича: добавь endpoint X', fakeRegistry, { llmCall });
|
||||
expect(r.source).toBe('llm');
|
||||
expect(r.latency_ms).toBe(432);
|
||||
expect(r.retry_count_internal).toBe(1);
|
||||
});
|
||||
|
||||
it('passes through alternatives_considered from Sonnet (truncated to top-3 by enricher, not by classify)', async () => {
|
||||
const llmCall = async () => ({
|
||||
task_type: 'feature', recommended_node: '#19', recommended_chain: null, recommended_chain_id: null,
|
||||
alternatives_considered: [{ node: '#19', score: 0.8 }, { node: '#62', score: 0.4 }],
|
||||
});
|
||||
const r = await classify('новая фича X', fakeRegistry, { llmCall });
|
||||
expect(r.alternatives_considered).toBeDefined();
|
||||
expect(r.alternatives_considered).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('sets llm_error_type=econnreset / latency / retry_count on transport error', async () => {
|
||||
const llmCall = async ({ onMetrics } = {}) => {
|
||||
if (onMetrics) onMetrics({ latency_ms: 1234, retry_count_internal: 4 });
|
||||
const e = new Error('fetch failed: ECONNRESET'); throw e;
|
||||
};
|
||||
const r = await classify('что-то непонятное вообще', fakeRegistry, { llmCall });
|
||||
expect(r.source).toBe('regex');
|
||||
expect(r.llm_error_type).toBe('econnreset');
|
||||
expect(r.latency_ms).toBe(1234);
|
||||
expect(r.retry_count_internal).toBe(4);
|
||||
});
|
||||
|
||||
it('sets llm_error_type=timeout on AbortError or per-attempt timeout', async () => {
|
||||
const llmCall = async () => {
|
||||
const e = new Error('per-attempt timeout 30000ms'); throw e;
|
||||
};
|
||||
const r = await classify('что-то непонятное вообще', fakeRegistry, { llmCall });
|
||||
expect(r.llm_error_type).toBe('timeout');
|
||||
});
|
||||
|
||||
it('sets llm_error_type=http_4xx on fatal upstream 4xx', async () => {
|
||||
const llmCall = async () => { const e = new Error('Router LLM 401: invalid key'); e.fatal = true; throw e; };
|
||||
const r = await classify('что-то непонятное вообще', fakeRegistry, { llmCall });
|
||||
expect(r.llm_error_type).toBe('http_4xx');
|
||||
});
|
||||
|
||||
it('sets llm_error_type=http_5xx on exhausted retries', async () => {
|
||||
const llmCall = async () => { const e = new Error('Router LLM 503: bad gateway'); throw e; };
|
||||
const r = await classify('что-то непонятное вообще', fakeRegistry, { llmCall });
|
||||
expect(r.llm_error_type).toBe('http_5xx');
|
||||
});
|
||||
|
||||
it('sets llm_error_type=parse_null when llmCall returns null (LLM produced unparseable response)', async () => {
|
||||
// Mocked llmCall returns null without throwing — simulates upstream parse failure
|
||||
// after a successful HTTP exchange. onMetrics still fires from the mocked path.
|
||||
const llmCall = async ({ onMetrics } = {}) => {
|
||||
if (onMetrics) onMetrics({ latency_ms: 800, retry_count_internal: 0 });
|
||||
return null;
|
||||
};
|
||||
const r = await classify('что-то непонятное вообще', fakeRegistry, { llmCall });
|
||||
expect(r.llm_error_type).toBe('parse_null');
|
||||
expect(r.latency_ms).toBe(800);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user