Files
portal/tools/router-classifier.test.mjs
T
Дмитрий 808461295a feat(router): Sonnet classifier + памятка + regex-fallback module (phase 2 task 10)
Phase 2 Task 10 of LLM-first router overhaul. Spec §4.2 — Layer 2 Sonnet 4.6
classifier with 4-pattern памятка enrichment, JSON output per spec, fallback
chain Sonnet → regex → degraded. Phase 1 regex Layer 1 extracted to its own
module so it can be called only as a fallback.

- tools/router-classifier-regex-fallback.mjs (NEW): self-contained regex
  fallback. Extracts TASK_TYPE_KEYWORDS, HARD_KEYWORD_STEMS, detectTaskType,
  keywordMatches, detectRecommendedNode, computeConfidence, classifyByRegex
  verbatim from the prior classifier. Self-contained (own MICRO_KEYWORDS,
  detectMicro, lower) — no circular imports.
- tools/router-classifier.mjs (REWRITE):
  + import { CLASSIFIER_MODEL } from router-config.mjs
  + re-export { classifyByRegex } from regex-fallback (back-compat surface)
  + buildClassifierPrompt(prompt, registry, { enrichment=true }) — spec §4.2
    format with 4-pattern памятка (brainstorming / discovery-interview /
    writing-plans / systematic-debugging) togglable via enrichment flag.
  + parseClassifierResponse(text) — strict task_type required, ```json fence
    aware, accepts null recommended_chain_id.
  + classify() rewritten: prefilter → cache → Sonnet (CLASSIFIER_MODEL) →
    regex fallback (transport error OR no key/unparseable).
  + callAnthropicAPI default model = CLASSIFIER_MODEL; max_tokens 300 → 1500
    (full classifier output with alternatives & памятка needs the budget).
  - removed: shouldEscalate, TASK_TYPE_KEYWORDS, detectTaskType,
    keywordMatches, detectRecommendedNode, HARD_KEYWORD_STEMS, computeConfidence
    (all live in regex-fallback now).
  Kept legacy: buildLLMPrompt / parseLLMResponse (back-compat surface).
- tools/router-accuracy-runner.mjs: import classifyByRegex from regex-fallback
  module (G11 from plan). Runner functionality unchanged.
- tools/router-classifier.test.mjs: +8 tests for buildClassifierPrompt (4) and
  parseClassifierResponse (4); removed obsolete shouldEscalate block (3);
  rewrote classify integration block (4 tests) to reflect new flow
  (prefilter-first, LLM-always-on-fallthrough, regex on error).

Tests: tools/router-classifier.test.mjs 44/44 PASS. Full tools/ suite:
557 tests passed, 0 failed (4 pre-existing empty test files report
"no test suite found" — unrelated: ruflo-recall-hook, subagent-prompt-prefix,
plus 2 others — not touched in this commit).
accuracy-runner smoke: type=85%/node=55%/micro=100% on the 20-prompt set,
unchanged from pre-Task-10 baseline (regex path semantics preserved).
2026-05-25 14:28:25 +03:00

344 lines
14 KiB
JavaScript

import { describe, it, expect, beforeEach } from 'vitest';
import { classifyByRegex, prefilter } from './router-classifier.mjs';
describe('prefilter — Phase 2 Task 9 (spec §4.1, 7 checks)', () => {
it('manual override has priority over continuation (delai cherez TDD)', () => {
const r = prefilter('делай через TDD', { prevState: null });
expect(r.task_type).toBe('manual_override');
expect(r.source).toBe('prefilter');
expect(r.requested_node).toContain('test-driven-development');
});
it('continuation inherits classification within 30 min', () => {
const prevState = {
classification: { task_type: 'feature', recommendedNode: '#19' },
timestamp: new Date().toISOString(),
task_id: 'prev-abc',
};
const r = prefilter('делай', { prevState });
expect(r.source).toBe('prefilter_inherited');
expect(r.task_type).toBe('feature');
expect(r.inheritance?.inherited_from_task_id).toBe('prev-abc');
});
it('continuation falls through to short-conversation when prev state > 30 min', () => {
const old = new Date(Date.now() - 31 * 60000).toISOString();
const r = prefilter('делай', { prevState: { classification: { task_type: 'feature' }, timestamp: old } });
expect(r.task_type).toBe('conversation');
});
it('acknowledgment is plain conversation (spasibo)', () => {
expect(prefilter('спасибо', {}).task_type).toBe('conversation');
});
it('cancellation flags previous task rejected (net)', () => {
expect(prefilter('нет', { prevState: { task_id: 'abc' } }).previous_rejected).toBe(true);
});
it('anchor protection saves "делай аудит" from short-conversation → null fall through', () => {
expect(prefilter('делай аудит', {})).toBeNull();
});
it('micro keyword fires (poprav\' typo v stroke)', () => {
expect(prefilter('поправь typo в строке', {}).task_type).toBe('micro');
});
it('content prompt with anchor returns null (forwards to Layer 2)', () => {
expect(prefilter('добавь endpoint для экспорта сделок', {})).toBeNull();
});
});
const fakeRegistry = {
nodes: [
{ id: '#19', name: 'Superpowers', status: 'active', triggers: [
{ classification: 'feature', weight: 1.0 },
{ classification: 'planning', weight: 1.0 },
] },
{ id: '#62', name: 'billing-audit', status: 'active', triggers: [
{ keyword: 'списание', weight: 1.0 },
{ keyword: 'биллинг', weight: 1.0 },
{ classification: 'bugfix', weight: 0.5 },
] },
{ id: '#74', name: 'marketing', status: 'active', triggers: [
{ keyword: 'email-рассылка', weight: 1.0 },
{ keyword: 'кампания', weight: 1.0 },
{ classification: 'marketing', weight: 1.0 },
] },
{ id: '#11', name: 'pint', status: 'active', triggers: [
{ classification: 'refactor', weight: 1.0 },
{ classification: 'cleanup', weight: 1.0 },
] },
],
};
describe('classifyByRegex — task type', () => {
it('detects feature from RU keyword «фича»', () => {
const r = classifyByRegex('давай сделаем новую фичу для биллинга', fakeRegistry);
expect(r.taskType).toBe('feature');
});
it('detects planning from RU «план»', () => {
const r = classifyByRegex('напиши план рефакторинга модуля X', fakeRegistry);
expect(r.taskType).toBe('planning');
});
it('detects bugfix from EN «bug»', () => {
const r = classifyByRegex('there is a bug in the auth flow', fakeRegistry);
expect(r.taskType).toBe('bugfix');
});
it('detects micro for typo', () => {
const r = classifyByRegex('опечатка в файле X', fakeRegistry);
expect(r.micro).toBe(true);
});
it('detects micro for rename', () => {
const r = classifyByRegex('переименуй функцию foo в bar', fakeRegistry);
expect(r.micro).toBe(true);
});
it('returns taskType=unknown when no signal', () => {
const r = classifyByRegex('просто привет', fakeRegistry);
expect(r.taskType).toBe('unknown');
expect(r.micro).toBe(false);
});
});
describe('classifyByRegex — domain node match', () => {
it('picks #62 billing-audit on «списание»', () => {
const r = classifyByRegex('почини двойное списание лида', fakeRegistry);
expect(r.recommendedNode).toBe('#62');
});
it('picks #74 marketing on «email-рассылка»', () => {
const r = classifyByRegex('составь email-рассылку для тарифа Бизнес', fakeRegistry);
expect(r.recommendedNode).toBe('#74');
});
it('falls back to classification trigger when no keyword match', () => {
const r = classifyByRegex('рефакторинг кода', fakeRegistry);
// 'рефакторинг' → classification: refactor → #11 pint
expect(r.recommendedNode).toBe('#11');
});
it('returns null when no node matched', () => {
const r = classifyByRegex('просто вопрос', fakeRegistry);
expect(r.recommendedNode).toBeNull();
});
it('case-insensitive keyword match', () => {
const r = classifyByRegex('СПИСАНИЕ дублируется', fakeRegistry);
expect(r.recommendedNode).toBe('#62');
});
});
describe('classifyByRegex — source tag', () => {
it('always marks source: regex', () => {
const r = classifyByRegex('test', fakeRegistry);
expect(r.source).toBe('regex');
});
});
describe('classifyByRegex — confidence', () => {
it('returns confidence>=0.8 for clean keyword match', () => {
const r = classifyByRegex('списание дублируется', fakeRegistry);
expect(r.confidence).toBeGreaterThanOrEqual(0.8);
});
it('returns confidence<0.5 when ambiguous (no clean match)', () => {
const r = classifyByRegex('что-то непонятное', fakeRegistry);
expect(r.confidence).toBeLessThan(0.5);
});
});
import { buildLLMPrompt, parseLLMResponse, classify, callAnthropicAPI, buildClassifierPrompt, parseClassifierResponse } from './router-classifier.mjs';
describe('buildClassifierPrompt — Phase 2 Task 10 (spec §4.2)', () => {
it('includes 4 памятка patterns when enrichment=true', () => {
const p = buildClassifierPrompt('добавь фичу', { nodes: [], chains: {} }, { enrichment: true });
expect(p).toContain('ПАТТЕРН 1');
expect(p).toContain('ПАТТЕРН 2');
expect(p).toContain('ПАТТЕРН 3');
expect(p).toContain('ПАТТЕРН 4');
});
it('omits памятка when enrichment=false', () => {
const p = buildClassifierPrompt('x', { nodes: [], chains: {} }, { enrichment: false });
expect(p).not.toContain('ПАТТЕРН 1');
});
it('embeds user prompt verbatim', () => {
const p = buildClassifierPrompt('почини двойное списание', { nodes: [], chains: {} });
expect(p).toContain('почини двойное списание');
});
it('lists only active nodes with capabilities in YAML-ish block', () => {
const reg = {
nodes: [
{ id: '#62', name: 'billing-audit', slug: 'billing-audit', status: 'active', capabilities: 'audits money invariants', triggers: [{ keyword: 'списание', weight: 1 }] },
{ id: '#999', name: 'gone', slug: 'gone', status: 'historic', capabilities: 'should be hidden', triggers: [] },
],
chains: {},
};
const p = buildClassifierPrompt('test', reg);
expect(p).toMatch(/#62/);
expect(p).toMatch(/billing-audit/);
expect(p).toMatch(/audits money invariants/);
expect(p).not.toMatch(/#999/);
expect(p).not.toMatch(/should be hidden/);
});
});
describe('parseClassifierResponse — Phase 2 Task 10 (spec §4.2)', () => {
it('accepts null recommended_chain_id', () => {
const r = parseClassifierResponse('{"task_type":"feature","recommended_node":"x","recommended_chain":["x"],"recommended_chain_id":null,"alternatives_considered":[],"no_skill_found":false}');
expect(r.recommended_chain_id).toBeNull();
expect(r.task_type).toBe('feature');
});
it('returns null on malformed JSON', () => {
expect(parseClassifierResponse('nope')).toBeNull();
});
it('returns null when task_type missing', () => {
expect(parseClassifierResponse('{"recommended_node":"x"}')).toBeNull();
});
it('strips ```json fence wrapper', () => {
const r = parseClassifierResponse('```json\n{"task_type":"bugfix","recommended_node":"#62","recommended_chain":[],"recommended_chain_id":null,"alternatives_considered":[],"no_skill_found":false}\n```');
expect(r.task_type).toBe('bugfix');
});
});
describe('buildLLMPrompt', () => {
it('serializes active nodes with id+name+top-3 triggers', () => {
const prompt = buildLLMPrompt('почини списание', fakeRegistry);
expect(prompt).toMatch(/#62/);
expect(prompt).toMatch(/billing-audit/);
expect(prompt).toMatch(/списание/);
expect(prompt).toMatch(/почини списание/);
});
it('excludes inactive nodes', () => {
const reg = { nodes: [...fakeRegistry.nodes, { id: '#999', name: 'gone', status: 'historic', triggers: [] }] };
const prompt = buildLLMPrompt('test', reg);
expect(prompt).not.toMatch(/#999/);
});
});
describe('parseLLMResponse', () => {
it('parses JSON object', () => {
const r = parseLLMResponse('{"taskType":"bugfix","micro":false,"recommendedNode":"#62","confidence":0.9,"recommendedChain":null,"reasoning":"keyword списание"}');
expect(r.taskType).toBe('bugfix');
expect(r.recommendedNode).toBe('#62');
expect(r.confidence).toBe(0.9);
});
it('parses JSON wrapped in ```json``` block', () => {
const r = parseLLMResponse('```json\n{"taskType":"feature","micro":false,"recommendedNode":"#19","confidence":0.8}\n```');
expect(r.taskType).toBe('feature');
});
it('returns null on unparseable response', () => {
expect(parseLLMResponse('I cannot help with this')).toBeNull();
});
});
describe('classify — full integration (with mock LLM)', () => {
it('falls back to regex on LLM transport error (long prompt, prefilter null)', async () => {
const r = await classify('почини двойное списание лида срочно', fakeRegistry, {
llmCall: () => { throw new Error('proxyapi 503'); },
});
expect(r.source).toBe('regex');
expect(r.recommendedNode).toBe('#62');
expect(r.degraded).toBe(true);
expect(r.llmError).toContain('proxyapi 503');
});
it('escalates to LLM when prefilter returns null', async () => {
const r = await classify('добавь endpoint экспорта сделок', fakeRegistry, {
llmCall: async () => ({ task_type: 'feature', recommended_node: '#19', recommended_chain: ['#19'], recommended_chain_id: 'L1', alternatives_considered: [], no_skill_found: false }),
});
expect(r.source).toBe('llm');
expect(r.task_type).toBe('feature');
});
it('uses cache on second call with same long prompt', async () => {
let calls = 0;
const llmCall = async () => {
calls++;
return { task_type: 'feature', recommended_node: '#19', recommended_chain: ['#19'], recommended_chain_id: 'L1', alternatives_considered: [], no_skill_found: false };
};
const cache = new Map();
await classify('добавь endpoint для нового lookup сервиса', fakeRegistry, { llmCall, cache });
await classify('добавь endpoint для нового lookup сервиса', fakeRegistry, { llmCall, cache });
expect(calls).toBe(1);
});
it('returns prefilter result without invoking LLM (short conversation)', async () => {
let llmCalled = false;
const r = await classify('спасибо', fakeRegistry, { llmCall: async () => { llmCalled = true; return null; } });
expect(r.task_type).toBe('conversation');
expect(r.source).toBe('prefilter');
expect(llmCalled).toBe(false);
});
});
describe('callAnthropicAPI — ProxyAPI wiring', () => {
it('posts to ProxyAPI base by default with Bearer auth', async () => {
let captured;
const fetchImpl = async (url, opts) => {
captured = { url, opts };
return { ok: true, json: async () => ({ content: [{ text: '{"taskType":"question"}' }] }) };
};
const text = await callAnthropicAPI('hi', { apiKey: 'sk-test', fetchImpl });
expect(captured.url).toBe('https://api.proxyapi.ru/anthropic/v1/messages');
expect(captured.opts.headers.authorization).toBe('Bearer sk-test');
expect(text).toContain('question');
});
it('honors a custom baseUrl and strips trailing slash', async () => {
let capturedUrl;
const fetchImpl = async (url) => {
capturedUrl = url;
return { ok: true, json: async () => ({ content: [{ text: 'x' }] }) };
};
await callAnthropicAPI('hi', { apiKey: 'k', baseUrl: 'https://example.test/', fetchImpl });
expect(capturedUrl).toBe('https://example.test/v1/messages');
});
it('throws on non-ok response', async () => {
const fetchImpl = async () => ({ ok: false, status: 401, text: async () => 'Invalid API Key' });
await expect(callAnthropicAPI('hi', { apiKey: 'bad', fetchImpl })).rejects.toThrow(/401/);
});
});
describe('classify — isolation from Claude Code auth', () => {
it('skips LLM and falls back to regex when ROUTER_LLM_KEY is absent', async () => {
const saved = process.env.ROUTER_LLM_KEY;
delete process.env.ROUTER_LLM_KEY;
try {
const r = await classify('что-то совсем непонятное', fakeRegistry);
expect(r.source).toBe('regex');
} finally {
if (saved !== undefined) process.env.ROUTER_LLM_KEY = saved;
}
});
it('does NOT read ANTHROPIC_API_KEY (would hijack the main session)', async () => {
const savedRouter = process.env.ROUTER_LLM_KEY;
const savedAnthropic = process.env.ANTHROPIC_API_KEY;
delete process.env.ROUTER_LLM_KEY;
process.env.ANTHROPIC_API_KEY = 'sk-should-not-be-used';
try {
const r = await classify('что-то совсем непонятное', fakeRegistry);
// No ROUTER_LLM_KEY → must stay on regex even though ANTHROPIC_API_KEY is set.
expect(r.source).toBe('regex');
} finally {
if (savedRouter !== undefined) process.env.ROUTER_LLM_KEY = savedRouter;
if (savedAnthropic !== undefined) process.env.ANTHROPIC_API_KEY = savedAnthropic;
else delete process.env.ANTHROPIC_API_KEY;
}
});
});