808461295a
Phase 2 Task 10 of LLM-first router overhaul. Spec §4.2 — Layer 2 Sonnet 4.6
classifier with 4-pattern памятка enrichment, JSON output per spec, fallback
chain Sonnet → regex → degraded. Phase 1 regex Layer 1 extracted to its own
module so it can be called only as a fallback.
- tools/router-classifier-regex-fallback.mjs (NEW): self-contained regex
fallback. Extracts TASK_TYPE_KEYWORDS, HARD_KEYWORD_STEMS, detectTaskType,
keywordMatches, detectRecommendedNode, computeConfidence, classifyByRegex
verbatim from the prior classifier. Self-contained (own MICRO_KEYWORDS,
detectMicro, lower) — no circular imports.
- tools/router-classifier.mjs (REWRITE):
+ import { CLASSIFIER_MODEL } from router-config.mjs
+ re-export { classifyByRegex } from regex-fallback (back-compat surface)
+ buildClassifierPrompt(prompt, registry, { enrichment=true }) — spec §4.2
format with 4-pattern памятка (brainstorming / discovery-interview /
writing-plans / systematic-debugging) togglable via enrichment flag.
+ parseClassifierResponse(text) — strict task_type required, ```json fence
aware, accepts null recommended_chain_id.
+ classify() rewritten: prefilter → cache → Sonnet (CLASSIFIER_MODEL) →
regex fallback (transport error OR no key/unparseable).
+ callAnthropicAPI default model = CLASSIFIER_MODEL; max_tokens 300 → 1500
(full classifier output with alternatives & памятка needs the budget).
- removed: shouldEscalate, TASK_TYPE_KEYWORDS, detectTaskType,
keywordMatches, detectRecommendedNode, HARD_KEYWORD_STEMS, computeConfidence
(all live in regex-fallback now).
Kept legacy: buildLLMPrompt / parseLLMResponse (back-compat surface).
- tools/router-accuracy-runner.mjs: import classifyByRegex from regex-fallback
module (G11 from plan). Runner functionality unchanged.
- tools/router-classifier.test.mjs: +8 tests for buildClassifierPrompt (4) and
parseClassifierResponse (4); removed obsolete shouldEscalate block (3);
rewrote classify integration block (4 tests) to reflect new flow
(prefilter-first, LLM-always-on-fallthrough, regex on error).
Tests: tools/router-classifier.test.mjs 44/44 PASS. Full tools/ suite:
557 tests passed, 0 failed (4 pre-existing empty test files report
"no test suite found" — unrelated: ruflo-recall-hook, subagent-prompt-prefix,
plus 2 others — not touched in this commit).
accuracy-runner smoke: type=85%/node=55%/micro=100% on the 20-prompt set,
unchanged from pre-Task-10 baseline (regex path semantics preserved).
130 lines
5.4 KiB
JavaScript
130 lines
5.4 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Router classifier — REGEX FALLBACK module (Phase 2 Task 10).
|
|
*
|
|
* Extracted from router-classifier.mjs as a self-contained fallback for when
|
|
* both Sonnet 4.6 and Haiku 4.5 LLM endpoints are unreachable. Pure: no
|
|
* fs/exec/net. Caller passes registry.
|
|
*
|
|
* Routing in router-classifier.mjs:
|
|
* prefilter() → Sonnet 4.6 (LLM) → Haiku 4.5 (LLM) → classifyByRegex (here) → degraded
|
|
*
|
|
* This module is also imported by tools/router-accuracy-runner.mjs which runs
|
|
* offline regex-only accuracy checks against a curated prompt set.
|
|
*/
|
|
|
|
// Порядок ключей значим: detectTaskType возвращает первое совпадение.
|
|
// Специфичные домены (marketing/security) идут ДО общего analysis, чтобы
|
|
// «проверь пдн» ушло в security, а «проверь индекс» — в analysis.
|
|
export const TASK_TYPE_KEYWORDS = {
|
|
feature: ['фич', 'feature', 'новый функционал', 'add feature'],
|
|
planning: ['план', 'plan', 'спека', 'spec', 'roadmap', 'распиши', 'спланируй'],
|
|
bugfix: ['баг', 'bug', 'дебаг', 'debug', 'почини', 'fix', 'ошибк', 'не работает',
|
|
'поправь', 'исправь', 'упал', 'падает', 'сломал'],
|
|
refactor: ['рефактор', 'refactor', 'почисти код', 'упрости'],
|
|
cleanup: ['уберём', 'удали', 'remove', 'cleanup', 'dead code'],
|
|
marketing: ['маркетинг', 'marketing', 'кампани', 'лендинг', 'рассылк', 'реклам', 'постинг'],
|
|
security: ['безопасност', 'security', 'уязвимост', 'vulnerability',
|
|
'пдн', '152-фз', 'stride', 'угроз', 'выход в интернет', 'go-live'],
|
|
analysis: ['проанализируй', 'analysis', 'разбер', 'investigate',
|
|
'проверь', 'выясни', 'посмотри почему', 'медленн'],
|
|
monitoring: ['мониторинг', 'monitor', 'трейс', 'observability'],
|
|
'memory-sync': ['запомни', 'обнови память', 'memory', 'CLAUDE.md', 'MEMORY.md'],
|
|
question: ['что такое', 'как работает', 'почему', 'объясни', 'расскажи'],
|
|
};
|
|
|
|
const MICRO_KEYWORDS = [
|
|
'опечатк', 'typo',
|
|
'переименуй', 'rename',
|
|
'удали мёртв', 'dead code',
|
|
'формат', 'format',
|
|
'константу', 'one constant',
|
|
'увеличь', 'уменьши', 'поменяй значени', 'измени константу',
|
|
'одну строку', 'bump',
|
|
];
|
|
|
|
// Hard keyword stems that signal a high-confidence regex match (last-resort
|
|
// degraded path — отделено от Layer 1 prefilter SKILL_ALIAS_MAP).
|
|
export const HARD_KEYWORD_STEMS = [
|
|
'списан', 'биллинг', 'маркетинг', 'email-рассылк',
|
|
'152-фз', 'go-live', 'фич', 'план', 'баг',
|
|
];
|
|
|
|
function lower(s) { return String(s || '').toLowerCase(); }
|
|
|
|
function detectTaskType(prompt) {
|
|
const p = lower(prompt);
|
|
for (const [t, kws] of Object.entries(TASK_TYPE_KEYWORDS)) {
|
|
for (const kw of kws) {
|
|
if (p.includes(kw)) return t;
|
|
}
|
|
}
|
|
return 'unknown';
|
|
}
|
|
|
|
function detectMicro(prompt) {
|
|
const p = lower(prompt);
|
|
return MICRO_KEYWORDS.some((kw) => p.includes(kw));
|
|
}
|
|
|
|
function keywordMatches(promptLower, keywordLower) {
|
|
if (promptLower.includes(keywordLower)) return true;
|
|
if (keywordLower.length >= 6) {
|
|
const stem = keywordLower.slice(0, -1);
|
|
if (promptLower.includes(stem)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function detectRecommendedNode(prompt, registry) {
|
|
const p = lower(prompt);
|
|
|
|
// Pass 1 — keyword-домен приоритетнее classification-типа.
|
|
let bestKw = { id: null, score: 0 };
|
|
for (const node of registry.nodes || []) {
|
|
if (node.status !== 'active') continue;
|
|
for (const t of node.triggers || []) {
|
|
if (!t.keyword) continue;
|
|
const kw = lower(t.keyword);
|
|
if (keywordMatches(p, kw)) {
|
|
const score = (t.weight ?? 1.0) + kw.length / 1000;
|
|
if (score > bestKw.score) bestKw = { id: node.id, score };
|
|
}
|
|
}
|
|
}
|
|
if (bestKw.id) return bestKw.id;
|
|
|
|
// Pass 2 — fallback на classification-триггер.
|
|
const taskType = detectTaskType(prompt);
|
|
let bestCls = { id: null, weight: 0 };
|
|
for (const node of registry.nodes || []) {
|
|
if (node.status !== 'active') continue;
|
|
for (const t of node.triggers || []) {
|
|
if (!t.classification) continue;
|
|
const w = t.weight ?? 1.0;
|
|
if (t.classification === taskType && w > bestCls.weight) {
|
|
bestCls = { id: node.id, weight: w };
|
|
}
|
|
}
|
|
}
|
|
return bestCls.id;
|
|
}
|
|
|
|
function computeConfidence(taskType, recommendedNode, prompt) {
|
|
if (recommendedNode === null && taskType === 'unknown') return 0.1;
|
|
if (recommendedNode === null) return 0.4;
|
|
const p = lower(prompt);
|
|
const hasHardKeyword = HARD_KEYWORD_STEMS.some((stem) => p.includes(stem));
|
|
if (hasHardKeyword) return 0.9;
|
|
if (taskType === 'unknown') return 0.5;
|
|
return 0.7;
|
|
}
|
|
|
|
export function classifyByRegex(prompt, registry) {
|
|
const taskType = detectTaskType(prompt);
|
|
const micro = detectMicro(prompt);
|
|
const recommendedNode = detectRecommendedNode(prompt, registry);
|
|
const confidence = computeConfidence(taskType, recommendedNode, prompt);
|
|
return { taskType, micro, recommendedNode, confidence, source: 'regex' };
|
|
}
|