a28618fd16
Phase 2 Task 10 of LLM-first router overhaul. Spec §4.2 — Layer 2 Sonnet 4.6
classifier with 4-pattern памятка enrichment, JSON output per spec, fallback
chain Sonnet → regex → degraded. Phase 1 regex Layer 1 extracted to its own
module so it can be called only as a fallback.
- tools/router-classifier-regex-fallback.mjs (NEW): self-contained regex
fallback. Extracts TASK_TYPE_KEYWORDS, HARD_KEYWORD_STEMS, detectTaskType,
keywordMatches, detectRecommendedNode, computeConfidence, classifyByRegex
verbatim from the prior classifier. Self-contained (own MICRO_KEYWORDS,
detectMicro, lower) — no circular imports.
- tools/router-classifier.mjs (REWRITE):
+ import { CLASSIFIER_MODEL } from router-config.mjs
+ re-export { classifyByRegex } from regex-fallback (back-compat surface)
+ buildClassifierPrompt(prompt, registry, { enrichment=true }) — spec §4.2
format with 4-pattern памятка (brainstorming / discovery-interview /
writing-plans / systematic-debugging) togglable via enrichment flag.
+ parseClassifierResponse(text) — strict task_type required, ```json fence
aware, accepts null recommended_chain_id.
+ classify() rewritten: prefilter → cache → Sonnet (CLASSIFIER_MODEL) →
regex fallback (transport error OR no key/unparseable).
+ callAnthropicAPI default model = CLASSIFIER_MODEL; max_tokens 300 → 1500
(full classifier output with alternatives & памятка needs the budget).
- removed: shouldEscalate, TASK_TYPE_KEYWORDS, detectTaskType,
keywordMatches, detectRecommendedNode, HARD_KEYWORD_STEMS, computeConfidence
(all live in regex-fallback now).
Kept legacy: buildLLMPrompt / parseLLMResponse (back-compat surface).
- tools/router-accuracy-runner.mjs: import classifyByRegex from regex-fallback
module (G11 from plan). Runner functionality unchanged.
- tools/router-classifier.test.mjs: +8 tests for buildClassifierPrompt (4) and
parseClassifierResponse (4); removed obsolete shouldEscalate block (3);
rewrote classify integration block (4 tests) to reflect new flow
(prefilter-first, LLM-always-on-fallthrough, regex on error).
Tests: tools/router-classifier.test.mjs 44/44 PASS. Full tools/ suite:
557 tests passed, 0 failed (4 pre-existing empty test files report
"no test suite found" — unrelated: ruflo-recall-hook, subagent-prompt-prefix,
plus 2 others — not touched in this commit).
accuracy-runner smoke: type=85%/node=55%/micro=100% on the 20-prompt set,
unchanged from pre-Task-10 baseline (regex path semantics preserved).
443 lines
16 KiB
JavaScript
443 lines
16 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* Router classifier — Phase 2 (LLM-first router overhaul).
|
||
*
|
||
* Architecture (spec §3, §4.1, §4.2):
|
||
* Layer 1: prefilter() — pure regex, 7 checks (manual override / continuation /
|
||
* acknowledgment / cancellation / short conv + anchor / micro / null).
|
||
* Layer 2: Sonnet 4.6 classifier via ProxyAPI. Memory pamyatka (4 patterns)
|
||
* injected when prompt-enrichment-mode=on. Output schema per §4.2.
|
||
* Layer 3 (fallback): regex fallback in router-classifier-regex-fallback.mjs.
|
||
* Layer 4 (degraded): { task_type: 'unknown', source: 'fallback', degraded: true }
|
||
* with explicit chat marker.
|
||
*
|
||
* Pure (Layer 1): no fs/exec/net. callers pass registry + optional prevState.
|
||
* Layer 2: HTTP via callAnthropicAPI (ProxyAPI, header reseller-isolation).
|
||
*
|
||
* Legacy exports buildLLMPrompt / parseLLMResponse retained for backward
|
||
* compatibility with older accuracy-runner snapshots and tests; not on the
|
||
* Phase 2 hot path. The Phase 1 regex Layer 1 (classifyByRegex, TASK_TYPE_KEYWORDS,
|
||
* HARD_KEYWORD_STEMS) moved verbatim to router-classifier-regex-fallback.mjs;
|
||
* re-exported here for callers that still reach for it through this module.
|
||
*/
|
||
|
||
import { CLASSIFIER_MODEL, INHERITANCE_MAX_AGE_MIN } from './router-config.mjs';
|
||
import { classifyByRegex } from './router-classifier-regex-fallback.mjs';
|
||
|
||
export { classifyByRegex };
|
||
|
||
const MICRO_KEYWORDS = [
|
||
'опечатк', 'typo',
|
||
'переименуй', 'rename',
|
||
'удали мёртв', 'dead code',
|
||
'формат', 'format',
|
||
'константу', 'one constant',
|
||
'увеличь', 'уменьши', 'поменяй значени', 'измени константу',
|
||
'одну строку', 'bump',
|
||
];
|
||
|
||
function lower(s) { return String(s || '').toLowerCase(); }
|
||
|
||
function detectMicro(prompt) {
|
||
const p = lower(prompt);
|
||
return MICRO_KEYWORDS.some((kw) => p.includes(kw));
|
||
}
|
||
|
||
// ─── Prefilter constants (spec §4.1, Phase 2 Task 9) ────────────────────────
|
||
|
||
const CONTINUATION_PATTERNS = [
|
||
'да', 'делай', 'давай', 'продолжай', 'дальше', 'ага', 'валяй',
|
||
'поехали', 'утверждаю', 'одобряю', 'ок делай', 'хорошо делай', 'согласен делай',
|
||
];
|
||
|
||
const ACKNOWLEDGMENT_PATTERNS = [
|
||
'спасибо', 'понял', 'ок', 'хорошо', 'отлично', 'верно',
|
||
'круто', 'годится', 'молодец', 'норм',
|
||
];
|
||
|
||
const CANCELLATION_PATTERNS = [
|
||
'стоп', 'нет', 'отмени', 'отбой', 'не надо',
|
||
'забей', 'хватит', 'достаточно',
|
||
];
|
||
|
||
const MANUAL_OVERRIDE_RE = /^(делай|сделай|используй|применя[йи]|запусти|вызови)\s+(через|с\s+помощью|skill|skill[оа]м)\s+([\w\-:]+)/i;
|
||
|
||
const ANCHOR_NOUNS = [
|
||
'аудит', 'баг', 'план', 'спека', 'фича', 'тест', 'миграция', 'endpoint', 'файл', 'функция',
|
||
'класс', 'компонент', 'view', 'модель', 'биллинг', 'маркетинг', 'безопасность', 'пдн', 'регион',
|
||
'портал', 'проект', 'сделка', 'лид', 'админка', 'база', 'схема', 'воронка', 'хук',
|
||
];
|
||
|
||
const ANCHOR_IMPERATIVES = [
|
||
'проанализируй', 'проверь', 'исправь', 'почини', 'создай', 'добавь',
|
||
'удали', 'переименуй', 'улучши', 'расширь',
|
||
];
|
||
|
||
const SKILL_ALIAS_MAP = {
|
||
tdd: 'test-driven-development',
|
||
'test-driven-development': 'test-driven-development',
|
||
brainstorming: 'brainstorming',
|
||
brainstorm: 'brainstorming',
|
||
debugging: 'systematic-debugging',
|
||
'systematic-debugging': 'systematic-debugging',
|
||
debug: 'systematic-debugging',
|
||
'writing-plans': 'writing-plans',
|
||
plan: 'writing-plans',
|
||
plans: 'writing-plans',
|
||
'verification-before-completion': 'verification-before-completion',
|
||
verify: 'verification-before-completion',
|
||
parallel: 'dispatching-parallel-agents',
|
||
'dispatching-parallel-agents': 'dispatching-parallel-agents',
|
||
worktree: 'using-git-worktrees',
|
||
'using-git-worktrees': 'using-git-worktrees',
|
||
review: 'requesting-code-review',
|
||
'requesting-code-review': 'requesting-code-review',
|
||
};
|
||
|
||
function containsAnchor(prompt) {
|
||
const p = lower(prompt);
|
||
if (ANCHOR_NOUNS.some((a) => p.includes(a))) return true;
|
||
if (prompt.length > 30 && ANCHOR_IMPERATIVES.some((a) => p.includes(a))) return true;
|
||
return false;
|
||
}
|
||
|
||
function resolveNodeAlias(extracted, registry) {
|
||
if (!extracted) return null;
|
||
const norm = String(extracted).toLowerCase();
|
||
if (SKILL_ALIAS_MAP[norm]) return SKILL_ALIAS_MAP[norm];
|
||
if (registry?.nodes) {
|
||
const exact = registry.nodes.find((n) => n.slug === norm);
|
||
if (exact) return exact.slug;
|
||
const fuzzy = registry.nodes.find((n) => {
|
||
const slug = String(n.slug || '').toLowerCase();
|
||
const name = String(n.name || '').toLowerCase();
|
||
return (slug && (slug.includes(norm) || norm.includes(slug))) || (name && name.includes(norm));
|
||
});
|
||
if (fuzzy) return fuzzy.slug;
|
||
}
|
||
return `unknown_${extracted}`;
|
||
}
|
||
|
||
/**
|
||
* Prefilter — Layer 1, 7-check chain (spec §4.1). Pure.
|
||
*
|
||
* @returns object on a positive match, or null when fall-through to Layer 2 is required.
|
||
*/
|
||
export function prefilter(prompt, { prevState, registry } = {}) {
|
||
if (!prompt) return null;
|
||
const raw = String(prompt);
|
||
const p = raw.trim().toLowerCase();
|
||
|
||
const m = raw.match(MANUAL_OVERRIDE_RE);
|
||
if (m) {
|
||
return {
|
||
task_type: 'manual_override',
|
||
node: 'direct',
|
||
source: 'prefilter',
|
||
requested_node: resolveNodeAlias(m[3], registry),
|
||
};
|
||
}
|
||
|
||
if (CONTINUATION_PATTERNS.includes(p) && prevState?.classification && prevState.timestamp) {
|
||
const ageMs = Date.now() - new Date(prevState.timestamp).getTime();
|
||
const ageMin = ageMs / 60000;
|
||
if (ageMin <= INHERITANCE_MAX_AGE_MIN) {
|
||
return {
|
||
task_type: prevState.classification.task_type,
|
||
node: 'direct',
|
||
source: 'prefilter_inherited',
|
||
recommendedNode: prevState.classification.recommendedNode ?? null,
|
||
inheritance: {
|
||
inherited_from_task_id: prevState.task_id ?? null,
|
||
inheritance_age_minutes: Math.round(ageMin),
|
||
},
|
||
};
|
||
}
|
||
}
|
||
|
||
if (ACKNOWLEDGMENT_PATTERNS.includes(p)) {
|
||
return { task_type: 'conversation', node: 'direct', source: 'prefilter' };
|
||
}
|
||
|
||
if (CANCELLATION_PATTERNS.includes(p)) {
|
||
return {
|
||
task_type: 'conversation',
|
||
node: 'direct',
|
||
source: 'prefilter',
|
||
previous_rejected: !!prevState?.task_id,
|
||
};
|
||
}
|
||
|
||
if (raw.length < 15 && !containsAnchor(raw)) {
|
||
return { task_type: 'conversation', node: 'direct', source: 'prefilter' };
|
||
}
|
||
|
||
if (detectMicro(raw)) {
|
||
return { task_type: 'micro', node: 'direct', source: 'prefilter' };
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
// ─── Layer 2: Sonnet 4.6 classifier (spec §4.2) ─────────────────────────────
|
||
|
||
const PAMYATKA = `=== ПАМЯТКА (4 паттерна, закрывает 1.1) ===
|
||
|
||
ПАТТЕРН 1 (brainstorming): обязательно рассмотри минимум 3 alternative_considered.
|
||
Один кандидат без альтернатив — плохо.
|
||
|
||
ПАТТЕРН 2 (discovery-interview): если запрос можно интерпретировать двумя+
|
||
способами — НЕ угадывай. Верни no_skill_found=true с
|
||
no_skill_found_suggestion: "ambiguous — clarify A vs B vs C".
|
||
|
||
ПАТТЕРН 3 (writing-plans): различай single-step и multi-step.
|
||
- Один глагол + объект ("поправь typo") → chain 1 элемент.
|
||
- "и"/"потом"/"затем" или подразумевается несколько этапов → chain ≥2 в порядке.
|
||
|
||
ПАТТЕРН 4 (systematic-debugging): для task_type=bugfix — проверь, чётко ли
|
||
описаны system/expected/actual. Если хотя бы одного нет — рекомендуй
|
||
superpowers:systematic-debugging (он сам потребует прояснить).`;
|
||
|
||
function escapeYamlStr(s) {
|
||
return String(s || '').replace(/"/g, '\\"').replace(/\n/g, ' ');
|
||
}
|
||
|
||
function buildNodesBlock(registry) {
|
||
const nodes = (registry.nodes || []).filter((n) => n.status === 'active');
|
||
return nodes.map((n) => {
|
||
const triggers = (n.triggers || [])
|
||
.slice(0, 5)
|
||
.map((t) => t.keyword ? `"${t.keyword}"` : t.classification ? `"cls:${t.classification}"` : null)
|
||
.filter(Boolean)
|
||
.join(', ');
|
||
const cap = n.capabilities ? `\n capabilities: "${escapeYamlStr(n.capabilities)}"` : '';
|
||
return `- skill_id: ${n.id}\n name: ${n.name}${cap}\n triggers: [${triggers}]`;
|
||
}).join('\n');
|
||
}
|
||
|
||
function buildChainsBlock(registry) {
|
||
return Object.entries(registry.chains || {})
|
||
.map(([id, c]) => `- ${id}: ${c.name} [${(c.sequence || []).join(' → ')}]`)
|
||
.join('\n');
|
||
}
|
||
|
||
/**
|
||
* Build Sonnet 4.6 classifier prompt per spec §4.2.
|
||
*
|
||
* @param {string} userPrompt — raw user prompt
|
||
* @param {object} registry — { nodes, chains }
|
||
* @param {object} [options]
|
||
* @param {boolean} [options.enrichment=true] — inject pamyatka (4 patterns)
|
||
*/
|
||
export function buildClassifierPrompt(userPrompt, registry, { enrichment = true } = {}) {
|
||
const pamyatka = enrichment ? `\n\n${PAMYATKA}\n` : '\n';
|
||
const nodesBlock = buildNodesBlock(registry);
|
||
const chainsBlock = buildChainsBlock(registry);
|
||
|
||
return `<system>
|
||
Ты классификатор задач для CRM-проекта «Лидерра» (Laravel 13 + Vue 3 + Vuetify 3).
|
||
|
||
ОБЯЗАТЕЛЬНЫЕ выходные правила:
|
||
1. Верни ровно один из: skill ИЛИ chain ИЛИ no_skill_found.
|
||
2. "direct" НЕ разрешён. Conversation/micro обрабатываются ДО тебя.
|
||
3. Верни топ-3 alternatives_considered со score (0-1) и причиной отклонения.
|
||
4. reason_for_choice — конкретно, со ссылкой на capability.
|
||
5. recommended_chain — массив из 1-5 skill IDs.
|
||
6. Если ни один узел не подходит — no_skill_found=true + suggestion.
|
||
${pamyatka}
|
||
=== РЕЕСТР УЗЛОВ ===
|
||
${nodesBlock}
|
||
|
||
=== РЕЕСТР ЦЕПОЧЕК (справочно) ===
|
||
${chainsBlock}
|
||
|
||
Output — ONLY JSON object, no prose, no code fences.
|
||
</system>
|
||
|
||
<user>
|
||
Prompt: ${userPrompt}
|
||
</user>`;
|
||
}
|
||
|
||
/**
|
||
* Parse Sonnet 4.6 classifier response per spec §4.2.
|
||
* Accepts:
|
||
* - raw JSON object
|
||
* - JSON wrapped in ```json ... ``` fence
|
||
* - JSON wrapped in plain ``` fence
|
||
* Returns null on parse failure or when required `task_type` is missing.
|
||
* `recommended_chain_id` may be null (custom chain not in L1-L16).
|
||
*/
|
||
export function parseClassifierResponse(text) {
|
||
if (!text) return null;
|
||
const trimmed = String(text).trim();
|
||
const stripped = trimmed.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```$/, '').trim();
|
||
try {
|
||
const parsed = JSON.parse(stripped);
|
||
if (typeof parsed.task_type !== 'string') return null;
|
||
return parsed;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
// ─── Legacy LLM prompt/parser (kept for backward compat) ────────────────────
|
||
|
||
const LEGACY_LLM_SYSTEM_PROMPT = `You are a router classifier for an AI coding assistant. Given a user prompt and a registry of available skills/tools (nodes), choose:
|
||
- taskType: one of {feature, planning, bugfix, refactor, cleanup, marketing, security, analysis, monitoring, memory-sync, question, unknown}
|
||
- micro: true if the task is a tiny edit (≤2 files, ≤20 lines, e.g. typo / rename / single constant)
|
||
- recommendedNode: id of the single best-matching active node, or null if nothing matches
|
||
- confidence: 0.0-1.0
|
||
- recommendedChain: id of the chain (L1-L16) if the task fits a known chain, else null
|
||
- reasoning: 1-2 sentences why
|
||
|
||
Reply with ONLY a JSON object, no prose. Example:
|
||
{"taskType":"bugfix","micro":false,"recommendedNode":"#62","confidence":0.9,"recommendedChain":null,"reasoning":"keyword 'списание' matches #62 billing-audit"}`;
|
||
|
||
export function buildLLMPrompt(prompt, registry) {
|
||
const nodes = (registry.nodes || []).filter((n) => n.status === 'active');
|
||
const nodeLines = nodes.map((n) => {
|
||
const triggers = (n.triggers || [])
|
||
.slice(0, 3)
|
||
.map((t) => t.keyword || `cls:${t.classification}`)
|
||
.filter(Boolean)
|
||
.join(', ');
|
||
return `- ${n.id} ${n.name} [${triggers}]`;
|
||
}).join('\n');
|
||
|
||
const chains = Object.entries(registry.chains || {})
|
||
.map(([id, c]) => `- ${id}: ${c.name} [${(c.sequence || []).join(' → ')}]`)
|
||
.join('\n');
|
||
|
||
return `${LEGACY_LLM_SYSTEM_PROMPT}
|
||
|
||
## Available nodes
|
||
${nodeLines}
|
||
|
||
## Available chains
|
||
${chains}
|
||
|
||
## User prompt
|
||
${prompt}
|
||
|
||
Reply with JSON object only.`;
|
||
}
|
||
|
||
export function parseLLMResponse(text) {
|
||
if (!text) return null;
|
||
const trimmed = String(text).trim();
|
||
const stripped = trimmed.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```$/, '').trim();
|
||
try {
|
||
const parsed = JSON.parse(stripped);
|
||
if (typeof parsed.taskType !== 'string') return null;
|
||
return parsed;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
// ─── HTTP transport (ProxyAPI, header reseller-isolation) ───────────────────
|
||
|
||
const DEFAULT_LLM_BASE_URL = 'https://api.proxyapi.ru/anthropic';
|
||
|
||
export async function callAnthropicAPI(prompt, {
|
||
apiKey,
|
||
baseUrl = DEFAULT_LLM_BASE_URL,
|
||
model = CLASSIFIER_MODEL,
|
||
fetchImpl = fetch,
|
||
}) {
|
||
const url = `${String(baseUrl).replace(/\/+$/, '')}/v1/messages`;
|
||
const r = await fetchImpl(url, {
|
||
method: 'POST',
|
||
headers: {
|
||
'authorization': `Bearer ${apiKey}`,
|
||
'x-api-key': apiKey,
|
||
'anthropic-version': '2023-06-01',
|
||
'content-type': 'application/json',
|
||
},
|
||
body: JSON.stringify({
|
||
model,
|
||
max_tokens: 1500,
|
||
messages: [{ role: 'user', content: prompt }],
|
||
}),
|
||
});
|
||
if (!r.ok) {
|
||
throw new Error(`Router LLM ${r.status}: ${await r.text()}`);
|
||
}
|
||
const data = await r.json();
|
||
return data.content?.[0]?.text || '';
|
||
}
|
||
|
||
function hashPrompt(s) {
|
||
let h = 0;
|
||
for (let i = 0; i < s.length; i++) {
|
||
h = ((h << 5) - h) + s.charCodeAt(i);
|
||
h |= 0;
|
||
}
|
||
return String(h);
|
||
}
|
||
|
||
/**
|
||
* classify — full Layer 1 + Layer 2 pipeline (spec §4.1, §4.2).
|
||
*
|
||
* Flow:
|
||
* 1. prefilter(prompt, prevState, registry). If non-null → return.
|
||
* 2. Cache check (hash(prompt)).
|
||
* 3. Sonnet 4.6 via ProxyAPI (default model = CLASSIFIER_MODEL).
|
||
* 4. On LLM error → regex fallback (router-classifier-regex-fallback.mjs).
|
||
* 5. On LLM null (no key / unparseable) → regex fallback.
|
||
*
|
||
* Options:
|
||
* - prevState: passed to prefilter for continuation/cancellation context.
|
||
* - cache: Map for hash(prompt) → result.
|
||
* - llmCall: function() → parsed-result-or-null. Used by tests to mock.
|
||
* - enrichment: bool, controls pamyatka in classifier prompt (default true).
|
||
* - model: classifier model id override.
|
||
*/
|
||
export async function classify(prompt, registry, options = {}) {
|
||
// Layer 1 — prefilter.
|
||
const pre = prefilter(prompt, { prevState: options.prevState, registry });
|
||
if (pre !== null) return pre;
|
||
|
||
// Cache.
|
||
const cache = options.cache;
|
||
const key = hashPrompt(prompt);
|
||
if (cache && cache.has(key)) {
|
||
return { ...cache.get(key), source: 'cache' };
|
||
}
|
||
|
||
// Layer 2 — Sonnet 4.6.
|
||
const llmCall = options.llmCall || (async () => {
|
||
const apiKey = process.env.ROUTER_LLM_KEY;
|
||
if (!apiKey) return null;
|
||
const classifierPrompt = buildClassifierPrompt(prompt, registry, {
|
||
enrichment: options.enrichment ?? true,
|
||
});
|
||
const text = await callAnthropicAPI(classifierPrompt, {
|
||
apiKey,
|
||
baseUrl: process.env.ROUTER_LLM_BASE_URL || undefined,
|
||
model: options.model || CLASSIFIER_MODEL,
|
||
});
|
||
return parseClassifierResponse(text);
|
||
});
|
||
|
||
let llmResult;
|
||
try {
|
||
llmResult = await llmCall();
|
||
} catch (err) {
|
||
// Layer 3 — regex fallback on LLM transport error.
|
||
const r = classifyByRegex(prompt, registry);
|
||
return { ...r, llmError: err.message, degraded: true };
|
||
}
|
||
|
||
if (!llmResult) {
|
||
// Layer 3 — regex fallback on no key / unparseable.
|
||
const r = classifyByRegex(prompt, registry);
|
||
return r;
|
||
}
|
||
|
||
const finalResult = { ...llmResult, source: 'llm' };
|
||
if (cache) cache.set(key, finalResult);
|
||
return finalResult;
|
||
}
|