portal/tools/router-classifier.mjs

#!/usr/bin/env node
/**
 * Router classifier — Phase 2 (LLM-first router overhaul).
 *
 * Architecture (spec §3, §4.1, §4.2):
 *   Layer 1: prefilter() — pure regex, 7 checks (manual override / continuation /
 *            acknowledgment / cancellation / short conv + anchor / micro / null).
 *   Layer 2: Sonnet 4.6 classifier via ProxyAPI. Memory pamyatka (4 patterns)
 *            injected when prompt-enrichment-mode=on. Output schema per §4.2.
 *   Layer 3 (fallback): regex fallback in router-classifier-regex-fallback.mjs.
 *   Layer 4 (degraded): { task_type: 'unknown', source: 'fallback', degraded: true }
 *            with explicit chat marker.
 *
 * Pure (Layer 1): no fs/exec/net. callers pass registry + optional prevState.
 * Layer 2: HTTP via callAnthropicAPI (ProxyAPI, header reseller-isolation).
 *
 * Legacy exports buildLLMPrompt / parseLLMResponse retained for backward
 * compatibility with older accuracy-runner snapshots and tests; not on the
 * Phase 2 hot path. The Phase 1 regex Layer 1 (classifyByRegex, TASK_TYPE_KEYWORDS,
 * HARD_KEYWORD_STEMS) moved verbatim to router-classifier-regex-fallback.mjs;
 * re-exported here for callers that still reach for it through this module.
 */

import { CLASSIFIER_MODEL, INHERITANCE_MAX_AGE_MIN } from './router-config.mjs';
import { classifyByRegex } from './router-classifier-regex-fallback.mjs';
import { Agent } from 'undici';

// Keep-alive dispatcher for ProxyAPI — skips TLS handshake on subsequent calls,
// reduces tail latency 100-300ms per request. Only attached to the default
// fetchImpl; tests passing their own fetchImpl are unaffected.
const KEEPALIVE_DISPATCHER = new Agent({
  keepAliveTimeout: 30_000,
  keepAliveMaxTimeout: 60_000,
  connections: 4,
});

async function defaultFetch(url, opts) {
  return fetch(url, { ...opts, dispatcher: KEEPALIVE_DISPATCHER });
}

export { classifyByRegex };

const MICRO_KEYWORDS = [
  'опечатк', 'typo',
  'переименуй', 'rename',
  'удали мёртв', 'dead code',
  'формат', 'format',
  'константу', 'one constant',
  'увеличь', 'уменьши', 'поменяй значени', 'измени константу',
  'одну строку', 'bump',
];

function lower(s) { return String(s || '').toLowerCase(); }

function detectMicro(prompt) {
  const p = lower(prompt);
  return MICRO_KEYWORDS.some((kw) => p.includes(kw));
}

// ─── Prefilter constants (spec §4.1, Phase 2 Task 9) ────────────────────────

const CONTINUATION_PATTERNS = [
  'да', 'делай', 'давай', 'продолжай', 'дальше', 'ага', 'валяй',
  'поехали', 'утверждаю', 'одобряю', 'ок делай', 'хорошо делай', 'согласен делай',
];

const ACKNOWLEDGMENT_PATTERNS = [
  'спасибо', 'понял', 'ок', 'хорошо', 'отлично', 'верно',
  'круто', 'годится', 'молодец', 'норм',
];

const CANCELLATION_PATTERNS = [
  'стоп', 'нет', 'отмени', 'отбой', 'не надо',
  'забей', 'хватит', 'достаточно',
];

const MANUAL_OVERRIDE_RE = /^(делай|сделай|используй|применя[йи]|запусти|вызови)\s+(через|с\s+помощью|skill|skill[оа]м)\s+([\w\-:]+)/i;

const ANCHOR_NOUNS = [
  'аудит', 'баг', 'план', 'спека', 'фича', 'тест', 'миграция', 'endpoint', 'файл', 'функция',
  'класс', 'компонент', 'view', 'модель', 'биллинг', 'маркетинг', 'безопасность', 'пдн', 'регион',
  'портал', 'проект', 'сделка', 'лид', 'админка', 'база', 'схема', 'воронка', 'хук',
];

const ANCHOR_IMPERATIVES = [
  'проанализируй', 'проверь', 'исправь', 'почини', 'создай', 'добавь',
  'удали', 'переименуй', 'улучши', 'расширь',
];

const SKILL_ALIAS_MAP = {
  tdd: 'test-driven-development',
  'test-driven-development': 'test-driven-development',
  brainstorming: 'brainstorming',
  brainstorm: 'brainstorming',
  debugging: 'systematic-debugging',
  'systematic-debugging': 'systematic-debugging',
  debug: 'systematic-debugging',
  'writing-plans': 'writing-plans',
  plan: 'writing-plans',
  plans: 'writing-plans',
  'verification-before-completion': 'verification-before-completion',
  verify: 'verification-before-completion',
  parallel: 'dispatching-parallel-agents',
  'dispatching-parallel-agents': 'dispatching-parallel-agents',
  worktree: 'using-git-worktrees',
  'using-git-worktrees': 'using-git-worktrees',
  review: 'requesting-code-review',
  'requesting-code-review': 'requesting-code-review',
};

function containsAnchor(prompt) {
  const p = lower(prompt);
  if (ANCHOR_NOUNS.some((a) => p.includes(a))) return true;
  if (prompt.length > 30 && ANCHOR_IMPERATIVES.some((a) => p.includes(a))) return true;
  return false;
}

function resolveNodeAlias(extracted, registry) {
  if (!extracted) return null;
  const norm = String(extracted).toLowerCase();
  if (SKILL_ALIAS_MAP[norm]) return SKILL_ALIAS_MAP[norm];
  if (registry?.nodes) {
    const exact = registry.nodes.find((n) => n.slug === norm);
    if (exact) return exact.slug;
    const fuzzy = registry.nodes.find((n) => {
      const slug = String(n.slug || '').toLowerCase();
      const name = String(n.name || '').toLowerCase();
      return (slug && (slug.includes(norm) || norm.includes(slug))) || (name && name.includes(norm));
    });
    if (fuzzy) return fuzzy.slug;
  }
  return `unknown_${extracted}`;
}

/**
 * Prefilter — Layer 1, 7-check chain (spec §4.1). Pure.
 *
 * @returns object on a positive match, or null when fall-through to Layer 2 is required.
 */
export function prefilter(prompt, { prevState, registry } = {}) {
  if (!prompt) return null;
  const raw = String(prompt);
  const p = raw.trim().toLowerCase();

  const m = raw.match(MANUAL_OVERRIDE_RE);
  if (m) {
    return {
      task_type: 'manual_override',
      node: 'direct',
      source: 'prefilter',
      requested_node: resolveNodeAlias(m[3], registry),
    };
  }

  if (CONTINUATION_PATTERNS.includes(p) && prevState?.classification && prevState.timestamp) {
    const ageMs = Date.now() - new Date(prevState.timestamp).getTime();
    const ageMin = ageMs / 60000;
    if (ageMin <= INHERITANCE_MAX_AGE_MIN) {
      return {
        task_type: prevState.classification.task_type,
        node: 'direct',
        source: 'prefilter_inherited',
        recommendedNode: prevState.classification.recommendedNode ?? null,
        inheritance: {
          inherited_from_task_id: prevState.task_id ?? null,
          inheritance_age_minutes: Math.round(ageMin),
        },
      };
    }
  }

  if (ACKNOWLEDGMENT_PATTERNS.includes(p)) {
    return { task_type: 'conversation', node: 'direct', source: 'prefilter' };
  }

  if (CANCELLATION_PATTERNS.includes(p)) {
    return {
      task_type: 'conversation',
      node: 'direct',
      source: 'prefilter',
      previous_rejected: !!prevState?.task_id,
    };
  }

  if (raw.length < 15 && !containsAnchor(raw)) {
    return { task_type: 'conversation', node: 'direct', source: 'prefilter' };
  }

  if (detectMicro(raw)) {
    return { task_type: 'micro', node: 'direct', source: 'prefilter' };
  }

  return null;
}

// ─── Layer 2: Sonnet 4.6 classifier (spec §4.2) ─────────────────────────────

const PAMYATKA = `=== ПАМЯТКА (4 паттерна, закрывает 1.1) ===

ПАТТЕРН 1 (brainstorming): обязательно рассмотри минимум 3 alternative_considered.
Один кандидат без альтернатив — плохо.

ПАТТЕРН 2 (discovery-interview): если запрос можно интерпретировать двумя+
способами — НЕ угадывай. Верни no_skill_found=true с
no_skill_found_suggestion: "ambiguous — clarify A vs B vs C".

ПАТТЕРН 3 (writing-plans): различай single-step и multi-step.
- Один глагол + объект ("поправь typo") → chain 1 элемент.
- "и"/"потом"/"затем" или подразумевается несколько этапов → chain ≥2 в порядке.

ПАТТЕРН 4 (systematic-debugging): для task_type=bugfix — проверь, чётко ли
описаны system/expected/actual. Если хотя бы одного нет — рекомендуй
superpowers:systematic-debugging (он сам потребует прояснить).`;

function escapeYamlStr(s) {
  return String(s || '').replace(/"/g, '\\"').replace(/\n/g, ' ');
}

function buildNodesBlock(registry) {
  const nodes = (registry.nodes || []).filter((n) => n.status === 'active');
  return nodes.map((n) => {
    const triggers = (n.triggers || [])
      .slice(0, 5)
      .map((t) => t.keyword ? `"${t.keyword}"` : t.classification ? `"cls:${t.classification}"` : null)
      .filter(Boolean)
      .join(', ');
    const cap = n.capabilities ? `\n  capabilities: "${escapeYamlStr(n.capabilities)}"` : '';
    return `- skill_id: ${n.id}\n  name: ${n.name}${cap}\n  triggers: [${triggers}]`;
  }).join('\n');
}

function buildChainsBlock(registry) {
  return Object.entries(registry.chains || {})
    .map(([id, c]) => `- ${id}: ${c.name} [${(c.sequence || []).join(' → ')}]`)
    .join('\n');
}

/**
 * Build Sonnet 4.6 classifier prompt per spec §4.2.
 *
 * Returns the prompt as a single string for backward compatibility
 * (snapshot tests, accuracy-runner historical mode). The classifier
 * hot-path uses buildClassifierPromptStructured() instead, which separates
 * cacheable (system + registry) from dynamic (user prompt) content.
 *
 * @param {string} userPrompt — raw user prompt
 * @param {object} registry — { nodes, chains }
 * @param {object} [options]
 * @param {boolean} [options.enrichment=true] — inject pamyatka (4 patterns)
 */
export function buildClassifierPrompt(userPrompt, registry, { enrichment = true } = {}) {
  const { system, user } = buildClassifierPromptStructured(userPrompt, registry, { enrichment });
  return `<system>\n${system}\n</system>\n\n<user>\n${user}\n</user>`;
}

/**
 * Build classifier prompt as { system, user } blocks for Anthropic prompt
 * caching (ephemeral 5m TTL). The `system` block is identical across all
 * classifier calls within a 5-minute window (instruction + памятка + node
 * registry + chains) and gets billed at 10% rate after the first call.
 * The `user` block is the only dynamic per-call content.
 *
 * Cache-eligibility: Sonnet requires ≥1024 tokens in the cached block.
 * Active node registry (~85 nodes × ~100 tokens) easily clears this.
 */
export function buildClassifierPromptStructured(userPrompt, registry, { enrichment = true } = {}) {
  const pamyatka = enrichment ? `\n\n${PAMYATKA}\n` : '\n';
  const nodesBlock = buildNodesBlock(registry);
  const chainsBlock = buildChainsBlock(registry);

  const system = `Ты классификатор задач для CRM-проекта «Лидерра» (Laravel 13 + Vue 3 + Vuetify 3).

ОБЯЗАТЕЛЬНЫЕ выходные правила:
1. Верни ровно один из: skill ИЛИ chain ИЛИ no_skill_found.
2. "direct" НЕ разрешён. Conversation/micro обрабатываются ДО тебя.
3. Верни топ-3 alternatives_considered со score (0-1) и причиной отклонения.
4. reason_for_choice — конкретно, со ссылкой на capability.
5. recommended_chain — массив из 1-5 skill IDs.
6. Если ни один узел не подходит — no_skill_found=true + suggestion.
${pamyatka}
=== РЕЕСТР УЗЛОВ ===
${nodesBlock}

=== РЕЕСТР ЦЕПОЧЕК (справочно) ===
${chainsBlock}

Output — ONLY JSON object, no prose, no code fences.`;

  const user = `Prompt: ${userPrompt}`;
  return { system, user };
}

/**
 * Parse Sonnet 4.6 classifier response per spec §4.2.
 * Accepts:
 *   - raw JSON object
 *   - JSON wrapped in ```json ... ``` fence
 *   - JSON wrapped in plain ``` fence
 * Returns null on parse failure or when required `task_type` is missing.
 * `recommended_chain_id` may be null (custom chain not in L1-L16).
 */
export function parseClassifierResponse(text) {
  if (!text) return null;
  const trimmed = String(text).trim();
  const stripped = trimmed.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```$/, '').trim();

  // Pass 1: clean JSON (after fence strip).
  try {
    const parsed = JSON.parse(stripped);
    if (typeof parsed.task_type === 'string') return parsed;
  } catch { /* fall through to extraction */ }

  // Pass 2: JSON object embedded in prose ("Here is the classification: { ... }").
  // Greedy match from first `{` to last `}` — works because the classifier
  // produces exactly one top-level object; outer braces are reliable anchors.
  const start = stripped.indexOf('{');
  const end = stripped.lastIndexOf('}');
  if (start !== -1 && end > start) {
    try {
      const parsed = JSON.parse(stripped.slice(start, end + 1));
      if (typeof parsed.task_type === 'string') return parsed;
    } catch { /* unrecoverable */ }
  }

  return null;
}

// ─── Legacy LLM prompt/parser (kept for backward compat) ────────────────────

const LEGACY_LLM_SYSTEM_PROMPT = `You are a router classifier for an AI coding assistant. Given a user prompt and a registry of available skills/tools (nodes), choose:
- taskType: one of {feature, planning, bugfix, refactor, cleanup, marketing, security, analysis, monitoring, memory-sync, question, unknown}
- micro: true if the task is a tiny edit (≤2 files, ≤20 lines, e.g. typo / rename / single constant)
- recommendedNode: id of the single best-matching active node, or null if nothing matches
- confidence: 0.0-1.0
- recommendedChain: id of the chain (L1-L16) if the task fits a known chain, else null
- reasoning: 1-2 sentences why

Reply with ONLY a JSON object, no prose. Example:
{"taskType":"bugfix","micro":false,"recommendedNode":"#62","confidence":0.9,"recommendedChain":null,"reasoning":"keyword 'списание' matches #62 billing-audit"}`;

export function buildLLMPrompt(prompt, registry) {
  const nodes = (registry.nodes || []).filter((n) => n.status === 'active');
  const nodeLines = nodes.map((n) => {
    const triggers = (n.triggers || [])
      .slice(0, 3)
      .map((t) => t.keyword || `cls:${t.classification}`)
      .filter(Boolean)
      .join(', ');
    return `- ${n.id} ${n.name} [${triggers}]`;
  }).join('\n');

  const chains = Object.entries(registry.chains || {})
    .map(([id, c]) => `- ${id}: ${c.name} [${(c.sequence || []).join(' → ')}]`)
    .join('\n');

  return `${LEGACY_LLM_SYSTEM_PROMPT}

## Available nodes
${nodeLines}

## Available chains
${chains}

## User prompt
${prompt}

Reply with JSON object only.`;
}

export function parseLLMResponse(text) {
  if (!text) return null;
  const trimmed = String(text).trim();
  const stripped = trimmed.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```$/, '').trim();
  try {
    const parsed = JSON.parse(stripped);
    if (typeof parsed.taskType !== 'string') return null;
    return parsed;
  } catch {
    return null;
  }
}

// ─── HTTP transport (ProxyAPI, header reseller-isolation) ───────────────────

const DEFAULT_LLM_BASE_URL = 'https://api.proxyapi.ru/anthropic';

/**
 * POST to ProxyAPI /v1/messages.
 *
 * First argument is overloaded:
 *   - string → legacy single-message body (no prompt caching).
 *   - { system, user } → split body with ephemeral cache_control on the
 *     `system` block. ~70-80% cost reduction on the cacheable portion
 *     after the first call within a 5-minute window.
 *
 * Optional `onUsage(usage)` callback receives Anthropic's usage object
 * (input_tokens / output_tokens / cache_creation_input_tokens /
 * cache_read_input_tokens) for observability.
 */
export async function callAnthropicAPI(promptOrMessages, {
  apiKey,
  baseUrl = DEFAULT_LLM_BASE_URL,
  model = CLASSIFIER_MODEL,
  fetchImpl = defaultFetch,
  maxRetries = 4,
  retryBaseDelayMs = 1000,
  perAttemptTimeoutMs = 30_000,
  sleepImpl = (ms) => new Promise((res) => setTimeout(res, ms)),
  onUsage,
  onMetrics,
}) {
  const url = `${String(baseUrl).replace(/\/+$/, '')}/v1/messages`;
  let body;
  if (typeof promptOrMessages === 'string') {
    body = JSON.stringify({
      model,
      max_tokens: 1500,
      messages: [{ role: 'user', content: promptOrMessages }],
    });
  } else {
    const { system, user } = promptOrMessages;
    body = JSON.stringify({
      model,
      max_tokens: 1500,
      system: [{ type: 'text', text: system, cache_control: { type: 'ephemeral' } }],
      messages: [{ role: 'user', content: user }],
    });
  }
  const headers = {
    'authorization': `Bearer ${apiKey}`,
    'x-api-key': apiKey,
    'anthropic-version': '2023-06-01',
    'content-type': 'application/json',
  };

  // Pass 2 metric capture (project-brain-factor-analysis-4passes).
  const started = Date.now();
  let attempt = 0;
  const emitMetrics = () => {
    if (!onMetrics) return;
    try { onMetrics({ latency_ms: Date.now() - started, retry_count_internal: attempt }); } catch { /* swallow */ }
  };

  let lastError;
  try {
    for (attempt = 0; attempt <= maxRetries; attempt++) {
      const ctrl = new AbortController();
      const timer = setTimeout(() => ctrl.abort(new Error(`per-attempt timeout ${perAttemptTimeoutMs}ms`)), perAttemptTimeoutMs);
      try {
        const r = await fetchImpl(url, { method: 'POST', headers, body, signal: ctrl.signal });
        if (r.ok) {
          const data = await r.json();
          if (onUsage && data.usage) {
            try { onUsage(data.usage); } catch { /* swallow callback errors */ }
          }
          return data.content?.[0]?.text || '';
        }
        // Retry on 5xx and 429; fail fast on 4xx (auth/quota/bad request — retry won't help).
        if (r.status >= 500 || r.status === 429) {
          lastError = new Error(`Router LLM ${r.status}: ${await r.text()}`);
        } else {
          const fatal = new Error(`Router LLM ${r.status}: ${await r.text()}`);
          fatal.fatal = true;
          throw fatal;
        }
      } catch (err) {
        // Re-throw fatal errors (4xx) instead of retrying them.
        if (err && err.fatal) { clearTimeout(timer); throw err; }
        // Network-level failure (fetch failed / ECONNRESET / TLS / per-attempt timeout). Retry-eligible.
        lastError = err;
      } finally {
        clearTimeout(timer);
      }
      if (attempt < maxRetries) {
        await sleepImpl(retryBaseDelayMs * 2 ** attempt);
      }
    }
    throw lastError;
  } finally {
    emitMetrics();
  }
}

// Pass 2 — categorize the LLM transport failure for the factor-analysis
// error_type axis. Looks at err.fatal + message keywords (no err.code on
// undici fetch failures — message is the only reliable signal).
export function classifyLLMError(err) {
  if (!err) return 'other';
  const msg = String(err.message || err);
  if (err.fatal && /\b4\d\d\b/.test(msg)) return 'http_4xx';
  if (/\b5\d\d\b/.test(msg) || /429\b/.test(msg)) return 'http_5xx';
  if (/ECONNRESET|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|socket hang up/i.test(msg)) return 'econnreset';
  if (err.name === 'AbortError' || /\btimeout\b/i.test(msg)) return 'timeout';
  return 'other';
}

function hashPrompt(s) {
  let h = 0;
  for (let i = 0; i < s.length; i++) {
    h = ((h << 5) - h) + s.charCodeAt(i);
    h |= 0;
  }
  return String(h);
}

/**
 * classify — full Layer 1 + Layer 2 pipeline (spec §4.1, §4.2).
 *
 * Flow:
 *   1. prefilter(prompt, prevState, registry). If non-null → return.
 *   2. Cache check (hash(prompt)).
 *   3. Sonnet 4.6 via ProxyAPI (default model = CLASSIFIER_MODEL).
 *   4. On LLM error → regex fallback (router-classifier-regex-fallback.mjs).
 *   5. On LLM null (no key / unparseable) → regex fallback.
 *
 * Options:
 *   - prevState: passed to prefilter for continuation/cancellation context.
 *   - cache: Map for hash(prompt) → result.
 *   - llmCall: function() → parsed-result-or-null. Used by tests to mock.
 *   - enrichment: bool, controls pamyatka in classifier prompt (default true).
 *   - model: classifier model id override.
 */
export async function classify(prompt, registry, options = {}) {
  // Layer 1 — prefilter.
  const pre = prefilter(prompt, { prevState: options.prevState, registry });
  if (pre !== null) return pre;

  // Cache.
  const cache = options.cache;
  const key = hashPrompt(prompt);
  if (cache && cache.has(key)) {
    return { ...cache.get(key), source: 'cache' };
  }

  // Layer 2 — Sonnet 4.6 with prompt caching (ephemeral 5m TTL on system block).
  // llmCall receives { onMetrics } so callAnthropicAPI can report latency / retries
  // (Pass 2 factor-analysis extension); tests pass synthetic metrics directly.
  const llmCall = options.llmCall || (async ({ onMetrics } = {}) => {
    const apiKey = process.env.ROUTER_LLM_KEY;
    if (!apiKey) return null;
    const structured = buildClassifierPromptStructured(prompt, registry, {
      enrichment: options.enrichment ?? true,
    });
    const text = await callAnthropicAPI(structured, {
      apiKey,
      baseUrl: process.env.ROUTER_LLM_BASE_URL || undefined,
      model: options.model || CLASSIFIER_MODEL,
      onUsage: options.onUsage,
      onMetrics,
    });
    return parseClassifierResponse(text);
  });

  let metrics = null;
  const captureMetrics = (m) => { metrics = m; };
  let llmResult;
  try {
    llmResult = await llmCall({ onMetrics: captureMetrics });
  } catch (err) {
    // Layer 3 — regex fallback on LLM transport error.
    const r = classifyByRegex(prompt, registry);
    return {
      ...r,
      llmError: err.message,
      llm_error_type: classifyLLMError(err),
      latency_ms: metrics?.latency_ms ?? null,
      retry_count_internal: metrics?.retry_count_internal ?? null,
      degraded: true,
    };
  }

  if (!llmResult) {
    // Layer 3 — regex fallback on no key (metrics null) / unparseable response
    // (metrics set, classify as parse_null so the analyzer error_type axis
    // distinguishes "API never called" from "API returned garbage").
    const r = classifyByRegex(prompt, registry);
    return {
      ...r,
      llm_error_type: metrics ? 'parse_null' : 'no_key',
      latency_ms: metrics?.latency_ms ?? null,
      retry_count_internal: metrics?.retry_count_internal ?? null,
    };
  }

  const finalResult = {
    ...llmResult,
    source: 'llm',
    latency_ms: metrics?.latency_ms ?? null,
    retry_count_internal: metrics?.retry_count_internal ?? null,
  };
  if (cache) cache.set(key, finalResult);
  return finalResult;
}