#!/usr/bin/env node /** * Router embedding layer (Phase 2 Task 12, spec §4.3). * * Computes 384-dim sentence embeddings via Xenova/all-MiniLM-L6-v2 for * NON-trivial classified episodes. Trivial task types (conversation / micro / * manual_override) are skipped — semantic search on "да" or "спасибо" is * wasted compute. * * Storage: base64-encoded Float32Array (~2050 chars per 384-dim vector). * Stored on the episode as `prompt_embedding_base64` (Phase 3 parser writes). * * Fallback: model load or inference failure → embed() returns null. Caller * marks `environment.embedding_unavailable = true` on the episode (parser). * * Lazy load: @xenova/transformers is heavy (native ONNX runtime, ~50 MB). The * pipeline is created on the first embed() call and cached; the dedicated * `tools/router-embedding-warmup.mjs` hook fires this on SessionStart so the * first real prompt doesn't pay the cold-start cost. */ import { Buffer } from 'buffer'; const EMBED_EXEMPT_TASK_TYPES = new Set(['conversation', 'micro', 'manual_override']); const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2'; export function shouldEmbed(taskType) { if (!taskType || typeof taskType !== 'string') return false; return !EMBED_EXEMPT_TASK_TYPES.has(taskType); } export function encodeBase64(arr) { return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength).toString('base64'); } export function decodeBase64(b64) { const buf = Buffer.from(b64, 'base64'); // Float32Array view over the buffer's underlying ArrayBuffer slice. return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4); } let _pipelinePromise = null; async function getPipeline() { if (_pipelinePromise) return _pipelinePromise; _pipelinePromise = (async () => { const mod = await import('@xenova/transformers'); return mod.pipeline('feature-extraction', EMBEDDING_MODEL); })(); // Reset promise on error so a transient failure doesn't poison subsequent calls. _pipelinePromise.catch(() => { _pipelinePromise = null; }); return _pipelinePromise; } /** * Compute embedding for a prompt. Returns Float32Array(384) on success, null * on any failure (model load error, runtime exception). Caller must handle null. */ export async function embed(prompt) { try { const pipe = await getPipeline(); const out = await pipe(prompt, { pooling: 'mean', normalize: true }); return new Float32Array(out.data); } catch { return null; } }