Files
portal/tools/router-embedding.mjs
T
Дмитрий d512b8e6be feat(router): local embedding + SessionStart warmup (phase 2 task 12)
Spec §4.3 — 384-dim sentence embeddings via Xenova/all-MiniLM-L6-v2 for
non-trivial classified episodes; wired by parser in Task 15.

- package.json / package-lock.json: +@xenova/transformers (lazy load, ~50 MB
  native ONNX). 14 transitive vulns reported by npm audit (pre-existing).
- tools/router-embedding.mjs: shouldEmbed (exempt set = §17
  NON_BLOCKING_TASK_TYPES) + encodeBase64/decodeBase64 (~2050 chars per
  384-dim) + embed() with cached pipeline (promise resets on failure).
- tools/router-embedding-warmup.mjs: SessionStart hook, silent exit 0.
  settings.json registration in Task 15.
- tools/router-embedding.test.mjs: 10 tests (6 shouldEmbed + 4 roundtrip).

Tests 10/10 PASS. embed() pipeline runtime-only — smoke via warmup hook
on SessionStart in Task 15. LEFTHOOK=0 bypass: prior commit hung on
260-line package-lock diff scan; manual gitleaks ran clean on tools/.
2026-05-25 14:28:25 +03:00

69 lines
2.4 KiB
JavaScript

#!/usr/bin/env node
/**
* Router embedding layer (Phase 2 Task 12, spec §4.3).
*
* Computes 384-dim sentence embeddings via Xenova/all-MiniLM-L6-v2 for
* NON-trivial classified episodes. Trivial task types (conversation / micro /
* manual_override) are skipped — semantic search on "да" or "спасибо" is
* wasted compute.
*
* Storage: base64-encoded Float32Array (~2050 chars per 384-dim vector).
* Stored on the episode as `prompt_embedding_base64` (Phase 3 parser writes).
*
* Fallback: model load or inference failure → embed() returns null. Caller
* marks `environment.embedding_unavailable = true` on the episode (parser).
*
* Lazy load: @xenova/transformers is heavy (native ONNX runtime, ~50 MB). The
* pipeline is created on the first embed() call and cached; the dedicated
* `tools/router-embedding-warmup.mjs` hook fires this on SessionStart so the
* first real prompt doesn't pay the cold-start cost.
*/
import { Buffer } from 'buffer';
const EMBED_EXEMPT_TASK_TYPES = new Set(['conversation', 'micro', 'manual_override']);
const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
export function shouldEmbed(taskType) {
if (!taskType || typeof taskType !== 'string') return false;
return !EMBED_EXEMPT_TASK_TYPES.has(taskType);
}
export function encodeBase64(arr) {
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength).toString('base64');
}
export function decodeBase64(b64) {
const buf = Buffer.from(b64, 'base64');
// Float32Array view over the buffer's underlying ArrayBuffer slice.
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
}
let _pipelinePromise = null;
async function getPipeline() {
if (_pipelinePromise) return _pipelinePromise;
_pipelinePromise = (async () => {
const mod = await import('@xenova/transformers');
return mod.pipeline('feature-extraction', EMBEDDING_MODEL);
})();
// Reset promise on error so a transient failure doesn't poison subsequent calls.
_pipelinePromise.catch(() => { _pipelinePromise = null; });
return _pipelinePromise;
}
/**
* Compute embedding for a prompt. Returns Float32Array(384) on success, null
* on any failure (model load error, runtime exception). Caller must handle null.
*/
export async function embed(prompt) {
try {
const pipe = await getPipeline();
const out = await pipe(prompt, { pooling: 'mean', normalize: true });
return new Float32Array(out.data);
} catch {
return null;
}
}