397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
69 lines
2.4 KiB
JavaScript
69 lines
2.4 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Router embedding layer (Phase 2 Task 12, spec §4.3).
|
|
*
|
|
* Computes 384-dim sentence embeddings via Xenova/all-MiniLM-L6-v2 for
|
|
* NON-trivial classified episodes. Trivial task types (conversation / micro /
|
|
* manual_override) are skipped — semantic search on "да" or "спасибо" is
|
|
* wasted compute.
|
|
*
|
|
* Storage: base64-encoded Float32Array (~2050 chars per 384-dim vector).
|
|
* Stored on the episode as `prompt_embedding_base64` (Phase 3 parser writes).
|
|
*
|
|
* Fallback: model load or inference failure → embed() returns null. Caller
|
|
* marks `environment.embedding_unavailable = true` on the episode (parser).
|
|
*
|
|
* Lazy load: @xenova/transformers is heavy (native ONNX runtime, ~50 MB). The
|
|
* pipeline is created on the first embed() call and cached; the dedicated
|
|
* `tools/router-embedding-warmup.mjs` hook fires this on SessionStart so the
|
|
* first real prompt doesn't pay the cold-start cost.
|
|
*/
|
|
|
|
import { Buffer } from 'buffer';
|
|
|
|
const EMBED_EXEMPT_TASK_TYPES = new Set(['conversation', 'micro', 'manual_override']);
|
|
|
|
const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
|
|
|
|
export function shouldEmbed(taskType) {
|
|
if (!taskType || typeof taskType !== 'string') return false;
|
|
return !EMBED_EXEMPT_TASK_TYPES.has(taskType);
|
|
}
|
|
|
|
export function encodeBase64(arr) {
|
|
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength).toString('base64');
|
|
}
|
|
|
|
export function decodeBase64(b64) {
|
|
const buf = Buffer.from(b64, 'base64');
|
|
// Float32Array view over the buffer's underlying ArrayBuffer slice.
|
|
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
}
|
|
|
|
let _pipelinePromise = null;
|
|
|
|
async function getPipeline() {
|
|
if (_pipelinePromise) return _pipelinePromise;
|
|
_pipelinePromise = (async () => {
|
|
const mod = await import('@xenova/transformers');
|
|
return mod.pipeline('feature-extraction', EMBEDDING_MODEL);
|
|
})();
|
|
// Reset promise on error so a transient failure doesn't poison subsequent calls.
|
|
_pipelinePromise.catch(() => { _pipelinePromise = null; });
|
|
return _pipelinePromise;
|
|
}
|
|
|
|
/**
|
|
* Compute embedding for a prompt. Returns Float32Array(384) on success, null
|
|
* on any failure (model load error, runtime exception). Caller must handle null.
|
|
*/
|
|
export async function embed(prompt) {
|
|
try {
|
|
const pipe = await getPipeline();
|
|
const out = await pipe(prompt, { pooling: 'mean', normalize: true });
|
|
return new Float32Array(out.data);
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|