58784b182d
Closes the 4-pass factor-analysis expansion plan in
memory/project_brain_factor_analysis_4passes.md. Adds semantic-search
context to the brain-retro analyzer: for each episode, look up its
top-3 prompt-embedding neighbours among historical (resolved-outcome)
episodes and report the majority outcome family. Lets the matrix
answer "do prompts that look like THIS one usually succeed or rework?"
# New module: tools/observer-embedding-index.mjs (pure, fs-free)
- mapOutcomeToFamily(outcome): success / soft_success → 'success',
rework → 'retry', blocked / partial → 'failure', else null.
- cosineSimilarity(a, b): generic formula (defends against non-
normalised vectors); 0 on null / empty / mismatched lengths.
- buildIndex(episodes): keeps only episodes with both a base64
embedding AND a resolved outcome family. Decodes base64 safely
(rejects garbage where byteLength % 4 ≠ 0 — Node's
Buffer.from('garbage', 'base64') silently strips invalid chars).
- findNearestNeighbors(target, index, k, opts): top-k by descending
cosine. Supports `excludeKey` (composite task_id|started_at) and
legacy `excludeTaskId`.
- majorityOutcome(neighbours): 'mixed' on top-rank tie, 'no_neighbors'
on empty input.
- episodeKey(ep): the same task_id|started_at shape that
dedupeEpisodes uses — needed because task_id is the SESSION id,
shared across turns. task_id alone cannot identify a single turn.
# brain-retro-analyzer.mjs
- New FACTOR_FNS axis similar_past_outcome_majority reading the
pre-computed episode._similarPastOutcomeMajority field.
- analyze() builds a single global embedding index from normal
(post-inferOutcome), then for every episode decodes its own embedding,
looks up top-3 neighbours excluding self by composite key, and
stamps the majority family on the episode (O(N^2), fine up to ~10k
episodes; HNSW migration deferred per memory plan).
- Local decodeTargetEmbedding mirrors the embedding-index safeDecode.
# Tests
20 new tests (RED -> GREEN):
- observer-embedding-index.test.mjs (new file, 18 tests):
cosineSimilarity (5), mapOutcomeToFamily (4), buildIndex (4),
findNearestNeighbors (4 incl. self-exclusion), majorityOutcome (3).
- brain-retro-analyzer.test.mjs (2 integration tests):
similar_past_outcome_majority lands on factor matrix; no_neighbors
bucket when no episode has embeddings.
Targeted sweep: 632/632 PASS on the 2 directly-affected suites.
Broader tools/ sweep: 7968/7969 PASS. Pre-existing 1 test failure in
observer-self-assessment-api.test.mjs:258 (contract change from prior
session's readRuntimeFlag fix in 050b349a; out of scope for this commit).
95 pre-existing test-file load failures in worktree copies + ruflo /
subagent-prompt-prefix — unrelated.
Factor matrix grew 11 -> 19 -> 21 -> 29 -> 30 axes across Pass 1+2+3+4.
LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
142 lines
5.0 KiB
JavaScript
142 lines
5.0 KiB
JavaScript
/**
|
|
* Observer episode embedding index (Pass 4 of project-brain-factor-analysis-4passes).
|
|
*
|
|
* Pure module: given a list of episodes carrying `prompt_embedding_base64` and a
|
|
* resolved `_inferredOutcome`, build an in-memory index, find top-k cosine
|
|
* neighbours for a target embedding, and report the majority outcome family
|
|
* (success / retry / failure / mixed / no_neighbors).
|
|
*
|
|
* Embeddings produced by router-embedding.mjs are mean-pooled AND L2-normalized,
|
|
* so cosine similarity collapses to a plain dot product. We still defend the
|
|
* generic formula (denominator) here for robustness against legacy / hand-crafted
|
|
* test vectors.
|
|
*
|
|
* Security Guidance #40: pure parsing — no exec/execSync.
|
|
*/
|
|
import { Buffer } from 'buffer';
|
|
import { decodeBase64 } from './router-embedding.mjs';
|
|
|
|
const OUTCOME_TO_FAMILY = {
|
|
success: 'success',
|
|
soft_success: 'success',
|
|
rework: 'retry',
|
|
blocked: 'failure',
|
|
partial: 'failure',
|
|
};
|
|
|
|
export function mapOutcomeToFamily(outcome) {
|
|
if (!outcome || typeof outcome !== 'string') return null;
|
|
return OUTCOME_TO_FAMILY[outcome] || null;
|
|
}
|
|
|
|
export function cosineSimilarity(a, b) {
|
|
if (!a || !b) return 0;
|
|
if (a.length === 0 || b.length === 0) return 0;
|
|
if (a.length !== b.length) return 0;
|
|
let dot = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
if (normA === 0 || normB === 0) return 0;
|
|
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
}
|
|
|
|
function safeDecode(b64) {
|
|
if (!b64 || typeof b64 !== 'string') return null;
|
|
try {
|
|
// Node's Buffer.from('garbage', 'base64') silently strips invalid chars and
|
|
// truncates — won't throw on 'not-base64!!!' style input. Guard explicitly
|
|
// by checking the byte length is a positive multiple of 4 (Float32 width).
|
|
const buf = Buffer.from(b64, 'base64');
|
|
if (buf.byteLength === 0 || buf.byteLength % 4 !== 0) return null;
|
|
const v = decodeBase64(b64);
|
|
if (!(v instanceof Float32Array) || v.length === 0) return null;
|
|
for (let i = 0; i < v.length; i++) if (!Number.isFinite(v[i])) return null;
|
|
return v;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Episode dedupe key — identifies a single turn uniquely. In real episodes
|
|
* `task_id` is the SESSION id (shared across turns), so task_id alone is not
|
|
* a turn identifier. Pairing with started_at gives the same key shape that
|
|
* dedupeEpisodes uses in the analyzer.
|
|
*/
|
|
export function episodeKey(ep) {
|
|
if (!ep) return '';
|
|
return `${ep.task_id || ''}|${(ep.timestamps || {}).started_at || ''}`;
|
|
}
|
|
|
|
/**
|
|
* Build an index from episodes carrying a base64 embedding AND a resolved
|
|
* outcome family. Episodes lacking either are silently skipped — they
|
|
* cannot teach the neighbour lookup anything.
|
|
*/
|
|
export function buildIndex(episodes) {
|
|
const idx = [];
|
|
for (const ep of episodes || []) {
|
|
const family = mapOutcomeToFamily(ep && ep._inferredOutcome);
|
|
if (!family) continue;
|
|
const emb = safeDecode(ep && ep.prompt_embedding_base64);
|
|
if (!emb) continue;
|
|
idx.push({
|
|
task_id: ep.task_id || null,
|
|
started_at: (ep.timestamps || {}).started_at || null,
|
|
key: episodeKey(ep),
|
|
family,
|
|
embedding: emb,
|
|
});
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
/**
|
|
* Return the top-k index entries by cosine similarity to `target`, in
|
|
* descending order. Self-exclusion is by composite key (task_id|started_at)
|
|
* since task_id alone is the session id (shared across turns). Legacy
|
|
* `excludeTaskId` option kept for callers that still pass task-unique ids;
|
|
* `excludeKey` overrides it. Empty / null inputs → [].
|
|
*/
|
|
export function findNearestNeighbors(target, index, k, options = {}) {
|
|
if (!target || !(target instanceof Float32Array) || target.length === 0) return [];
|
|
if (!Array.isArray(index) || index.length === 0) return [];
|
|
const excludeKey = options.excludeKey || null;
|
|
const excludeTaskId = options.excludeTaskId || null;
|
|
const scored = [];
|
|
for (const entry of index) {
|
|
if (excludeKey && entry.key === excludeKey) continue;
|
|
if (excludeTaskId && entry.task_id === excludeTaskId && !excludeKey) continue;
|
|
scored.push({ ...entry, similarity: cosineSimilarity(target, entry.embedding) });
|
|
}
|
|
scored.sort((a, b) => b.similarity - a.similarity);
|
|
return scored.slice(0, k);
|
|
}
|
|
|
|
/**
|
|
* Return the dominant family across `neighbors`, or 'mixed' on a tie at the
|
|
* top, or 'no_neighbors' on empty input. The 4 known families are
|
|
* success / retry / failure (plus the synthetic mixed / no_neighbors).
|
|
*/
|
|
export function majorityOutcome(neighbors) {
|
|
if (!Array.isArray(neighbors) || neighbors.length === 0) return 'no_neighbors';
|
|
const counts = {};
|
|
for (const n of neighbors) {
|
|
const f = n && n.family;
|
|
if (!f) continue;
|
|
counts[f] = (counts[f] || 0) + 1;
|
|
}
|
|
const entries = Object.entries(counts);
|
|
if (entries.length === 0) return 'no_neighbors';
|
|
let maxN = 0;
|
|
for (const [, n] of entries) if (n > maxN) maxN = n;
|
|
const winners = entries.filter(([, n]) => n === maxN);
|
|
if (winners.length > 1) return 'mixed';
|
|
return winners[0][0];
|
|
}
|