Files
portal/tools/observer-self-assessment-api.mjs
T
Дмитрий 050b349af5 fix(observer): factor-analysis surface — 3 episode-write bugs
After verifying episode schema vs FACTOR_FNS axes, surfaced 3 silent
data-loss bugs in the v4.3 observer write path:

1. readRuntimeFlag (observer-self-assessment-api.mjs) read field 'value'
   but all ~/.claude/runtime/*-mode.json files persist 'mode'. Result:
   every runtime flag (embedding-mode, self-assessment-mode, etc.) was
   silently 'off' regardless of actual setting. This explains why
   prompt_embedding_base64 was null in all 18 v4 episodes and
   self-assessment never fired. Fix accepts both 'mode' (canonical) and
   'value' (legacy alias for existing test fixtures).

2. task_cost.iterations was concatenated as string ('0[object Object]...')
   because usage.iterations arrives as object/array in extended-thinking
   turns, not number. Added iterationsCount() that handles number /
   array / object / undefined / non-finite uniformly.

3. classifier_output.reasoning was dropped from extracted state — Sonnet
   returns it as reason_for_choice (new prompt) or reasoning (legacy),
   but extractClassifierOutput only kept 6 hand-picked fields. Added
   pickReasoning() with fallback chain + 600-char truncate, plus the
   confidence numeric field. Unlocks 'why classifier picked X' axis.

Live impact: embeddings + reasoning + iterations now populate correctly
on next non-trivial episode write. No behavior change for regex/prefilter
paths. Test contracts preserved.

LEFTHOOK=0 due to known quirk #111 (gitleaks pre-commit hangs on heavy
package-lock.json diff in workspace). Manual gitleaks scan: clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 16:14:42 +03:00

211 lines
8.2 KiB
JavaScript

/**
* tools/observer-self-assessment-api.mjs
*
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
*
* Exports:
* buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
* callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted,
* apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal })
* readRuntimeFlag(name, { homedir, fsImpl })
*
* All functions are pure / fail-quiet — they never throw in production.
* callSelfAssessmentApi always returns string | null (null = skip self-assessment).
*/
import { join } from 'path';
import { existsSync, readFileSync } from 'fs';
import { homedir as osHomedir } from 'os';
// ---------------------------------------------------------------------------
// Prompt builder (pure)
// ---------------------------------------------------------------------------
/**
* Build the self-assessment prompt for Sonnet.
*
* System: Russian instruction asking Claude to evaluate its own routing choice
* and return a JSON object with 4 fields.
*
* User: interpolates the 4 context fields.
*
* @param {object} opts
* @param {string|null|undefined} opts.prompt — the user's original prompt text
* @param {string|null|undefined} opts.recommendedNode — node recommended by router
* @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct'
* @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed
* @returns {{ system: string, user: string }}
*/
export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) {
const safePrompt = prompt ?? '';
const safeRecommended = recommendedNode ?? 'не определён';
const safeActual = actualNode ?? 'direct';
const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0
? chainExecuted.join(' → ')
: '[]';
const system = [
'Ты — внутренний наблюдатель роутинговой системы Claude Code.',
'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.',
'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:',
' "summary": строка — краткое описание принятого решения (до 120 символов)',
' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор',
' "what_could_be_better": строка или null — что можно было сделать иначе',
' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий',
'Не добавляй лишних полей. Не используй markdown. Только JSON.',
].join('\n');
const user = [
'Контекст роутингового решения:',
'',
`Запрос пользователя: ${safePrompt || '(пусто)'}`,
`Рекомендованный узел роутером: ${safeRecommended}`,
`Фактически выбранный узел: ${safeActual}`,
`Выполненная цепочка: ${safeChain}`,
'',
'Оцени это решение. Верни JSON с 4 полями.',
].join('\n');
return { system, user };
}
// ---------------------------------------------------------------------------
// Runtime flag reader
// ---------------------------------------------------------------------------
/**
* Read a runtime flag from ~/.claude/runtime/<name>.json.
* Returns the "value" field from the file, or 'off' on any error.
*
* @param {string} name — flag file basename without .json
* @param {object} opts
* @param {string} [opts.homedir] — override home dir (for tests)
* @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests)
* @returns {string}
*/
export function readRuntimeFlag(name, { homedir, fsImpl } = {}) {
const home = homedir ?? osHomedir();
const fs = fsImpl ?? { existsSync, readFileSync };
try {
const filePath = join(home, '.claude', 'runtime', `${name}.json`);
if (!fs.existsSync(filePath)) return 'off';
const raw = fs.readFileSync(filePath, 'utf-8');
const parsed = JSON.parse(raw);
// Runtime flag files use `mode` (canonical, see all ~/.claude/runtime/*-mode.json);
// `value` retained as legacy alias to keep existing test fixtures working.
const val = parsed.mode ?? parsed.value;
if (typeof val !== 'string') return 'off';
return val;
} catch {
return 'off';
}
}
// ---------------------------------------------------------------------------
// API caller (async, fail-quiet)
// ---------------------------------------------------------------------------
const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic';
const DEFAULT_MODEL = 'claude-sonnet-4-6';
const DEFAULT_TIMEOUT_MS = 10000;
const MAX_TOKENS = 512;
/**
* Call the Anthropic /v1/messages endpoint with the self-assessment prompt.
* Returns the text content from the first content block, or null on any failure.
*
* Fail-quiet contract: any error (missing key, network error, non-2xx, JSON
* parse error, timeout) → return null. Never throws.
*
* @param {object} opts
* @param {string|null|undefined} opts.prompt
* @param {string|null|undefined} opts.recommendedNode
* @param {string|null|undefined} opts.actualNode
* @param {string[]|null|undefined} opts.chainExecuted
* @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value
* @param {string} [opts.baseUrl] — API base URL
* @param {string} [opts.model] — model alias
* @param {Function} [opts.fetchImpl] — override fetch (for tests)
* @param {number} [opts.timeoutMs] — abort timeout in ms
* @param {AbortSignal} [opts.abortSignal] — external abort signal
* @returns {Promise<string|null>}
*/
export async function callSelfAssessmentApi({
prompt,
recommendedNode,
actualNode,
chainExecuted,
apiKey,
baseUrl = DEFAULT_BASE_URL,
model = DEFAULT_MODEL,
fetchImpl,
timeoutMs = DEFAULT_TIMEOUT_MS,
abortSignal,
} = {}) {
// Guard: no key → skip silently
if (!apiKey) return null;
const fetchFn = fetchImpl ?? globalThis.fetch;
const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted });
const url = `${baseUrl}/v1/messages`;
const body = JSON.stringify({
model,
max_tokens: MAX_TOKENS,
system,
messages: [{ role: 'user', content: user }],
});
// Build abort signal — wire to caller's signal if provided
let timeoutId;
let controller;
let signal = abortSignal;
if (!signal) {
controller = new AbortController();
signal = controller.signal;
}
// Build a timeout promise that resolves to null after timeoutMs.
// We always race the fetch against the timeout so that even when the
// fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins.
const timeoutPromise = new Promise((resolve) => {
timeoutId = setTimeout(() => resolve(null), timeoutMs);
if (controller) {
// Also abort the controller so real fetch() implementations cancel early.
setTimeout(() => controller.abort(), timeoutMs);
}
});
try {
const fetchPromise = fetchFn(url, {
method: 'POST',
headers: {
'content-type': 'application/json',
'x-api-key': apiKey,
'authorization': `Bearer ${apiKey}`,
'anthropic-version': '2023-06-01',
},
body,
signal,
}).then(async (response) => {
if (!response.ok) return null;
const data = await response.json();
const text = data?.content?.[0]?.text;
if (typeof text !== 'string') return null;
return text;
}).catch(() => null);
// Race: first settlement wins.
const result = await Promise.race([fetchPromise, timeoutPromise]);
return result ?? null;
} catch {
// Unexpected outer error → fail-quiet
return null;
} finally {
if (timeoutId !== undefined) clearTimeout(timeoutId);
}
}