Files
brain/tools/observer-self-assessment-api.mjs
T

222 lines
9.1 KiB
JavaScript

/**
* tools/observer-self-assessment-api.mjs
*
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
*
* Exports:
* buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
* callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted,
* apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal })
* readRuntimeFlag(name, { homedir, fsImpl })
*
* All functions are pure / fail-quiet — they never throw in production.
* callSelfAssessmentApi always returns string | null (null = skip self-assessment).
*/
import { join } from 'path';
import { existsSync, readFileSync } from 'fs';
import { homedir as osHomedir } from 'os';
// ---------------------------------------------------------------------------
// Prompt builder (pure)
// ---------------------------------------------------------------------------
/**
* Build the self-assessment prompt for Sonnet.
*
* System: Russian instruction asking Claude to evaluate its own routing choice
* and return a JSON object with 4 fields.
*
* User: interpolates the 4 context fields.
*
* @param {object} opts
* @param {string|null|undefined} opts.prompt — the user's original prompt text
* @param {string|null|undefined} opts.recommendedNode — node recommended by router
* @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct'
* @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed
* @returns {{ system: string, user: string }}
*/
export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) {
const safePrompt = prompt ?? '';
const safeRecommended = recommendedNode ?? 'не определён';
const safeActual = actualNode ?? 'direct';
const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0
? chainExecuted.join(' → ')
: '[]';
const system = [
'Ты — внутренний наблюдатель роутинговой системы Claude Code.',
'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.',
'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:',
' "summary": строка — краткое описание принятого решения (до 120 символов)',
' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор',
' "what_could_be_better": строка или null — что можно было сделать иначе',
' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий',
'Не добавляй лишних полей. Не используй markdown. Только JSON.',
].join('\n');
const user = [
'Контекст роутингового решения:',
'',
`Запрос пользователя: ${safePrompt || '(пусто)'}`,
`Рекомендованный узел роутером: ${safeRecommended}`,
`Фактически выбранный узел: ${safeActual}`,
`Выполненная цепочка: ${safeChain}`,
'',
'Оцени это решение. Верни JSON с 4 полями.',
].join('\n');
return { system, user };
}
// ---------------------------------------------------------------------------
// Runtime flag reader
// ---------------------------------------------------------------------------
/**
* Read a runtime flag from ~/.claude/runtime/<name>.json.
* Returns the "value" field from the file, or 'off' on any error.
*
* @param {string} name — flag file basename without .json
* @param {object} opts
* @param {string} [opts.homedir] — override home dir (for tests)
* @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests)
* @returns {string}
*/
export function readRuntimeFlag(name, { homedir, fsImpl } = {}) {
const home = homedir ?? osHomedir();
const fs = fsImpl ?? { existsSync, readFileSync };
try {
const filePath = join(home, '.claude', 'runtime', `${name}.json`);
if (!fs.existsSync(filePath)) return 'off';
const raw = fs.readFileSync(filePath, 'utf-8');
const parsed = JSON.parse(raw);
// Runtime flag files use `mode` (canonical, see all ~/.claude/runtime/*-mode.json);
// `value` retained as legacy alias to keep existing test fixtures working.
const val = parsed.mode ?? parsed.value;
if (typeof val !== 'string') return 'off';
return val;
} catch {
return 'off';
}
}
// ---------------------------------------------------------------------------
// API caller (async, fail-quiet)
// ---------------------------------------------------------------------------
const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic';
const DEFAULT_MODEL = 'claude-sonnet-4-6';
// A2 (2026-05-26): raised 10000 → 30000. On Windows, first ProxyAPI fetch
// triggers TLS handshake которое часто занимает 20+ секунд; 10с убивал каждый
// первый вызов → 85% no_self_assessment в brain-retro #6. Stop-hook outer
// timeout в .claude/settings.json поднят до 60с параллельно.
// brain-retro #7 C3 (2026-05-27): bumped 30s → 50s. Self_assessment coverage
// fell to 22% (18/23 episodes self_assessment_pending) because Windows TLS
// handshake (20-25s first call, see memory feedback_windows_tls_handshake.md)
// + Sonnet latency (3-10s) routinely exceeded the 30s budget. Stop-hook has
// a 60s budget; self-assessment can safely use 50s within it.
// Exported for tests + observability.
export const DEFAULT_TIMEOUT_MS = 50000;
const MAX_TOKENS = 512;
/**
* Call the Anthropic /v1/messages endpoint with the self-assessment prompt.
* Returns the text content from the first content block, or null on any failure.
*
* Fail-quiet contract: any error (missing key, network error, non-2xx, JSON
* parse error, timeout) → return null. Never throws.
*
* @param {object} opts
* @param {string|null|undefined} opts.prompt
* @param {string|null|undefined} opts.recommendedNode
* @param {string|null|undefined} opts.actualNode
* @param {string[]|null|undefined} opts.chainExecuted
* @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value
* @param {string} [opts.baseUrl] — API base URL
* @param {string} [opts.model] — model alias
* @param {Function} [opts.fetchImpl] — override fetch (for tests)
* @param {number} [opts.timeoutMs] — abort timeout in ms
* @param {AbortSignal} [opts.abortSignal] — external abort signal
* @returns {Promise<string|null>}
*/
export async function callSelfAssessmentApi({
prompt,
recommendedNode,
actualNode,
chainExecuted,
apiKey,
// Смена оператора 2026-06-12: env-переключатель в дефолте (зеркало callAnthropicAPI).
baseUrl = process.env.ROUTER_LLM_BASE_URL || DEFAULT_BASE_URL,
model = DEFAULT_MODEL,
fetchImpl,
timeoutMs = DEFAULT_TIMEOUT_MS,
abortSignal,
} = {}) {
// Guard: no key → skip silently
if (!apiKey) return null;
const fetchFn = fetchImpl ?? globalThis.fetch;
const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted });
const url = `${baseUrl}/v1/messages`;
const body = JSON.stringify({
model,
max_tokens: MAX_TOKENS,
system,
messages: [{ role: 'user', content: user }],
});
// Build abort signal — wire to caller's signal if provided
let timeoutId;
let controller;
let signal = abortSignal;
if (!signal) {
controller = new AbortController();
signal = controller.signal;
}
// Build a timeout promise that resolves to null after timeoutMs.
// We always race the fetch against the timeout so that even when the
// fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins.
const timeoutPromise = new Promise((resolve) => {
timeoutId = setTimeout(() => resolve(null), timeoutMs);
if (controller) {
// Also abort the controller so real fetch() implementations cancel early.
setTimeout(() => controller.abort(), timeoutMs);
}
});
try {
const fetchPromise = fetchFn(url, {
method: 'POST',
headers: {
'content-type': 'application/json',
'x-api-key': apiKey,
'authorization': `Bearer ${apiKey}`,
'anthropic-version': '2023-06-01',
},
body,
signal,
}).then(async (response) => {
if (!response.ok) return null;
const data = await response.json();
const text = data?.content?.[0]?.text;
if (typeof text !== 'string') return null;
return text;
}).catch(() => null);
// Race: first settlement wins.
const result = await Promise.race([fetchPromise, timeoutPromise]);
return result ?? null;
} catch {
// Unexpected outer error → fail-quiet
return null;
} finally {
if (timeoutId !== undefined) clearTimeout(timeoutId);
}
}