/** * tools/observer-self-assessment-api.mjs * * Phase 3 deferred follow-up #5: real LLM self-assessment API call. * * Exports: * buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted }) * callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted, * apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal }) * readRuntimeFlag(name, { homedir, fsImpl }) * * All functions are pure / fail-quiet — they never throw in production. * callSelfAssessmentApi always returns string | null (null = skip self-assessment). */ import { join } from 'path'; import { existsSync, readFileSync } from 'fs'; import { homedir as osHomedir } from 'os'; // --------------------------------------------------------------------------- // Prompt builder (pure) // --------------------------------------------------------------------------- /** * Build the self-assessment prompt for Sonnet. * * System: Russian instruction asking Claude to evaluate its own routing choice * and return a JSON object with 4 fields. * * User: interpolates the 4 context fields. * * @param {object} opts * @param {string|null|undefined} opts.prompt — the user's original prompt text * @param {string|null|undefined} opts.recommendedNode — node recommended by router * @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct' * @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed * @returns {{ system: string, user: string }} */ export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) { const safePrompt = prompt ?? ''; const safeRecommended = recommendedNode ?? 'не определён'; const safeActual = actualNode ?? 'direct'; const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0 ? chainExecuted.join(' → ') : '[]'; const system = [ 'Ты — внутренний наблюдатель роутинговой системы Claude Code.', 'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.', 'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:', ' "summary": строка — краткое описание принятого решения (до 120 символов)', ' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор', ' "what_could_be_better": строка или null — что можно было сделать иначе', ' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий', 'Не добавляй лишних полей. Не используй markdown. Только JSON.', ].join('\n'); const user = [ 'Контекст роутингового решения:', '', `Запрос пользователя: ${safePrompt || '(пусто)'}`, `Рекомендованный узел роутером: ${safeRecommended}`, `Фактически выбранный узел: ${safeActual}`, `Выполненная цепочка: ${safeChain}`, '', 'Оцени это решение. Верни JSON с 4 полями.', ].join('\n'); return { system, user }; } // --------------------------------------------------------------------------- // Runtime flag reader // --------------------------------------------------------------------------- /** * Read a runtime flag from ~/.claude/runtime/.json. * Returns the "value" field from the file, or 'off' on any error. * * @param {string} name — flag file basename without .json * @param {object} opts * @param {string} [opts.homedir] — override home dir (for tests) * @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests) * @returns {string} */ export function readRuntimeFlag(name, { homedir, fsImpl } = {}) { const home = homedir ?? osHomedir(); const fs = fsImpl ?? { existsSync, readFileSync }; try { const filePath = join(home, '.claude', 'runtime', `${name}.json`); if (!fs.existsSync(filePath)) return 'off'; const raw = fs.readFileSync(filePath, 'utf-8'); const parsed = JSON.parse(raw); // Runtime flag files use `mode` (canonical, see all ~/.claude/runtime/*-mode.json); // `value` retained as legacy alias to keep existing test fixtures working. const val = parsed.mode ?? parsed.value; if (typeof val !== 'string') return 'off'; return val; } catch { return 'off'; } } // --------------------------------------------------------------------------- // API caller (async, fail-quiet) // --------------------------------------------------------------------------- const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic'; const DEFAULT_MODEL = 'claude-sonnet-4-6'; // A2 (2026-05-26): raised 10000 → 30000. On Windows, first ProxyAPI fetch // triggers TLS handshake которое часто занимает 20+ секунд; 10с убивал каждый // первый вызов → 85% no_self_assessment в brain-retro #6. Stop-hook outer // timeout в .claude/settings.json поднят до 60с параллельно. // brain-retro #7 C3 (2026-05-27): bumped 30s → 50s. Self_assessment coverage // fell to 22% (18/23 episodes self_assessment_pending) because Windows TLS // handshake (20-25s first call, see memory feedback_windows_tls_handshake.md) // + Sonnet latency (3-10s) routinely exceeded the 30s budget. Stop-hook has // a 60s budget; self-assessment can safely use 50s within it. // Exported for tests + observability. export const DEFAULT_TIMEOUT_MS = 50000; const MAX_TOKENS = 512; /** * Call the Anthropic /v1/messages endpoint with the self-assessment prompt. * Returns the text content from the first content block, or null on any failure. * * Fail-quiet contract: any error (missing key, network error, non-2xx, JSON * parse error, timeout) → return null. Never throws. * * @param {object} opts * @param {string|null|undefined} opts.prompt * @param {string|null|undefined} opts.recommendedNode * @param {string|null|undefined} opts.actualNode * @param {string[]|null|undefined} opts.chainExecuted * @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value * @param {string} [opts.baseUrl] — API base URL * @param {string} [opts.model] — model alias * @param {Function} [opts.fetchImpl] — override fetch (for tests) * @param {number} [opts.timeoutMs] — abort timeout in ms * @param {AbortSignal} [opts.abortSignal] — external abort signal * @returns {Promise} */ export async function callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted, apiKey, // Смена оператора 2026-06-12: env-переключатель в дефолте (зеркало callAnthropicAPI). baseUrl = process.env.ROUTER_LLM_BASE_URL || DEFAULT_BASE_URL, model = DEFAULT_MODEL, fetchImpl, timeoutMs = DEFAULT_TIMEOUT_MS, abortSignal, } = {}) { // Guard: no key → skip silently if (!apiKey) return null; const fetchFn = fetchImpl ?? globalThis.fetch; const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted }); const url = `${baseUrl}/v1/messages`; const body = JSON.stringify({ model, max_tokens: MAX_TOKENS, system, messages: [{ role: 'user', content: user }], }); // Build abort signal — wire to caller's signal if provided let timeoutId; let controller; let signal = abortSignal; if (!signal) { controller = new AbortController(); signal = controller.signal; } // Build a timeout promise that resolves to null after timeoutMs. // We always race the fetch against the timeout so that even when the // fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins. const timeoutPromise = new Promise((resolve) => { timeoutId = setTimeout(() => resolve(null), timeoutMs); if (controller) { // Also abort the controller so real fetch() implementations cancel early. setTimeout(() => controller.abort(), timeoutMs); } }); try { const fetchPromise = fetchFn(url, { method: 'POST', headers: { 'content-type': 'application/json', 'x-api-key': apiKey, 'authorization': `Bearer ${apiKey}`, 'anthropic-version': '2023-06-01', }, body, signal, }).then(async (response) => { if (!response.ok) return null; const data = await response.json(); const text = data?.content?.[0]?.text; if (typeof text !== 'string') return null; return text; }).catch(() => null); // Race: first settlement wins. const result = await Promise.race([fetchPromise, timeoutPromise]); return result ?? null; } catch { // Unexpected outer error → fail-quiet return null; } finally { if (timeoutId !== undefined) clearTimeout(timeoutId); } }