397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
222 lines
9.1 KiB
JavaScript
222 lines
9.1 KiB
JavaScript
/**
|
|
* tools/observer-self-assessment-api.mjs
|
|
*
|
|
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
|
|
*
|
|
* Exports:
|
|
* buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
|
|
* callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted,
|
|
* apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal })
|
|
* readRuntimeFlag(name, { homedir, fsImpl })
|
|
*
|
|
* All functions are pure / fail-quiet — they never throw in production.
|
|
* callSelfAssessmentApi always returns string | null (null = skip self-assessment).
|
|
*/
|
|
|
|
import { join } from 'path';
|
|
import { existsSync, readFileSync } from 'fs';
|
|
import { homedir as osHomedir } from 'os';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Prompt builder (pure)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Build the self-assessment prompt for Sonnet.
|
|
*
|
|
* System: Russian instruction asking Claude to evaluate its own routing choice
|
|
* and return a JSON object with 4 fields.
|
|
*
|
|
* User: interpolates the 4 context fields.
|
|
*
|
|
* @param {object} opts
|
|
* @param {string|null|undefined} opts.prompt — the user's original prompt text
|
|
* @param {string|null|undefined} opts.recommendedNode — node recommended by router
|
|
* @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct'
|
|
* @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed
|
|
* @returns {{ system: string, user: string }}
|
|
*/
|
|
export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) {
|
|
const safePrompt = prompt ?? '';
|
|
const safeRecommended = recommendedNode ?? 'не определён';
|
|
const safeActual = actualNode ?? 'direct';
|
|
const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0
|
|
? chainExecuted.join(' → ')
|
|
: '[]';
|
|
|
|
const system = [
|
|
'Ты — внутренний наблюдатель роутинговой системы Claude Code.',
|
|
'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.',
|
|
'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:',
|
|
' "summary": строка — краткое описание принятого решения (до 120 символов)',
|
|
' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор',
|
|
' "what_could_be_better": строка или null — что можно было сделать иначе',
|
|
' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий',
|
|
'Не добавляй лишних полей. Не используй markdown. Только JSON.',
|
|
].join('\n');
|
|
|
|
const user = [
|
|
'Контекст роутингового решения:',
|
|
'',
|
|
`Запрос пользователя: ${safePrompt || '(пусто)'}`,
|
|
`Рекомендованный узел роутером: ${safeRecommended}`,
|
|
`Фактически выбранный узел: ${safeActual}`,
|
|
`Выполненная цепочка: ${safeChain}`,
|
|
'',
|
|
'Оцени это решение. Верни JSON с 4 полями.',
|
|
].join('\n');
|
|
|
|
return { system, user };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Runtime flag reader
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Read a runtime flag from ~/.claude/runtime/<name>.json.
|
|
* Returns the "value" field from the file, or 'off' on any error.
|
|
*
|
|
* @param {string} name — flag file basename without .json
|
|
* @param {object} opts
|
|
* @param {string} [opts.homedir] — override home dir (for tests)
|
|
* @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests)
|
|
* @returns {string}
|
|
*/
|
|
export function readRuntimeFlag(name, { homedir, fsImpl } = {}) {
|
|
const home = homedir ?? osHomedir();
|
|
const fs = fsImpl ?? { existsSync, readFileSync };
|
|
|
|
try {
|
|
const filePath = join(home, '.claude', 'runtime', `${name}.json`);
|
|
if (!fs.existsSync(filePath)) return 'off';
|
|
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
const parsed = JSON.parse(raw);
|
|
// Runtime flag files use `mode` (canonical, see all ~/.claude/runtime/*-mode.json);
|
|
// `value` retained as legacy alias to keep existing test fixtures working.
|
|
const val = parsed.mode ?? parsed.value;
|
|
if (typeof val !== 'string') return 'off';
|
|
return val;
|
|
} catch {
|
|
return 'off';
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// API caller (async, fail-quiet)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic';
|
|
const DEFAULT_MODEL = 'claude-sonnet-4-6';
|
|
// A2 (2026-05-26): raised 10000 → 30000. On Windows, first ProxyAPI fetch
|
|
// triggers TLS handshake которое часто занимает 20+ секунд; 10с убивал каждый
|
|
// первый вызов → 85% no_self_assessment в brain-retro #6. Stop-hook outer
|
|
// timeout в .claude/settings.json поднят до 60с параллельно.
|
|
// brain-retro #7 C3 (2026-05-27): bumped 30s → 50s. Self_assessment coverage
|
|
// fell to 22% (18/23 episodes self_assessment_pending) because Windows TLS
|
|
// handshake (20-25s first call, see memory feedback_windows_tls_handshake.md)
|
|
// + Sonnet latency (3-10s) routinely exceeded the 30s budget. Stop-hook has
|
|
// a 60s budget; self-assessment can safely use 50s within it.
|
|
// Exported for tests + observability.
|
|
export const DEFAULT_TIMEOUT_MS = 50000;
|
|
const MAX_TOKENS = 512;
|
|
|
|
/**
|
|
* Call the Anthropic /v1/messages endpoint with the self-assessment prompt.
|
|
* Returns the text content from the first content block, or null on any failure.
|
|
*
|
|
* Fail-quiet contract: any error (missing key, network error, non-2xx, JSON
|
|
* parse error, timeout) → return null. Never throws.
|
|
*
|
|
* @param {object} opts
|
|
* @param {string|null|undefined} opts.prompt
|
|
* @param {string|null|undefined} opts.recommendedNode
|
|
* @param {string|null|undefined} opts.actualNode
|
|
* @param {string[]|null|undefined} opts.chainExecuted
|
|
* @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value
|
|
* @param {string} [opts.baseUrl] — API base URL
|
|
* @param {string} [opts.model] — model alias
|
|
* @param {Function} [opts.fetchImpl] — override fetch (for tests)
|
|
* @param {number} [opts.timeoutMs] — abort timeout in ms
|
|
* @param {AbortSignal} [opts.abortSignal] — external abort signal
|
|
* @returns {Promise<string|null>}
|
|
*/
|
|
export async function callSelfAssessmentApi({
|
|
prompt,
|
|
recommendedNode,
|
|
actualNode,
|
|
chainExecuted,
|
|
apiKey,
|
|
// Смена оператора 2026-06-12: env-переключатель в дефолте (зеркало callAnthropicAPI).
|
|
baseUrl = process.env.ROUTER_LLM_BASE_URL || DEFAULT_BASE_URL,
|
|
model = DEFAULT_MODEL,
|
|
fetchImpl,
|
|
timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
abortSignal,
|
|
} = {}) {
|
|
// Guard: no key → skip silently
|
|
if (!apiKey) return null;
|
|
|
|
const fetchFn = fetchImpl ?? globalThis.fetch;
|
|
|
|
const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted });
|
|
|
|
const url = `${baseUrl}/v1/messages`;
|
|
const body = JSON.stringify({
|
|
model,
|
|
max_tokens: MAX_TOKENS,
|
|
system,
|
|
messages: [{ role: 'user', content: user }],
|
|
});
|
|
|
|
// Build abort signal — wire to caller's signal if provided
|
|
let timeoutId;
|
|
let controller;
|
|
let signal = abortSignal;
|
|
|
|
if (!signal) {
|
|
controller = new AbortController();
|
|
signal = controller.signal;
|
|
}
|
|
|
|
// Build a timeout promise that resolves to null after timeoutMs.
|
|
// We always race the fetch against the timeout so that even when the
|
|
// fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins.
|
|
const timeoutPromise = new Promise((resolve) => {
|
|
timeoutId = setTimeout(() => resolve(null), timeoutMs);
|
|
if (controller) {
|
|
// Also abort the controller so real fetch() implementations cancel early.
|
|
setTimeout(() => controller.abort(), timeoutMs);
|
|
}
|
|
});
|
|
|
|
try {
|
|
const fetchPromise = fetchFn(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'content-type': 'application/json',
|
|
'x-api-key': apiKey,
|
|
'authorization': `Bearer ${apiKey}`,
|
|
'anthropic-version': '2023-06-01',
|
|
},
|
|
body,
|
|
signal,
|
|
}).then(async (response) => {
|
|
if (!response.ok) return null;
|
|
const data = await response.json();
|
|
const text = data?.content?.[0]?.text;
|
|
if (typeof text !== 'string') return null;
|
|
return text;
|
|
}).catch(() => null);
|
|
|
|
// Race: first settlement wins.
|
|
const result = await Promise.race([fetchPromise, timeoutPromise]);
|
|
return result ?? null;
|
|
} catch {
|
|
// Unexpected outer error → fail-quiet
|
|
return null;
|
|
} finally {
|
|
if (timeoutId !== undefined) clearTimeout(timeoutId);
|
|
}
|
|
}
|