7b4da1477e
Brain-retro #6 follow-up #2 (consolidated). Eight independent fixes: A1 — task_cost wiring (cost tracking) - router-prehook.mjs: capture classifier LLM usage via onUsage callback, persist to state.task_cost.classifier_input_tokens / output_tokens. - observer-transcript-parser.mjs: merge router-state.task_cost on top of extractTokenUsage(turn). State-file values win for classifier/ self_assessment/reviewer fields. - New buildCostFromClassifierUsage() exported from router-prehook. - Verified live: state file now shows real input_tokens=190 / output_tokens=598 / cache_read=10075 (was 0 before). A2 — self-assessment coverage - observer-self-assessment-api.mjs: DEFAULT_TIMEOUT_MS 10s -> 30s. - .claude/settings.json: Stop-hook timeout 15s -> 60s. - Same Windows TLS handshake issue. Was 85% no_self_assessment in retro #6. B3 — brain-retro SKILL.md reconciliation - Step 5b: batch=default for N>=20, subagent for N<20. C1 — dead-code cleanup - Removed recommendNode import + getClassificationMap + getDormancy from observer-transcript-parser.mjs. G — parseClassifierResponse Pass 3 (fixLLMJsonQuirks) - Root cause: real Sonnet output sometimes contains raw newlines inside string values (multi-line reason_for_choice) and trailing commas, which strict JSON.parse rejects. Result was llm_error_type=parse_null on every other call, falling back to regex with task_type=unknown. - Fix: after Pass 1 (clean) and Pass 2 (brace-extract) fail, try Pass 3 that escapes raw newline/tab inside string values and strips trailing commas before final JSON.parse attempt. Pure char-walk, no JSON5 dep. H — 'unknown' added to NON_BLOCKING_TASK_TYPES in router-tool-gate.mjs - Until G fully proves itself, blocking Bash/Edit on unknown is too strict. With G in place, parse_null should be rare; H gives a safety net. Tests added: +9 across 5 test files. Regression: 913 vitest tests in tools/. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
215 lines
8.6 KiB
JavaScript
215 lines
8.6 KiB
JavaScript
/**
|
|
* tools/observer-self-assessment-api.mjs
|
|
*
|
|
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
|
|
*
|
|
* Exports:
|
|
* buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
|
|
* callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted,
|
|
* apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal })
|
|
* readRuntimeFlag(name, { homedir, fsImpl })
|
|
*
|
|
* All functions are pure / fail-quiet — they never throw in production.
|
|
* callSelfAssessmentApi always returns string | null (null = skip self-assessment).
|
|
*/
|
|
|
|
import { join } from 'path';
|
|
import { existsSync, readFileSync } from 'fs';
|
|
import { homedir as osHomedir } from 'os';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Prompt builder (pure)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Build the self-assessment prompt for Sonnet.
|
|
*
|
|
* System: Russian instruction asking Claude to evaluate its own routing choice
|
|
* and return a JSON object with 4 fields.
|
|
*
|
|
* User: interpolates the 4 context fields.
|
|
*
|
|
* @param {object} opts
|
|
* @param {string|null|undefined} opts.prompt — the user's original prompt text
|
|
* @param {string|null|undefined} opts.recommendedNode — node recommended by router
|
|
* @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct'
|
|
* @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed
|
|
* @returns {{ system: string, user: string }}
|
|
*/
|
|
export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) {
|
|
const safePrompt = prompt ?? '';
|
|
const safeRecommended = recommendedNode ?? 'не определён';
|
|
const safeActual = actualNode ?? 'direct';
|
|
const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0
|
|
? chainExecuted.join(' → ')
|
|
: '[]';
|
|
|
|
const system = [
|
|
'Ты — внутренний наблюдатель роутинговой системы Claude Code.',
|
|
'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.',
|
|
'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:',
|
|
' "summary": строка — краткое описание принятого решения (до 120 символов)',
|
|
' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор',
|
|
' "what_could_be_better": строка или null — что можно было сделать иначе',
|
|
' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий',
|
|
'Не добавляй лишних полей. Не используй markdown. Только JSON.',
|
|
].join('\n');
|
|
|
|
const user = [
|
|
'Контекст роутингового решения:',
|
|
'',
|
|
`Запрос пользователя: ${safePrompt || '(пусто)'}`,
|
|
`Рекомендованный узел роутером: ${safeRecommended}`,
|
|
`Фактически выбранный узел: ${safeActual}`,
|
|
`Выполненная цепочка: ${safeChain}`,
|
|
'',
|
|
'Оцени это решение. Верни JSON с 4 полями.',
|
|
].join('\n');
|
|
|
|
return { system, user };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Runtime flag reader
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Read a runtime flag from ~/.claude/runtime/<name>.json.
|
|
* Returns the "value" field from the file, or 'off' on any error.
|
|
*
|
|
* @param {string} name — flag file basename without .json
|
|
* @param {object} opts
|
|
* @param {string} [opts.homedir] — override home dir (for tests)
|
|
* @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests)
|
|
* @returns {string}
|
|
*/
|
|
export function readRuntimeFlag(name, { homedir, fsImpl } = {}) {
|
|
const home = homedir ?? osHomedir();
|
|
const fs = fsImpl ?? { existsSync, readFileSync };
|
|
|
|
try {
|
|
const filePath = join(home, '.claude', 'runtime', `${name}.json`);
|
|
if (!fs.existsSync(filePath)) return 'off';
|
|
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
const parsed = JSON.parse(raw);
|
|
// Runtime flag files use `mode` (canonical, see all ~/.claude/runtime/*-mode.json);
|
|
// `value` retained as legacy alias to keep existing test fixtures working.
|
|
const val = parsed.mode ?? parsed.value;
|
|
if (typeof val !== 'string') return 'off';
|
|
return val;
|
|
} catch {
|
|
return 'off';
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// API caller (async, fail-quiet)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic';
|
|
const DEFAULT_MODEL = 'claude-sonnet-4-6';
|
|
// A2 (2026-05-26): raised 10000 → 30000. On Windows, first ProxyAPI fetch
|
|
// triggers TLS handshake которое часто занимает 20+ секунд; 10с убивал каждый
|
|
// первый вызов → 85% no_self_assessment в brain-retro #6. Stop-hook outer
|
|
// timeout в .claude/settings.json поднят до 60с параллельно.
|
|
const DEFAULT_TIMEOUT_MS = 30000;
|
|
const MAX_TOKENS = 512;
|
|
|
|
/**
|
|
* Call the Anthropic /v1/messages endpoint with the self-assessment prompt.
|
|
* Returns the text content from the first content block, or null on any failure.
|
|
*
|
|
* Fail-quiet contract: any error (missing key, network error, non-2xx, JSON
|
|
* parse error, timeout) → return null. Never throws.
|
|
*
|
|
* @param {object} opts
|
|
* @param {string|null|undefined} opts.prompt
|
|
* @param {string|null|undefined} opts.recommendedNode
|
|
* @param {string|null|undefined} opts.actualNode
|
|
* @param {string[]|null|undefined} opts.chainExecuted
|
|
* @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value
|
|
* @param {string} [opts.baseUrl] — API base URL
|
|
* @param {string} [opts.model] — model alias
|
|
* @param {Function} [opts.fetchImpl] — override fetch (for tests)
|
|
* @param {number} [opts.timeoutMs] — abort timeout in ms
|
|
* @param {AbortSignal} [opts.abortSignal] — external abort signal
|
|
* @returns {Promise<string|null>}
|
|
*/
|
|
export async function callSelfAssessmentApi({
|
|
prompt,
|
|
recommendedNode,
|
|
actualNode,
|
|
chainExecuted,
|
|
apiKey,
|
|
baseUrl = DEFAULT_BASE_URL,
|
|
model = DEFAULT_MODEL,
|
|
fetchImpl,
|
|
timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
abortSignal,
|
|
} = {}) {
|
|
// Guard: no key → skip silently
|
|
if (!apiKey) return null;
|
|
|
|
const fetchFn = fetchImpl ?? globalThis.fetch;
|
|
|
|
const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted });
|
|
|
|
const url = `${baseUrl}/v1/messages`;
|
|
const body = JSON.stringify({
|
|
model,
|
|
max_tokens: MAX_TOKENS,
|
|
system,
|
|
messages: [{ role: 'user', content: user }],
|
|
});
|
|
|
|
// Build abort signal — wire to caller's signal if provided
|
|
let timeoutId;
|
|
let controller;
|
|
let signal = abortSignal;
|
|
|
|
if (!signal) {
|
|
controller = new AbortController();
|
|
signal = controller.signal;
|
|
}
|
|
|
|
// Build a timeout promise that resolves to null after timeoutMs.
|
|
// We always race the fetch against the timeout so that even when the
|
|
// fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins.
|
|
const timeoutPromise = new Promise((resolve) => {
|
|
timeoutId = setTimeout(() => resolve(null), timeoutMs);
|
|
if (controller) {
|
|
// Also abort the controller so real fetch() implementations cancel early.
|
|
setTimeout(() => controller.abort(), timeoutMs);
|
|
}
|
|
});
|
|
|
|
try {
|
|
const fetchPromise = fetchFn(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'content-type': 'application/json',
|
|
'x-api-key': apiKey,
|
|
'authorization': `Bearer ${apiKey}`,
|
|
'anthropic-version': '2023-06-01',
|
|
},
|
|
body,
|
|
signal,
|
|
}).then(async (response) => {
|
|
if (!response.ok) return null;
|
|
const data = await response.json();
|
|
const text = data?.content?.[0]?.text;
|
|
if (typeof text !== 'string') return null;
|
|
return text;
|
|
}).catch(() => null);
|
|
|
|
// Race: first settlement wins.
|
|
const result = await Promise.race([fetchPromise, timeoutPromise]);
|
|
return result ?? null;
|
|
} catch {
|
|
// Unexpected outer error → fail-quiet
|
|
return null;
|
|
} finally {
|
|
if (timeoutId !== undefined) clearTimeout(timeoutId);
|
|
}
|
|
}
|