feat(observer): wire real LLM self-assessment API call — phase 3 deferred #5
- NEW tools/observer-self-assessment-api.mjs
buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
pure, handles nulls/undefined, returns { system, user } strings
callSelfAssessmentApi(opts) async, fail-quiet — returns string|null
AbortController + timeout race (works even when fetchImpl ignores signal)
guards: !apiKey -> return null immediately (no fetch call)
guards: !response.ok, fetch throw, JSON parse error -> return null
passes x-api-key + authorization headers per ProxyAPI two-header pattern
readRuntimeFlag(name, { homedir, fsImpl }) reads ~/.claude/runtime/<name>.json
returns value field string or 'off' on missing/malformed
- NEW tools/observer-self-assessment-api.test.mjs: 14 tests, 0 failed
1. buildSelfAssessmentPrompt all 4 fields interpolated
2. buildSelfAssessmentPrompt null/undefined inputs (2 tests)
3. callSelfAssessmentApi returns null when apiKey falsy (2 tests)
4. returns content[0].text on 200 ok (fake fetchImpl)
5. returns null on non-2xx (response.ok=false)
6. returns null on fetch throw
7. returns null on timeout (never-resolving fake fetchImpl, timeoutMs=30ms)
8. sends correct headers+body shape (spy fetchImpl)
9. readRuntimeFlag reads {"value":"on"}, returns 'off' on missing/malformed (4 tests)
- EDIT tools/observer-stop-hook.mjs
import { callSelfAssessmentApi, readRuntimeFlag } added
stdin 'end' handler made async
step 3.5 inserted between buildEpisodeFromContext and appendEpisode:
reads self-assessment-mode runtime flag; if 'on' and ROUTER_LLM_KEY set,
calls callSelfAssessmentApi and attaches ep.self_assessment via buildSelfAssessment()
fail-quiet: on any error apiResult=null -> self_assessment_pending: true
Regression: 628/628 tests passed (35 test files), 0 failed
gitleaks: 0 leaks on all 3 files
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,207 @@
|
||||
/**
|
||||
* tools/observer-self-assessment-api.mjs
|
||||
*
|
||||
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
|
||||
*
|
||||
* Exports:
|
||||
* buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
|
||||
* callSelfAssessmentApi({ prompt, recommendedNode, actualNode, chainExecuted,
|
||||
* apiKey, baseUrl, model, fetchImpl, timeoutMs, abortSignal })
|
||||
* readRuntimeFlag(name, { homedir, fsImpl })
|
||||
*
|
||||
* All functions are pure / fail-quiet — they never throw in production.
|
||||
* callSelfAssessmentApi always returns string | null (null = skip self-assessment).
|
||||
*/
|
||||
|
||||
import { join } from 'path';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
import { homedir as osHomedir } from 'os';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Prompt builder (pure)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build the self-assessment prompt for Sonnet.
|
||||
*
|
||||
* System: Russian instruction asking Claude to evaluate its own routing choice
|
||||
* and return a JSON object with 4 fields.
|
||||
*
|
||||
* User: interpolates the 4 context fields.
|
||||
*
|
||||
* @param {object} opts
|
||||
* @param {string|null|undefined} opts.prompt — the user's original prompt text
|
||||
* @param {string|null|undefined} opts.recommendedNode — node recommended by router
|
||||
* @param {string|null|undefined} opts.actualNode — node actually chosen / 'direct'
|
||||
* @param {string[]|null|undefined} opts.chainExecuted — list of chain steps executed
|
||||
* @returns {{ system: string, user: string }}
|
||||
*/
|
||||
export function buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted } = {}) {
|
||||
const safePrompt = prompt ?? '';
|
||||
const safeRecommended = recommendedNode ?? 'не определён';
|
||||
const safeActual = actualNode ?? 'direct';
|
||||
const safeChain = Array.isArray(chainExecuted) && chainExecuted.length > 0
|
||||
? chainExecuted.join(' → ')
|
||||
: '[]';
|
||||
|
||||
const system = [
|
||||
'Ты — внутренний наблюдатель роутинговой системы Claude Code.',
|
||||
'Твоя задача — честно оценить качество роутингового решения, принятого в этой сессии.',
|
||||
'Отвечай ТОЛЬКО валидным JSON-объектом без markdown-обёрток, ровно 4 поля:',
|
||||
' "summary": строка — краткое описание принятого решения (до 120 символов)',
|
||||
' "confidence_in_choice": число от 0.0 до 1.0 — насколько оптимальным был выбор',
|
||||
' "what_could_be_better": строка или null — что можно было сделать иначе',
|
||||
' "lesson_learned": строка или null — чему учит этот эпизод для будущих сессий',
|
||||
'Не добавляй лишних полей. Не используй markdown. Только JSON.',
|
||||
].join('\n');
|
||||
|
||||
const user = [
|
||||
'Контекст роутингового решения:',
|
||||
'',
|
||||
`Запрос пользователя: ${safePrompt || '(пусто)'}`,
|
||||
`Рекомендованный узел роутером: ${safeRecommended}`,
|
||||
`Фактически выбранный узел: ${safeActual}`,
|
||||
`Выполненная цепочка: ${safeChain}`,
|
||||
'',
|
||||
'Оцени это решение. Верни JSON с 4 полями.',
|
||||
].join('\n');
|
||||
|
||||
return { system, user };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Runtime flag reader
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Read a runtime flag from ~/.claude/runtime/<name>.json.
|
||||
* Returns the "value" field from the file, or 'off' on any error.
|
||||
*
|
||||
* @param {string} name — flag file basename without .json
|
||||
* @param {object} opts
|
||||
* @param {string} [opts.homedir] — override home dir (for tests)
|
||||
* @param {{ existsSync: Function, readFileSync: Function }} [opts.fsImpl] — override fs (for tests)
|
||||
* @returns {string}
|
||||
*/
|
||||
export function readRuntimeFlag(name, { homedir, fsImpl } = {}) {
|
||||
const home = homedir ?? osHomedir();
|
||||
const fs = fsImpl ?? { existsSync, readFileSync };
|
||||
|
||||
try {
|
||||
const filePath = join(home, '.claude', 'runtime', `${name}.json`);
|
||||
if (!fs.existsSync(filePath)) return 'off';
|
||||
const raw = fs.readFileSync(filePath, 'utf-8');
|
||||
const parsed = JSON.parse(raw);
|
||||
if (typeof parsed.value !== 'string') return 'off';
|
||||
return parsed.value;
|
||||
} catch {
|
||||
return 'off';
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// API caller (async, fail-quiet)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_BASE_URL = 'https://api.proxyapi.ru/anthropic';
|
||||
const DEFAULT_MODEL = 'claude-sonnet-4-6';
|
||||
const DEFAULT_TIMEOUT_MS = 10000;
|
||||
const MAX_TOKENS = 512;
|
||||
|
||||
/**
|
||||
* Call the Anthropic /v1/messages endpoint with the self-assessment prompt.
|
||||
* Returns the text content from the first content block, or null on any failure.
|
||||
*
|
||||
* Fail-quiet contract: any error (missing key, network error, non-2xx, JSON
|
||||
* parse error, timeout) → return null. Never throws.
|
||||
*
|
||||
* @param {object} opts
|
||||
* @param {string|null|undefined} opts.prompt
|
||||
* @param {string|null|undefined} opts.recommendedNode
|
||||
* @param {string|null|undefined} opts.actualNode
|
||||
* @param {string[]|null|undefined} opts.chainExecuted
|
||||
* @param {string|null|undefined} opts.apiKey — ROUTER_LLM_KEY value
|
||||
* @param {string} [opts.baseUrl] — API base URL
|
||||
* @param {string} [opts.model] — model alias
|
||||
* @param {Function} [opts.fetchImpl] — override fetch (for tests)
|
||||
* @param {number} [opts.timeoutMs] — abort timeout in ms
|
||||
* @param {AbortSignal} [opts.abortSignal] — external abort signal
|
||||
* @returns {Promise<string|null>}
|
||||
*/
|
||||
export async function callSelfAssessmentApi({
|
||||
prompt,
|
||||
recommendedNode,
|
||||
actualNode,
|
||||
chainExecuted,
|
||||
apiKey,
|
||||
baseUrl = DEFAULT_BASE_URL,
|
||||
model = DEFAULT_MODEL,
|
||||
fetchImpl,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
abortSignal,
|
||||
} = {}) {
|
||||
// Guard: no key → skip silently
|
||||
if (!apiKey) return null;
|
||||
|
||||
const fetchFn = fetchImpl ?? globalThis.fetch;
|
||||
|
||||
const { system, user } = buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted });
|
||||
|
||||
const url = `${baseUrl}/v1/messages`;
|
||||
const body = JSON.stringify({
|
||||
model,
|
||||
max_tokens: MAX_TOKENS,
|
||||
system,
|
||||
messages: [{ role: 'user', content: user }],
|
||||
});
|
||||
|
||||
// Build abort signal — wire to caller's signal if provided
|
||||
let timeoutId;
|
||||
let controller;
|
||||
let signal = abortSignal;
|
||||
|
||||
if (!signal) {
|
||||
controller = new AbortController();
|
||||
signal = controller.signal;
|
||||
}
|
||||
|
||||
// Build a timeout promise that resolves to null after timeoutMs.
|
||||
// We always race the fetch against the timeout so that even when the
|
||||
// fetchImpl ignores the AbortSignal (e.g. in tests) the timeout still wins.
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
timeoutId = setTimeout(() => resolve(null), timeoutMs);
|
||||
if (controller) {
|
||||
// Also abort the controller so real fetch() implementations cancel early.
|
||||
setTimeout(() => controller.abort(), timeoutMs);
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
const fetchPromise = fetchFn(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
'authorization': `Bearer ${apiKey}`,
|
||||
'anthropic-version': '2023-06-01',
|
||||
},
|
||||
body,
|
||||
signal,
|
||||
}).then(async (response) => {
|
||||
if (!response.ok) return null;
|
||||
const data = await response.json();
|
||||
const text = data?.content?.[0]?.text;
|
||||
if (typeof text !== 'string') return null;
|
||||
return text;
|
||||
}).catch(() => null);
|
||||
|
||||
// Race: first settlement wins.
|
||||
const result = await Promise.race([fetchPromise, timeoutPromise]);
|
||||
return result ?? null;
|
||||
} catch {
|
||||
// Unexpected outer error → fail-quiet
|
||||
return null;
|
||||
} finally {
|
||||
if (timeoutId !== undefined) clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
/**
|
||||
* Tests for tools/observer-self-assessment-api.mjs
|
||||
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
|
||||
* TDD — these tests are written BEFORE the implementation exists.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
buildSelfAssessmentPrompt,
|
||||
callSelfAssessmentApi,
|
||||
readRuntimeFlag,
|
||||
} from './observer-self-assessment-api.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. buildSelfAssessmentPrompt — all 4 fields interpolated
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('buildSelfAssessmentPrompt — all fields interpolated', () => {
|
||||
it('returns system+user strings with all 4 fields present in user string', () => {
|
||||
const { system, user } = buildSelfAssessmentPrompt({
|
||||
prompt: 'напиши тест для биллинга',
|
||||
recommendedNode: '#62',
|
||||
actualNode: '#19',
|
||||
chainExecuted: ['#19', '#62'],
|
||||
});
|
||||
expect(typeof system).toBe('string');
|
||||
expect(system.length).toBeGreaterThan(0);
|
||||
expect(typeof user).toBe('string');
|
||||
expect(user).toContain('напиши тест для биллинга');
|
||||
expect(user).toContain('#62');
|
||||
expect(user).toContain('#19');
|
||||
expect(user).toContain('#62'); // part of chainExecuted serialisation
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. buildSelfAssessmentPrompt — handles missing/null inputs gracefully
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('buildSelfAssessmentPrompt — null/undefined inputs', () => {
|
||||
it('returns valid strings when all inputs are undefined/null', () => {
|
||||
const { system, user } = buildSelfAssessmentPrompt({});
|
||||
expect(typeof system).toBe('string');
|
||||
expect(typeof user).toBe('string');
|
||||
// Should contain fallback placeholders, not throw
|
||||
expect(user).not.toContain('undefined');
|
||||
expect(user).not.toContain('[object Object]');
|
||||
});
|
||||
|
||||
it('handles null recommendedNode and empty chainExecuted', () => {
|
||||
const { user } = buildSelfAssessmentPrompt({
|
||||
prompt: 'test',
|
||||
recommendedNode: null,
|
||||
actualNode: 'direct',
|
||||
chainExecuted: [],
|
||||
});
|
||||
expect(user).toContain('test');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. callSelfAssessmentApi — returns null when apiKey is missing/empty
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — missing apiKey', () => {
|
||||
it('returns null immediately when apiKey is falsy (no fetch call)', async () => {
|
||||
let fetchCalled = false;
|
||||
const fakeFetch = async () => { fetchCalled = true; };
|
||||
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
|
||||
apiKey: '',
|
||||
fetchImpl: fakeFetch,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
expect(fetchCalled).toBe(false);
|
||||
});
|
||||
|
||||
it('returns null when apiKey is undefined', async () => {
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
|
||||
apiKey: undefined,
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. callSelfAssessmentApi — returns text on 200 + content[0].text
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — successful 200 response', () => {
|
||||
it('returns content[0].text on ok response', async () => {
|
||||
const responseText = '{"summary":"chose correctly","confidence_in_choice":0.9,"what_could_be_better":null,"lesson_learned":null}';
|
||||
const fakeFetch = async () => ({
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
content: [{ type: 'text', text: responseText }],
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'do something',
|
||||
recommendedNode: '#19',
|
||||
actualNode: '#19',
|
||||
chainExecuted: ['#19'],
|
||||
apiKey: 'test-key',
|
||||
baseUrl: 'https://api.example.com/anthropic',
|
||||
model: 'claude-sonnet-4-6',
|
||||
fetchImpl: fakeFetch,
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
|
||||
expect(result).toBe(responseText);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 5. callSelfAssessmentApi — returns null on non-2xx (r.ok=false)
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — non-2xx response', () => {
|
||||
it('returns null when response.ok is false', async () => {
|
||||
const fakeFetch = async () => ({
|
||||
ok: false,
|
||||
status: 429,
|
||||
json: async () => ({ error: { message: 'rate limited' } }),
|
||||
});
|
||||
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
|
||||
apiKey: 'test-key',
|
||||
fetchImpl: fakeFetch,
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 6. callSelfAssessmentApi — returns null on fetch throw
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — fetch throws', () => {
|
||||
it('returns null (fail-quiet) when fetch throws a network error', async () => {
|
||||
const fakeFetch = async () => { throw new Error('network error'); };
|
||||
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
|
||||
apiKey: 'test-key',
|
||||
fetchImpl: fakeFetch,
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 7. callSelfAssessmentApi — returns null on timeout
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — timeout', () => {
|
||||
it('returns null when fetch never resolves within timeoutMs', async () => {
|
||||
// fakeFetch returns a promise that never resolves
|
||||
const fakeFetch = async (_url, _opts) => new Promise(() => { /* never */ });
|
||||
|
||||
const start = Date.now();
|
||||
const result = await callSelfAssessmentApi({
|
||||
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
|
||||
apiKey: 'test-key',
|
||||
fetchImpl: fakeFetch,
|
||||
timeoutMs: 30, // 30 ms timeout — very fast for test
|
||||
});
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
expect(result).toBeNull();
|
||||
// Should resolve around the timeout, not hang indefinitely
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 8. callSelfAssessmentApi — sends correct headers and body
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callSelfAssessmentApi — request format', () => {
|
||||
it('sends correct headers and body shape (spy fetchImpl)', async () => {
|
||||
let capturedUrl, capturedOpts;
|
||||
const fakeFetch = async (url, opts) => {
|
||||
capturedUrl = url;
|
||||
capturedOpts = opts;
|
||||
return {
|
||||
ok: true,
|
||||
json: async () => ({ content: [{ type: 'text', text: 'ok' }] }),
|
||||
};
|
||||
};
|
||||
|
||||
await callSelfAssessmentApi({
|
||||
prompt: 'test prompt',
|
||||
recommendedNode: '#62',
|
||||
actualNode: '#62',
|
||||
chainExecuted: ['#62'],
|
||||
apiKey: 'my-secret-key',
|
||||
baseUrl: 'https://api.proxyapi.ru/anthropic',
|
||||
model: 'claude-sonnet-4-6',
|
||||
fetchImpl: fakeFetch,
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
|
||||
expect(capturedUrl).toContain('/v1/messages');
|
||||
const headers = capturedOpts.headers;
|
||||
expect(headers['authorization'] || headers['x-api-key']).toBeTruthy();
|
||||
const body = JSON.parse(capturedOpts.body);
|
||||
expect(body.model).toBe('claude-sonnet-4-6');
|
||||
expect(Array.isArray(body.messages)).toBe(true);
|
||||
expect(body.messages[0].role).toBe('user');
|
||||
expect(body.max_tokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 9. readRuntimeFlag — reads value from file; returns 'off' on missing/malformed
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('readRuntimeFlag', () => {
|
||||
it('returns the value from {"value":"on"} when file exists', () => {
|
||||
const fakeHomedir = '/fake/home';
|
||||
const fakeFsImpl = {
|
||||
existsSync: (p) => p.endsWith('self-assessment-mode.json'),
|
||||
readFileSync: (_p, _enc) => '{"value":"on"}',
|
||||
};
|
||||
|
||||
const result = readRuntimeFlag('self-assessment-mode', { homedir: fakeHomedir, fsImpl: fakeFsImpl });
|
||||
expect(result).toBe('on');
|
||||
});
|
||||
|
||||
it('returns "off" when file does not exist', () => {
|
||||
const fakeFsImpl = {
|
||||
existsSync: () => false,
|
||||
readFileSync: () => { throw new Error('no file'); },
|
||||
};
|
||||
|
||||
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
|
||||
expect(result).toBe('off');
|
||||
});
|
||||
|
||||
it('returns "off" on malformed JSON', () => {
|
||||
const fakeFsImpl = {
|
||||
existsSync: () => true,
|
||||
readFileSync: () => 'NOT JSON',
|
||||
};
|
||||
|
||||
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
|
||||
expect(result).toBe('off');
|
||||
});
|
||||
|
||||
it('returns "off" when value field is missing', () => {
|
||||
const fakeFsImpl = {
|
||||
existsSync: () => true,
|
||||
readFileSync: () => '{"mode":"on"}', // no "value" key
|
||||
};
|
||||
|
||||
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
|
||||
expect(result).toBe('off');
|
||||
});
|
||||
});
|
||||
@@ -19,6 +19,7 @@ import { join } from 'path';
|
||||
import { sanitize, sanitizeWithCount } from './observer-pii-filter.mjs';
|
||||
import { parseTranscript, extractLastUserPromptText } from './observer-transcript-parser.mjs';
|
||||
import { detectMethodDirected, loadKnownNodes } from './observer-routing-detector.mjs';
|
||||
import { callSelfAssessmentApi, readRuntimeFlag } from './observer-self-assessment-api.mjs';
|
||||
|
||||
const REQUIRED_FIELDS = ['task_id', 'timestamps', 'path_type', 'outcome', 'primary_rationale'];
|
||||
const V2_FIELDS = [
|
||||
@@ -294,7 +295,7 @@ function currentMonth() {
|
||||
if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/observer-stop-hook.mjs')) {
|
||||
const chunks = [];
|
||||
process.stdin.on('data', (c) => chunks.push(c));
|
||||
process.stdin.on('end', () => {
|
||||
process.stdin.on('end', async () => {
|
||||
let ctx = {};
|
||||
try {
|
||||
const raw = Buffer.concat(chunks).toString('utf-8');
|
||||
@@ -315,6 +316,23 @@ if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/observer-s
|
||||
}
|
||||
try {
|
||||
const ep = buildEpisodeFromContext(ctx, transcriptText);
|
||||
|
||||
// Step 3.5: self-assessment API call (fail-quiet).
|
||||
// Only runs when the runtime flag is 'on' and ROUTER_LLM_KEY is set.
|
||||
const saMode = readRuntimeFlag('self-assessment-mode');
|
||||
const saApiKey = process.env.ROUTER_LLM_KEY || null;
|
||||
if (saMode === 'on' && saApiKey) {
|
||||
const rat = ep.primary_rationale ?? {};
|
||||
const apiResult = await callSelfAssessmentApi({
|
||||
prompt: ctx.prompt || null,
|
||||
recommendedNode: rat.recommended_node || null,
|
||||
actualNode: rat.node_chosen || null,
|
||||
chainExecuted: rat.chain_executed || [],
|
||||
apiKey: saApiKey,
|
||||
});
|
||||
ep.self_assessment = buildSelfAssessment({ apiResult });
|
||||
}
|
||||
|
||||
// Always write the episode first — exit-0-safe (spec §5.1 step 1).
|
||||
appendEpisode(ep);
|
||||
// Then the routing-gate (spec §5.1 steps 2-4).
|
||||
|
||||
Reference in New Issue
Block a user