/** * Tests for tools/observer-self-assessment-api.mjs * Phase 3 deferred follow-up #5: real LLM self-assessment API call. * TDD — these tests are written BEFORE the implementation exists. */ import { describe, it, expect } from 'vitest'; import { buildSelfAssessmentPrompt, callSelfAssessmentApi, readRuntimeFlag, } from './observer-self-assessment-api.mjs'; // --------------------------------------------------------------------------- // 1. buildSelfAssessmentPrompt — all 4 fields interpolated // --------------------------------------------------------------------------- describe('buildSelfAssessmentPrompt — all fields interpolated', () => { it('returns system+user strings with all 4 fields present in user string', () => { const { system, user } = buildSelfAssessmentPrompt({ prompt: 'напиши тест для биллинга', recommendedNode: '#62', actualNode: '#19', chainExecuted: ['#19', '#62'], }); expect(typeof system).toBe('string'); expect(system.length).toBeGreaterThan(0); expect(typeof user).toBe('string'); expect(user).toContain('напиши тест для биллинга'); expect(user).toContain('#62'); expect(user).toContain('#19'); expect(user).toContain('#62'); // part of chainExecuted serialisation }); }); // --------------------------------------------------------------------------- // 2. buildSelfAssessmentPrompt — handles missing/null inputs gracefully // --------------------------------------------------------------------------- describe('buildSelfAssessmentPrompt — null/undefined inputs', () => { it('returns valid strings when all inputs are undefined/null', () => { const { system, user } = buildSelfAssessmentPrompt({}); expect(typeof system).toBe('string'); expect(typeof user).toBe('string'); // Should contain fallback placeholders, not throw expect(user).not.toContain('undefined'); expect(user).not.toContain('[object Object]'); }); it('handles null recommendedNode and empty chainExecuted', () => { const { user } = buildSelfAssessmentPrompt({ prompt: 'test', recommendedNode: null, actualNode: 'direct', chainExecuted: [], }); expect(user).toContain('test'); }); }); // --------------------------------------------------------------------------- // 3. callSelfAssessmentApi — returns null when apiKey is missing/empty // --------------------------------------------------------------------------- describe('callSelfAssessmentApi — missing apiKey', () => { it('returns null immediately when apiKey is falsy (no fetch call)', async () => { let fetchCalled = false; const fakeFetch = async () => { fetchCalled = true; }; const result = await callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: '', fetchImpl: fakeFetch, }); expect(result).toBeNull(); expect(fetchCalled).toBe(false); }); it('returns null when apiKey is undefined', async () => { const result = await callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: undefined, }); expect(result).toBeNull(); }); }); // --------------------------------------------------------------------------- // 4. callSelfAssessmentApi — returns text on 200 + content[0].text // --------------------------------------------------------------------------- describe('callSelfAssessmentApi — successful 200 response', () => { it('returns content[0].text on ok response', async () => { const responseText = '{"summary":"chose correctly","confidence_in_choice":0.9,"what_could_be_better":null,"lesson_learned":null}'; const fakeFetch = async () => ({ ok: true, json: async () => ({ content: [{ type: 'text', text: responseText }], }), }); const result = await callSelfAssessmentApi({ prompt: 'do something', recommendedNode: '#19', actualNode: '#19', chainExecuted: ['#19'], apiKey: 'test-key', baseUrl: 'https://api.example.com/anthropic', model: 'claude-sonnet-4-6', fetchImpl: fakeFetch, timeoutMs: 5000, }); expect(result).toBe(responseText); }); }); // --------------------------------------------------------------------------- // 5. callSelfAssessmentApi — returns null on non-2xx (r.ok=false) // --------------------------------------------------------------------------- describe('callSelfAssessmentApi — non-2xx response', () => { it('returns null when response.ok is false', async () => { const fakeFetch = async () => ({ ok: false, status: 429, json: async () => ({ error: { message: 'rate limited' } }), }); const result = await callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: 'test-key', fetchImpl: fakeFetch, timeoutMs: 5000, }); expect(result).toBeNull(); }); }); // --------------------------------------------------------------------------- // 6. callSelfAssessmentApi — returns null on fetch throw // --------------------------------------------------------------------------- describe('callSelfAssessmentApi — fetch throws', () => { it('returns null (fail-quiet) when fetch throws a network error', async () => { const fakeFetch = async () => { throw new Error('network error'); }; const result = await callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: 'test-key', fetchImpl: fakeFetch, timeoutMs: 5000, }); expect(result).toBeNull(); }); }); // --------------------------------------------------------------------------- // 7. callSelfAssessmentApi — returns null on timeout // --------------------------------------------------------------------------- // brain-retro #7 C3 (2026-05-27): self_assessment coverage dropped to 22%. // Root cause: Windows TLS handshake (20-25s first call, see memory // feedback_windows_tls_handshake.md) + Sonnet latency (3-10s) routinely exceeds // the 30s timeout, returning null → episode marked self_assessment_pending. // Stop-hook has 60s budget; self-assessment can safely use 50s within it. describe('callSelfAssessmentApi — C3 default timeout bumped 30s → 50s (2026-05-27)', () => { it('default timeoutMs is >= 50000 ms (TLS handshake + Sonnet on Windows)', async () => { let observedTimeout = 0; // We probe the default by setting up a fakeFetch that records when its // signal would be aborted. Use real setTimeout to wire abort fires. const start = Date.now(); const fakeFetch = (_url, opts) => new Promise((resolve) => { if (opts.signal) { opts.signal.addEventListener('abort', () => { observedTimeout = Date.now() - start; resolve(null); }); } // Don't resolve — let timeout win to measure it. But we can't wait 50s // in a test. Instead pass explicit timeoutMs=1 and verify we honor caller. // Then separately export-introspect the default. }); // Path 1: caller-supplied timeoutMs is honored (fast path). const mod = await import('./observer-self-assessment-api.mjs'); const r = await mod.callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: 'test-key', fetchImpl: fakeFetch, timeoutMs: 1, }); expect(r).toBeNull(); // Path 2: exported constant — required for C3 (gives explicit budget invariant). expect(typeof mod.DEFAULT_TIMEOUT_MS).toBe('number'); expect(mod.DEFAULT_TIMEOUT_MS).toBeGreaterThanOrEqual(50000); }); }); describe('callSelfAssessmentApi — A2 default timeout bumped 10s → 30s (2026-05-26)', () => { it('default timeoutMs is >= 30000 ms (TLS handshake budget on Windows)', async () => { // Detect default by mocking fetch to record signal AbortController duration. // We can introspect indirectly: start a fakeFetch that resolves after 25s // (longer than old default 10s, shorter than new default 30s). With the new // default, it should resolve to the response; with the old default, null. // To avoid waiting 25s real-time, we instead check the exported constant. const mod = await import('./observer-self-assessment-api.mjs'); // Test via call: pass no timeoutMs and confirm fetchImpl's signal doesn't abort early. let abortedEarly = false; const fakeFetch = (_url, opts) => new Promise((resolve) => { if (opts.signal) { opts.signal.addEventListener('abort', () => { abortedEarly = true; resolve(null); }); } // resolve after 12s (would fail with 10s default, pass with 30s) setTimeout(() => resolve({ ok: true, json: () => Promise.resolve({ content: [{ text: '{}' }] }) }), 50); }); const result = await mod.callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: 'test-key', fetchImpl: fakeFetch, // no explicit timeoutMs — use default }); // 50ms fetch should NOT be aborted by default timeout (>= 30000ms means lots of headroom) expect(abortedEarly).toBe(false); // Returned the parsed JSON content (string) expect(typeof result).toBe('string'); }); }); describe('callSelfAssessmentApi — timeout', () => { it('returns null when fetch never resolves within timeoutMs', async () => { // fakeFetch returns a promise that never resolves const fakeFetch = async (_url, _opts) => new Promise(() => { /* never */ }); const start = Date.now(); const result = await callSelfAssessmentApi({ prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [], apiKey: 'test-key', fetchImpl: fakeFetch, timeoutMs: 30, // 30 ms timeout — very fast for test }); const elapsed = Date.now() - start; expect(result).toBeNull(); // Should resolve around the timeout, not hang indefinitely expect(elapsed).toBeLessThan(500); }); }); // --------------------------------------------------------------------------- // 8. callSelfAssessmentApi — sends correct headers and body // --------------------------------------------------------------------------- describe('callSelfAssessmentApi — request format', () => { it('sends correct headers and body shape (spy fetchImpl)', async () => { let capturedUrl, capturedOpts; const fakeFetch = async (url, opts) => { capturedUrl = url; capturedOpts = opts; return { ok: true, json: async () => ({ content: [{ type: 'text', text: 'ok' }] }), }; }; await callSelfAssessmentApi({ prompt: 'test prompt', recommendedNode: '#62', actualNode: '#62', chainExecuted: ['#62'], apiKey: 'my-secret-key', baseUrl: 'https://api.proxyapi.ru/anthropic', model: 'claude-sonnet-4-6', fetchImpl: fakeFetch, timeoutMs: 5000, }); expect(capturedUrl).toContain('/v1/messages'); const headers = capturedOpts.headers; expect(headers['authorization'] || headers['x-api-key']).toBeTruthy(); const body = JSON.parse(capturedOpts.body); expect(body.model).toBe('claude-sonnet-4-6'); expect(Array.isArray(body.messages)).toBe(true); expect(body.messages[0].role).toBe('user'); expect(body.max_tokens).toBeGreaterThan(0); }); // Смена LLM-оператора 2026-06-12: env ROUTER_LLM_BASE_URL переключает оператора // и здесь (зеркало фикса callAnthropicAPI — модуль имел свой хардкод-дефолт). it('ROUTER_LLM_BASE_URL env переключает дефолтный base URL (явный baseUrl не передан)', async () => { process.env.ROUTER_LLM_BASE_URL = 'https://api.aitunnel.ru'; try { let capturedUrl; const fakeFetch = async (url) => { capturedUrl = url; return { ok: true, json: async () => ({ content: [{ type: 'text', text: 'ok' }] }) }; }; await callSelfAssessmentApi({ prompt: 'p', apiKey: 'k', fetchImpl: fakeFetch, timeoutMs: 5000 }); expect(capturedUrl).toBe('https://api.aitunnel.ru/v1/messages'); } finally { delete process.env.ROUTER_LLM_BASE_URL; } }); }); // --------------------------------------------------------------------------- // 9. readRuntimeFlag — reads value from file; returns 'off' on missing/malformed // --------------------------------------------------------------------------- describe('readRuntimeFlag', () => { it('returns the value from {"value":"on"} when file exists', () => { const fakeHomedir = '/fake/home'; const fakeFsImpl = { existsSync: (p) => p.endsWith('self-assessment-mode.json'), readFileSync: (_p, _enc) => '{"value":"on"}', }; const result = readRuntimeFlag('self-assessment-mode', { homedir: fakeHomedir, fsImpl: fakeFsImpl }); expect(result).toBe('on'); }); it('returns "off" when file does not exist', () => { const fakeFsImpl = { existsSync: () => false, readFileSync: () => { throw new Error('no file'); }, }; const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); expect(result).toBe('off'); }); it('returns "off" on malformed JSON', () => { const fakeFsImpl = { existsSync: () => true, readFileSync: () => 'NOT JSON', }; const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); expect(result).toBe('off'); }); it('reads "mode" field when "value" is absent (post-050b349a fix)', () => { // After 050b349a's readRuntimeFlag fix, runtime files store {mode: "on"} as // canonical shape. The legacy "value" key is still accepted as fallback, // but "mode" is preferred. Test that mode='on' without value yields 'on'. const fakeFsImpl = { existsSync: () => true, readFileSync: () => '{"mode":"on"}', }; const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); expect(result).toBe('on'); }); it('returns "off" when neither "mode" nor "value" present', () => { const fakeFsImpl = { existsSync: () => true, readFileSync: () => '{"other":"thing"}', }; const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); expect(result).toBe('off'); }); });