Files
portal/tools/observer-self-assessment-api.test.mjs
T
Дмитрий b437597286 feat(observer): wire real LLM self-assessment API call — phase 3 deferred #5
- NEW tools/observer-self-assessment-api.mjs
  buildSelfAssessmentPrompt({ prompt, recommendedNode, actualNode, chainExecuted })
  pure, handles nulls/undefined, returns { system, user } strings
  callSelfAssessmentApi(opts) async, fail-quiet — returns string|null
  AbortController + timeout race (works even when fetchImpl ignores signal)
  guards: !apiKey -> return null immediately (no fetch call)
  guards: !response.ok, fetch throw, JSON parse error -> return null
  passes x-api-key + authorization headers per ProxyAPI two-header pattern
  readRuntimeFlag(name, { homedir, fsImpl }) reads ~/.claude/runtime/<name>.json
  returns value field string or 'off' on missing/malformed

- NEW tools/observer-self-assessment-api.test.mjs: 14 tests, 0 failed
  1. buildSelfAssessmentPrompt all 4 fields interpolated
  2. buildSelfAssessmentPrompt null/undefined inputs (2 tests)
  3. callSelfAssessmentApi returns null when apiKey falsy (2 tests)
  4. returns content[0].text on 200 ok (fake fetchImpl)
  5. returns null on non-2xx (response.ok=false)
  6. returns null on fetch throw
  7. returns null on timeout (never-resolving fake fetchImpl, timeoutMs=30ms)
  8. sends correct headers+body shape (spy fetchImpl)
  9. readRuntimeFlag reads {"value":"on"}, returns 'off' on missing/malformed (4 tests)

- EDIT tools/observer-stop-hook.mjs
  import { callSelfAssessmentApi, readRuntimeFlag } added
  stdin 'end' handler made async
  step 3.5 inserted between buildEpisodeFromContext and appendEpisode:
  reads self-assessment-mode runtime flag; if 'on' and ROUTER_LLM_KEY set,
  calls callSelfAssessmentApi and attaches ep.self_assessment via buildSelfAssessment()
  fail-quiet: on any error apiResult=null -> self_assessment_pending: true

Regression: 628/628 tests passed (35 test files), 0 failed
gitleaks: 0 leaks on all 3 files

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 14:28:26 +03:00

261 lines
9.5 KiB
JavaScript

/**
* Tests for tools/observer-self-assessment-api.mjs
* Phase 3 deferred follow-up #5: real LLM self-assessment API call.
* TDD — these tests are written BEFORE the implementation exists.
*/
import { describe, it, expect } from 'vitest';
import {
buildSelfAssessmentPrompt,
callSelfAssessmentApi,
readRuntimeFlag,
} from './observer-self-assessment-api.mjs';
// ---------------------------------------------------------------------------
// 1. buildSelfAssessmentPrompt — all 4 fields interpolated
// ---------------------------------------------------------------------------
describe('buildSelfAssessmentPrompt — all fields interpolated', () => {
it('returns system+user strings with all 4 fields present in user string', () => {
const { system, user } = buildSelfAssessmentPrompt({
prompt: 'напиши тест для биллинга',
recommendedNode: '#62',
actualNode: '#19',
chainExecuted: ['#19', '#62'],
});
expect(typeof system).toBe('string');
expect(system.length).toBeGreaterThan(0);
expect(typeof user).toBe('string');
expect(user).toContain('напиши тест для биллинга');
expect(user).toContain('#62');
expect(user).toContain('#19');
expect(user).toContain('#62'); // part of chainExecuted serialisation
});
});
// ---------------------------------------------------------------------------
// 2. buildSelfAssessmentPrompt — handles missing/null inputs gracefully
// ---------------------------------------------------------------------------
describe('buildSelfAssessmentPrompt — null/undefined inputs', () => {
it('returns valid strings when all inputs are undefined/null', () => {
const { system, user } = buildSelfAssessmentPrompt({});
expect(typeof system).toBe('string');
expect(typeof user).toBe('string');
// Should contain fallback placeholders, not throw
expect(user).not.toContain('undefined');
expect(user).not.toContain('[object Object]');
});
it('handles null recommendedNode and empty chainExecuted', () => {
const { user } = buildSelfAssessmentPrompt({
prompt: 'test',
recommendedNode: null,
actualNode: 'direct',
chainExecuted: [],
});
expect(user).toContain('test');
});
});
// ---------------------------------------------------------------------------
// 3. callSelfAssessmentApi — returns null when apiKey is missing/empty
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — missing apiKey', () => {
it('returns null immediately when apiKey is falsy (no fetch call)', async () => {
let fetchCalled = false;
const fakeFetch = async () => { fetchCalled = true; };
const result = await callSelfAssessmentApi({
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
apiKey: '',
fetchImpl: fakeFetch,
});
expect(result).toBeNull();
expect(fetchCalled).toBe(false);
});
it('returns null when apiKey is undefined', async () => {
const result = await callSelfAssessmentApi({
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
apiKey: undefined,
});
expect(result).toBeNull();
});
});
// ---------------------------------------------------------------------------
// 4. callSelfAssessmentApi — returns text on 200 + content[0].text
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — successful 200 response', () => {
it('returns content[0].text on ok response', async () => {
const responseText = '{"summary":"chose correctly","confidence_in_choice":0.9,"what_could_be_better":null,"lesson_learned":null}';
const fakeFetch = async () => ({
ok: true,
json: async () => ({
content: [{ type: 'text', text: responseText }],
}),
});
const result = await callSelfAssessmentApi({
prompt: 'do something',
recommendedNode: '#19',
actualNode: '#19',
chainExecuted: ['#19'],
apiKey: 'test-key',
baseUrl: 'https://api.example.com/anthropic',
model: 'claude-sonnet-4-6',
fetchImpl: fakeFetch,
timeoutMs: 5000,
});
expect(result).toBe(responseText);
});
});
// ---------------------------------------------------------------------------
// 5. callSelfAssessmentApi — returns null on non-2xx (r.ok=false)
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — non-2xx response', () => {
it('returns null when response.ok is false', async () => {
const fakeFetch = async () => ({
ok: false,
status: 429,
json: async () => ({ error: { message: 'rate limited' } }),
});
const result = await callSelfAssessmentApi({
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
apiKey: 'test-key',
fetchImpl: fakeFetch,
timeoutMs: 5000,
});
expect(result).toBeNull();
});
});
// ---------------------------------------------------------------------------
// 6. callSelfAssessmentApi — returns null on fetch throw
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — fetch throws', () => {
it('returns null (fail-quiet) when fetch throws a network error', async () => {
const fakeFetch = async () => { throw new Error('network error'); };
const result = await callSelfAssessmentApi({
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
apiKey: 'test-key',
fetchImpl: fakeFetch,
timeoutMs: 5000,
});
expect(result).toBeNull();
});
});
// ---------------------------------------------------------------------------
// 7. callSelfAssessmentApi — returns null on timeout
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — timeout', () => {
it('returns null when fetch never resolves within timeoutMs', async () => {
// fakeFetch returns a promise that never resolves
const fakeFetch = async (_url, _opts) => new Promise(() => { /* never */ });
const start = Date.now();
const result = await callSelfAssessmentApi({
prompt: 'x', recommendedNode: '#1', actualNode: '#1', chainExecuted: [],
apiKey: 'test-key',
fetchImpl: fakeFetch,
timeoutMs: 30, // 30 ms timeout — very fast for test
});
const elapsed = Date.now() - start;
expect(result).toBeNull();
// Should resolve around the timeout, not hang indefinitely
expect(elapsed).toBeLessThan(500);
});
});
// ---------------------------------------------------------------------------
// 8. callSelfAssessmentApi — sends correct headers and body
// ---------------------------------------------------------------------------
describe('callSelfAssessmentApi — request format', () => {
it('sends correct headers and body shape (spy fetchImpl)', async () => {
let capturedUrl, capturedOpts;
const fakeFetch = async (url, opts) => {
capturedUrl = url;
capturedOpts = opts;
return {
ok: true,
json: async () => ({ content: [{ type: 'text', text: 'ok' }] }),
};
};
await callSelfAssessmentApi({
prompt: 'test prompt',
recommendedNode: '#62',
actualNode: '#62',
chainExecuted: ['#62'],
apiKey: 'my-secret-key',
baseUrl: 'https://api.proxyapi.ru/anthropic',
model: 'claude-sonnet-4-6',
fetchImpl: fakeFetch,
timeoutMs: 5000,
});
expect(capturedUrl).toContain('/v1/messages');
const headers = capturedOpts.headers;
expect(headers['authorization'] || headers['x-api-key']).toBeTruthy();
const body = JSON.parse(capturedOpts.body);
expect(body.model).toBe('claude-sonnet-4-6');
expect(Array.isArray(body.messages)).toBe(true);
expect(body.messages[0].role).toBe('user');
expect(body.max_tokens).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// 9. readRuntimeFlag — reads value from file; returns 'off' on missing/malformed
// ---------------------------------------------------------------------------
describe('readRuntimeFlag', () => {
it('returns the value from {"value":"on"} when file exists', () => {
const fakeHomedir = '/fake/home';
const fakeFsImpl = {
existsSync: (p) => p.endsWith('self-assessment-mode.json'),
readFileSync: (_p, _enc) => '{"value":"on"}',
};
const result = readRuntimeFlag('self-assessment-mode', { homedir: fakeHomedir, fsImpl: fakeFsImpl });
expect(result).toBe('on');
});
it('returns "off" when file does not exist', () => {
const fakeFsImpl = {
existsSync: () => false,
readFileSync: () => { throw new Error('no file'); },
};
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
expect(result).toBe('off');
});
it('returns "off" on malformed JSON', () => {
const fakeFsImpl = {
existsSync: () => true,
readFileSync: () => 'NOT JSON',
};
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
expect(result).toBe('off');
});
it('returns "off" when value field is missing', () => {
const fakeFsImpl = {
existsSync: () => true,
readFileSync: () => '{"mode":"on"}', // no "value" key
};
const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl });
expect(result).toBe('off');
});
});