Files
brain/tools/judge-evaluator.test.mjs
T

51 lines
2.2 KiB
JavaScript

// tools/judge-evaluator.test.mjs
import { describe, it, expect } from 'vitest';
import { evaluatePostfactum } from './judge-evaluator.mjs';
describe('evaluatePostfactum (D31, §9.4): прошлые GO vs что вылезло ниже', () => {
it('нет всплывших проблем → промахов нет, флага нет', () => {
const r = evaluatePostfactum({
verdicts: [{ verdict_id: 'v1', decision: 'GO', source: 'judge' }],
surfaced: [],
});
expect(r.misses).toBe(0);
expect(r.flag).toBe(false);
});
it('проблема вылезла ниже и её пропустил GO-вердикт → промах', () => {
const r = evaluatePostfactum({
verdicts: [{ verdict_id: 'v1', decision: 'GO', source: 'judge' }, { verdict_id: 'v2', decision: 'GO', source: 'judge' }],
surfaced: [{ missed_by: 'v1', where: 'gate3' }],
threshold: 0.4,
});
expect(r.misses).toBe(1);
expect(r.totalGo).toBe(2);
expect(r.missRate).toBeCloseTo(0.5);
expect(r.flag).toBe(true);
});
it('проблема указывает на NO-GO вердикт (судья поймал) → не промах того вердикта', () => {
const r = evaluatePostfactum({
verdicts: [{ verdict_id: 'v1', decision: 'NO-GO', source: 'judge' }],
surfaced: [{ missed_by: 'v1', where: 'owner' }],
});
expect(r.misses).toBe(0);
});
it('разбивка по источнику (судья vs роутер)', () => {
const r = evaluatePostfactum({
verdicts: [
{ verdict_id: 'j1', decision: 'GO', source: 'judge' },
{ verdict_id: 'r1', decision: 'GO', source: 'router' },
],
surfaced: [{ missed_by: 'j1' }, { missed_by: 'r1' }],
threshold: 0.9,
});
expect(r.bySource.judge.misses).toBe(1);
expect(r.bySource.router.misses).toBe(1);
});
it('ниже порога → флага нет', () => {
const verdicts = Array.from({ length: 10 }, (_, i) => ({ verdict_id: 'v' + i, decision: 'GO', source: 'judge' }));
const r = evaluatePostfactum({ verdicts, surfaced: [{ missed_by: 'v0' }], threshold: 0.2 });
expect(r.missRate).toBeCloseTo(0.1);
expect(r.flag).toBe(false);
});
});