feat(brain): CREATE reviewer fallback handler + verify subagent (phase 3 task 18)

Phase 3 Task 18 (G16 closure). Spec §4.6 — direct Opus API fallback for the
brain-retro reviewer when the Claude Code subagent
.claude/agents/reviewer-agent.md crashes / times out.

- tools/brain-retro-opus-reviewer.mjs (NEW — G16: file did not exist):
  + buildReviewPrompt(episode) — adaptive prompt:
    v4 → full (alternatives_considered + self_assessment + chain_gaps cues)
    v3 → omits alternatives_considered
    v2 → omits both alternatives + self_assessment
  + parseReview(text) — strips ```json fence, requires the 7 review
    fields (node_quality / chain_quality / gap_assessment /
    agent_self_assessment_accuracy / error_root_cause / outcome_reviewed /
    reasoning) + alternative_better (nullable). Passes through
    reviewer_error escalations from the subagent verbatim.
  + reviewViaDirectApi(episode, options) — async wrapper around
    callAnthropicAPI with REVIEWER_MODEL. Returns parsed review or null.
- tools/brain-retro-opus-reviewer.test.mjs (NEW): 9 tests (4 prompt +
  5 parse: complete / fence / malformed / missing field / reviewer_error
  escalation).
- Reviewer subagent verified: .claude/agents/reviewer-agent.md exists
  with frontmatter spec §4.6 (tools: Read/Grep/Glob/Skill; model: opus;
  8-dim review contract). No edits to the agent file (this Task 18
  step 1 is a verify, not a rewrite — agent already conforms).
This commit is contained in:
Дмитрий
2026-05-25 12:24:00 +03:00
parent f9ce56813b
commit 1e74b2c95e
2 changed files with 192 additions and 0 deletions
+127
View File
@@ -0,0 +1,127 @@
#!/usr/bin/env node
/**
* brain-retro reviewer — direct Opus API fallback handler (Phase 3 Task 18).
*
* Spec §4.6: the primary reviewer is a Claude Code subagent
* (`.claude/agents/reviewer-agent.md`) spawned via Task() from /brain-retro.
* THIS module is the FALLBACK handler invoked by the controller when the
* subagent crashes / times out: direct Opus API call with the same adaptive
* review prompt (but no cross-episode reading, no skill invocations).
*
* Pure layer: buildReviewPrompt + parseReview (this file's tests). Network
* layer: reviewViaDirectApi (zero-cost wrapper around router-classifier's
* callAnthropicAPI; the controller decides when to call it).
*
* G16 — file did not exist before Phase 3 Task 18; created here.
*/
import { REVIEWER_MODEL } from './router-config.mjs';
const REQUIRED_REVIEW_FIELDS = [
'node_quality',
'chain_quality',
'gap_assessment',
'agent_self_assessment_accuracy',
'error_root_cause',
'outcome_reviewed',
'reasoning',
];
/**
* Build the adaptive review prompt for a given episode. Pure.
*
* Adaptive prompt template (spec §4.6):
* - v4 → full prompt including alternatives_considered, self_assessment,
* chain_gaps cues.
* - v3 → omits alternatives_considered.
* - v2 → omits both alternatives_considered and self_assessment.
* - v1 → skipped upstream (caller filters them out).
*/
export function buildReviewPrompt(episode) {
const v = Number(episode?.schema_version) || 0;
const cues = [];
cues.push('node_quality: correct | wrong_node | overkill | underkill | disputable');
cues.push('chain_quality: correct | missing_step | extra_step | wrong_order | n/a');
cues.push('gap_assessment: acceptable | mistake_should_complete | mistake_should_not_start | n/a');
cues.push('agent_self_assessment_accuracy: accurate | over_confident | under_confident | no_self_assessment');
cues.push('error_root_cause: wrong_skill | wrong_tool | wrong_chain_order | external_failure | n/a');
cues.push('alternative_better: <node_id> | null');
cues.push('outcome_reviewed: success | soft_success | rework | blocked');
cues.push('reasoning: 1-3 sentences');
const adaptiveNotes = [];
if (v >= 3) {
adaptiveNotes.push('Episode is v3+: primary_rationale carries triggers/candidates/boundaries.');
}
if (v >= 4) {
adaptiveNotes.push('Episode is v4: classifier_output.alternatives_considered tells you what the classifier weighed.');
adaptiveNotes.push('self_assessment (if present and not pending) is the agent\'s post-hoc judgement — compare honesty.');
adaptiveNotes.push('execution_trace.chain_gaps shows whether the recommended chain ran in full.');
}
return [
'You are the independent reviewer of routing decisions for the Лидерра brain-governance experiment.',
'Return ONLY a JSON object with the 8 fields below. No prose, no code fences.',
'',
'Fields:',
...cues.map((c) => ' - ' + c),
'',
adaptiveNotes.length ? 'Notes for this schema version:' : '',
...adaptiveNotes.map((n) => ' - ' + n),
'',
'Episode (JSON):',
JSON.stringify(episode, null, 2),
'',
'Output JSON only.',
].filter(Boolean).join('\n');
}
/**
* Parse the Opus reviewer response. Pure. Returns null on malformed JSON or
* when a required 8-dim field is missing. Passes through `reviewer_error`
* escalations from the subagent.
*/
export function parseReview(text) {
if (!text) return null;
const stripped = String(text).trim()
.replace(/^```(?:json)?\s*\n?/, '')
.replace(/\n?```$/, '')
.trim();
let parsed;
try { parsed = JSON.parse(stripped); }
catch { return null; }
if (!parsed || typeof parsed !== 'object') return null;
// Reviewer-agent escalation: pass through verbatim.
if (typeof parsed.reviewer_error === 'string') return parsed;
for (const f of REQUIRED_REVIEW_FIELDS) {
if (parsed[f] === undefined) return null;
}
return parsed;
}
/**
* Direct Opus API call. Wraps callAnthropicAPI from router-classifier with
* the reviewer model. Caller (controller inside /brain-retro) is responsible
* for decision (subagent first, this on failure).
*
* Returns the parsed review object or null on transport / parse failure.
*/
export async function reviewViaDirectApi(episode, options = {}) {
const { callAnthropicAPI } = await import('./router-classifier.mjs');
const apiKey = options.apiKey ?? process.env.ROUTER_LLM_KEY;
if (!apiKey) return null;
const prompt = buildReviewPrompt(episode);
try {
const text = await callAnthropicAPI(prompt, {
apiKey,
baseUrl: options.baseUrl ?? process.env.ROUTER_LLM_BASE_URL ?? undefined,
model: options.model ?? REVIEWER_MODEL,
});
return parseReview(text);
} catch {
return null;
}
}
+65
View File
@@ -0,0 +1,65 @@
// tools/brain-retro-opus-reviewer.test.mjs — TDD for Phase 3 Task 18 (G16, spec §4.6)
import { describe, it, expect } from 'vitest';
import { buildReviewPrompt, parseReview } from './brain-retro-opus-reviewer.mjs';
describe('buildReviewPrompt — adaptive v2/v3/v4 (spec §4.6)', () => {
it('v4 includes alternatives_considered + self_assessment + chain_gaps cues', () => {
const ep = {
schema_version: 4,
schema_minor: 2,
task_id: 't',
primary_rationale: { task_classification: 'feature', node_chosen: 'direct' },
classifier_output: { recommended_node: '#19', alternatives_considered: [{ node: 'x', match_score: 0.5 }] },
self_assessment: { summary: 'ok', confidence_in_choice: 0.8 },
execution_trace: { chain_gaps: [] },
};
const p = buildReviewPrompt(ep);
expect(p).toContain('alternatives_considered');
expect(p).toContain('self_assessment');
expect(p).toContain('chain_gaps');
});
it('v3 omits alternatives_considered cue', () => {
expect(buildReviewPrompt({ schema_version: 3 })).not.toContain('alternatives_considered');
});
it('v2 omits alternatives + self_assessment cues', () => {
const p = buildReviewPrompt({ schema_version: 2 });
expect(p).not.toContain('alternatives_considered');
expect(p).not.toContain('self_assessment');
});
it('includes the episode JSON verbatim for the reviewer to read', () => {
const ep = { schema_version: 4, task_id: 'task-xyz-1' };
expect(buildReviewPrompt(ep)).toContain('task-xyz-1');
});
});
describe('parseReview — 8-dim review schema (spec §4.6)', () => {
it('parses a complete 8-dim review JSON', () => {
const r = parseReview('{"node_quality":"correct","chain_quality":"n/a","gap_assessment":"n/a","agent_self_assessment_accuracy":"accurate","error_root_cause":"n/a","alternative_better":null,"outcome_reviewed":"success","reasoning":"x"}');
expect(r.node_quality).toBe('correct');
expect(r.outcome_reviewed).toBe('success');
expect(r.alternative_better).toBeNull();
expect(r.reasoning).toBe('x');
});
it('strips ```json fence', () => {
const r = parseReview('```json\n{"node_quality":"wrong_node","chain_quality":"missing_step","gap_assessment":"acceptable","agent_self_assessment_accuracy":"over_confident","error_root_cause":"wrong_skill","alternative_better":"#19","outcome_reviewed":"rework","reasoning":"y"}\n```');
expect(r.node_quality).toBe('wrong_node');
expect(r.alternative_better).toBe('#19');
});
it('returns null on malformed JSON', () => {
expect(parseReview('not json')).toBeNull();
});
it('returns null when required field missing', () => {
expect(parseReview('{"node_quality":"correct"}')).toBeNull();
});
it('returns reviewer_error passthrough when reviewer escalates', () => {
const r = parseReview('{"reviewer_error":"malformed episode"}');
expect(r?.reviewer_error).toBe('malformed episode');
});
});