Files
brain/tools/brain-retro-batch-reviewer.mjs
T

91 lines
3.2 KiB
JavaScript

#!/usr/bin/env node
/**
* Brain-retro batch reviewer (one-off, not part of canonical procedure).
*
* Reads docs/observer/episodes-YYYY-MM.jsonl, filters episodes in period and
* without outcome_reviewed, samples N (or all), calls reviewViaDirectApi on
* each (Opus 4.7 via ProxyAPI), and writes review.* fields + outcome_reviewed
* + outcome_reviewed_source = "direct_api_batch" back into the JSONL file
* (in-place line replacement, preserves forward-only forward fields).
*
* Usage:
* node tools/brain-retro-batch-reviewer.mjs <jsonl-path> <cutoff-iso> [limit] [concurrency]
*
* Example:
* node tools/brain-retro-batch-reviewer.mjs docs/observer/episodes-2026-05.jsonl 2026-05-24T13:18:00Z 30 5
*/
import { readFileSync, writeFileSync } from 'fs';
import { reviewViaDirectApi } from './brain-retro-opus-reviewer.mjs';
const [, , filePath, cutoff, limitStr = '30', concStr = '5'] = process.argv;
if (!filePath || !cutoff) {
console.error('usage: <jsonl-path> <cutoff-iso> [limit=30] [concurrency=5]');
process.exit(1);
}
const limit = parseInt(limitStr, 10);
const concurrency = parseInt(concStr, 10);
const raw = readFileSync(filePath, 'utf-8');
const lines = raw.split('\n');
const lineCount = lines.length;
const targets = []; // { idx, episode }
for (let i = 0; i < lineCount; i++) {
const line = lines[i];
if (!line.trim()) continue;
let ep;
try { ep = JSON.parse(line); } catch { continue; }
if (ep.observer_error) continue;
if (!ep.timestamps?.started_at) continue;
if (ep.timestamps.started_at < cutoff) continue;
if (ep.outcome_reviewed) continue;
targets.push({ idx: i, episode: ep });
}
const total = targets.length;
const slice = targets.slice(0, limit);
console.error(`[batch-reviewer] total in period unreviewed: ${total}, processing first ${slice.length} with concurrency ${concurrency}`);
let done = 0;
let errors = 0;
const startTs = Date.now();
async function reviewOne({ idx, episode }) {
try {
const review = await reviewViaDirectApi(episode);
if (review && !review.reviewer_error) {
episode.review = review;
episode.outcome_reviewed = review.outcome_reviewed ?? null;
episode.outcome_reviewed_source = 'direct_api_batch';
lines[idx] = JSON.stringify(episode);
done++;
} else {
errors++;
console.error(`[batch-reviewer] ${idx}: null/error from API`);
}
} catch (e) {
errors++;
console.error(`[batch-reviewer] ${idx}: ${e.message}`);
}
}
async function runBatched() {
for (let i = 0; i < slice.length; i += concurrency) {
const batch = slice.slice(i, i + concurrency);
await Promise.all(batch.map(reviewOne));
const elapsed = ((Date.now() - startTs) / 1000).toFixed(1);
console.error(`[batch-reviewer] progress ${done + errors}/${slice.length} (${elapsed}s)`);
}
}
await runBatched();
// Write file back. Note: we re-serialize EVERY line we mutated, but other lines
// are kept verbatim (no re-serialization that could alter ordering/escaping).
writeFileSync(filePath, lines.join('\n'), 'utf-8');
const elapsed = ((Date.now() - startTs) / 1000).toFixed(1);
console.error(`[batch-reviewer] done: ${done} reviewed, ${errors} errors, ${elapsed}s wall-clock`);
process.exit(0);