portal/tools/llm-judge-per-tool.mjs

// tools/llm-judge-per-tool.mjs
/**
 * llm-judge-per-tool — PreToolUse judge on mutating tools (v4.1 §4.7).
 * Asks a single Sonnet judge: "is this tool call consistent with the declared
 * user task and recommended skill scope?" NO → block. Doubt (null) → block.
 * No key / budget exhausted → degraded allow + flag (fail-open on LLM layer).
 */

import { JUDGE_MODELS } from './llm-judge.mjs';

function truncate(s, n) {
  const str = typeof s === 'string' ? s : JSON.stringify(s ?? {});
  return str.length > n ? str.slice(0, n) + '…' : str;
}

/** Build the consistency question for a single mutating tool call. */
export function buildPerToolQuestion({ toolName, toolInput, declaredTask }) {
  const dt = declaredTask || {};
  return [
    'User task: ' + (dt.task_summary || '(unknown)'),
    'Recommended skill: ' + (dt.recommended_node ?? 'none'),
    'Recommended chain: ' + JSON.stringify(dt.recommended_chain ?? []),
    'Tool being called: ' + toolName,
    'Tool input summary: ' + truncate(toolInput, 500),
    '',
    'Is this tool call consistent with the declared user task and recommended skill scope?',
    'Answer YES (consistent) or NO (inconsistent / off-scope / suspicious). Сомнения → NO.',
  ].join('\n');
}

/**
 * Pure decision.  is the injected single-judge caller (test
 * mock or, in main(), the real llmJudgeCall bound to ROUTER_LLM_KEY).
 *
 * @returns {Promise<{block:boolean, reason?:string, degraded?:boolean, verdict?:string|null}>}
 */
export async function judgePerTool({
  toolName,
  toolInput,
  declaredTask,
  apiKey = process.env.ROUTER_LLM_KEY,
  budgetState,
  llmJudgeCallImpl,
}) {
  if (!llmJudgeCallImpl && !apiKey) {
    return { block: false, degraded: true, reason: 'no_api_key' };
  }
  if (budgetState && budgetState.spent + 1 > budgetState.limit) {
    return { block: false, degraded: true, reason: 'budget_exhausted' };
  }
  const question = buildPerToolQuestion({ toolName, toolInput, declaredTask });
  const verdict = await llmJudgeCallImpl({
    model: JUDGE_MODELS.single[0],
    question,
    content: '', // question already carries the (truncated) input
  });
  if (verdict === 'YES') return { block: false, verdict };
  return {
    block: true,
    verdict,
    reason: 'v4.1 per-tool LLM-judge: tool call classified off-scope vs declared user task (doubt→block).',
  };
}

import { readFileSync, appendFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { homedir } from 'node:os';
import { readStdin, parseEventJson, exitDecision } from './enforce-hook-helpers.mjs';
import { llmJudgeCall, readJudgeBudget, bumpJudgeBudget, JUDGE_SESSION_BUDGET } from './llm-judge.mjs';

// Calibration 1 (2026-05-31) — `Skill` removed from judge scope (SCOPE fix, NOT
// a discipline drop). Invoking a Skill mutates no state; it is the prescribed
// §17 entry into work. Judging the skill-invocation itself and blocking on
// doubt directly contradicts §17 (which mandates skills). The real mutations a
// skill leads to (Edit/Write/MultiEdit/Bash/PowerShell/commit/push/Task) remain
// fully judged below — doubt→block on those is unchanged.
export const MUTATING_TOOLS = new Set([
  'Edit', 'Write', 'MultiEdit', 'NotebookEdit', 'Bash', 'PowerShell', 'Task', 'Workflow',
]);

function runtimeDir(override) {
  return override || join(homedir(), '.claude', 'runtime');
}

/**
 * Calibration 4 (soft, 2026-05-31): the classifier's distilled task summary is
 * lossy and sometimes "(unknown)" even for a perfectly clear user request,
 * which made the judge block all real edits (no task to compare → doubt→block).
 * When the summary is unknown/empty, fall back to judging against the user's
 * actual last prompt — the ground-truth request — instead of nothing.
 *
 * This is NOT calibration 2 (which would blindly ALLOW on unknown). The judge
 * still runs and still blocks on doubt; it just uses better evidence. When both
 * the summary and the user prompt are unavailable, the task stays "(unknown)"
 * and doubt→block is preserved.
 */
export function resolveEffectiveTask(declaredTask, lastUserPrompt) {
  const dt = declaredTask || {};
  const summary = dt.task_summary;
  const summaryUnknown = !summary || summary === '(unknown)' || !String(summary).trim();
  const prompt = typeof lastUserPrompt === 'string' ? lastUserPrompt.trim() : '';
  if (summaryUnknown && prompt) {
    return { ...dt, task_summary: prompt, task_source: 'user_prompt_fallback' };
  }
  return dt;
}

/** Read the classifier-written declared task for this session; stub on miss. */
export function readDeclaredTask({ sessionId, runtimeDirOverride }) {
  const path = join(runtimeDir(runtimeDirOverride), `router-state-${sessionId || 'unknown'}.json`);
  try {
    const st = JSON.parse(readFileSync(path, 'utf8'));
    return {
      task_summary: st.task_summary ?? st.task_classification?.task_summary ?? '(unknown)',
      recommended_node: st.recommended_node ?? null,
      recommended_chain: st.recommended_chain ?? [],
    };
  } catch {
    return { task_summary: '(unknown)', recommended_node: null, recommended_chain: [] };
  }
}

function logPerTool({ sessionId, runtimeDirOverride, entry }) {
  try {
    const dir = runtimeDir(runtimeDirOverride);
    mkdirSync(dir, { recursive: true });
    appendFileSync(join(dir, `llm-judge-per-tool-${sessionId || 'unknown'}.jsonl`),
      JSON.stringify({ ts: new Date().toISOString(), session_id: sessionId || null, ...entry }) + '\n');
  } catch { /* ignore */ }
}

async function main() {
  try {
    const event = parseEventJson(await readStdin());
    const toolName = event.tool_name;
    if (!MUTATING_TOOLS.has(toolName)) { exitDecision({ block: false }); return; }

    const sessionId = event.session_id;
    const declaredTask = readDeclaredTask({ sessionId });
    const spent = readJudgeBudget({ sessionId });

    const result = await judgePerTool({
      toolName,
      toolInput: event.tool_input || {},
      declaredTask,
      budgetState: { spent, limit: JUDGE_SESSION_BUDGET },
      llmJudgeCallImpl: (opts) => llmJudgeCall(opts),
    });

    if (!result.degraded) bumpJudgeBudget({ sessionId, by: 1 });

    logPerTool({
      sessionId,
      entry: {
        tool_name: toolName,
        tool_input_summary: truncate(event.tool_input, 200),
        declared_task: declaredTask.task_summary,
        verdict: result.verdict ?? null,
        action_taken: result.block ? 'block' : (result.degraded ? 'degraded_allow' : 'allow'),
        reason: result.reason || null,
      },
    });

    exitDecision({ block: result.block, message: result.reason });
  } catch {
    exitDecision({ block: false }); // fail-quiet
  }
}

const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/llm-judge-per-tool.mjs');
if (isCli) main();