Files
portal/tools/safe-baseline-metering.mjs
T
Дмитрий 8e2b8bee6b fix(router-gate): stream A safe-baseline — dedupe overlap, deep-freeze, dead-var, +tests
Fix 1 (correctness): keywordOverlapCount dedupes `a` into a Set so duplicate
keywords like ['router','router','gate'] ∩ ['router','gate'] yields 2 not 3.
Fix 2 (consistency): deep-freeze all nested threshold objects in DEFAULT_THRESHOLDS
matching the tools/cost-pricing.mjs pattern.
Fix 3 (cleanup): move isMutatingForBaseline check to top of evaluateThresholds
so key/th vars are only computed in the metered-tool branch.
Fix 4 (coverage): add LS=10 and AskUserQuestion=2 soft_flag tests.
Fix 5 (docs): JSDoc on METERED_TOOLS noting TodoWrite → TodoWrite_writes mapping.
Tests: 23 → 29 (+6), all GREEN.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 20:01:00 +03:00

124 lines
4.6 KiB
JavaScript

// tools/safe-baseline-metering.mjs
/**
* Safe-baseline metering — router-gate v4 spec §3.1.2 (Direction 1).
* Pure: счётчики Read/Grep/Glob/LS/TodoWrite/AskUserQuestion per task.
* Закрывает skill-substitution laundering (много Read/анализ вместо invoke skill).
*/
import crypto from 'node:crypto';
export const RESET_MARKERS = [
'новая задача', 'сброс контекста', 'забудь предыдущее', 'забудь контекст',
'начнём заново', 'с чистого листа',
];
/**
* Tools whose usage is metered per-task.
* NOTE: 'TodoWrite' maps to the counter key 'TodoWrite_writes' (via `counterKey`).
* Consumers comparing against `state.counts` keys should use 'TodoWrite_writes', not 'TodoWrite'.
*/
export const METERED_TOOLS = ['Read', 'Grep', 'Glob', 'LS', 'TodoWrite', 'AskUserQuestion'];
// Fix 2: deep-freeze nested objects to match tools/cost-pricing.mjs pattern.
export const DEFAULT_THRESHOLDS = Object.freeze({
Read: Object.freeze({ warn: 30, hard: 60 }),
Grep: Object.freeze({ warn: 15, hard: 30 }),
Glob: Object.freeze({ warn: 10, hard: 20 }),
LS: Object.freeze({ warn: 10, hard: 20 }),
TodoWrite_writes: Object.freeze({ warn: 5, hard: 15 }),
AskUserQuestion: Object.freeze({ warn: 2, hard: 30 }),
});
const MUTATING = new Set(['Edit', 'Write', 'MultiEdit', 'NotebookEdit', 'Bash', 'Skill', 'Task']);
export function isMutatingForBaseline(toolName) {
return MUTATING.has(toolName);
}
export function isResetMarker(prompt) {
const low = String(prompt || '').toLowerCase();
return RESET_MARKERS.some((m) => low.includes(m));
}
export function deriveTaskId(firstPrompt) {
return crypto.createHash('sha256').update(String(firstPrompt || '')).digest('hex').slice(0, 16);
}
// Fix 1: dedupe `a` into a Set so duplicate keywords don't inflate the count.
export function keywordOverlapCount(a, b) {
const setB = new Set((b || []).map((k) => String(k).toLowerCase()));
const setA = new Set((a || []).map((k) => String(k).toLowerCase()));
let n = 0;
for (const k of setA) if (setB.has(k)) n++;
return n;
}
export function shouldInheritTaskId(prevKeywords, currentKeywords, prompt) {
if (isResetMarker(prompt)) return false;
return keywordOverlapCount(prevKeywords, currentKeywords) >= 2;
}
export function newCounterState({ taskId, startedAtIso, firstPromptExcerpt }) {
return {
schema_version: 1,
task_id: taskId,
task_started_at: startedAtIso,
task_first_prompt_excerpt: String(firstPromptExcerpt || '').slice(0, 200),
counts: { Read: 0, Grep: 0, Glob: 0, LS: 0, TodoWrite_writes: 0, AskUserQuestion: 0 },
skill_match_within_task: false,
warnings_issued: [],
hard_blocks_issued: [],
};
}
function counterKey(toolName) {
return toolName === 'TodoWrite' ? 'TodoWrite_writes' : toolName;
}
export function incrementCounter(state, toolName) {
const key = counterKey(toolName);
if (!(key in state.counts)) return state; // not metered
return {
...state,
counts: { ...state.counts, [key]: state.counts[key] + 1 },
};
}
// Fix 3: move mutating check to top (after skillMatched short-circuit) so
// `key`/`th` are only computed in the metered-tool branch where they're used.
export function evaluateThresholds(state, toolName, skillMatched, thresholds = DEFAULT_THRESHOLDS) {
if (skillMatched) return { action: 'allow' };
// mutating tool: block if ANY metered counter reached its hard threshold
if (isMutatingForBaseline(toolName)) {
for (const mk of Object.keys(state.counts)) {
const t = thresholds[mk];
if (t && state.counts[mk] >= t.hard) {
return {
action: 'hard_block',
reason: `Превышен лимит safe-baseline tools (${mk}=${state.counts[mk]}) без Skill match. Паттерн skill-substitution. Вызови recommended skill ИЛИ перезапусти задачу с явным skill invocation.`,
};
}
}
return { action: 'allow' };
}
// metered safe-baseline tool
const key = counterKey(toolName);
const th = thresholds[key];
if (th) {
const count = state.counts[key];
if (count >= th.hard) {
// Read/Grep/etc остаются allowed (legit continuation)
return { action: 'allow', tool: toolName };
}
if (count >= th.warn) {
return {
action: 'soft_flag',
tool: toolName,
reason: `Сделано ${count} ${toolName} в задаче без invoke skill. Invoke recommended skill ИЛИ продолжить direct с явным "direct ok".`,
};
}
}
return { action: 'allow' };
}