Files
brain/tools/mcp-tool-classifier.mjs
T

259 lines
12 KiB
JavaScript

#!/usr/bin/env node
/**
* MCP tool classifier (router-gate v4 Stream C, spec §5.3 + v4.1 G1/G12).
*
* Classifies an MCP / built-in tool call against a path-deny / URL-whitelist /
* SQL-statement overlay. Pure — path normalization & protected-path check are
* injected (Stream A); LLM-judge for WebSearch query is flagged for the consumer
* (Stream D). Unknown tools -> default 'block' (fail-CLOSE).
*/
// §5.3 + v4.1 G1/G12 classification map. Glob keys use `*`. `default` is the
// fallback category for unmatched tools.
export const DEFAULT_MCP_CLASSIFICATION = Object.freeze({
'mcp__redis__get': { category: 'read_only' },
'mcp__redis__list': { category: 'read_only' },
'mcp__redis__set': { category: 'hard_blacklist' },
'mcp__redis__delete': { category: 'hard_blacklist' },
'mcp__github__get_me': { category: 'read_only' },
'mcp__github__list_*': { category: 'read_only' },
'mcp__github__search_*': { category: 'read_only' },
'mcp__github__pull_request_read': { category: 'read_only' },
'mcp__github__issue_read': { category: 'read_only' },
'mcp__laravel-boost__database-query': {
category: 'conditional',
args_key_to_scan: 'query',
// v4.1 G12 — full-statement scan (mutating verb anywhere, not just prefix).
query_full_statement_scan: {
read_only_only_patterns: [
'^\\s*(?:SELECT|EXPLAIN|SHOW|DESCRIBE|DESC|WITH\\s+\\w+\\s+AS\\s*\\(\\s*SELECT)\\b',
],
blocked_anywhere_patterns: [
'\\b(?:UPDATE|INSERT|DELETE|DROP|TRUNCATE|ALTER|CREATE|GRANT|REVOKE|COMMIT|ROLLBACK|MERGE|REPLACE|LOAD)\\b',
';\\s*(?:UPDATE|INSERT|DELETE|DROP|TRUNCATE|ALTER|CREATE|GRANT|REVOKE)\\b',
],
comment_strip: true,
},
},
'mcp__laravel-boost__*': { category: 'read_only', exception: 'database-query handled above' },
'mcp__github__create_*': { category: 'hard_blacklist' },
'mcp__github__update_*': { category: 'hard_blacklist' },
'mcp__github__merge_*': { category: 'hard_blacklist' },
'mcp__github__delete_*': { category: 'hard_blacklist' },
'mcp__github__push_files': { category: 'hard_blacklist' },
'mcp__github__create_or_update_file': { category: 'hard_blacklist', path_args: ['path'] },
'mcp__github__add_*comment*': { category: 'hard_blacklist' },
'mcp__github__add_reply*': { category: 'hard_blacklist' },
'mcp__github__star_repository': { category: 'hard_blacklist' },
'mcp__github__unstar_repository': { category: 'hard_blacklist' },
'mcp__github__manage_*subscription': { category: 'hard_blacklist' },
'mcp__github__mark_*read': { category: 'hard_blacklist' },
'mcp__github__dismiss_*': { category: 'hard_blacklist' },
'mcp__github__discussion_comment_write': { category: 'hard_blacklist' },
'mcp__github__sub_issue_write': { category: 'hard_blacklist' },
'mcp__github__actions_run_trigger': { category: 'hard_blacklist' },
'mcp__playwright__browser_snapshot': { category: 'read_only' },
'mcp__playwright__browser_take_screenshot': { category: 'read_only' },
'mcp__playwright__browser_network_requests': { category: 'read_only' },
'mcp__playwright__browser_console_messages': { category: 'read_only' },
'mcp__playwright__browser_navigate': {
category: 'conditional',
args_key_to_scan: 'url',
// Host token MUST be followed by a port/path/query/fragment delimiter or end —
// otherwise a subdomain-suffix spoof (liderra.ru.evil.com / localhost.evil.com)
// slips past. (The v4.0 design §5.3 regex omitted this boundary; corrected here,
// spec to be synced in Stream H.)
url_whitelist_patterns: ['^https?://(?:localhost|127\\.0\\.0\\.1|liderra\\.ru)(?:[:/?#]|$)'],
url_blocked_patterns: ['^https?://(?!(?:localhost|127\\.0\\.0\\.1|liderra\\.ru)(?:[:/?#]|$))'],
},
'mcp__playwright__browser_click': { category: 'hard_blacklist' },
'mcp__playwright__browser_fill_form': { category: 'hard_blacklist' },
'mcp__playwright__browser_type': { category: 'hard_blacklist' },
'mcp__playwright__browser_press_key': { category: 'hard_blacklist' },
'mcp__playwright__browser_drag': { category: 'hard_blacklist' },
'mcp__playwright__browser_drop': { category: 'hard_blacklist' },
'mcp__playwright__browser_evaluate': { category: 'hard_blacklist' },
'mcp__playwright__browser_file_upload': { category: 'hard_blacklist' },
'mcp__playwright__browser_handle_dialog': { category: 'hard_blacklist' },
'mcp__playwright__browser_hover': { category: 'hard_blacklist' },
'mcp__playwright__browser_resize': { category: 'hard_blacklist' },
'mcp__playwright__browser_run_code_unsafe': { category: 'hard_blacklist' },
'mcp__playwright__browser_select_option': { category: 'hard_blacklist' },
'mcp__plugin_brand-voice_*__authenticate': { category: 'hard_blacklist' },
'mcp__plugin_brand-voice_*__complete_authentication': { category: 'hard_blacklist' },
'mcp__plugin_*_*__authenticate': { category: 'hard_blacklist' },
'mcp__plugin_*_*__complete_authentication': { category: 'hard_blacklist' },
'mcp__openapi__deals-store': { category: 'hard_blacklist' },
'mcp__openapi__deals-update': { category: 'hard_blacklist' },
'mcp__openapi__deals-bulk-*': { category: 'hard_blacklist' },
'mcp__openapi__deals-export': { category: 'hard_blacklist' },
'mcp__plugin_context7_context7__*': { category: 'read_only' },
'mcp__universal-icons__*': { category: 'read_only' },
// Off-phase research-tooling (Perplexity Pack #87/#88/#89): read_only posture per
// ADR-019 (owner decision 2026-06-14). Web research reads external sources and does
// not mutate project state; egress arg scan (enforce-mcp-classification) still runs.
'mcp__perplexity__*': { category: 'read_only' },
'mcp__exa__*': { category: 'read_only' },
'mcp__firecrawl__*': { category: 'read_only' },
// v4.1 G1 — WebSearch / WebFetch.
'WebSearch': {
category: 'conditional',
args_key_to_scan: 'query',
llm_judge_required: true,
rationale: 'search query observable in engine logs; potential exfil channel',
},
'WebFetch': {
category: 'conditional',
args_key_to_scan: 'url',
url_whitelist_patterns: [
'^https?://docs\\.anthropic\\.com/',
'^https?://github\\.com/(?:liderra|anthropics|deck|deck-platform)/',
'^https?://liderra\\.ru/',
'^https?://(?:www\\.)?npmjs\\.com/package/',
'^https?://stackoverflow\\.com/questions/',
],
url_blocked_patterns: [
'^data:',
'^javascript:',
'^https?://(?!docs\\.anthropic\\.com|github\\.com|liderra\\.ru|npmjs\\.com|stackoverflow\\.com)',
],
fetched_content_scan: true,
},
'default': 'block',
});
/**
* Convert a glob key (`*` wildcards) to an anchored regex. Escapes regex specials,
* expands `*` to `.*`. No backtracking risk (single-pass, no nested quantifiers).
*/
function globKeyToRegex(key) {
const escaped = key.replace(/[.+^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*');
return new RegExp('^' + escaped + '$');
}
/**
* Resolve the classification entry for a tool name. Exact key wins; otherwise the
* most specific glob key (longest literal length = fewest wildcards / longest
* static prefix) wins. The literal "default" key is never matched as a tool.
* @returns {object|null} the entry, or null if nothing matches.
*/
export function matchClassificationKey(toolName, classification = DEFAULT_MCP_CLASSIFICATION) {
if (typeof toolName !== 'string' || !classification) return null;
if (toolName === 'default') return null;
// 1. Exact match (excluding 'default').
if (Object.prototype.hasOwnProperty.call(classification, toolName)) {
const entry = classification[toolName];
if (entry && typeof entry === 'object') return entry;
}
// 2. Glob match — collect all, pick most specific (longest literal length).
let best = null;
let bestScore = -1;
for (const key of Object.keys(classification)) {
if (key === 'default' || key === toolName) continue;
if (!key.includes('*')) continue;
if (!globKeyToRegex(key).test(toolName)) continue;
const score = key.replace(/\*/g, '').length; // literal char count = specificity
if (score > bestScore) {
bestScore = score;
best = classification[key];
}
}
return best && typeof best === 'object' ? best : null;
}
function defaultNormalize(target) {
if (typeof target !== 'string') return '';
return target.replace(/\\/g, '/').toLowerCase();
}
function stripSqlComments(sql) {
// Remove /* ... */ and -- ... line comments (lazy bounded — no backtracking).
return String(sql)
.replace(/\/\*[\s\S]*?\*\//g, ' ')
.replace(/--[^\n]*/g, ' ');
}
function testAny(patterns, text) {
return (patterns || []).some((p) => new RegExp(p, 'i').test(text));
}
/**
* Classify an MCP / built-in tool call into an actionable decision.
*
* @param {string} toolName
* @param {object} toolInput
* @param {{classification?: object, normalize?: Function, isProtectedPath?: Function}} [deps]
* @returns {{decision: 'allow'|'block'|'ask', category?: string, reason?: string,
* needsLlmJudge?: boolean, needsContentScan?: boolean, scanArg?: string}}
*/
export function classifyMcpTool(toolName, toolInput = {}, deps = {}) {
const classification = deps.classification || DEFAULT_MCP_CLASSIFICATION;
const normalize = typeof deps.normalize === 'function' ? deps.normalize : defaultNormalize;
const isProtectedPath = typeof deps.isProtectedPath === 'function' ? deps.isProtectedPath : () => false;
const entry = matchClassificationKey(toolName, classification);
if (!entry) {
return { decision: 'block', category: 'default', reason: `MCP tool ${toolName} not in gate-config classification. Add to mcp_tool_classification.` };
}
const category = entry.category;
if (category === 'read_only') return { decision: 'allow', category };
if (category === 'hard_blacklist') {
return { decision: 'block', category, reason: `MCP tool ${toolName} classified hard-blacklist.` };
}
if (category === 'conditional') {
// 1. path_args — normalize + protected check.
if (Array.isArray(entry.path_args)) {
for (const key of entry.path_args) {
const raw = toolInput && toolInput[key];
if (typeof raw === 'string' && isProtectedPath(normalize(raw))) {
return { decision: 'block', category, reason: `MCP tool ${toolName} targets protected path "${raw}".` };
}
}
}
const scanKey = entry.args_key_to_scan;
const argVal = scanKey && toolInput ? toolInput[scanKey] : undefined;
// 2. SQL full-statement scan (G12).
if (entry.query_full_statement_scan && typeof argVal === 'string') {
const cfg = entry.query_full_statement_scan;
const sql = cfg.comment_strip ? stripSqlComments(argVal) : argVal;
if (testAny(cfg.blocked_anywhere_patterns, sql)) {
return { decision: 'block', category, reason: `database-query contains a mutating verb (full-statement scan).` };
}
if (testAny(cfg.read_only_only_patterns, sql)) {
return { decision: 'allow', category };
}
return { decision: 'ask', category, reason: `database-query did not match read-only nor blocked patterns — needs approval.`, scanArg: argVal };
}
// 2b. SQL prefix scan (legacy v4.0 style).
if (entry.query_prefix_scan && typeof argVal === 'string') {
const cfg = entry.query_prefix_scan;
if (testAny(cfg.blocked_patterns, argVal)) return { decision: 'block', category };
if (testAny(cfg.read_only_patterns, argVal)) return { decision: 'allow', category };
return { decision: 'ask', category, scanArg: argVal };
}
// 3. URL whitelist / blocklist (WebFetch / browser_navigate).
if (typeof argVal === 'string' && (entry.url_whitelist_patterns || entry.url_blocked_patterns)) {
if (testAny(entry.url_blocked_patterns, argVal)) {
return { decision: 'block', category, reason: `MCP tool ${toolName} URL "${argVal}" is blocked.` };
}
if (testAny(entry.url_whitelist_patterns, argVal)) {
return { decision: 'allow', category, needsContentScan: !!entry.fetched_content_scan };
}
return { decision: 'block', category, reason: `MCP tool ${toolName} URL "${argVal}" not in whitelist.` };
}
// 4. LLM-judge required (WebSearch) — flag for the consumer (Stream D).
if (entry.llm_judge_required) {
return { decision: 'ask', category, needsLlmJudge: true, scanArg: typeof argVal === 'string' ? argVal : undefined };
}
// Conditional with no resolvable signal -> ask.
return { decision: 'ask', category, reason: `MCP tool ${toolName} conditional — needs approval.` };
}
// Unknown category string -> fail-CLOSE.
return { decision: 'block', category: category || 'unknown', reason: `MCP tool ${toolName} unknown category.` };
}