diff --git a/tools/commit-message-scanner.mjs b/tools/commit-message-scanner.mjs index 495a18f..ae01bdc 100644 --- a/tools/commit-message-scanner.mjs +++ b/tools/commit-message-scanner.mjs @@ -9,10 +9,11 @@ * loud cases. */ -// G11 patterns (spec v4.1). External-URL pattern whitelists -// github.com/{liderra,deck,deck-platform}, liderra.ru, *.anthropic.com. -export const SUSPICIOUS_MESSAGE_PATTERNS = [ - /\bhttps?:\/\/(?!github\.com\/(?:liderra|deck|deck-platform)|liderra\.ru|api\.anthropic\.com|docs\.anthropic\.com)\S+/i, // external URL +import { buildCommitMessageUrlPattern, DEFAULT_PROJECT_URL_WHITELIST } from './url-whitelist-rules.mjs'; + +// Suspicious-payload patterns (spec v4.1 G11). External-URL pattern [0] built from +// base ∪ project_url_whitelist; the rest are payload-shape patterns (unchanged). +export const OTHER_SUSPICIOUS_PATTERNS = [ /[A-Fa-f0-9]{40,}/, // long hex (full 40-char SHA refs trigger — use short SHA) /[A-Za-z0-9+/]{60,}={0,2}/, // base64-like blob / { expect(r.block).toBe(true); }); }); + +describe('scanCommitMessagePatterns — project_url_whitelist (D3/D4)', () => { + it('default (no opts) keeps liderra whitelisted', () => { + expect(scanCommitMessagePatterns('docs: https://liderra.ru/x').block).toBe(false); + }); + it('empty whitelist → liderra blocked (fail-CLOSED), anthropic ok', () => { + expect(scanCommitMessagePatterns('docs: https://liderra.ru/x', { urlWhitelist: [] }).block).toBe(true); + expect(scanCommitMessagePatterns('docs: https://docs.anthropic.com/x', { urlWhitelist: [] }).block).toBe(false); + }); + it('config whitelist admits own domain', () => { + expect(scanCommitMessagePatterns('docs: https://liderra.ru/x', { urlWhitelist: ['liderra.ru'] }).block).toBe(false); + }); +}); diff --git a/tools/mcp-tool-classifier.mjs b/tools/mcp-tool-classifier.mjs index 8a6d6c7..60db1f2 100644 --- a/tools/mcp-tool-classifier.mjs +++ b/tools/mcp-tool-classifier.mjs @@ -8,6 +8,13 @@ * (Stream D). Unknown tools -> default 'block' (fail-CLOSE). */ +import { + DEFAULT_PROJECT_URL_WHITELIST, + buildNavigateWhitelistPatterns, + buildWebFetchWhitelistPatterns, + WEBFETCH_SCHEME_BLOCK_PATTERNS, +} from './url-whitelist-rules.mjs'; + // §5.3 + v4.1 G1/G12 classification map. Glob keys use `*`. `default` is the // fallback category for unmatched tools. export const DEFAULT_MCP_CLASSIFICATION = Object.freeze({ @@ -59,12 +66,12 @@ export const DEFAULT_MCP_CLASSIFICATION = Object.freeze({ 'mcp__playwright__browser_navigate': { category: 'conditional', args_key_to_scan: 'url', + url_whitelist_kind: 'navigate', // Host token MUST be followed by a port/path/query/fragment delimiter or end — // otherwise a subdomain-suffix spoof (liderra.ru.evil.com / localhost.evil.com) - // slips past. (The v4.0 design §5.3 regex omitted this boundary; corrected here, - // spec to be synced in Stream H.) - url_whitelist_patterns: ['^https?://(?:localhost|127\\.0\\.0\\.1|liderra\\.ru)(?:[:/?#]|$)'], - url_blocked_patterns: ['^https?://(?!(?:localhost|127\\.0\\.0\\.1|liderra\\.ru)(?:[:/?#]|$))'], + // slips past. Whitelist built from base hosts ∪ project_url_whitelist; the domain + // block-list is dropped (redundant with default-block on non-whitelist, fail-CLOSE). + url_whitelist_patterns: buildNavigateWhitelistPatterns(DEFAULT_PROJECT_URL_WHITELIST), }, 'mcp__playwright__browser_click': { category: 'hard_blacklist' }, 'mcp__playwright__browser_fill_form': { category: 'hard_blacklist' }, @@ -105,18 +112,12 @@ export const DEFAULT_MCP_CLASSIFICATION = Object.freeze({ 'WebFetch': { category: 'conditional', args_key_to_scan: 'url', - url_whitelist_patterns: [ - '^https?://docs\\.anthropic\\.com/', - '^https?://github\\.com/(?:liderra|anthropics|deck|deck-platform)/', - '^https?://liderra\\.ru/', - '^https?://(?:www\\.)?npmjs\\.com/package/', - '^https?://stackoverflow\\.com/questions/', - ], - url_blocked_patterns: [ - '^data:', - '^javascript:', - '^https?://(?!docs\\.anthropic\\.com|github\\.com|liderra\\.ru|npmjs\\.com|stackoverflow\\.com)', - ], + url_whitelist_kind: 'webfetch', + // Whitelist built from base (anthropic / github-anthropics+deck / npmjs / stackoverflow) + // ∪ project_url_whitelist. Scheme blocks (data:/javascript:) kept; the domain + // negative-lookahead block is dropped (redundant with default-block, fail-CLOSE). + url_whitelist_patterns: buildWebFetchWhitelistPatterns(DEFAULT_PROJECT_URL_WHITELIST), + url_blocked_patterns: WEBFETCH_SCHEME_BLOCK_PATTERNS, fetched_content_scan: true, }, 'default': 'block', @@ -191,11 +192,22 @@ export function classifyMcpTool(toolName, toolInput = {}, deps = {}) { const normalize = typeof deps.normalize === 'function' ? deps.normalize : defaultNormalize; const isProtectedPath = typeof deps.isProtectedPath === 'function' ? deps.isProtectedPath : () => false; - const entry = matchClassificationKey(toolName, classification); + let entry = matchClassificationKey(toolName, classification); if (!entry) { return { decision: 'block', category: 'default', reason: `MCP tool ${toolName} not in gate-config classification. Add to mcp_tool_classification.` }; } + // Config-injected project_url_whitelist: rebuild navigate/WebFetch whitelist from + // deps.urlWhitelist (fail-CLOSED when empty). Spread → frozen default untouched. + if (entry.url_whitelist_kind && deps.urlWhitelist !== undefined) { + const proj = deps.urlWhitelist; + if (entry.url_whitelist_kind === 'navigate') { + entry = { ...entry, url_whitelist_patterns: buildNavigateWhitelistPatterns(proj) }; + } else if (entry.url_whitelist_kind === 'webfetch') { + entry = { ...entry, url_whitelist_patterns: buildWebFetchWhitelistPatterns(proj) }; + } + } + const category = entry.category; if (category === 'read_only') return { decision: 'allow', category }; diff --git a/tools/mcp-tool-classifier.test.mjs b/tools/mcp-tool-classifier.test.mjs index 6827b81..4bacedb 100644 --- a/tools/mcp-tool-classifier.test.mjs +++ b/tools/mcp-tool-classifier.test.mjs @@ -134,3 +134,26 @@ describe('classifyMcpTool — WebSearch llm-judge flag (G1)', () => { expect(r.scanArg).toBe('how to exfil data'); }); }); + +describe('classifyMcpTool — project_url_whitelist (D3/D4)', () => { + it('navigate fail-CLOSED: empty whitelist blocks project domain', () => { + expect(classifyMcpTool('mcp__playwright__browser_navigate', + { url: 'https://liderra.ru/x' }, { urlWhitelist: [] }).decision).toBe('block'); + }); + it('navigate empty whitelist still allows base infra host', () => { + expect(classifyMcpTool('mcp__playwright__browser_navigate', + { url: 'http://localhost:8000' }, { urlWhitelist: [] }).decision).toBe('allow'); + }); + it('navigate config whitelist admits own project domain', () => { + expect(classifyMcpTool('mcp__playwright__browser_navigate', + { url: 'https://liderra.ru/x' }, { urlWhitelist: ['liderra.ru'] }).decision).toBe('allow'); + }); + it('navigate no dep → backward-compat (liderra allowed)', () => { + expect(classifyMcpTool('mcp__playwright__browser_navigate', + { url: 'https://liderra.ru/admin' }).decision).toBe('allow'); + }); + it('WebFetch fail-CLOSED: empty whitelist blocks project, keeps base', () => { + expect(classifyMcpTool('WebFetch', { url: 'https://liderra.ru/x' }, { urlWhitelist: [] }).decision).toBe('block'); + expect(classifyMcpTool('WebFetch', { url: 'https://docs.anthropic.com/x' }, { urlWhitelist: [] }).decision).toBe('allow'); + }); +}); diff --git a/tools/url-whitelist-rules.mjs b/tools/url-whitelist-rules.mjs new file mode 100644 index 0000000..8282ee3 --- /dev/null +++ b/tools/url-whitelist-rules.mjs @@ -0,0 +1,40 @@ +#!/usr/bin/env node +/** url-whitelist-rules — дом сборки project-URL-whitelist паттернов (config-seam). + * База неизменна; проектные домены приходят списком; пусто = fail-CLOSED. Чистый. */ + +export const DEFAULT_PROJECT_URL_WHITELIST = Object.freeze(['liderra.ru', 'github.com/liderra']); +export const BASE_NAVIGATE_HOSTS = Object.freeze(['localhost', '127.0.0.1']); +export const BASE_WEBFETCH_WHITELIST_PATTERNS = Object.freeze([ + '^https?://docs\\.anthropic\\.com/', + '^https?://github\\.com/(?:anthropics|deck|deck-platform)/', + '^https?://(?:www\\.)?npmjs\\.com/package/', + '^https?://stackoverflow\\.com/questions/', +]); +export const WEBFETCH_SCHEME_BLOCK_PATTERNS = Object.freeze(['^data:', '^javascript:']); +export const BASE_COMMIT_MSG_FRAGS = Object.freeze([ + 'github\\.com/(?:deck|deck-platform)', 'api\\.anthropic\\.com', 'docs\\.anthropic\\.com', +]); + +/** Экранировать regex-спецсимволы; `/` не трогаем (литеральный разделитель пути). */ +export function escapeDomain(d) { + return String(d).replace(/[.+^${}()|[\]\\?*]/g, '\\$&'); +} +function hostOnly(domains) { + return (domains || []).filter((d) => typeof d === 'string' && d && !d.includes('/')); +} +/** navigate: один host-альтернация-паттерн с границей (?:[:/?#]|$); возврат — одноэлементный массив. */ +export function buildNavigateWhitelistPatterns(projectDomains) { + const hosts = [...BASE_NAVIGATE_HOSTS, ...hostOnly(projectDomains)]; + return ['^https?://(?:' + hosts.map(escapeDomain).join('|') + ')(?:[:/?#]|$)']; +} +/** WebFetch: база + на каждый проектный домен `^https?:///`. */ +export function buildWebFetchWhitelistPatterns(projectDomains) { + const proj = (projectDomains || []).filter((d) => typeof d === 'string' && d); + return [...BASE_WEBFETCH_WHITELIST_PATTERNS, ...proj.map((d) => '^https?://' + escapeDomain(d) + '/')]; +} +/** commit-message negative-lookahead: блок URL, чей домен НЕ из (база ∪ проект). */ +export function buildCommitMessageUrlPattern(projectDomains) { + const proj = (projectDomains || []).filter((d) => typeof d === 'string' && d); + const frags = [...BASE_COMMIT_MSG_FRAGS, ...proj.map(escapeDomain)]; + return new RegExp('\\bhttps?:\\/\\/(?!' + frags.join('|') + ')\\S+', 'i'); +} diff --git a/tools/url-whitelist-rules.test.mjs b/tools/url-whitelist-rules.test.mjs new file mode 100644 index 0000000..7779a45 --- /dev/null +++ b/tools/url-whitelist-rules.test.mjs @@ -0,0 +1,53 @@ +import { describe, it, expect } from 'vitest'; +import { + DEFAULT_PROJECT_URL_WHITELIST, BASE_NAVIGATE_HOSTS, escapeDomain, + buildNavigateWhitelistPatterns, buildWebFetchWhitelistPatterns, buildCommitMessageUrlPattern, +} from './url-whitelist-rules.mjs'; + +describe('escapeDomain', () => { + it('escapes dots, leaves slash literal', () => { + expect(escapeDomain('liderra.ru')).toBe('liderra\\.ru'); + expect(escapeDomain('github.com/liderra')).toBe('github\\.com/liderra'); + expect(escapeDomain('127.0.0.1')).toBe('127\\.0\\.0\\.1'); + }); +}); +describe('buildNavigateWhitelistPatterns', () => { + it('default project → byte-identical to current navigate pattern', () => { + expect(buildNavigateWhitelistPatterns(['liderra.ru'])).toEqual([ + '^https?://(?:localhost|127\\.0\\.0\\.1|liderra\\.ru)(?:[:/?#]|$)']); + }); + it('drops path-qualified domains; empty → base only (fail-CLOSED)', () => { + expect(buildNavigateWhitelistPatterns(['github.com/liderra'])).toEqual([ + '^https?://(?:localhost|127\\.0\\.0\\.1)(?:[:/?#]|$)']); + expect(buildNavigateWhitelistPatterns([])).toEqual([ + '^https?://(?:localhost|127\\.0\\.0\\.1)(?:[:/?#]|$)']); + }); +}); +describe('buildWebFetchWhitelistPatterns', () => { + it('appends project domains, keeps base; empty → base only', () => { + const r = buildWebFetchWhitelistPatterns(['liderra.ru', 'github.com/liderra']); + expect(r).toContain('^https?://liderra\\.ru/'); + expect(r).toContain('^https?://github\\.com/liderra/'); + expect(r).toContain('^https?://docs\\.anthropic\\.com/'); + expect(buildWebFetchWhitelistPatterns([]).some((p) => /liderra/.test(p))).toBe(false); + }); +}); +describe('buildCommitMessageUrlPattern', () => { + it('default: liderra/anthropic allowed, external blocked', () => { + const re = buildCommitMessageUrlPattern(['liderra.ru', 'github.com/liderra']); + expect(re.test('see https://liderra.ru/x')).toBe(false); + expect(re.test('see https://docs.anthropic.com/x')).toBe(false); + expect(re.test('see http://evil.example.com/p')).toBe(true); + }); + it('empty → liderra blocked (fail-CLOSED), anthropic ok', () => { + const re = buildCommitMessageUrlPattern([]); + expect(re.test('see https://liderra.ru/x')).toBe(true); + expect(re.test('see https://docs.anthropic.com/x')).toBe(false); + }); +}); +describe('defaults', () => { + it('expected values', () => { + expect(DEFAULT_PROJECT_URL_WHITELIST).toEqual(['liderra.ru', 'github.com/liderra']); + expect(BASE_NAVIGATE_HOSTS).toEqual(['localhost', '127.0.0.1']); + }); +});