397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
112 lines
4.9 KiB
JavaScript
112 lines
4.9 KiB
JavaScript
/**
|
|
* PII filter for brain governance observer (B2).
|
|
* Used by Stop-hook before JSONL write — per Pravila §16.2 + ADR-011 + spec §5.4.
|
|
*
|
|
* Patterns covered:
|
|
* RU_PHONE — +7XXXXXXXXXX OR bare 7XXXXXXXXXX (11 digits starting with 7,
|
|
* word-boundary on left). Real-leak regression (gitleaks
|
|
* 2026-05-23): bare format slipped past `\+7\d{10}`.
|
|
* EMAIL — any user@domain.tld
|
|
* JWT — eyJ<base64>.<base64>.<base64> (must run BEFORE OPENAI/Bearer
|
|
* fallbacks to avoid partial matches)
|
|
* AWS_KEY — AKIA[A-Z0-9]{16} (Access Key ID prefix)
|
|
* YC_STATIC — AQVN[\w-]{15,} (Yandex Cloud IAM static key)
|
|
* YC_SESSION — t1.<base64> (Yandex IAM session token)
|
|
* YC_OAUTH — y0_<base64> (Yandex OAuth token)
|
|
* SENTRY_TOKEN — sntrys?_<12+ alphanum>
|
|
* OPENAI_TOKEN — sk-<20+ alphanum>
|
|
* GENERIC_BEARER — Bearer <20+ token chars>
|
|
* IPV4 — dotted-quad (over-redacts 4-segment build numbers — accepted
|
|
* tradeoff; under-redaction is the worse failure)
|
|
* WIN_USER_PATH — C:\Users\<name> → C:\Users\***
|
|
* POSIX_HOME — /home/<name> → /home/***
|
|
*
|
|
* Security Guidance #40: pure regex — no exec/execSync.
|
|
*/
|
|
|
|
const RU_PHONE = /(?:\+7|\b7)\d{10}/g;
|
|
const EMAIL = /[\w.+-]+@[\w-]+\.[\w.-]+/g;
|
|
const JWT = /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/g;
|
|
const AWS_KEY = /\bAKIA[A-Z0-9]{16}\b/g;
|
|
const YC_STATIC = /\bAQVN[A-Za-z0-9_-]{15,}\b/g;
|
|
const YC_SESSION = /\bt1\.[A-Za-z0-9_-]{40,}\b/g;
|
|
const YC_OAUTH = /\by0_[A-Za-z0-9_-]{40,}\b/g;
|
|
const SENTRY_TOKEN = /sntrys?_[A-Za-z0-9]{12,}/g;
|
|
const OPENAI_TOKEN = /sk-[A-Za-z0-9]{20,}/g;
|
|
const GENERIC_BEARER = /Bearer\s+[A-Za-z0-9._-]{20,}/g;
|
|
const IPV4 = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
|
|
const WIN_USER_PATH = /([Cc]:[\\/]Users[\\/])[^\\/\s"'<>:|?*]+/g;
|
|
const POSIX_HOME = /(\/home\/)[^/\s"'<>:?*]+/g;
|
|
|
|
// Единый конвейер замен (порядок = безопасность: специфичные первыми, иначе
|
|
// GENERIC_BEARER / OPENAI_TOKEN съели бы частичные матчи). sanitize и count
|
|
// идут по ОДНОМУ конвейеру → count == число реальных редакций sanitize
|
|
// (перекрытие потребляется первым паттерном, под-счёт утечки невозможен;
|
|
// sharp-edges SE-2).
|
|
const PIPELINE = [
|
|
{ name: 'RU_PHONE', re: RU_PHONE, repl: () => '+7XXXXXXXXXX' },
|
|
{ name: 'EMAIL', re: EMAIL, repl: () => '***@***' },
|
|
{ name: 'JWT', re: JWT, repl: () => '[REDACTED:jwt]' },
|
|
{ name: 'AWS_KEY', re: AWS_KEY, repl: () => '[REDACTED:aws]' },
|
|
{ name: 'YC_STATIC', re: YC_STATIC, repl: () => '[REDACTED:yandex]' },
|
|
{ name: 'YC_SESSION', re: YC_SESSION, repl: () => '[REDACTED:yandex]' },
|
|
{ name: 'YC_OAUTH', re: YC_OAUTH, repl: () => '[REDACTED:yandex]' },
|
|
{ name: 'SENTRY_TOKEN', re: SENTRY_TOKEN, repl: () => '[REDACTED:sentry]' },
|
|
{ name: 'OPENAI_TOKEN', re: OPENAI_TOKEN, repl: () => '[REDACTED:openai]' },
|
|
{ name: 'GENERIC_BEARER', re: GENERIC_BEARER, repl: () => '[REDACTED:bearer]' },
|
|
{ name: 'IPV4', re: IPV4, repl: () => '[REDACTED:ipv4]' },
|
|
{ name: 'WIN_USER_PATH', re: WIN_USER_PATH, repl: (_m, p1) => p1 + '***' },
|
|
{ name: 'POSIX_HOME', re: POSIX_HOME, repl: (_m, p1) => p1 + '***' },
|
|
];
|
|
|
|
function sanitizeString(s) {
|
|
if (typeof s !== 'string') return s;
|
|
let out = s;
|
|
for (const { re, repl } of PIPELINE) out = out.replace(re, repl);
|
|
return out;
|
|
}
|
|
|
|
function countString(s, counts) {
|
|
if (typeof s !== 'string') return;
|
|
let cur = s;
|
|
for (const { name, re, repl } of PIPELINE) {
|
|
cur = cur.replace(re, (...args) => {
|
|
counts[name] = (counts[name] || 0) + 1;
|
|
return repl(...args);
|
|
});
|
|
}
|
|
}
|
|
|
|
function walkAndCount(input, counts) {
|
|
if (typeof input === 'string') { countString(input, counts); return; }
|
|
if (input === null || input === undefined) return;
|
|
if (Array.isArray(input)) { input.forEach((v) => walkAndCount(v, counts)); return; }
|
|
if (typeof input === 'object') {
|
|
for (const v of Object.values(input)) walkAndCount(v, counts);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sanitize input AND count matches per pattern type.
|
|
* Returns { sanitized, counts: { PATTERN_NAME: N, ... } }.
|
|
* counts is pre-initialised to 0 for all 13 known patterns.
|
|
*/
|
|
export function sanitizeWithCount(input) {
|
|
const counts = {};
|
|
for (const { name } of PIPELINE) counts[name] = 0;
|
|
walkAndCount(input, counts);
|
|
return { sanitized: sanitize(input), counts };
|
|
}
|
|
|
|
export function sanitize(input) {
|
|
if (typeof input === 'string') return sanitizeString(input);
|
|
if (input === null || input === undefined) return input;
|
|
if (Array.isArray(input)) return input.map(sanitize);
|
|
if (typeof input === 'object') {
|
|
const out = {};
|
|
for (const [k, v] of Object.entries(input)) out[k] = sanitize(v);
|
|
return out;
|
|
}
|
|
return input;
|
|
}
|