import { describe, it, expect } from 'vitest'; import { sanitize, sanitizeWithCount } from './observer-pii-filter.mjs'; describe('observer-pii-filter sanitize', () => { it('masks Russian phone numbers', () => { const input = 'Контакт: +79991234567 — позвонить'; expect(sanitize(input)).toBe('Контакт: +7XXXXXXXXXX — позвонить'); }); it('masks bare Russian phone numbers without + prefix (regression: episodes-2026-05 leak)', () => { // Real leak found by gitleaks 2026-05-23: '79135191264' in observer JSONL free-text. const input = 'Утечка телефона: 79135191264 в логе'; expect(sanitize(input)).toBe('Утечка телефона: +7XXXXXXXXXX в логе'); }); it('does not match 11-digit sequences embedded in longer numeric strings', () => { // False-positive guard: long IDs / hashes where '7' is mid-digit have no word boundary. const input = 'id 1796133619135191264999 not a phone'; expect(sanitize(input)).toBe('id 1796133619135191264999 not a phone'); }); it('masks bare phone inside JSON-like context (quotes, braces)', () => { const input = '{"phone": "79135191264"}'; expect(sanitize(input)).toBe('{"phone": "+7XXXXXXXXXX"}'); }); it('masks email addresses', () => { const input = 'Mail: kpd9363@gmail.com'; expect(sanitize(input)).toBe('Mail: ***@***'); }); it('masks Sentry-style tokens', () => { const input = 'token sntrys_abc123def456ghi789'; expect(sanitize(input)).toContain('[REDACTED'); expect(sanitize(input)).not.toContain('sntrys_abc123def456ghi789'); }); it('is idempotent on already-sanitized strings', () => { const sanitized = 'Контакт: +7XXXXXXXXXX, ***@***'; expect(sanitize(sanitized)).toBe(sanitized); }); it('handles empty string', () => { expect(sanitize('')).toBe(''); }); it('handles object input by sanitizing string fields recursively', () => { const input = { task_id: 'x', note: 'call +79991234567' }; const out = sanitize(input); expect(out.note).toBe('call +7XXXXXXXXXX'); expect(out.task_id).toBe('x'); }); it('masks JWT tokens (three base64 segments separated by .)', () => { const jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4ifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c'; const out = sanitize(`Authorization header: Bearer-like ${jwt} end`); expect(out).toContain('[REDACTED:jwt]'); expect(out).not.toContain(jwt); }); it('masks AWS access key IDs (AKIA-prefixed)', () => { const out = sanitize('aws key AKIAIOSFODNN7EXAMPLE in code'); expect(out).toContain('[REDACTED:aws]'); expect(out).not.toContain('AKIAIOSFODNN7EXAMPLE'); }); it('masks Yandex Cloud IAM static keys (AQVN-prefixed)', () => { const k = 'AQVN-static-key-abc123xyz456789defABC'; const out = sanitize(`yc key ${k} configured`); expect(out).toContain('[REDACTED:yandex]'); expect(out).not.toContain(k); }); it('masks Yandex Cloud IAM session tokens (t1. prefix)', () => { const t = 't1.9euelZqOk5KZj4-WzpaclpqRy42TmO3rnpqaks-Tj4_HmJOZk4_LipKczpvl9PdABCDEF'; const out = sanitize(`token: ${t} end`); expect(out).toContain('[REDACTED:yandex]'); expect(out).not.toContain(t); }); it('masks Yandex OAuth tokens (y0_ prefix)', () => { const t = 'y0_AgAAAABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef'; const out = sanitize(`bearer ${t} for api`); expect(out).toContain('[REDACTED:yandex]'); expect(out).not.toContain(t); }); it('masks IPv4 addresses', () => { const out = sanitize('connect to 192.168.1.42 and 10.0.0.1'); expect(out).toContain('[REDACTED:ipv4]'); expect(out).not.toContain('192.168.1.42'); expect(out).not.toContain('10.0.0.1'); }); it('redacts the OS username segment in Windows user paths', () => { const out = sanitize('C:\\Users\\Administrator\\.claude\\projects\\foo\\bar.md'); expect(out).toContain('C:\\Users\\***\\'); expect(out).not.toContain('Administrator'); }); it('redacts the user in POSIX /home/ paths', () => { const out = sanitize('/home/dmitry/work/script.sh'); expect(out).toContain('/home/***/'); expect(out).not.toContain('/home/dmitry'); }); it('is idempotent on the expanded patterns', () => { const masked = 'token [REDACTED:jwt] / aws [REDACTED:aws] / yc [REDACTED:yandex] / ip [REDACTED:ipv4] / path C:\\Users\\***\\foo'; expect(sanitize(masked)).toBe(masked); }); }); describe('sanitizeWithCount (Task 3)', () => { it('counts matches per pattern type alongside sanitizing', () => { const input = 'Phone +71234567890 and email user@example.com'; const { sanitized, counts } = sanitizeWithCount(input); expect(sanitized).toContain('+7XXXXXXXXXX'); expect(sanitized).toContain('***@***'); expect(counts.RU_PHONE).toBe(1); expect(counts.EMAIL).toBe(1); }); it('counts bare RU phone (no + prefix) as RU_PHONE pattern', () => { const { counts } = sanitizeWithCount('phone 79135191264 in free text'); expect(counts.RU_PHONE).toBe(1); }); it('returns zero for absent patterns', () => { const { counts } = sanitizeWithCount('plain text with no PII'); expect(counts.RU_PHONE).toBe(0); expect(counts.EMAIL).toBe(0); expect(counts.JWT).toBe(0); }); it('counts recursively over objects', () => { const input = { msg: 'call +71112223344', meta: { contact: 'a@b.co' } }; const { counts } = sanitizeWithCount(input); expect(counts.RU_PHONE).toBe(1); expect(counts.EMAIL).toBe(1); }); it('returns counts object with all 13 pattern keys pre-initialised to 0', () => { const { counts } = sanitizeWithCount(''); const keys = Object.keys(counts).sort(); expect(keys).toEqual([ 'AWS_KEY', 'EMAIL', 'GENERIC_BEARER', 'IPV4', 'JWT', 'OPENAI_TOKEN', 'POSIX_HOME', 'RU_PHONE', 'SENTRY_TOKEN', 'WIN_USER_PATH', 'YC_OAUTH', 'YC_SESSION', 'YC_STATIC', ].sort()); }); it('handles arrays', () => { const { counts } = sanitizeWithCount(['+71111111111', 'no pii']); expect(counts.RU_PHONE).toBe(1); }); it('counts an overlapping Bearer+OpenAI span once (not twice)', () => { const { counts } = sanitizeWithCount('auth: Bearer sk-AAAAAAAAAAAAAAAAAAAA done'); expect(counts.OPENAI_TOKEN + counts.GENERIC_BEARER).toBe(1); }); it('counts two distinct adjacent leaks separately', () => { const { counts } = sanitizeWithCount('a@b.co +71234567890'); expect(counts.EMAIL).toBe(1); expect(counts.RU_PHONE).toBe(1); }); });