Files
portal/tools/observer-pii-filter.test.mjs
T
Дмитрий dbe2252421 feat(observer): real PII counter — STATUS.md stops lying
Closes brain-retro 2026-05-20 #3 SIMPLIFIED — sanitizeWithCount in
pii-filter (counts matches per pattern) + persistent monthly counter
docs/observer/.pii-counters.json (bumped by Stop-hook on each episode
write) + status-md-generator reads real count (no more piiMatches: 0
hardcode).

PII patterns themselves NOT changed (F7 of parallel session already
extended to 13 patterns).

Counter is informational — write failure never blocks Stop-event.

5+1+1=7 new vitest tests, 256/256 GREEN.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 13:47:36 +03:00

131 lines
5.0 KiB
JavaScript

import { describe, it, expect } from 'vitest';
import { sanitize, sanitizeWithCount } from './observer-pii-filter.mjs';
describe('observer-pii-filter sanitize', () => {
it('masks Russian phone numbers', () => {
const input = 'Контакт: +79991234567 — позвонить';
expect(sanitize(input)).toBe('Контакт: +7XXXXXXXXXX — позвонить');
});
it('masks email addresses', () => {
const input = 'Mail: kpd9363@gmail.com';
expect(sanitize(input)).toBe('Mail: ***@***');
});
it('masks Sentry-style tokens', () => {
const input = 'token sntrys_abc123def456ghi789';
expect(sanitize(input)).toContain('[REDACTED');
expect(sanitize(input)).not.toContain('sntrys_abc123def456ghi789');
});
it('is idempotent on already-sanitized strings', () => {
const sanitized = 'Контакт: +7XXXXXXXXXX, ***@***';
expect(sanitize(sanitized)).toBe(sanitized);
});
it('handles empty string', () => {
expect(sanitize('')).toBe('');
});
it('handles object input by sanitizing string fields recursively', () => {
const input = { task_id: 'x', note: 'call +79991234567' };
const out = sanitize(input);
expect(out.note).toBe('call +7XXXXXXXXXX');
expect(out.task_id).toBe('x');
});
it('masks JWT tokens (three base64 segments separated by .)', () => {
const jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4ifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c';
const out = sanitize(`Authorization header: Bearer-like ${jwt} end`);
expect(out).toContain('[REDACTED:jwt]');
expect(out).not.toContain(jwt);
});
it('masks AWS access key IDs (AKIA-prefixed)', () => {
const out = sanitize('aws key AKIAIOSFODNN7EXAMPLE in code');
expect(out).toContain('[REDACTED:aws]');
expect(out).not.toContain('AKIAIOSFODNN7EXAMPLE');
});
it('masks Yandex Cloud IAM static keys (AQVN-prefixed)', () => {
const k = 'AQVN-static-key-abc123xyz456789defABC';
const out = sanitize(`yc key ${k} configured`);
expect(out).toContain('[REDACTED:yandex]');
expect(out).not.toContain(k);
});
it('masks Yandex Cloud IAM session tokens (t1. prefix)', () => {
const t = 't1.9euelZqOk5KZj4-WzpaclpqRy42TmO3rnpqaks-Tj4_HmJOZk4_LipKczpvl9PdABCDEF';
const out = sanitize(`token: ${t} end`);
expect(out).toContain('[REDACTED:yandex]');
expect(out).not.toContain(t);
});
it('masks Yandex OAuth tokens (y0_ prefix)', () => {
const t = 'y0_AgAAAABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef';
const out = sanitize(`bearer ${t} for api`);
expect(out).toContain('[REDACTED:yandex]');
expect(out).not.toContain(t);
});
it('masks IPv4 addresses', () => {
const out = sanitize('connect to 192.168.1.42 and 10.0.0.1');
expect(out).toContain('[REDACTED:ipv4]');
expect(out).not.toContain('192.168.1.42');
expect(out).not.toContain('10.0.0.1');
});
it('redacts the OS username segment in Windows user paths', () => {
const out = sanitize('C:\\Users\\Administrator\\.claude\\projects\\foo\\bar.md');
expect(out).toContain('C:\\Users\\***\\');
expect(out).not.toContain('Administrator');
});
it('redacts the user in POSIX /home/ paths', () => {
const out = sanitize('/home/dmitry/work/script.sh');
expect(out).toContain('/home/***/');
expect(out).not.toContain('/home/dmitry');
});
it('is idempotent on the expanded patterns', () => {
const masked = 'token [REDACTED:jwt] / aws [REDACTED:aws] / yc [REDACTED:yandex] / ip [REDACTED:ipv4] / path C:\\Users\\***\\foo';
expect(sanitize(masked)).toBe(masked);
});
});
describe('sanitizeWithCount (Task 3)', () => {
it('counts matches per pattern type alongside sanitizing', () => {
const input = 'Phone +71234567890 and email user@example.com';
const { sanitized, counts } = sanitizeWithCount(input);
expect(sanitized).toContain('+7XXXXXXXXXX');
expect(sanitized).toContain('***@***');
expect(counts.RU_PHONE).toBe(1);
expect(counts.EMAIL).toBe(1);
});
it('returns zero for absent patterns', () => {
const { counts } = sanitizeWithCount('plain text with no PII');
expect(counts.RU_PHONE).toBe(0);
expect(counts.EMAIL).toBe(0);
expect(counts.JWT).toBe(0);
});
it('counts recursively over objects', () => {
const input = { msg: 'call +71112223344', meta: { contact: 'a@b.co' } };
const { counts } = sanitizeWithCount(input);
expect(counts.RU_PHONE).toBe(1);
expect(counts.EMAIL).toBe(1);
});
it('returns counts object with all 13 pattern keys pre-initialised to 0', () => {
const { counts } = sanitizeWithCount('');
const keys = Object.keys(counts).sort();
expect(keys).toEqual([
'AWS_KEY', 'EMAIL', 'GENERIC_BEARER', 'IPV4', 'JWT',
'OPENAI_TOKEN', 'POSIX_HOME', 'RU_PHONE', 'SENTRY_TOKEN',
'WIN_USER_PATH', 'YC_OAUTH', 'YC_SESSION', 'YC_STATIC',
].sort());
});
it('handles arrays', () => {
const { counts } = sanitizeWithCount(['+71111111111', 'no pii']);
expect(counts.RU_PHONE).toBe(1);
});
});