2476dd3c1b
PII filter previously covered only RU phone, email, Sentry, OpenAI token, and generic Bearer. Several common surface leaks were uncovered: - JWT tokens (eyJ<base64>.<base64>.<base64>) — auth/session tokens. - AWS access key IDs (AKIA<16 alphanum>) — IAM static creds. - Yandex Cloud IAM static keys (AQVN<base64>), session tokens (t1.<base64>), OAuth tokens (y0_<base64>) — primary cloud-provider for this project. - IPv4 addresses (dotted-quad) — over-redacts 4-segment build numbers as an accepted tradeoff (under-redaction is the worse failure). - Windows user-paths (C:\Users\<name>) → C:\Users\***. Otherwise the OS username `Administrator` leaks via task_size.files in every episode. - POSIX /home/<name>/ → /home/***/. Same rationale for Linux dev hosts. Pattern order: highly-specific token patterns (JWT/AWS/YC) run BEFORE OPENAI_TOKEN/GENERIC_BEARER fallbacks; otherwise partial overlaps would strip the wrong segments. Tests: 9 new (each new pattern + idempotency over the expanded redaction markers). 27/27 PII tests green. .gitleaks.toml: added the test fixture to the path allowlist — the file contains synthetic JWT/AWS/Yandex tokens (the filter is supposed to redact them), not real secrets. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
95 lines
3.5 KiB
JavaScript
95 lines
3.5 KiB
JavaScript
import { describe, it, expect } from 'vitest';
|
|
import { sanitize } from './observer-pii-filter.mjs';
|
|
|
|
describe('observer-pii-filter sanitize', () => {
|
|
it('masks Russian phone numbers', () => {
|
|
const input = 'Контакт: +79991234567 — позвонить';
|
|
expect(sanitize(input)).toBe('Контакт: +7XXXXXXXXXX — позвонить');
|
|
});
|
|
|
|
it('masks email addresses', () => {
|
|
const input = 'Mail: kpd9363@gmail.com';
|
|
expect(sanitize(input)).toBe('Mail: ***@***');
|
|
});
|
|
|
|
it('masks Sentry-style tokens', () => {
|
|
const input = 'token sntrys_abc123def456ghi789';
|
|
expect(sanitize(input)).toContain('[REDACTED');
|
|
expect(sanitize(input)).not.toContain('sntrys_abc123def456ghi789');
|
|
});
|
|
|
|
it('is idempotent on already-sanitized strings', () => {
|
|
const sanitized = 'Контакт: +7XXXXXXXXXX, ***@***';
|
|
expect(sanitize(sanitized)).toBe(sanitized);
|
|
});
|
|
|
|
it('handles empty string', () => {
|
|
expect(sanitize('')).toBe('');
|
|
});
|
|
|
|
it('handles object input by sanitizing string fields recursively', () => {
|
|
const input = { task_id: 'x', note: 'call +79991234567' };
|
|
const out = sanitize(input);
|
|
expect(out.note).toBe('call +7XXXXXXXXXX');
|
|
expect(out.task_id).toBe('x');
|
|
});
|
|
|
|
it('masks JWT tokens (three base64 segments separated by .)', () => {
|
|
const jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4ifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c';
|
|
const out = sanitize(`Authorization header: Bearer-like ${jwt} end`);
|
|
expect(out).toContain('[REDACTED:jwt]');
|
|
expect(out).not.toContain(jwt);
|
|
});
|
|
|
|
it('masks AWS access key IDs (AKIA-prefixed)', () => {
|
|
const out = sanitize('aws key AKIAIOSFODNN7EXAMPLE in code');
|
|
expect(out).toContain('[REDACTED:aws]');
|
|
expect(out).not.toContain('AKIAIOSFODNN7EXAMPLE');
|
|
});
|
|
|
|
it('masks Yandex Cloud IAM static keys (AQVN-prefixed)', () => {
|
|
const k = 'AQVN-static-key-abc123xyz456789defABC';
|
|
const out = sanitize(`yc key ${k} configured`);
|
|
expect(out).toContain('[REDACTED:yandex]');
|
|
expect(out).not.toContain(k);
|
|
});
|
|
|
|
it('masks Yandex Cloud IAM session tokens (t1. prefix)', () => {
|
|
const t = 't1.9euelZqOk5KZj4-WzpaclpqRy42TmO3rnpqaks-Tj4_HmJOZk4_LipKczpvl9PdABCDEF';
|
|
const out = sanitize(`token: ${t} end`);
|
|
expect(out).toContain('[REDACTED:yandex]');
|
|
expect(out).not.toContain(t);
|
|
});
|
|
|
|
it('masks Yandex OAuth tokens (y0_ prefix)', () => {
|
|
const t = 'y0_AgAAAABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef';
|
|
const out = sanitize(`bearer ${t} for api`);
|
|
expect(out).toContain('[REDACTED:yandex]');
|
|
expect(out).not.toContain(t);
|
|
});
|
|
|
|
it('masks IPv4 addresses', () => {
|
|
const out = sanitize('connect to 192.168.1.42 and 10.0.0.1');
|
|
expect(out).toContain('[REDACTED:ipv4]');
|
|
expect(out).not.toContain('192.168.1.42');
|
|
expect(out).not.toContain('10.0.0.1');
|
|
});
|
|
|
|
it('redacts the OS username segment in Windows user paths', () => {
|
|
const out = sanitize('C:\\Users\\Administrator\\.claude\\projects\\foo\\bar.md');
|
|
expect(out).toContain('C:\\Users\\***\\');
|
|
expect(out).not.toContain('Administrator');
|
|
});
|
|
|
|
it('redacts the user in POSIX /home/ paths', () => {
|
|
const out = sanitize('/home/dmitry/work/script.sh');
|
|
expect(out).toContain('/home/***/');
|
|
expect(out).not.toContain('/home/dmitry');
|
|
});
|
|
|
|
it('is idempotent on the expanded patterns', () => {
|
|
const masked = 'token [REDACTED:jwt] / aws [REDACTED:aws] / yc [REDACTED:yandex] / ip [REDACTED:ipv4] / path C:\\Users\\***\\foo';
|
|
expect(sanitize(masked)).toBe(masked);
|
|
});
|
|
});
|