Files
brain/tools/path-normalization.mjs
T

135 lines
5.9 KiB
JavaScript
Raw Normal View History

// tools/path-normalization.mjs
/**
* Path normalization — router-gate v4 spec §3.1.1.
* + glob-matcher util (used by skill-scope-verifier, tdd-real-test-verifier).
* Pure-core; I/O (realpath) injected via opts for testability.
*/
import path from 'node:path';
import os from 'node:os';
import fs from 'node:fs';
const ENV_WHITELIST = ['HOME', 'USERPROFILE', 'APPDATA', 'XDG_CONFIG_HOME', 'XDG_DATA_HOME', 'XDG_CACHE_HOME'];
export function expandHome(target, homedir) {
if (target === '~') return homedir;
if (target.startsWith('~/') || target.startsWith('~\\')) {
return homedir + target.slice(1);
}
return target;
}
export function expandEnvVars(target, env) {
let out = target;
for (const name of ENV_WHITELIST) {
const val = env[name];
if (val === undefined) continue;
out = out.split(`%${name}%`).join(val);
out = out.split(`\${${name}}`).join(val);
// Stream H Task 9 cosmetic: PowerShell `$env:NAME` syntax — case-insensitive
// match because PowerShell is case-insensitive (`$env:USERPROFILE` ≡ `$env:userprofile`).
out = out.replace(new RegExp(`\\$env:${name}(?![A-Za-z0-9_])`, 'gi'), () => val);
// bare $VAR — only when followed by non-word boundary.
// Use a function replacer so `val` is inserted literally (avoids $& / $' / $` replacement-pattern misinterpretation).
out = out.replace(new RegExp(`\\$${name}(?![A-Za-z0-9_])`, 'g'), () => val);
}
return out;
}
export function caseFold(p, platform) {
return platform === 'win32' ? p.toLowerCase() : p;
}
// NOTE: `pattern` must use forward slashes. For cross-platform path matching use `globMatch` instead.
export function globToRegExp(pattern) {
let re = '';
for (let i = 0; i < pattern.length; i++) {
const c = pattern[i];
if (c === '*') {
if (pattern[i + 1] === '*') {
re += '.*'; // ** — across separators
i++;
if (pattern[i + 1] === '/') i++; // consume trailing slash of **/
} else {
re += '[^/]*'; // * — within segment
}
} else if (c === '?') {
re += '[^/]';
} else if ('.+^${}()|[]\\'.includes(c)) {
re += '\\' + c;
} else {
re += c;
}
}
return new RegExp(`^${re}$`);
}
export function globMatch(pathStr, pattern) {
const norm = (s) => s.split('\\').join('/');
return globToRegExp(norm(pattern)).test(norm(pathStr));
}
/**
* Normalize a path: expand ~, expand whitelisted env vars, resolve, realpath, case-fold.
*
* @param {string} target - Raw path (may contain ~ or $VAR).
* @param {object} [opts]
* @param {string} [opts.homedir] - Override home directory (default: os.homedir()).
* @param {object} [opts.env] - Override environment map (default: process.env).
* @param {string} [opts.platform] - Override platform string (default: process.platform).
* @param {Function} [opts.realpath] - Injectable realpath (default: fs.realpathSync) — used for test determinism.
* @param {Function} [opts.resolve] - Injectable path.resolve (default: path.resolve) — injectable for cross-platform test determinism.
*/
export function pathNormalize(target, opts = {}) {
const {
homedir = os.homedir(),
env = process.env,
platform = process.platform,
realpath = fs.realpathSync,
resolve = path.resolve,
} = opts;
let p = expandHome(target, homedir);
p = expandEnvVars(p, env);
// Stream H Task 9 cosmetic: detect Cygwin/git-bash drive-prefix style `/c/Users/...`
// and convert to native `c:/Users/...` BEFORE resolve. Without this, path.resolve
// on win32 treats `/c/...` as drive-relative and prepends cwd's drive letter,
// producing display paths like `c:/c/users/...` (doubled drive) in gate error
// messages. Detected during Smoke 5 Real Fix Re-test 2026-05-30 (step 4).
//
// Guard: only apply on win32 AND when the supplied homedir itself looks
// drive-rooted (contains `<letter>:`). This avoids breaking POSIX-style test
// fixtures that pass `/h` or `/home/u` and expect /A/B-style paths to stay raw.
if (platform === 'win32' && /^[a-zA-Z]:/.test(String(homedir || ''))) {
p = p.replace(/^\/([a-zA-Z])\//, (_, drive) => `${drive}:/`);
}
const resolved = resolve(p);
let real;
try {
real = realpath(resolved);
} catch (e) {
if (e && e.code && e.code !== 'ENOENT') throw e; // surface real FS errors; fail-close handled by caller
real = resolved; // ENOENT — best-effort resolved path for unknown-state files
}
// Smoke 5 integration fix (2026-05-30): normalize ALL separators to forward slashes
// regardless of platform. DEFAULT_PROTECTED_PATTERNS regexes are forward-slash only.
// Without this, win32 path.resolve + realpath returns backslashes and patterns miss.
// P10-b (router-mentor): NFC-нормализация — декомпозированный Unicode (NFD) не
// должен обходить regex-защиты, сверяющиеся с композированной формой пути.
return caseFold(real, platform).split('\\').join('/').normalize('NFC');
}
/**
* Тотальный (никогда не бросает) вариант pathNormalize — M7 Фаза 0 (правило 7а, SE-I/L6).
* Для escape-survivability binding-ключей: если нормализация бросит, escape-чек станет
* недостижим и баг кирпичит сессию мимо аварийного выхода владельца. Возвращает результат
* pathNormalize при успехе; '' при любой ошибке (вызывающий трактует пусто как unknown).
* Content-стражи (read-path-deny/mcp-classification) ПРОДОЛЖАЮТ звать строгий pathNormalize —
* там FS-ошибка должна всплыть и привести к fail-CLOSE, а не быть проглоченной.
*/
export function pathNormalizeSafe(target, opts = {}) {
try {
return pathNormalize(target, opts);
} catch {
return '';
}
}