Files
brain/tools/path-normalization.mjs

135 lines
5.9 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// tools/path-normalization.mjs
/**
* Path normalization — router-gate v4 spec §3.1.1.
* + glob-matcher util (used by skill-scope-verifier, tdd-real-test-verifier).
* Pure-core; I/O (realpath) injected via opts for testability.
*/
import path from 'node:path';
import os from 'node:os';
import fs from 'node:fs';
const ENV_WHITELIST = ['HOME', 'USERPROFILE', 'APPDATA', 'XDG_CONFIG_HOME', 'XDG_DATA_HOME', 'XDG_CACHE_HOME'];
export function expandHome(target, homedir) {
if (target === '~') return homedir;
if (target.startsWith('~/') || target.startsWith('~\\')) {
return homedir + target.slice(1);
}
return target;
}
export function expandEnvVars(target, env) {
let out = target;
for (const name of ENV_WHITELIST) {
const val = env[name];
if (val === undefined) continue;
out = out.split(`%${name}%`).join(val);
out = out.split(`\${${name}}`).join(val);
// Stream H Task 9 cosmetic: PowerShell `$env:NAME` syntax — case-insensitive
// match because PowerShell is case-insensitive (`$env:USERPROFILE` ≡ `$env:userprofile`).
out = out.replace(new RegExp(`\\$env:${name}(?![A-Za-z0-9_])`, 'gi'), () => val);
// bare $VAR — only when followed by non-word boundary.
// Use a function replacer so `val` is inserted literally (avoids $& / $' / $` replacement-pattern misinterpretation).
out = out.replace(new RegExp(`\\$${name}(?![A-Za-z0-9_])`, 'g'), () => val);
}
return out;
}
export function caseFold(p, platform) {
return platform === 'win32' ? p.toLowerCase() : p;
}
// NOTE: `pattern` must use forward slashes. For cross-platform path matching use `globMatch` instead.
export function globToRegExp(pattern) {
let re = '';
for (let i = 0; i < pattern.length; i++) {
const c = pattern[i];
if (c === '*') {
if (pattern[i + 1] === '*') {
re += '.*'; // ** — across separators
i++;
if (pattern[i + 1] === '/') i++; // consume trailing slash of **/
} else {
re += '[^/]*'; // * — within segment
}
} else if (c === '?') {
re += '[^/]';
} else if ('.+^${}()|[]\\'.includes(c)) {
re += '\\' + c;
} else {
re += c;
}
}
return new RegExp(`^${re}$`);
}
export function globMatch(pathStr, pattern) {
const norm = (s) => s.split('\\').join('/');
return globToRegExp(norm(pattern)).test(norm(pathStr));
}
/**
* Normalize a path: expand ~, expand whitelisted env vars, resolve, realpath, case-fold.
*
* @param {string} target - Raw path (may contain ~ or $VAR).
* @param {object} [opts]
* @param {string} [opts.homedir] - Override home directory (default: os.homedir()).
* @param {object} [opts.env] - Override environment map (default: process.env).
* @param {string} [opts.platform] - Override platform string (default: process.platform).
* @param {Function} [opts.realpath] - Injectable realpath (default: fs.realpathSync) — used for test determinism.
* @param {Function} [opts.resolve] - Injectable path.resolve (default: path.resolve) — injectable for cross-platform test determinism.
*/
export function pathNormalize(target, opts = {}) {
const {
homedir = os.homedir(),
env = process.env,
platform = process.platform,
realpath = fs.realpathSync,
resolve = path.resolve,
} = opts;
let p = expandHome(target, homedir);
p = expandEnvVars(p, env);
// Stream H Task 9 cosmetic: detect Cygwin/git-bash drive-prefix style `/c/Users/...`
// and convert to native `c:/Users/...` BEFORE resolve. Without this, path.resolve
// on win32 treats `/c/...` as drive-relative and prepends cwd's drive letter,
// producing display paths like `c:/c/users/...` (doubled drive) in gate error
// messages. Detected during Smoke 5 Real Fix Re-test 2026-05-30 (step 4).
//
// Guard: only apply on win32 AND when the supplied homedir itself looks
// drive-rooted (contains `<letter>:`). This avoids breaking POSIX-style test
// fixtures that pass `/h` or `/home/u` and expect /A/B-style paths to stay raw.
if (platform === 'win32' && /^[a-zA-Z]:/.test(String(homedir || ''))) {
p = p.replace(/^\/([a-zA-Z])\//, (_, drive) => `${drive}:/`);
}
const resolved = resolve(p);
let real;
try {
real = realpath(resolved);
} catch (e) {
if (e && e.code && e.code !== 'ENOENT') throw e; // surface real FS errors; fail-close handled by caller
real = resolved; // ENOENT — best-effort resolved path for unknown-state files
}
// Smoke 5 integration fix (2026-05-30): normalize ALL separators to forward slashes
// regardless of platform. DEFAULT_PROTECTED_PATTERNS regexes are forward-slash only.
// Without this, win32 path.resolve + realpath returns backslashes and patterns miss.
// P10-b (router-mentor): NFC-нормализация — декомпозированный Unicode (NFD) не
// должен обходить regex-защиты, сверяющиеся с композированной формой пути.
return caseFold(real, platform).split('\\').join('/').normalize('NFC');
}
/**
* Тотальный (никогда не бросает) вариант pathNormalize — M7 Фаза 0 (правило 7а, SE-I/L6).
* Для escape-survivability binding-ключей: если нормализация бросит, escape-чек станет
* недостижим и баг кирпичит сессию мимо аварийного выхода владельца. Возвращает результат
* pathNormalize при успехе; '' при любой ошибке (вызывающий трактует пусто как unknown).
* Content-стражи (read-path-deny/mcp-classification) ПРОДОЛЖАЮТ звать строгий pathNormalize —
* там FS-ошибка должна всплыть и привести к fail-CLOSE, а не быть проглоченной.
*/
export function pathNormalizeSafe(target, opts = {}) {
try {
return pathNormalize(target, opts);
} catch {
return '';
}
}