Files
portal/tools/graphify-safe-update.mjs
T

206 lines
8.3 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* tools/graphify-safe-update.mjs
*
* Safe post-commit graphify update wrapper.
*
* Per ADR-017 § "Стратегия обновлений" — direct `graphify update .` from
* широкого scope разнесло граф 6305 → 41586 узлов (38 МБ bloat). Этот
* wrapper:
*
* 1. Reads last commit changed files via `git diff HEAD~1 --name-only`.
* 2. Filters to ONLY allowed scopes: docs/, .claude/, app/ — НЕ tools/,
* vendor/, node_modules/, bin/, .git/, etc.
* 3. For CODE files in app/ (PHP/Vue/TS/JS) → invokes graphify AST extract
* on EXPLICIT file list (bypasses detect_incremental + manifest-prune
* risk) + build_merge into existing graph.json.
* 4. For DOC/MD files (docs/ + .claude/ md) → writes `needs_update` flag,
* NO automatic LLM extraction (subagent dispatch too expensive for hook).
* 5. Skips silently if graphify-out/ missing (e.g., worktree без graph).
*
* Non-blocking by design — always exits 0 to не ломать commit. Log goes to
* stdout (lefthook собирает).
*
* Security Guidance #40: pure deterministic — git read + Python exec, без LLM.
*
* @author graphify-formalization 2026-05-27
* @see docs/adr/ADR-017-knowledge-graph-tooling.md § "Стратегия обновлений"
*/
import { execFileSync } from 'node:child_process';
import { existsSync, writeFileSync, readFileSync } from 'node:fs';
import { join, resolve, extname } from 'node:path';
const ALLOWED_SCOPES = ['docs/', '.claude/', 'app/'];
const CODE_EXTS = new Set(['.php', '.ts', '.js', '.vue', '.mjs', '.cjs', '.py', '.go']);
const SCAN_EXCLUDE_DIRS = ['node_modules/', 'vendor/', '__pycache__/', '.git/'];
/**
* Pure: filter a list of git-diff file paths down to those within allowed
* scopes AND not inside excluded directories.
*
* @param {string[]} paths git-diff output lines (relative repo paths)
* @param {string[]} allowed prefixes that the path must start with
* @param {string[]} excluded substrings that disqualify a path even if it matches a scope
* @returns {string[]}
*/
export function filterInScope(paths, allowed, excluded) {
return paths.filter((p) => {
if (!allowed.some((s) => p.startsWith(s))) return false;
if (excluded.some((ex) => p.includes(ex))) return false;
return true;
});
}
/**
* Pure: partition file list by extension into code-files vs everything-else.
* Case-insensitive on extension.
*
* @param {string[]} paths
* @param {Set<string>} codeExts lowercase extensions including the dot (e.g. '.php')
* @returns {{ codeFiles: string[], docFiles: string[] }}
*/
export function partitionByExtension(paths, codeExts) {
const codeFiles = [];
const docFiles = [];
for (const p of paths) {
const ext = extname(p).toLowerCase();
if (codeExts.has(ext)) codeFiles.push(p);
else docFiles.push(p);
}
return { codeFiles, docFiles };
}
// ────────────────────────── CLI entry ──────────────────────────
// Skip side-effects when imported as a module (tests). Works on Windows
// (file:///C:/…) and POSIX (file:///home/…) by comparing the file URL
// pathname to the resolved argv[1].
import { fileURLToPath } from 'node:url';
const isMain = process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1]);
if (isMain) {
await runCli();
}
async function runCli() {
const REPO_ROOT = resolve(process.cwd());
const GRAPH_DIR = join(REPO_ROOT, 'graphify-out');
const logInfo = (msg) => process.stdout.write(`[graphify-safe-update] ${msg}\n`);
const silentExit = (reason) => {
if (process.env.GRAPHIFY_SAFE_DEBUG) logInfo(`silent exit: ${reason}`);
process.exit(0);
};
// 1. Graph must exist
if (!existsSync(join(GRAPH_DIR, 'graph.json'))) {
silentExit('graphify-out/graph.json missing');
}
// 2. Get files changed in the LAST commit only (HEAD vs HEAD~1) — NOT
// `git diff HEAD~1` (1-arg) which diffs prev-commit-vs-working-tree and
// pollutes scope with unstaged WIP from parallel sessions. Use
// diff-tree of HEAD to get exactly the files touched by HEAD.
let changedRaw;
try {
changedRaw = execFileSync('git', ['diff-tree', '--no-commit-id', '--name-only', '-r', 'HEAD'], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
});
} catch (e) {
silentExit(`git diff-tree failed: ${e.message?.slice(0, 80)}`);
}
const allChanged = changedRaw.split('\n').map((s) => s.trim()).filter(Boolean);
if (allChanged.length === 0) silentExit('no changed files');
// 3. Filter to allowed scopes + exclude system dirs
const inScope = filterInScope(allChanged, ALLOWED_SCOPES, SCAN_EXCLUDE_DIRS);
if (inScope.length === 0) {
silentExit(`${allChanged.length} files changed, 0 in allowed scope (docs/.claude/app)`);
}
// 4. Partition by code vs doc/md
const { codeFiles, docFiles } = partitionByExtension(inScope, CODE_EXTS);
// 5. Doc/MD files → just write needs_update flag, no LLM
if (docFiles.length > 0) {
const flagPath = join(GRAPH_DIR, 'needs_update');
const logPath = join(GRAPH_DIR, 'needs_update.log');
try {
const ts = new Date().toISOString();
const entry = `${ts} ${docFiles.length} doc files changed:\n${docFiles.map((f) => ` ${f}`).join('\n')}\n\n`;
writeFileSync(flagPath, `Run /graphify --update to refresh semantic graph (${docFiles.length} doc files changed).\nLast: ${ts}\n`);
let existingLog = '';
if (existsSync(logPath)) existingLog = readFileSync(logPath, 'utf-8');
writeFileSync(logPath, entry + existingLog);
logInfo(`${docFiles.length} doc/md changed → wrote needs_update flag (manual /graphify --update needed)`);
} catch (e) {
logInfo(`needs_update flag write failed: ${e.message?.slice(0, 80)} (non-blocking)`);
}
}
// 6. Code files → AST extract on explicit list + merge into graph
if (codeFiles.length === 0) silentExit('no code files (only docs)');
const liveCodeFiles = codeFiles.filter((f) => existsSync(join(REPO_ROOT, f)));
if (liveCodeFiles.length === 0) silentExit('all code files deleted (no AST to extract)');
// 7. Locate graphify Python interpreter
const pyPathFile = join(GRAPH_DIR, '.graphify_python');
let pyPath;
if (existsSync(pyPathFile)) {
pyPath = readFileSync(pyPathFile, 'utf-8').replace(/^/, '').trim();
}
if (!pyPath || !existsSync(pyPath)) {
try {
pyPath = execFileSync('uv', ['tool', 'run', '--from', 'graphifyy', 'python', '-c', 'import sys; print(sys.executable)'], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
}).trim();
} catch (e) {
logInfo(`graphify python not resolvable: ${e.message?.slice(0, 80)} — manual /graphify --update needed`);
process.exit(0);
}
}
// 8. Run AST extract + merge via Python one-liner
const liveListJson = JSON.stringify(liveCodeFiles);
const pyCode = `
import sys, json
from pathlib import Path
from graphify.extract import extract
from graphify.build import build_merge
from graphify.export import to_json
repo_root = Path(${JSON.stringify(REPO_ROOT)})
files = [repo_root / f for f in json.loads('''${liveListJson}''')]
ast = extract(files, cache_root=repo_root)
ast_nodes = len(ast.get('nodes', []))
ast_edges = len(ast.get('edges', []))
print(f'[graphify-safe-update] AST: {ast_nodes} nodes, {ast_edges} edges from {len(files)} code files')
# dedup=False для incremental: дефолтный dedup=True агрессивно фьюз-дедупит весь
# merged граф (483 exact + 447 fuzzy на 6305-node графе → 5356), to_json потом
# refuses overwrite смаленьшим графом. dedup=False → чистый union по ID.
graph_path = repo_root / 'graphify-out' / 'graph.json'
G = build_merge([ast], graph_path=str(graph_path), dedup=False)
to_json(G, dict(), str(graph_path))
print(f'[graphify-safe-update] merged: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges in graph.json')
`;
try {
const out = execFileSync(pyPath, ['-c', pyCode], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
cwd: REPO_ROOT,
});
process.stdout.write(out);
} catch (e) {
logInfo(`AST extract/merge failed (non-blocking): ${e.message?.slice(0, 200)}`);
if (e.stderr) process.stderr.write(e.stderr.toString());
}
process.exit(0);
}