#!/usr/bin/env node /** * tools/graphify-safe-update.mjs * * Safe post-commit graphify update wrapper. * * Per ADR-017 § "Стратегия обновлений" — direct `graphify update .` from * широкого scope разнесло граф 6305 → 41586 узлов (38 МБ bloat). Этот * wrapper: * * 1. Reads last commit changed files via `git diff HEAD~1 --name-only`. * 2. Filters to ONLY allowed scopes: docs/, .claude/, app/ — НЕ tools/, * vendor/, node_modules/, bin/, .git/, etc. * 3. For CODE files in app/ (PHP/Vue/TS/JS) → invokes graphify AST extract * on EXPLICIT file list (bypasses detect_incremental + manifest-prune * risk) + build_merge into existing graph.json. * 4. For DOC/MD files (docs/ + .claude/ md) → writes `needs_update` flag, * NO automatic LLM extraction (subagent dispatch too expensive for hook). * 5. Skips silently if graphify-out/ missing (e.g., worktree без graph). * * Non-blocking by design — always exits 0 to не ломать commit. Log goes to * stdout (lefthook собирает). * * Security Guidance #40: pure deterministic — git read + Python exec, без LLM. * * @author graphify-formalization 2026-05-27 * @see docs/adr/ADR-017-knowledge-graph-tooling.md § "Стратегия обновлений" */ import { execFileSync } from 'node:child_process'; import { existsSync, writeFileSync, readFileSync } from 'node:fs'; import { join, resolve, extname } from 'node:path'; const ALLOWED_SCOPES = ['docs/', '.claude/', 'app/']; const CODE_EXTS = new Set(['.php', '.ts', '.js', '.vue', '.mjs', '.cjs', '.py', '.go']); const SCAN_EXCLUDE_DIRS = ['node_modules/', 'vendor/', '__pycache__/', '.git/']; /** * Pure: filter a list of git-diff file paths down to those within allowed * scopes AND not inside excluded directories. * * @param {string[]} paths git-diff output lines (relative repo paths) * @param {string[]} allowed prefixes that the path must start with * @param {string[]} excluded substrings that disqualify a path even if it matches a scope * @returns {string[]} */ export function filterInScope(paths, allowed, excluded) { return paths.filter((p) => { if (!allowed.some((s) => p.startsWith(s))) return false; if (excluded.some((ex) => p.includes(ex))) return false; return true; }); } /** * Pure: partition file list by extension into code-files vs everything-else. * Case-insensitive on extension. * * @param {string[]} paths * @param {Set} codeExts lowercase extensions including the dot (e.g. '.php') * @returns {{ codeFiles: string[], docFiles: string[] }} */ export function partitionByExtension(paths, codeExts) { const codeFiles = []; const docFiles = []; for (const p of paths) { const ext = extname(p).toLowerCase(); if (codeExts.has(ext)) codeFiles.push(p); else docFiles.push(p); } return { codeFiles, docFiles }; } // ────────────────────────── CLI entry ────────────────────────── // Skip side-effects when imported as a module (tests). Works on Windows // (file:///C:/…) and POSIX (file:///home/…) by comparing the file URL // pathname to the resolved argv[1]. import { fileURLToPath } from 'node:url'; const isMain = process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1]); if (isMain) { await runCli(); } async function runCli() { const REPO_ROOT = resolve(process.cwd()); const GRAPH_DIR = join(REPO_ROOT, 'graphify-out'); const logInfo = (msg) => process.stdout.write(`[graphify-safe-update] ${msg}\n`); const silentExit = (reason) => { if (process.env.GRAPHIFY_SAFE_DEBUG) logInfo(`silent exit: ${reason}`); process.exit(0); }; // 1. Graph must exist if (!existsSync(join(GRAPH_DIR, 'graph.json'))) { silentExit('graphify-out/graph.json missing'); } // 2. Get files changed in the LAST commit only (HEAD vs HEAD~1) — NOT // `git diff HEAD~1` (1-arg) which diffs prev-commit-vs-working-tree and // pollutes scope with unstaged WIP from parallel sessions. Use // diff-tree of HEAD to get exactly the files touched by HEAD. let changedRaw; try { changedRaw = execFileSync('git', ['diff-tree', '--no-commit-id', '--name-only', '-r', 'HEAD'], { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'pipe'], }); } catch (e) { silentExit(`git diff-tree failed: ${e.message?.slice(0, 80)}`); } const allChanged = changedRaw.split('\n').map((s) => s.trim()).filter(Boolean); if (allChanged.length === 0) silentExit('no changed files'); // 3. Filter to allowed scopes + exclude system dirs const inScope = filterInScope(allChanged, ALLOWED_SCOPES, SCAN_EXCLUDE_DIRS); if (inScope.length === 0) { silentExit(`${allChanged.length} files changed, 0 in allowed scope (docs/.claude/app)`); } // 4. Partition by code vs doc/md const { codeFiles, docFiles } = partitionByExtension(inScope, CODE_EXTS); // 5. Doc/MD files → just write needs_update flag, no LLM if (docFiles.length > 0) { const flagPath = join(GRAPH_DIR, 'needs_update'); const logPath = join(GRAPH_DIR, 'needs_update.log'); try { const ts = new Date().toISOString(); const entry = `${ts} ${docFiles.length} doc files changed:\n${docFiles.map((f) => ` ${f}`).join('\n')}\n\n`; writeFileSync(flagPath, `Run /graphify --update to refresh semantic graph (${docFiles.length} doc files changed).\nLast: ${ts}\n`); let existingLog = ''; if (existsSync(logPath)) existingLog = readFileSync(logPath, 'utf-8'); writeFileSync(logPath, entry + existingLog); logInfo(`${docFiles.length} doc/md changed → wrote needs_update flag (manual /graphify --update needed)`); } catch (e) { logInfo(`needs_update flag write failed: ${e.message?.slice(0, 80)} (non-blocking)`); } } // 6. Code files → AST extract on explicit list + merge into graph if (codeFiles.length === 0) silentExit('no code files (only docs)'); const liveCodeFiles = codeFiles.filter((f) => existsSync(join(REPO_ROOT, f))); if (liveCodeFiles.length === 0) silentExit('all code files deleted (no AST to extract)'); // 7. Locate graphify Python interpreter const pyPathFile = join(GRAPH_DIR, '.graphify_python'); let pyPath; if (existsSync(pyPathFile)) { pyPath = readFileSync(pyPathFile, 'utf-8').replace(/^/, '').trim(); } if (!pyPath || !existsSync(pyPath)) { try { pyPath = execFileSync('uv', ['tool', 'run', '--from', 'graphifyy', 'python', '-c', 'import sys; print(sys.executable)'], { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'pipe'], }).trim(); } catch (e) { logInfo(`graphify python not resolvable: ${e.message?.slice(0, 80)} — manual /graphify --update needed`); process.exit(0); } } // 8. Run AST extract + merge via Python one-liner const liveListJson = JSON.stringify(liveCodeFiles); const pyCode = ` import sys, json from pathlib import Path from graphify.extract import extract from graphify.build import build_merge from graphify.export import to_json repo_root = Path(${JSON.stringify(REPO_ROOT)}) files = [repo_root / f for f in json.loads('''${liveListJson}''')] ast = extract(files, cache_root=repo_root) ast_nodes = len(ast.get('nodes', [])) ast_edges = len(ast.get('edges', [])) print(f'[graphify-safe-update] AST: {ast_nodes} nodes, {ast_edges} edges from {len(files)} code files') # dedup=False для incremental: дефолтный dedup=True агрессивно фьюз-дедупит весь # merged граф (483 exact + 447 fuzzy на 6305-node графе → 5356), to_json потом # refuses overwrite смаленьшим графом. dedup=False → чистый union по ID. graph_path = repo_root / 'graphify-out' / 'graph.json' G = build_merge([ast], graph_path=str(graph_path), dedup=False) to_json(G, dict(), str(graph_path)) print(f'[graphify-safe-update] merged: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges in graph.json') `; try { const out = execFileSync(pyPath, ['-c', pyCode], { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'pipe'], cwd: REPO_ROOT, }); process.stdout.write(out); } catch (e) { logInfo(`AST extract/merge failed (non-blocking): ${e.message?.slice(0, 200)}`); if (e.stderr) process.stderr.write(e.stderr.toString()); } process.exit(0); }