Files
brain/tools/graphify-safe-update.mjs

206 lines
8.3 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* tools/graphify-safe-update.mjs
*
* Safe post-commit graphify update wrapper.
*
* Per ADR-017 § "Стратегия обновлений" — direct `graphify update .` from
* широкого scope разнесло граф 6305 → 41586 узлов (38 МБ bloat). Этот
* wrapper:
*
* 1. Reads last commit changed files via `git diff HEAD~1 --name-only`.
* 2. Filters to ONLY allowed scopes: docs/, .claude/, app/ — НЕ tools/,
* vendor/, node_modules/, bin/, .git/, etc.
* 3. For CODE files in app/ (PHP/Vue/TS/JS) → invokes graphify AST extract
* on EXPLICIT file list (bypasses detect_incremental + manifest-prune
* risk) + build_merge into existing graph.json.
* 4. For DOC/MD files (docs/ + .claude/ md) → writes `needs_update` flag,
* NO automatic LLM extraction (subagent dispatch too expensive for hook).
* 5. Skips silently if graphify-out/ missing (e.g., worktree без graph).
*
* Non-blocking by design — always exits 0 to не ломать commit. Log goes to
* stdout (lefthook собирает).
*
* Security Guidance #40: pure deterministic — git read + Python exec, без LLM.
*
* @author graphify-formalization 2026-05-27
* @see docs/adr/ADR-017-knowledge-graph-tooling.md § "Стратегия обновлений"
*/
import { execFileSync } from 'node:child_process';
import { existsSync, writeFileSync, readFileSync } from 'node:fs';
import { join, resolve, extname } from 'node:path';
const ALLOWED_SCOPES = ['docs/', '.claude/', 'app/'];
const CODE_EXTS = new Set(['.php', '.ts', '.js', '.vue', '.mjs', '.cjs', '.py', '.go']);
const SCAN_EXCLUDE_DIRS = ['node_modules/', 'vendor/', '__pycache__/', '.git/'];
/**
* Pure: filter a list of git-diff file paths down to those within allowed
* scopes AND not inside excluded directories.
*
* @param {string[]} paths git-diff output lines (relative repo paths)
* @param {string[]} allowed prefixes that the path must start with
* @param {string[]} excluded substrings that disqualify a path even if it matches a scope
* @returns {string[]}
*/
export function filterInScope(paths, allowed, excluded) {
return paths.filter((p) => {
if (!allowed.some((s) => p.startsWith(s))) return false;
if (excluded.some((ex) => p.includes(ex))) return false;
return true;
});
}
/**
* Pure: partition file list by extension into code-files vs everything-else.
* Case-insensitive on extension.
*
* @param {string[]} paths
* @param {Set<string>} codeExts lowercase extensions including the dot (e.g. '.php')
* @returns {{ codeFiles: string[], docFiles: string[] }}
*/
export function partitionByExtension(paths, codeExts) {
const codeFiles = [];
const docFiles = [];
for (const p of paths) {
const ext = extname(p).toLowerCase();
if (codeExts.has(ext)) codeFiles.push(p);
else docFiles.push(p);
}
return { codeFiles, docFiles };
}
// ────────────────────────── CLI entry ──────────────────────────
// Skip side-effects when imported as a module (tests). Works on Windows
// (file:///C:/…) and POSIX (file:///home/…) by comparing the file URL
// pathname to the resolved argv[1].
import { fileURLToPath } from 'node:url';
const isMain = process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1]);
if (isMain) {
await runCli();
}
async function runCli() {
const REPO_ROOT = resolve(process.cwd());
const GRAPH_DIR = join(REPO_ROOT, 'graphify-out');
const logInfo = (msg) => process.stdout.write(`[graphify-safe-update] ${msg}\n`);
const silentExit = (reason) => {
if (process.env.GRAPHIFY_SAFE_DEBUG) logInfo(`silent exit: ${reason}`);
process.exit(0);
};
// 1. Graph must exist
if (!existsSync(join(GRAPH_DIR, 'graph.json'))) {
silentExit('graphify-out/graph.json missing');
}
// 2. Get files changed in the LAST commit only (HEAD vs HEAD~1) — NOT
// `git diff HEAD~1` (1-arg) which diffs prev-commit-vs-working-tree and
// pollutes scope with unstaged WIP from parallel sessions. Use
// diff-tree of HEAD to get exactly the files touched by HEAD.
let changedRaw;
try {
changedRaw = execFileSync('git', ['diff-tree', '--no-commit-id', '--name-only', '-r', 'HEAD'], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
});
} catch (e) {
silentExit(`git diff-tree failed: ${e.message?.slice(0, 80)}`);
}
const allChanged = changedRaw.split('\n').map((s) => s.trim()).filter(Boolean);
if (allChanged.length === 0) silentExit('no changed files');
// 3. Filter to allowed scopes + exclude system dirs
const inScope = filterInScope(allChanged, ALLOWED_SCOPES, SCAN_EXCLUDE_DIRS);
if (inScope.length === 0) {
silentExit(`${allChanged.length} files changed, 0 in allowed scope (docs/.claude/app)`);
}
// 4. Partition by code vs doc/md
const { codeFiles, docFiles } = partitionByExtension(inScope, CODE_EXTS);
// 5. Doc/MD files → just write needs_update flag, no LLM
if (docFiles.length > 0) {
const flagPath = join(GRAPH_DIR, 'needs_update');
const logPath = join(GRAPH_DIR, 'needs_update.log');
try {
const ts = new Date().toISOString();
const entry = `${ts} ${docFiles.length} doc files changed:\n${docFiles.map((f) => ` ${f}`).join('\n')}\n\n`;
writeFileSync(flagPath, `Run /graphify --update to refresh semantic graph (${docFiles.length} doc files changed).\nLast: ${ts}\n`);
let existingLog = '';
if (existsSync(logPath)) existingLog = readFileSync(logPath, 'utf-8');
writeFileSync(logPath, entry + existingLog);
logInfo(`${docFiles.length} doc/md changed → wrote needs_update flag (manual /graphify --update needed)`);
} catch (e) {
logInfo(`needs_update flag write failed: ${e.message?.slice(0, 80)} (non-blocking)`);
}
}
// 6. Code files → AST extract on explicit list + merge into graph
if (codeFiles.length === 0) silentExit('no code files (only docs)');
const liveCodeFiles = codeFiles.filter((f) => existsSync(join(REPO_ROOT, f)));
if (liveCodeFiles.length === 0) silentExit('all code files deleted (no AST to extract)');
// 7. Locate graphify Python interpreter
const pyPathFile = join(GRAPH_DIR, '.graphify_python');
let pyPath;
if (existsSync(pyPathFile)) {
pyPath = readFileSync(pyPathFile, 'utf-8').replace(/^/, '').trim();
}
if (!pyPath || !existsSync(pyPath)) {
try {
pyPath = execFileSync('uv', ['tool', 'run', '--from', 'graphifyy', 'python', '-c', 'import sys; print(sys.executable)'], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
}).trim();
} catch (e) {
logInfo(`graphify python not resolvable: ${e.message?.slice(0, 80)} — manual /graphify --update needed`);
process.exit(0);
}
}
// 8. Run AST extract + merge via Python one-liner
const liveListJson = JSON.stringify(liveCodeFiles);
const pyCode = `
import sys, json
from pathlib import Path
from graphify.extract import extract
from graphify.build import build_merge
from graphify.export import to_json
repo_root = Path(${JSON.stringify(REPO_ROOT)})
files = [repo_root / f for f in json.loads('''${liveListJson}''')]
ast = extract(files, cache_root=repo_root)
ast_nodes = len(ast.get('nodes', []))
ast_edges = len(ast.get('edges', []))
print(f'[graphify-safe-update] AST: {ast_nodes} nodes, {ast_edges} edges from {len(files)} code files')
# dedup=False для incremental: дефолтный dedup=True агрессивно фьюз-дедупит весь
# merged граф (483 exact + 447 fuzzy на 6305-node графе → 5356), to_json потом
# refuses overwrite смаленьшим графом. dedup=False → чистый union по ID.
graph_path = repo_root / 'graphify-out' / 'graph.json'
G = build_merge([ast], graph_path=str(graph_path), dedup=False)
to_json(G, dict(), str(graph_path))
print(f'[graphify-safe-update] merged: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges in graph.json')
`;
try {
const out = execFileSync(pyPath, ['-c', pyCode], {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
cwd: REPO_ROOT,
});
process.stdout.write(out);
} catch (e) {
logInfo(`AST extract/merge failed (non-blocking): ${e.message?.slice(0, 200)}`);
if (e.stderr) process.stderr.write(e.stderr.toString());
}
process.exit(0);
}