206 lines
8.3 KiB
JavaScript
206 lines
8.3 KiB
JavaScript
|
|
#!/usr/bin/env node
|
|||
|
|
/**
|
|||
|
|
* tools/graphify-safe-update.mjs
|
|||
|
|
*
|
|||
|
|
* Safe post-commit graphify update wrapper.
|
|||
|
|
*
|
|||
|
|
* Per ADR-017 § "Стратегия обновлений" — direct `graphify update .` from
|
|||
|
|
* широкого scope разнесло граф 6305 → 41586 узлов (38 МБ bloat). Этот
|
|||
|
|
* wrapper:
|
|||
|
|
*
|
|||
|
|
* 1. Reads last commit changed files via `git diff HEAD~1 --name-only`.
|
|||
|
|
* 2. Filters to ONLY allowed scopes: docs/, .claude/, app/ — НЕ tools/,
|
|||
|
|
* vendor/, node_modules/, bin/, .git/, etc.
|
|||
|
|
* 3. For CODE files in app/ (PHP/Vue/TS/JS) → invokes graphify AST extract
|
|||
|
|
* on EXPLICIT file list (bypasses detect_incremental + manifest-prune
|
|||
|
|
* risk) + build_merge into existing graph.json.
|
|||
|
|
* 4. For DOC/MD files (docs/ + .claude/ md) → writes `needs_update` flag,
|
|||
|
|
* NO automatic LLM extraction (subagent dispatch too expensive for hook).
|
|||
|
|
* 5. Skips silently if graphify-out/ missing (e.g., worktree без graph).
|
|||
|
|
*
|
|||
|
|
* Non-blocking by design — always exits 0 to не ломать commit. Log goes to
|
|||
|
|
* stdout (lefthook собирает).
|
|||
|
|
*
|
|||
|
|
* Security Guidance #40: pure deterministic — git read + Python exec, без LLM.
|
|||
|
|
*
|
|||
|
|
* @author graphify-formalization 2026-05-27
|
|||
|
|
* @see docs/adr/ADR-017-knowledge-graph-tooling.md § "Стратегия обновлений"
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
import { execFileSync } from 'node:child_process';
|
|||
|
|
import { existsSync, writeFileSync, readFileSync } from 'node:fs';
|
|||
|
|
import { join, resolve, extname } from 'node:path';
|
|||
|
|
|
|||
|
|
const ALLOWED_SCOPES = ['docs/', '.claude/', 'app/'];
|
|||
|
|
const CODE_EXTS = new Set(['.php', '.ts', '.js', '.vue', '.mjs', '.cjs', '.py', '.go']);
|
|||
|
|
const SCAN_EXCLUDE_DIRS = ['node_modules/', 'vendor/', '__pycache__/', '.git/'];
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Pure: filter a list of git-diff file paths down to those within allowed
|
|||
|
|
* scopes AND not inside excluded directories.
|
|||
|
|
*
|
|||
|
|
* @param {string[]} paths git-diff output lines (relative repo paths)
|
|||
|
|
* @param {string[]} allowed prefixes that the path must start with
|
|||
|
|
* @param {string[]} excluded substrings that disqualify a path even if it matches a scope
|
|||
|
|
* @returns {string[]}
|
|||
|
|
*/
|
|||
|
|
export function filterInScope(paths, allowed, excluded) {
|
|||
|
|
return paths.filter((p) => {
|
|||
|
|
if (!allowed.some((s) => p.startsWith(s))) return false;
|
|||
|
|
if (excluded.some((ex) => p.includes(ex))) return false;
|
|||
|
|
return true;
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Pure: partition file list by extension into code-files vs everything-else.
|
|||
|
|
* Case-insensitive on extension.
|
|||
|
|
*
|
|||
|
|
* @param {string[]} paths
|
|||
|
|
* @param {Set<string>} codeExts lowercase extensions including the dot (e.g. '.php')
|
|||
|
|
* @returns {{ codeFiles: string[], docFiles: string[] }}
|
|||
|
|
*/
|
|||
|
|
export function partitionByExtension(paths, codeExts) {
|
|||
|
|
const codeFiles = [];
|
|||
|
|
const docFiles = [];
|
|||
|
|
for (const p of paths) {
|
|||
|
|
const ext = extname(p).toLowerCase();
|
|||
|
|
if (codeExts.has(ext)) codeFiles.push(p);
|
|||
|
|
else docFiles.push(p);
|
|||
|
|
}
|
|||
|
|
return { codeFiles, docFiles };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ────────────────────────── CLI entry ──────────────────────────
|
|||
|
|
// Skip side-effects when imported as a module (tests). Works on Windows
|
|||
|
|
// (file:///C:/…) and POSIX (file:///home/…) by comparing the file URL
|
|||
|
|
// pathname to the resolved argv[1].
|
|||
|
|
import { fileURLToPath } from 'node:url';
|
|||
|
|
const isMain = process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1]);
|
|||
|
|
|
|||
|
|
if (isMain) {
|
|||
|
|
await runCli();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async function runCli() {
|
|||
|
|
const REPO_ROOT = resolve(process.cwd());
|
|||
|
|
const GRAPH_DIR = join(REPO_ROOT, 'graphify-out');
|
|||
|
|
|
|||
|
|
const logInfo = (msg) => process.stdout.write(`[graphify-safe-update] ${msg}\n`);
|
|||
|
|
const silentExit = (reason) => {
|
|||
|
|
if (process.env.GRAPHIFY_SAFE_DEBUG) logInfo(`silent exit: ${reason}`);
|
|||
|
|
process.exit(0);
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// 1. Graph must exist
|
|||
|
|
if (!existsSync(join(GRAPH_DIR, 'graph.json'))) {
|
|||
|
|
silentExit('graphify-out/graph.json missing');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. Get files changed in the LAST commit only (HEAD vs HEAD~1) — NOT
|
|||
|
|
// `git diff HEAD~1` (1-arg) which diffs prev-commit-vs-working-tree and
|
|||
|
|
// pollutes scope with unstaged WIP from parallel sessions. Use
|
|||
|
|
// diff-tree of HEAD to get exactly the files touched by HEAD.
|
|||
|
|
let changedRaw;
|
|||
|
|
try {
|
|||
|
|
changedRaw = execFileSync('git', ['diff-tree', '--no-commit-id', '--name-only', '-r', 'HEAD'], {
|
|||
|
|
encoding: 'utf-8',
|
|||
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|||
|
|
});
|
|||
|
|
} catch (e) {
|
|||
|
|
silentExit(`git diff-tree failed: ${e.message?.slice(0, 80)}`);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const allChanged = changedRaw.split('\n').map((s) => s.trim()).filter(Boolean);
|
|||
|
|
if (allChanged.length === 0) silentExit('no changed files');
|
|||
|
|
|
|||
|
|
// 3. Filter to allowed scopes + exclude system dirs
|
|||
|
|
const inScope = filterInScope(allChanged, ALLOWED_SCOPES, SCAN_EXCLUDE_DIRS);
|
|||
|
|
if (inScope.length === 0) {
|
|||
|
|
silentExit(`${allChanged.length} files changed, 0 in allowed scope (docs/.claude/app)`);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 4. Partition by code vs doc/md
|
|||
|
|
const { codeFiles, docFiles } = partitionByExtension(inScope, CODE_EXTS);
|
|||
|
|
|
|||
|
|
// 5. Doc/MD files → just write needs_update flag, no LLM
|
|||
|
|
if (docFiles.length > 0) {
|
|||
|
|
const flagPath = join(GRAPH_DIR, 'needs_update');
|
|||
|
|
const logPath = join(GRAPH_DIR, 'needs_update.log');
|
|||
|
|
try {
|
|||
|
|
const ts = new Date().toISOString();
|
|||
|
|
const entry = `${ts} ${docFiles.length} doc files changed:\n${docFiles.map((f) => ` ${f}`).join('\n')}\n\n`;
|
|||
|
|
writeFileSync(flagPath, `Run /graphify --update to refresh semantic graph (${docFiles.length} doc files changed).\nLast: ${ts}\n`);
|
|||
|
|
let existingLog = '';
|
|||
|
|
if (existsSync(logPath)) existingLog = readFileSync(logPath, 'utf-8');
|
|||
|
|
writeFileSync(logPath, entry + existingLog);
|
|||
|
|
logInfo(`${docFiles.length} doc/md changed → wrote needs_update flag (manual /graphify --update needed)`);
|
|||
|
|
} catch (e) {
|
|||
|
|
logInfo(`needs_update flag write failed: ${e.message?.slice(0, 80)} (non-blocking)`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 6. Code files → AST extract on explicit list + merge into graph
|
|||
|
|
if (codeFiles.length === 0) silentExit('no code files (only docs)');
|
|||
|
|
|
|||
|
|
const liveCodeFiles = codeFiles.filter((f) => existsSync(join(REPO_ROOT, f)));
|
|||
|
|
if (liveCodeFiles.length === 0) silentExit('all code files deleted (no AST to extract)');
|
|||
|
|
|
|||
|
|
// 7. Locate graphify Python interpreter
|
|||
|
|
const pyPathFile = join(GRAPH_DIR, '.graphify_python');
|
|||
|
|
let pyPath;
|
|||
|
|
if (existsSync(pyPathFile)) {
|
|||
|
|
pyPath = readFileSync(pyPathFile, 'utf-8').replace(/^/, '').trim();
|
|||
|
|
}
|
|||
|
|
if (!pyPath || !existsSync(pyPath)) {
|
|||
|
|
try {
|
|||
|
|
pyPath = execFileSync('uv', ['tool', 'run', '--from', 'graphifyy', 'python', '-c', 'import sys; print(sys.executable)'], {
|
|||
|
|
encoding: 'utf-8',
|
|||
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|||
|
|
}).trim();
|
|||
|
|
} catch (e) {
|
|||
|
|
logInfo(`graphify python not resolvable: ${e.message?.slice(0, 80)} — manual /graphify --update needed`);
|
|||
|
|
process.exit(0);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 8. Run AST extract + merge via Python one-liner
|
|||
|
|
const liveListJson = JSON.stringify(liveCodeFiles);
|
|||
|
|
const pyCode = `
|
|||
|
|
import sys, json
|
|||
|
|
from pathlib import Path
|
|||
|
|
from graphify.extract import extract
|
|||
|
|
from graphify.build import build_merge
|
|||
|
|
from graphify.export import to_json
|
|||
|
|
|
|||
|
|
repo_root = Path(${JSON.stringify(REPO_ROOT)})
|
|||
|
|
files = [repo_root / f for f in json.loads('''${liveListJson}''')]
|
|||
|
|
ast = extract(files, cache_root=repo_root)
|
|||
|
|
ast_nodes = len(ast.get('nodes', []))
|
|||
|
|
ast_edges = len(ast.get('edges', []))
|
|||
|
|
print(f'[graphify-safe-update] AST: {ast_nodes} nodes, {ast_edges} edges from {len(files)} code files')
|
|||
|
|
|
|||
|
|
# dedup=False для incremental: дефолтный dedup=True агрессивно фьюз-дедупит весь
|
|||
|
|
# merged граф (483 exact + 447 fuzzy на 6305-node графе → 5356), to_json потом
|
|||
|
|
# refuses overwrite смаленьшим графом. dedup=False → чистый union по ID.
|
|||
|
|
graph_path = repo_root / 'graphify-out' / 'graph.json'
|
|||
|
|
G = build_merge([ast], graph_path=str(graph_path), dedup=False)
|
|||
|
|
to_json(G, dict(), str(graph_path))
|
|||
|
|
print(f'[graphify-safe-update] merged: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges in graph.json')
|
|||
|
|
`;
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const out = execFileSync(pyPath, ['-c', pyCode], {
|
|||
|
|
encoding: 'utf-8',
|
|||
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|||
|
|
cwd: REPO_ROOT,
|
|||
|
|
});
|
|||
|
|
process.stdout.write(out);
|
|||
|
|
} catch (e) {
|
|||
|
|
logInfo(`AST extract/merge failed (non-blocking): ${e.message?.slice(0, 200)}`);
|
|||
|
|
if (e.stderr) process.stderr.write(e.stderr.toString());
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
process.exit(0);
|
|||
|
|
}
|