397777089e
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
206 lines
8.3 KiB
JavaScript
206 lines
8.3 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* tools/graphify-safe-update.mjs
|
||
*
|
||
* Safe post-commit graphify update wrapper.
|
||
*
|
||
* Per ADR-017 § "Стратегия обновлений" — direct `graphify update .` from
|
||
* широкого scope разнесло граф 6305 → 41586 узлов (38 МБ bloat). Этот
|
||
* wrapper:
|
||
*
|
||
* 1. Reads last commit changed files via `git diff HEAD~1 --name-only`.
|
||
* 2. Filters to ONLY allowed scopes: docs/, .claude/, app/ — НЕ tools/,
|
||
* vendor/, node_modules/, bin/, .git/, etc.
|
||
* 3. For CODE files in app/ (PHP/Vue/TS/JS) → invokes graphify AST extract
|
||
* on EXPLICIT file list (bypasses detect_incremental + manifest-prune
|
||
* risk) + build_merge into existing graph.json.
|
||
* 4. For DOC/MD files (docs/ + .claude/ md) → writes `needs_update` flag,
|
||
* NO automatic LLM extraction (subagent dispatch too expensive for hook).
|
||
* 5. Skips silently if graphify-out/ missing (e.g., worktree без graph).
|
||
*
|
||
* Non-blocking by design — always exits 0 to не ломать commit. Log goes to
|
||
* stdout (lefthook собирает).
|
||
*
|
||
* Security Guidance #40: pure deterministic — git read + Python exec, без LLM.
|
||
*
|
||
* @author graphify-formalization 2026-05-27
|
||
* @see docs/adr/ADR-017-knowledge-graph-tooling.md § "Стратегия обновлений"
|
||
*/
|
||
|
||
import { execFileSync } from 'node:child_process';
|
||
import { existsSync, writeFileSync, readFileSync } from 'node:fs';
|
||
import { join, resolve, extname } from 'node:path';
|
||
|
||
const ALLOWED_SCOPES = ['docs/', '.claude/', 'app/'];
|
||
const CODE_EXTS = new Set(['.php', '.ts', '.js', '.vue', '.mjs', '.cjs', '.py', '.go']);
|
||
const SCAN_EXCLUDE_DIRS = ['node_modules/', 'vendor/', '__pycache__/', '.git/'];
|
||
|
||
/**
|
||
* Pure: filter a list of git-diff file paths down to those within allowed
|
||
* scopes AND not inside excluded directories.
|
||
*
|
||
* @param {string[]} paths git-diff output lines (relative repo paths)
|
||
* @param {string[]} allowed prefixes that the path must start with
|
||
* @param {string[]} excluded substrings that disqualify a path even if it matches a scope
|
||
* @returns {string[]}
|
||
*/
|
||
export function filterInScope(paths, allowed, excluded) {
|
||
return paths.filter((p) => {
|
||
if (!allowed.some((s) => p.startsWith(s))) return false;
|
||
if (excluded.some((ex) => p.includes(ex))) return false;
|
||
return true;
|
||
});
|
||
}
|
||
|
||
/**
|
||
* Pure: partition file list by extension into code-files vs everything-else.
|
||
* Case-insensitive on extension.
|
||
*
|
||
* @param {string[]} paths
|
||
* @param {Set<string>} codeExts lowercase extensions including the dot (e.g. '.php')
|
||
* @returns {{ codeFiles: string[], docFiles: string[] }}
|
||
*/
|
||
export function partitionByExtension(paths, codeExts) {
|
||
const codeFiles = [];
|
||
const docFiles = [];
|
||
for (const p of paths) {
|
||
const ext = extname(p).toLowerCase();
|
||
if (codeExts.has(ext)) codeFiles.push(p);
|
||
else docFiles.push(p);
|
||
}
|
||
return { codeFiles, docFiles };
|
||
}
|
||
|
||
// ────────────────────────── CLI entry ──────────────────────────
|
||
// Skip side-effects when imported as a module (tests). Works on Windows
|
||
// (file:///C:/…) and POSIX (file:///home/…) by comparing the file URL
|
||
// pathname to the resolved argv[1].
|
||
import { fileURLToPath } from 'node:url';
|
||
const isMain = process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1]);
|
||
|
||
if (isMain) {
|
||
await runCli();
|
||
}
|
||
|
||
async function runCli() {
|
||
const REPO_ROOT = resolve(process.cwd());
|
||
const GRAPH_DIR = join(REPO_ROOT, 'graphify-out');
|
||
|
||
const logInfo = (msg) => process.stdout.write(`[graphify-safe-update] ${msg}\n`);
|
||
const silentExit = (reason) => {
|
||
if (process.env.GRAPHIFY_SAFE_DEBUG) logInfo(`silent exit: ${reason}`);
|
||
process.exit(0);
|
||
};
|
||
|
||
// 1. Graph must exist
|
||
if (!existsSync(join(GRAPH_DIR, 'graph.json'))) {
|
||
silentExit('graphify-out/graph.json missing');
|
||
}
|
||
|
||
// 2. Get files changed in the LAST commit only (HEAD vs HEAD~1) — NOT
|
||
// `git diff HEAD~1` (1-arg) which diffs prev-commit-vs-working-tree and
|
||
// pollutes scope with unstaged WIP from parallel sessions. Use
|
||
// diff-tree of HEAD to get exactly the files touched by HEAD.
|
||
let changedRaw;
|
||
try {
|
||
changedRaw = execFileSync('git', ['diff-tree', '--no-commit-id', '--name-only', '-r', 'HEAD'], {
|
||
encoding: 'utf-8',
|
||
stdio: ['ignore', 'pipe', 'pipe'],
|
||
});
|
||
} catch (e) {
|
||
silentExit(`git diff-tree failed: ${e.message?.slice(0, 80)}`);
|
||
}
|
||
|
||
const allChanged = changedRaw.split('\n').map((s) => s.trim()).filter(Boolean);
|
||
if (allChanged.length === 0) silentExit('no changed files');
|
||
|
||
// 3. Filter to allowed scopes + exclude system dirs
|
||
const inScope = filterInScope(allChanged, ALLOWED_SCOPES, SCAN_EXCLUDE_DIRS);
|
||
if (inScope.length === 0) {
|
||
silentExit(`${allChanged.length} files changed, 0 in allowed scope (docs/.claude/app)`);
|
||
}
|
||
|
||
// 4. Partition by code vs doc/md
|
||
const { codeFiles, docFiles } = partitionByExtension(inScope, CODE_EXTS);
|
||
|
||
// 5. Doc/MD files → just write needs_update flag, no LLM
|
||
if (docFiles.length > 0) {
|
||
const flagPath = join(GRAPH_DIR, 'needs_update');
|
||
const logPath = join(GRAPH_DIR, 'needs_update.log');
|
||
try {
|
||
const ts = new Date().toISOString();
|
||
const entry = `${ts} ${docFiles.length} doc files changed:\n${docFiles.map((f) => ` ${f}`).join('\n')}\n\n`;
|
||
writeFileSync(flagPath, `Run /graphify --update to refresh semantic graph (${docFiles.length} doc files changed).\nLast: ${ts}\n`);
|
||
let existingLog = '';
|
||
if (existsSync(logPath)) existingLog = readFileSync(logPath, 'utf-8');
|
||
writeFileSync(logPath, entry + existingLog);
|
||
logInfo(`${docFiles.length} doc/md changed → wrote needs_update flag (manual /graphify --update needed)`);
|
||
} catch (e) {
|
||
logInfo(`needs_update flag write failed: ${e.message?.slice(0, 80)} (non-blocking)`);
|
||
}
|
||
}
|
||
|
||
// 6. Code files → AST extract on explicit list + merge into graph
|
||
if (codeFiles.length === 0) silentExit('no code files (only docs)');
|
||
|
||
const liveCodeFiles = codeFiles.filter((f) => existsSync(join(REPO_ROOT, f)));
|
||
if (liveCodeFiles.length === 0) silentExit('all code files deleted (no AST to extract)');
|
||
|
||
// 7. Locate graphify Python interpreter
|
||
const pyPathFile = join(GRAPH_DIR, '.graphify_python');
|
||
let pyPath;
|
||
if (existsSync(pyPathFile)) {
|
||
pyPath = readFileSync(pyPathFile, 'utf-8').replace(/^/, '').trim();
|
||
}
|
||
if (!pyPath || !existsSync(pyPath)) {
|
||
try {
|
||
pyPath = execFileSync('uv', ['tool', 'run', '--from', 'graphifyy', 'python', '-c', 'import sys; print(sys.executable)'], {
|
||
encoding: 'utf-8',
|
||
stdio: ['ignore', 'pipe', 'pipe'],
|
||
}).trim();
|
||
} catch (e) {
|
||
logInfo(`graphify python not resolvable: ${e.message?.slice(0, 80)} — manual /graphify --update needed`);
|
||
process.exit(0);
|
||
}
|
||
}
|
||
|
||
// 8. Run AST extract + merge via Python one-liner
|
||
const liveListJson = JSON.stringify(liveCodeFiles);
|
||
const pyCode = `
|
||
import sys, json
|
||
from pathlib import Path
|
||
from graphify.extract import extract
|
||
from graphify.build import build_merge
|
||
from graphify.export import to_json
|
||
|
||
repo_root = Path(${JSON.stringify(REPO_ROOT)})
|
||
files = [repo_root / f for f in json.loads('''${liveListJson}''')]
|
||
ast = extract(files, cache_root=repo_root)
|
||
ast_nodes = len(ast.get('nodes', []))
|
||
ast_edges = len(ast.get('edges', []))
|
||
print(f'[graphify-safe-update] AST: {ast_nodes} nodes, {ast_edges} edges from {len(files)} code files')
|
||
|
||
# dedup=False для incremental: дефолтный dedup=True агрессивно фьюз-дедупит весь
|
||
# merged граф (483 exact + 447 fuzzy на 6305-node графе → 5356), to_json потом
|
||
# refuses overwrite смаленьшим графом. dedup=False → чистый union по ID.
|
||
graph_path = repo_root / 'graphify-out' / 'graph.json'
|
||
G = build_merge([ast], graph_path=str(graph_path), dedup=False)
|
||
to_json(G, dict(), str(graph_path))
|
||
print(f'[graphify-safe-update] merged: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges in graph.json')
|
||
`;
|
||
|
||
try {
|
||
const out = execFileSync(pyPath, ['-c', pyCode], {
|
||
encoding: 'utf-8',
|
||
stdio: ['ignore', 'pipe', 'pipe'],
|
||
cwd: REPO_ROOT,
|
||
});
|
||
process.stdout.write(out);
|
||
} catch (e) {
|
||
logInfo(`AST extract/merge failed (non-blocking): ${e.message?.slice(0, 200)}`);
|
||
if (e.stderr) process.stderr.write(e.stderr.toString());
|
||
}
|
||
|
||
process.exit(0);
|
||
}
|