Files
portal/tools/enforce-parallel-session-lock.mjs
T
Дмитрий 6577c04a1f fix(router-gate): session-lock hygiene — clearer block message + stale-lock prune
Closes the remaining parallel-session-lock remarks on top of the keying fix
(7a469dc9), with NO weakening of same-worktree serialization:

- D: the block message now identifies the holder by its STABLE session_id and
  marks the recorded pid as transient ("may change between attempts"). Chasing
  the pid is what led to closing the wrong session. Decision logic is unchanged
  (text only) — existing /pid N/ triage assertion still holds.
- B: pruneStaleLocks() best-effort deletes leaked lock files that are ALREADY
  stale by the shared isStale() definition (now exported from the pure module —
  single source of truth). Active within-TTL locks are never touched, so the
  serialization guarantee is not weakened. Wired into the PreToolUse branch of
  main(), wrapped so hygiene can never break the gate (fail-open).
- C (no code): release-on-SessionEnd needs only a settings.json registration
  (owner action) — the existing !tool_name branch already releases. Documented
  in the plan. Until then, leaked locks self-heal via B + the 5-min TTL takeover.

TDD: RED -> GREEN per behavior. tools-vitest 2014 passed / 2 skipped.
Backlog items B/C/D; plan docs/superpowers/plans/2026-05-31-discipline-guard-backlog.md.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 17:43:03 +03:00

194 lines
7.7 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* enforce-parallel-session-lock — PreToolUse wrapper around the pure
* parallel-session-lock module (router-gate v4 Stream H Task 7).
*
* Prevents two Claude sessions on the same workspace from concurrently
* mutating files. When session B tries a mutating tool while session A
* holds a fresh (non-stale) lock, B is blocked with a message naming A's
* pid for human triage.
*
* Activation: settings.json registration is deferred to Phase H-α/H-β
* batch step. main() is a no-op (exit 0) until then.
*/
import { acquire, release, computeWorkspaceHash, isStale } from './parallel-session-lock.mjs';
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, readdirSync } from 'node:fs';
import { execFileSync } from 'node:child_process';
import { join, dirname } from 'node:path';
import { readStdin, parseEventJson, exitDecision, runtimeDir } from './enforce-hook-helpers.mjs';
/**
* Pure decision: given an acquire() result, decide block/allow.
*
* @param {object} args
* @param {object|null|undefined} args.acquireResult - from parallel-session-lock.acquire()
* @param {string} args.sessionId - current session id
* @returns {{block: boolean, reason?: string}}
*/
export function decide({ acquireResult, sessionId }) {
// Fail-open if no acquire result (treat as internal error — never lockout).
if (!acquireResult || typeof acquireResult !== 'object') return { block: false };
if (acquireResult.acquired) return { block: false };
const holder = acquireResult.holder || {};
// Identify the holder by its STABLE session id, not the pid: the recorded pid
// is the transient hook-node pid and changes between attempts, so chasing it
// leads to closing the wrong session. Surface the pid only as a triage hint.
return {
block: true,
reason: `parallel session lock held by session ${holder.session_id || 'unknown'} (current pid ${holder.pid || '?'}, may change between attempts — identify the session by its id, not pid) — wait for the 5-min TTL or close THAT session`,
};
}
/**
* PreToolUse wiring: acquire (or same-session refresh / stale takeover) the lock,
* then decide block/allow. I/O injected for testability.
*
* @returns {{block: boolean, reason?: string}}
*/
export function runAcquireDecision({ event, now, pid, cwd, readLock, writeLock }) {
const sessionId = event && event.session_id;
const workspaceHash = computeWorkspaceHash(cwd);
const acquireResult = acquire({ sessionId, pid, workspaceHash, now, readLock, writeLock });
return decide({ acquireResult, sessionId });
}
/**
* Stop wiring: release the lock if this session owns it (no-op otherwise).
*
* @returns {{released: boolean}}
*/
export function runReleaseAction({ event, cwd, readLock, deleteLock }) {
const sessionId = event && event.session_id;
const workspaceHash = computeWorkspaceHash(cwd);
release({ sessionId, workspaceHash, readLock, deleteLock });
return { released: true };
}
/**
* Resolve the stable work-tree root used as the lock key. Keys on the SESSION's
* cwd (`event.cwd`, stable across resume) resolved to the git work-tree root —
* NOT the hook's `process.cwd()`, which collapses to the main repo dir after a
* session resume and thereby false-blocks sessions in DIFFERENT worktrees.
* Pure (I/O injected): `runGitToplevel(dir)` returns the toplevel or '' on failure.
*
* @param {object} p
* @param {object} p.event
* @param {string} p.processCwd
* @param {(dir:string)=>string} p.runGitToplevel
* @returns {string}
*/
export function resolveWorkspacePath({ event, processCwd, runGitToplevel }) {
const dir = (event && typeof event.cwd === 'string' && event.cwd) ? event.cwd : processCwd;
try {
const top = runGitToplevel(dir);
if (top && typeof top === 'string') return top;
} catch { /* fall through to raw dir (fail-open) */ }
return dir;
}
/**
* Disk hygiene: delete leaked lock files whose record is ALREADY stale by the
* shared isStale() definition (so an active within-TTL lock is never touched).
* Pure (I/O injected). Best-effort: a failed read counts the file as stale
* (garbage), a failed delete is swallowed — hygiene must never break the gate.
*
* @param {object} p
* @param {string[]} p.files - absolute lock-file paths
* @param {(f:string)=>object|null} p.readRecord
* @param {(f:string)=>void} p.deleteRecord
* @param {(rec:object|null, now:number)=>boolean} p.isStaleFn
* @param {number} p.now
* @returns {{pruned: number}}
*/
export function pruneStaleLocks({ files, readRecord, deleteRecord, isStaleFn, now }) {
let pruned = 0;
for (const f of files || []) {
let rec = null;
try { rec = readRecord(f); } catch { rec = null; }
if (isStaleFn(rec, now)) {
try { deleteRecord(f); pruned++; } catch { /* best-effort */ }
}
}
return { pruned };
}
function realGitToplevel(dir) {
try {
return execFileSync('git', ['-C', dir, 'rev-parse', '--show-toplevel'], {
encoding: 'utf-8',
timeout: 1000,
stdio: ['ignore', 'pipe', 'ignore'],
}).trim();
} catch { return ''; }
}
function lockPathFor(cwd) {
return join(runtimeDir(), `session-lock-${computeWorkspaceHash(cwd)}.json`);
}
function realReadLock(p) {
try { return JSON.parse(readFileSync(p, 'utf-8')); } catch { return null; }
}
function realWriteLock(p, rec) {
try { mkdirSync(dirname(p), { recursive: true }); writeFileSync(p, JSON.stringify(rec)); } catch { /* fail-open */ }
}
function realDeleteLock(p) {
try { unlinkSync(p); } catch { /* already gone */ }
}
async function main() {
// Live wiring (point 2, 2026-05-31). PreToolUse (mutating tool) → acquire/refresh
// the workspace lock; Stop (no tool_name) → release it. Fail-open on any error so
// a lock bug can NEVER wedge the user out of their own session.
try {
const event = parseEventJson(await readStdin());
// Key by the session's stable work-tree root (event.cwd → git toplevel),
// not the volatile hook process.cwd() (collapses to main on resume → false
// cross-worktree blocks). Fallback to process.cwd() keeps prior behavior.
const cwd = resolveWorkspacePath({ event, processCwd: process.cwd(), runGitToplevel: realGitToplevel });
const p = lockPathFor(cwd);
// Stop event carries no tool_name → release path.
if (!event.tool_name) {
runReleaseAction({ event, cwd, readLock: () => realReadLock(p), deleteLock: () => realDeleteLock(p) });
return exitDecision({ block: false });
}
// Best-effort disk hygiene (B): drop leaked stale lock files before acquiring.
// isStale-gated → an active within-TTL lock is never pruned, so same-worktree
// serialization is untouched. Wrapped so hygiene can never break the gate.
try {
const dir = runtimeDir();
const files = readdirSync(dir)
.filter((f) => /^session-lock-.*\.json$/.test(f))
.map((f) => join(dir, f));
pruneStaleLocks({
files,
readRecord: (fp) => realReadLock(fp),
deleteRecord: (fp) => realDeleteLock(fp),
isStaleFn: isStale,
now: Date.now(),
});
} catch { /* hygiene is best-effort */ }
// PreToolUse on a mutating tool → acquire/refresh, then block/allow.
const r = runAcquireDecision({
event,
now: Date.now(),
pid: process.pid,
cwd,
readLock: () => realReadLock(p),
writeLock: (rec) => realWriteLock(p, rec),
});
return exitDecision({ block: r.block, message: r.block ? `[parallel-session-lock] ${r.reason}` : undefined });
} catch {
return exitDecision({ block: false }); // fail-open — never lock out
}
}
if (import.meta.url === `file://${process.argv[1].replace(/\\/g, '/')}` || (process.argv[1] || '').endsWith('enforce-parallel-session-lock.mjs')) {
main().catch(() => process.exit(0));
}