diff --git a/.claude/settings.json b/.claude/settings.json index b63465d0..a03e8c50 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -65,6 +65,36 @@ "timeout": 5 } ] + }, + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-memory-coverage.mjs", + "timeout": 5 + }, + { + "type": "command", + "command": "node tools/enforce-tdd-gate.mjs", + "timeout": 5 + } + ] + }, + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-branch-switch.mjs", + "timeout": 5 + }, + { + "type": "command", + "command": "node tools/enforce-verify-before-push.mjs", + "timeout": 5 + } + ] } ], "PostToolUse": [ @@ -85,6 +115,31 @@ "command": "node -e \"const f=process.env.CLAUDE_FILE_PATH||''; const n=f.replace(/\\\\\\\\/g,'/'); if (/(^|\\\\/)db\\\\/schema\\\\.sql$/i.test(n)) { process.stdout.write('\\n[hook] REMINDER: You modified db/schema.sql. Per CLAUDE.md §5 п.8, add a corresponding entry to db/CHANGELOG_schema.md before committing.\\n'); }\"" } ] + }, + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-verify-record.mjs", + "timeout": 5 + }, + { + "type": "command", + "command": "node tools/enforce-rationalization-audit.mjs", + "timeout": 5 + } + ] + }, + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-rationalization-audit.mjs", + "timeout": 5 + } + ] } ], "Stop": [ @@ -105,6 +160,24 @@ "timeout": 5 } ] + }, + { + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-coverage-verify.mjs", + "timeout": 5 + } + ] + }, + { + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-classifier-match.mjs", + "timeout": 5 + } + ] } ], "UserPromptSubmit": [ @@ -116,6 +189,15 @@ "timeout": 10 } ] + }, + { + "hooks": [ + { + "type": "command", + "command": "node tools/enforce-prompt-injection.mjs", + "timeout": 5 + } + ] } ], "SessionStart": [ diff --git a/docs/superpowers/plans/2026-05-25-enforce-hard-rules.md b/docs/superpowers/plans/2026-05-25-enforce-hard-rules.md new file mode 100644 index 00000000..c9429e86 --- /dev/null +++ b/docs/superpowers/plans/2026-05-25-enforce-hard-rules.md @@ -0,0 +1,72 @@ +# Enforce hard rules — implementation plan + +**Spec:** `docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md` +**Branch:** `feat/enforce-hard-rules` +**Estimate:** 4-8 hours autonomous (overnight) + +## Tasks (in commit order — each commit standalone testable) + +### T1 — Shared hook helpers + override vocab +**Files:** `tools/enforce-hook-helpers.mjs`, `tools/enforce-hook-helpers.test.mjs`, `tools/enforce-override-vocab.json` +**Helpers:** readStdinJson, readTranscript, getCoverageFromLastAssistant, hasOverridePhrase, loadVocab, sentinelPath, writeSentinel, readSentinel, expectedBranchPath, getExpectedBranch, setExpectedBranch, readRationalizationFlags, appendRationalizationFlag. +**Override vocab content:** initial 6 phrases per spec §9. +**Coverage:** skill:superpowers:test-driven-development + +### T2 — Rule #5 memory-sync coverage (PreToolUse) +**File:** `tools/enforce-memory-coverage.mjs` + test. +Simplest rule, easy validation. RED test: prod-code edit with TDD coverage → block. GREEN: memory edit with memory-sync coverage → allow. + +### T3 — Rule #7 branch-switch detection (PreToolUse Bash) +**File:** `tools/enforce-branch-switch.mjs` + test. +Reads expected-branch file, runs `git branch --show-current`, compares. + +### T4 — Rule #4 verify-before-push (PreToolUse + PostToolUse Bash) +**Files:** `tools/enforce-verify-before-push.mjs` (PreToolUse) + `tools/enforce-verify-record.mjs` (PostToolUse to write sentinel) + tests. +PostToolUse runs after Bash with vitest/pest pattern. If exit 0 + stdout has PASS marker → write sentinel. +PreToolUse on git commit/push checks sentinel age + exists. + +### T5 — Rule #2 coverage-verify (Stop) +**File:** `tools/enforce-coverage-verify.mjs` + test. +Parses last assistant message for coverage line, checks against transcript tool_use history. + +### T6 — Rule #1 mandatory re-classification injection (UserPromptSubmit) +**File:** `tools/enforce-prompt-injection.mjs` + test. +Reads classifier output from router-state-*.json, injects mandatory coverage list via stdout JSON. + +### T7 — Rule #3 + Rule #6 TDD + writing-plans gate (PreToolUse Edit/Write/MultiEdit) +**File:** `tools/enforce-tdd-gate.mjs` + test. +Path-match, transcript-scan for test-edit + vitest-fail-output, OR plan-file-exists. + +### T8 — Rule #8 classifier-mismatch (Stop) +**File:** `tools/enforce-classifier-match.mjs` + test. +Reads classifier output, checks turn for matching Skill/Task tool_use, gates on confidence threshold. + +### T9 — Rule #10 rationalization flags (PostToolUse Bash + Edit/Write) +**File:** `tools/enforce-rationalization-audit.mjs` + test. +Scan transcript for rationalization phrases / weak tests; append flag JSONL. + +### T10 — Atomic wire-up +**File:** `.claude/settings.json` — add all hooks to PreToolUse/PostToolUse/UserPromptSubmit/Stop. +**Critical:** this must be the LAST commit. Pre-wire commits keep hooks inert. + +### T11 — Smoke + push +Manual smoke each hook with synthetic stdin. Then `git push origin feat/enforce-hard-rules:main` via FF (or merge-commit if main moved). + +### T12 — Memory + state sync +Create `memory/project_enforce_hard_rules.md`, update MEMORY.md index, project_state.md, reference_github.md. + +## Risks identified, mitigations + +- **R1:** Parallel session edits `.claude/settings.json` while I'm working. **Mitigation:** Read settings.json fresh right before T10. Use `git stash` for any concurrent local changes if needed. +- **R2:** A rule blocks my own work mid-task. **Mitigation:** Rules inert until T10. If T10 wire-up succeeds and immediately blocks me on T11 push, override-vocab is in place (`recovery` phrase). +- **R3:** Hook scripts crash → all subsequent tool calls hang. **Mitigation:** Every hook wraps logic in try/catch, exits 0 with empty {} on internal error (fail-quiet). NEVER exit 2 unless intentional violation found. +- **R4:** Override-vocab phrase appears coincidentally in user's normal speech. **Mitigation:** Phrases chosen to be unusual (включают «без скилов» which is unlikely normal). +- **R5:** PreToolUse latency on Bash slows every command. **Mitigation:** Hook target deltay <100ms by reading minimum (cached classifier-state, sentinel file, no transcript-parse unless rule triggers). + +## Acceptance criteria + +- All 10 rules implemented with unit tests +- All hooks wired in settings.json +- Manual smoke per hook: fake-stdin → expected exit code + stderr +- Push to origin/main (or PR if main is unstable) +- Memory + project_state synced diff --git a/docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md b/docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md new file mode 100644 index 00000000..d3cfb47d --- /dev/null +++ b/docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md @@ -0,0 +1,157 @@ +# Enforce hard rules — design (2026-05-25 night) + +**Status:** In progress (autonomous overnight implementation) +**Origin:** End of brain factor-analysis 4-passes session (HEAD `58784b18`). Honest retrospective showed brain-governance / observer / classifier architecture is observe-only — no enforce. Controller (Claude) rationalized 4 skill bypasses + single coverage tag for 6 hours of varied activity without any hook blocking the behaviour. +**Goal:** Convert soft warnings to hard `exit 2` blocks at the only enforce-able layer Claude Code exposes — PreToolUse + Stop hooks. Substance-of-skill compliance translates to artifact-checks. + +## Non-goals + +- Constraining Claude's text output (impossible by architecture — LLM generation). +- Enforcing test quality (substance). Future LLM-judge epic. +- Enforcing skill content interpretation. Best-effort via artifact gates. +- Replacing the classifier / observer / brain-retro infrastructure. This is enforcement layer on top. + +## Architectural premise + +Claude Code hook surface: +- **UserPromptSubmit** — can inject `` text into the next turn's context. CAN'T block. +- **PreToolUse** — `exit 2` blocks the tool call. Stderr returns to Claude. +- **PostToolUse** — observes, can write state. CAN'T block (tool already ran). +- **Stop** — `exit 2` denies turn completion. Stderr returns to Claude on next continuation. + +This proposal uses all four. Output text remains uncontrolled by design — but every consequential ACTION (tool call, turn completion) passes a gate. + +## The 10 rules (priority + risk ordered) + +### Rule #1 — Mandatory re-classification per prompt + +**Mechanism:** UserPromptSubmit hook (`tools/enforce-prompt-classify.mjs`) runs after the existing classifier, then injects a `` listing: +- Classification + confidence +- 1-3 recommended skills/nodes +- Forced `coverage:` line requirement (first line of response) + +**Effect:** Each turn starts with explicit coverage expectation visible to Claude in context. + +**Override:** User says one of the override-vocab phrases (see Rule #9). Then injection is suppressed for that prompt. + +### Rule #2 — Coverage tag verified against artifacts + +**Mechanism:** Stop hook (`tools/enforce-coverage-verify.mjs`). Reads the assistant's last response, parses `coverage: :`. Then: +- `channel=skill` → check transcript for `Skill` tool_use with `input.skill === id` in this turn. If absent → `exit 2`. +- `channel=node` → check for tool_use matching the node's canonical tool (e.g., #19 frontend-design → check for matching skill or canonical command). If absent → `exit 2`. +- `channel=direct` → no artifact check, but classifier-recommendation must align with non-direct fallback (handled by Rule #8). +- No `coverage:` line at all → `exit 2`. + +**Override:** Override-vocab phrase in previous user prompt. + +### Rule #3 — TDD-gate on production code + +**Mechanism:** PreToolUse hook on `Edit`/`Write`/`MultiEdit` (`tools/enforce-tdd-gate.mjs`). For paths matching production patterns: +- `tools/**/*.mjs` (not `*.test.mjs`) +- `app/app/**/*.php` (not `app/tests/**`) +- `resources/js/**` (not `**/*.spec.ts`, not `**/*.test.ts`) + +Reads transcript of current turn so far. Requires: +1. Earlier `Edit`/`Write` on a corresponding test path within the same turn, OR +2. Test artifact already exists (Bash `test -f` could verify, but we read git status) + +AND: +3. Earlier `Bash` with `vitest` / `pest` in command, AND +4. The `Bash` stdout in transcript contains a "fail" / "FAIL" marker (RED phase confirmed) + +If any check fails → `exit 2` with explanation. + +**Override:** Override-vocab phrase + sentinel file `~/.claude/runtime/tdd-bypass-.flag` (auto-created from override). + +### Rule #4 — Git commit/push requires verification artifact + +**Mechanism:** PreToolUse hook on `Bash` (`tools/enforce-verify-before-push.mjs`). Pattern-matches command for `git commit` or `git push`. If matched: +- Check for sentinel file `~/.claude/runtime/verify-pass-.json` +- Sentinel contains `last_full_run_at` timestamp, `result: pass|fail`, `command_run`, `tests_total`, `tests_passed` +- Sentinel must be written by Rule's companion PostToolUse hook on Bash, when Bash command matches vitest/pest full-run pattern AND stdout indicates success +- Sentinel age < 600s required; missing or stale → `exit 2` + +**Override:** Override-vocab phrase or `RECOVERY-INTENT:` marker in previous response. + +### Rule #5 — Memory write requires memory-sync coverage + +**Mechanism:** PreToolUse hook on `Edit`/`Write` (`tools/enforce-memory-coverage.mjs`). Path-match: +- `**/memory/*.md` +- `**/MEMORY.md` +- `C:\Users\*\.claude\projects\**\memory\*.md` + +Reads last assistant message for `coverage: direct:memory-sync` or `coverage: skill:`. +If coverage absent or stale (matches non-memory channel) → `exit 2` with re-announce instruction. + +### Rule #6 — Writing-plans enforce for feature/bugfix/refactor + +**Mechanism:** PreToolUse hook on production-code `Edit`/`Write` (folded into Rule #3 hook). Before first production-code edit of a turn classified as `feature`/`bugfix`/`refactor`: +- Either invoke `superpowers:writing-plans` skill (Skill tool_use) in this turn so far, OR +- Plan file exists at `docs/superpowers/plans/-.md` referenced in transcript, OR +- Override-vocab phrase + +If none → `exit 2`. + +### Rule #7 — Branch-switch detection before commit + +**Mechanism:** PreToolUse on `Bash` matching `git commit`. Hook runs `git branch --show-current`. Compares to expected branch (from `~/.claude/runtime/expected-branch-`, written at session start or when user explicitly mentions a branch). + +If actual ≠ expected → `exit 2`: «Branch switched silently. Verify via `BRANCH-SWITCH-CONFIRMED` or `RECOVERY-INTENT`.» + +### Rule #8 — Classifier-mismatch enforce + +**Mechanism:** Stop hook (chained after Rule #2). Reads classifier output: +- If `classifier_output.recommended_node !== null` AND +- `confidence >= 0.7` AND +- No `Skill`/`Task` tool_use matching the recommendation in this turn AND +- No `override: ` line in response + +→ `exit 2`. + +### Rule #9 — Override-vocabulary hard-coded + +**Mechanism:** Config file `tools/enforce-override-vocab.json` with allowed phrases. Hooks read this once at startup. Phrases (initial): +- `без скилов` → all skill-related rules suppressed for this prompt +- `direct ok` → coverage=direct allowed without justification +- `срочно` → Rule #4 (verify) suppressed +- `быстрый коммит` → Rule #3+#4 suppressed for one commit +- `recovery` → Rule #7 suppressed +- `memory dump` → Rule #5 acknowledged auto + +Match is literal substring against user's last prompt. Case-insensitive. Each use logged to `~/.claude/runtime/override-usage.jsonl`. + +### Rule #10 — Rationalization flags (post-fact audit) + +**Mechanism:** PostToolUse on `Bash` (`tools/enforce-rationalization-audit.mjs`). After each prod-code Edit/Write or git commit: +- Scan turn so far for indicators: weak test (≤2 expects), commit message lacking TDD evidence, "just this once" / "for now" / "пока без" / "сейчас быстрее" phrases. +- Each flag appended to `~/.claude/runtime/rationalization-flags-.jsonl`. +- Next UserPromptSubmit hook reads this file and injects into context: «Previous turn flagged: X — adjust behavior.» + +Soft (no block), but visible to Claude on next turn. + +## Anti-self-block strategy during development + +Implementing the rules inside the very project they will enforce creates a chicken-and-egg problem. Mitigation: + +1. **Develop on feature branch `feat/enforce-hard-rules`** (already created). +2. **Hook scripts are inert until wired into `.claude/settings.json`.** All implementation commits don't trigger them. +3. **Final commit atomically wires all hooks** in settings.json. +4. **First push and test must happen ON main after wire-up commit** — by then all rules are committed AND satisfied (because each new turn after wire will start under enforced rules naturally). + +## Test strategy per rule + +Per-rule unit tests in `tools/enforce-*.test.mjs`: +- Hook receives fake stdin (event JSON) +- Hook decision verified by exit code + stderr message +- Sentinel file behavior tested with mkdtemp baseDir override +- Override-vocab integration tested by injecting phrase in prev-prompt fixture + +Target ~60-100 tests total for all hooks. + +## Out of scope (deferred, may revisit morning) + +- LLM-judge on test quality +- Confidence threshold tuning (default 0.7, hand-tune via brain-retro) +- Multi-prompt session-level reasoning (each prompt evaluated standalone) +- Conflict resolution if multiple override-vocab phrases stack +- UI for override-usage retro (just JSONL file; brain-retro will read) diff --git a/tools/enforce-branch-switch.mjs b/tools/enforce-branch-switch.mjs new file mode 100644 index 00000000..8f2041f9 --- /dev/null +++ b/tools/enforce-branch-switch.mjs @@ -0,0 +1,105 @@ +#!/usr/bin/env node +/** + * Rule #7 — Branch-switch detection before commit / push. + * + * PreToolUse on Bash. Detects `git commit`, `git push`, `git cherry-pick`, + * `git reset --hard`, `git rebase`, `git branch -f/-d`. Reads expected branch + * from sentinel; if missing, defaults to "main". Compares to actual current + * branch via `git branch --show-current`. Mismatch → block unless explicit + * confirmation marker in last assistant text OR override phrase. + * + * Confirmation markers in assistant response (case-sensitive substring): + * - BRANCH-SWITCH-CONFIRMED + * - RECOVERY-INTENT: + * Override phrases: "recovery" (suppresses branch-switch + git-recovery rule keys) + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + lastAssistantText, + findOverride, + logOverride, + exitDecision, + detectGitCommandKind, + readGitBranch, + getExpectedBranch, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY = 'branch-switch'; + +const CONFIRMATION_MARKERS = [ + 'BRANCH-SWITCH-CONFIRMED', + 'RECOVERY-INTENT:', +]; + +export function decide({ + toolName, + command, + expectedBranch, + actualBranch, + assistantText, + override, +}) { + if (toolName !== 'Bash' || typeof command !== 'string') return { block: false }; + const kind = detectGitCommandKind(command); + if (!kind) return { block: false }; + if (override) return { block: false }; + + const exp = (expectedBranch || 'main').trim(); + const act = (actualBranch || '').trim(); + if (!act || act === exp) return { block: false }; + + for (const marker of CONFIRMATION_MARKERS) { + if (assistantText && assistantText.includes(marker)) return { block: false }; + } + + return { + block: true, + message: [ + `[enforce-branch-switch] About to run \`git ${kind}\` on branch "${act}" but expected "${exp}".`, + `Likely cause: parallel session switched HEAD silently (see Pravila §15.1).`, + ``, + `If intentional — write one of these in your next response BEFORE running the command:`, + ` BRANCH-SWITCH-CONFIRMED (you intend to commit on ${act})`, + ` RECOVERY-INTENT: (recovery operation, e.g., cherry-pick to main)`, + ``, + `Or include the override phrase "recovery" in the user's next prompt.`, + ].join('\n'), + }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const toolName = event.tool_name || ''; + const command = (event.tool_input && event.tool_input.command) || ''; + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const override = findOverride(userPrompt, RULE_KEY); + if (override) logOverride(RULE_KEY, override, event.session_id); + + const expected = getExpectedBranch(event.session_id) || 'main'; + const actual = readGitBranch(); + const assistantText = lastAssistantText(transcript); + + const result = decide({ + toolName, command, + expectedBranch: expected, + actualBranch: actual, + assistantText, + override, + }); + exitDecision(result); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-branch-switch.mjs'); +if (isCli) main(); diff --git a/tools/enforce-branch-switch.test.mjs b/tools/enforce-branch-switch.test.mjs new file mode 100644 index 00000000..11dbd3de --- /dev/null +++ b/tools/enforce-branch-switch.test.mjs @@ -0,0 +1,92 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-branch-switch.mjs'; + +describe('enforce-branch-switch / decide', () => { + it('allows non-Bash tools', () => { + expect(decide({ toolName: 'Edit', command: '' }).block).toBe(false); + }); + + it('allows non-git Bash commands', () => { + expect(decide({ toolName: 'Bash', command: 'ls -la', actualBranch: 'feat/x', expectedBranch: 'main' }).block).toBe(false); + }); + + it('allows git status / git log (read-only)', () => { + expect(decide({ toolName: 'Bash', command: 'git status', actualBranch: 'feat/x', expectedBranch: 'main' }).block).toBe(false); + }); + + it('blocks git commit when actual != expected', () => { + const r = decide({ + toolName: 'Bash', + command: 'git commit -m "x"', + actualBranch: 'feat/supplier', + expectedBranch: 'main', + assistantText: 'some random text', + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/feat\/supplier.*main/); + }); + + it('blocks git push on wrong branch', () => { + const r = decide({ + toolName: 'Bash', + command: 'LEFTHOOK=0 git push origin main', + actualBranch: 'feat/other', + expectedBranch: 'main', + assistantText: '', + }); + expect(r.block).toBe(true); + }); + + it('allows when BRANCH-SWITCH-CONFIRMED marker present in assistant text', () => { + const r = decide({ + toolName: 'Bash', + command: 'git commit -m "x"', + actualBranch: 'feat/x', + expectedBranch: 'main', + assistantText: 'BRANCH-SWITCH-CONFIRMED — продолжаю на feat/x по плану', + }); + expect(r.block).toBe(false); + }); + + it('allows when RECOVERY-INTENT marker present', () => { + const r = decide({ + toolName: 'Bash', + command: 'git cherry-pick abc123', + actualBranch: 'main', + expectedBranch: 'feat/x', + assistantText: 'RECOVERY-INTENT: cherry-pick после смены ветки чужой сессией', + }); + expect(r.block).toBe(false); + }); + + it('allows when override phrase present', () => { + const r = decide({ + toolName: 'Bash', + command: 'git commit -m "x"', + actualBranch: 'feat/x', + expectedBranch: 'main', + assistantText: '', + override: { phrase: 'recovery', suppresses: ['branch-switch'] }, + }); + expect(r.block).toBe(false); + }); + + it('allows on match', () => { + const r = decide({ + toolName: 'Bash', + command: 'git commit -m "x"', + actualBranch: 'main', + expectedBranch: 'main', + }); + expect(r.block).toBe(false); + }); + + it('defaults expected to "main" if unset and matches when on main', () => { + expect(decide({ toolName: 'Bash', command: 'git commit', actualBranch: 'main', expectedBranch: '' }).block).toBe(false); + }); + + it('defaults expected to "main" if unset and blocks when on feature branch', () => { + const r = decide({ toolName: 'Bash', command: 'git commit', actualBranch: 'feat/x', expectedBranch: '' }); + expect(r.block).toBe(true); + }); +}); diff --git a/tools/enforce-classifier-match.mjs b/tools/enforce-classifier-match.mjs new file mode 100644 index 00000000..4b9551a6 --- /dev/null +++ b/tools/enforce-classifier-match.mjs @@ -0,0 +1,105 @@ +#!/usr/bin/env node +/** + * Rule #8 — Classifier-mismatch enforce. + * + * Stop hook. Reads classifier output from router-state. If classifier recommended + * a node with confidence >= threshold AND the turn DIDN'T invoke a matching + * skill/task — block. + * + * Override: "без скилов" / "direct ok" / explicit "override: " line in + * assistant text. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + lastAssistantText, + turnToolUses, + findOverride, + logOverride, + exitDecision, + readRouterState, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY = 'classifier-mismatch'; +const CONFIDENCE_THRESHOLD = 0.7; + +const MUTATING_TOOLS = new Set(['Edit', 'Write', 'MultiEdit', 'NotebookEdit', 'Bash']); + +/** Normalize a node id: strip "superpowers:" / "skill:" prefix; allow #ID. */ +function normalizeNode(s) { + if (typeof s !== 'string') return ''; + return s.toLowerCase().replace(/^skill:/, '').replace(/^superpowers:/, ''); +} + +function nodeMatches(recommendation, toolUse) { + if (!recommendation || !toolUse) return false; + const rec = normalizeNode(recommendation); + if (!rec) return false; + if (toolUse.name === 'Skill') { + const s = normalizeNode(String(toolUse.input && toolUse.input.skill || '')); + if (s && (s === rec || s.includes(rec) || rec.includes(s))) return true; + } + if (toolUse.name === 'Task') { + const sub = String(toolUse.input && toolUse.input.subagent_type || '').toLowerCase(); + if (sub && rec.includes(sub)) return true; + } + return false; +} + +export function decide({ toolUses, recommendation, confidence, assistantText, override }) { + // Pure conversation: skip. + const hasMutating = toolUses.some((u) => MUTATING_TOOLS.has(u.name)); + if (!hasMutating) return { block: false }; + if (override) return { block: false }; + + if (!recommendation) return { block: false }; + if (typeof confidence === 'number' && confidence < CONFIDENCE_THRESHOLD) return { block: false }; + + const matched = toolUses.some((u) => nodeMatches(recommendation, u)); + if (matched) return { block: false }; + + // Allow explicit override: lines like "override: " in assistant text. + if (assistantText && /\boverride:\s+\S/i.test(assistantText)) return { block: false }; + + return { + block: true, + message: [ + `[enforce-classifier-match] Classifier recommended "${recommendation}" (confidence=${confidence ?? 'n/a'}) but turn did not invoke that skill/node.`, + `Either:`, + ` - Invoke ${recommendation} via Skill / Task tool, OR`, + ` - Add an explicit "override: " line in your response, OR`, + ` - Include "без скилов" / "direct ok" in the next user prompt.`, + ].join('\n'), + }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const override = findOverride(userPrompt, RULE_KEY); + if (override) logOverride(RULE_KEY, override, event.session_id); + + const state = readRouterState(event.session_id); + const cls = state && state.classification; + const recommendation = cls && (cls.recommended_node || cls.recommendedNode); + const confidence = cls && typeof cls.confidence === 'number' ? cls.confidence : null; + const toolUses = turnToolUses(transcript); + const assistantText = lastAssistantText(transcript); + + const result = decide({ toolUses, recommendation, confidence, assistantText, override }); + exitDecision(result); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-classifier-match.mjs'); +if (isCli) main(); diff --git a/tools/enforce-classifier-match.test.mjs b/tools/enforce-classifier-match.test.mjs new file mode 100644 index 00000000..ccbe5724 --- /dev/null +++ b/tools/enforce-classifier-match.test.mjs @@ -0,0 +1,94 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-classifier-match.mjs'; + +describe('enforce-classifier-match / decide', () => { + it('allows pure conversation (no mutating tools)', () => { + expect(decide({ + toolUses: [{ name: 'Read' }], + recommendation: 'superpowers:writing-plans', + confidence: 0.9, + }).block).toBe(false); + }); + + it('allows when no recommendation', () => { + expect(decide({ + toolUses: [{ name: 'Edit', input: {} }], + recommendation: null, + confidence: null, + }).block).toBe(false); + }); + + it('allows when confidence below threshold', () => { + expect(decide({ + toolUses: [{ name: 'Edit', input: {} }], + recommendation: 'superpowers:writing-plans', + confidence: 0.5, + }).block).toBe(false); + }); + + it('blocks when recommendation high-confidence + no matching tool', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'x.mjs' } }], + recommendation: 'superpowers:writing-plans', + confidence: 0.9, + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/writing-plans/); + }); + + it('allows when Skill tool invoked with matching name', () => { + const r = decide({ + toolUses: [ + { name: 'Skill', input: { skill: 'superpowers:writing-plans' } }, + { name: 'Edit', input: { file_path: 'x.mjs' } }, + ], + recommendation: 'superpowers:writing-plans', + confidence: 0.9, + }); + expect(r.block).toBe(false); + }); + + it('matches normalized name without superpowers: prefix', () => { + const r = decide({ + toolUses: [ + { name: 'Skill', input: { skill: 'writing-plans' } }, + { name: 'Edit', input: {} }, + ], + recommendation: 'superpowers:writing-plans', + confidence: 0.9, + }); + expect(r.block).toBe(false); + }); + + it('matches Task subagent', () => { + const r = decide({ + toolUses: [ + { name: 'Task', input: { subagent_type: 'rls-reviewer' } }, + { name: 'Edit', input: {} }, + ], + recommendation: 'rls-reviewer', + confidence: 0.85, + }); + expect(r.block).toBe(false); + }); + + it('allows when explicit "override:" in assistant text', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: {} }], + recommendation: 'foo:bar', + confidence: 0.9, + assistantText: 'override: simpler direct edit, foo:bar overkill here\n', + }); + expect(r.block).toBe(false); + }); + + it('allows when override phrase present', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: {} }], + recommendation: 'foo:bar', + confidence: 0.9, + override: { phrase: 'direct ok', suppresses: ['classifier-mismatch'] }, + }); + expect(r.block).toBe(false); + }); +}); diff --git a/tools/enforce-coverage-verify.mjs b/tools/enforce-coverage-verify.mjs new file mode 100644 index 00000000..6b7577fc --- /dev/null +++ b/tools/enforce-coverage-verify.mjs @@ -0,0 +1,101 @@ +#!/usr/bin/env node +/** + * Rule #2 — Coverage tag verified against artifacts (Stop hook). + * + * Reads transcript at Stop event. Parses `coverage: :` from last + * assistant text. Then: + * - channel=skill / id=X — require Skill tool_use with input.skill === X + * - channel=node — accept any tool_use that produced work (>= 1 mutating tool) + * - channel=direct — accept (Rule #8 handles direct-vs-classifier mismatch) + * - channel=chain / hook / agent — accept (lighter discipline) + * - missing coverage line — block + * + * Override: "без скилов" / "direct ok" suppress this rule. + * + * NB: only fires when the assistant ACTUALLY did some work (>=1 tool_use). + * Pure conversational turns (no tool calls) pass without coverage requirement. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + lastAssistantText, + parseCoverageLine, + turnToolUses, + findOverride, + logOverride, + exitDecision, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY = 'coverage-skill-match'; + +const MUTATING_TOOLS = new Set([ + 'Edit', 'Write', 'MultiEdit', 'NotebookEdit', 'Bash', +]); + +export function decide({ + toolUses, assistantText, override, +}) { + // Pure conversational turn — skip. + const hasMutating = toolUses.some((u) => MUTATING_TOOLS.has(u.name)); + if (!hasMutating) return { block: false }; + if (override) return { block: false }; + + const cov = parseCoverageLine(assistantText); + if (!cov) { + return { + block: true, + message: [ + `[enforce-coverage-verify] Turn performed mutating tool calls but assistant response has no \`coverage:\` line.`, + `Add as first line of next response:`, + ` coverage: skill: (e.g., skill:superpowers:test-driven-development)`, + ` coverage: direct: (e.g., direct:memory-sync, direct:git-recovery)`, + ``, + `Override: include "без скилов" or "direct ok" in your prompt.`, + ].join('\n'), + }; + } + + if (cov.channel === 'skill') { + const found = toolUses.some((u) => u.name === 'Skill' && u.input && (u.input.skill === cov.id || u.input.skill === cov.id.replace(/^superpowers:/, ''))); + if (!found) { + return { + block: true, + message: [ + `[enforce-coverage-verify] coverage says skill:${cov.id} but the Skill tool was never invoked with that name in this turn.`, + `Either invoke the skill via Skill tool, or switch coverage to direct: with justification.`, + ].join('\n'), + }; + } + return { block: false }; + } + + // direct / node / chain / hook / agent — accepted at this layer. + return { block: false }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const override = findOverride(userPrompt, RULE_KEY); + if (override) logOverride(RULE_KEY, override, event.session_id); + + const toolUses = turnToolUses(transcript); + const assistantText = lastAssistantText(transcript); + + const result = decide({ toolUses, assistantText, override }); + exitDecision(result); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-coverage-verify.mjs'); +if (isCli) main(); diff --git a/tools/enforce-coverage-verify.test.mjs b/tools/enforce-coverage-verify.test.mjs new file mode 100644 index 00000000..aeecede1 --- /dev/null +++ b/tools/enforce-coverage-verify.test.mjs @@ -0,0 +1,74 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-coverage-verify.mjs'; + +describe('enforce-coverage-verify / decide', () => { + it('allows turn with no mutating tools (pure conversational)', () => { + const r = decide({ toolUses: [{ name: 'Read', input: {} }], assistantText: 'just talking' }); + expect(r.block).toBe(false); + }); + + it('blocks mutating turn with no coverage line', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'foo.mjs' } }], + assistantText: 'just did some work', + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/no.*coverage/); + }); + + it('blocks when coverage says skill but Skill tool not invoked', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'foo.mjs' } }], + assistantText: 'coverage: skill:superpowers:test-driven-development\nдалее…', + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/Skill tool was never invoked/); + }); + + it('allows when coverage says skill and Skill tool invoked with matching name', () => { + const r = decide({ + toolUses: [ + { name: 'Skill', input: { skill: 'superpowers:test-driven-development' } }, + { name: 'Edit', input: { file_path: 'foo.mjs' } }, + ], + assistantText: 'coverage: skill:superpowers:test-driven-development\nок', + }); + expect(r.block).toBe(false); + }); + + it('allows when coverage matches without superpowers: prefix in tool input', () => { + const r = decide({ + toolUses: [ + { name: 'Skill', input: { skill: 'test-driven-development' } }, + { name: 'Edit', input: { file_path: 'foo.mjs' } }, + ], + assistantText: 'coverage: skill:superpowers:test-driven-development', + }); + expect(r.block).toBe(false); + }); + + it('allows direct coverage', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'memory/foo.md' } }], + assistantText: 'coverage: direct:memory-sync', + }); + expect(r.block).toBe(false); + }); + + it('allows node coverage', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'foo.vue' } }], + assistantText: 'coverage: node:#19', + }); + expect(r.block).toBe(false); + }); + + it('allows when override phrase present', () => { + const r = decide({ + toolUses: [{ name: 'Edit', input: { file_path: 'foo.mjs' } }], + assistantText: 'no coverage', + override: { phrase: 'без скилов', suppresses: ['coverage-skill-match'] }, + }); + expect(r.block).toBe(false); + }); +}); diff --git a/tools/enforce-hook-helpers.mjs b/tools/enforce-hook-helpers.mjs new file mode 100644 index 00000000..96f4d6ae --- /dev/null +++ b/tools/enforce-hook-helpers.mjs @@ -0,0 +1,370 @@ +/** + * Shared helpers for the 10-rule enforcement hook layer. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + * Plan: docs/superpowers/plans/2026-05-25-enforce-hard-rules.md + * + * Design contract: ALL hooks MUST fail-quiet on internal error (exit 0 with empty {}). + * Only deliberate enforcement violations exit 2. + * + * Security note: this file uses child_process.execFileSync with FIXED arguments + * (no user input concatenation) — pattern is safe by construction. No injection + * surface. See readGitBranch(). + * + * Security Guidance #40: pure parsing — no exec/execSync except readGitBranch which + * is the documented use case (fixed args, no user input). + */ + +import { readFileSync, writeFileSync, existsSync, mkdirSync, appendFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { homedir } from 'os'; +import { execFileSync } from 'child_process'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +/** Read full stdin as utf-8 string. Returns '' on empty/error. */ +export async function readStdin(stdinStream = process.stdin) { + return new Promise((resolve) => { + let data = ''; + let timedOut = false; + const timer = setTimeout(() => { timedOut = true; resolve(data); }, 4500); + stdinStream.setEncoding('utf-8'); + stdinStream.on('data', (chunk) => { data += chunk; }); + stdinStream.on('end', () => { + if (timedOut) return; + clearTimeout(timer); + resolve(data); + }); + stdinStream.on('error', () => { + clearTimeout(timer); + resolve(''); + }); + }); +} + +export function parseEventJson(raw) { + try { return JSON.parse(raw || '{}'); } catch { return {}; } +} + +/** Runtime directory: ~/.claude/runtime/ */ +export function runtimeDir() { + const dir = join(homedir(), '.claude', 'runtime'); + try { mkdirSync(dir, { recursive: true }); } catch { /* ignore */ } + return dir; +} + +export function sentinelPath(name, sessionId) { + return join(runtimeDir(), `${name}-${sessionId || 'unknown'}.json`); +} + +export function writeSentinel(name, sessionId, data) { + try { + const p = sentinelPath(name, sessionId); + writeFileSync(p, JSON.stringify({ ...data, written_at: new Date().toISOString() }, null, 2)); + return p; + } catch { return null; } +} + +export function readSentinel(name, sessionId) { + try { + const p = sentinelPath(name, sessionId); + if (!existsSync(p)) return null; + return JSON.parse(readFileSync(p, 'utf-8')); + } catch { return null; } +} + +export function sentinelAgeSec(name, sessionId) { + const s = readSentinel(name, sessionId); + if (!s || !s.written_at) return null; + const ms = Date.now() - new Date(s.written_at).getTime(); + if (!Number.isFinite(ms)) return null; + return Math.floor(ms / 1000); +} + +export function readTranscript(transcriptPath) { + if (!transcriptPath || typeof transcriptPath !== 'string') return []; + if (!existsSync(transcriptPath)) return []; + try { + const raw = readFileSync(transcriptPath, 'utf-8'); + const lines = raw.split('\n').filter(Boolean); + const out = []; + for (const l of lines) { + try { out.push(JSON.parse(l)); } catch { /* skip */ } + } + return out; + } catch { return []; } +} + +export function lastTurnEntries(entries) { + if (!Array.isArray(entries) || entries.length === 0) return []; + for (let i = entries.length - 1; i >= 0; i--) { + const e = entries[i]; + if (e && e.message && e.message.role === 'user') { + const c = e.message.content; + if (typeof c === 'string' && c.trim().length > 0) return entries.slice(i); + if (Array.isArray(c)) { + const hasToolResult = c.some((b) => b && b.type === 'tool_result'); + const hasText = c.some((b) => b && b.type === 'text'); + if (hasText && !hasToolResult) return entries.slice(i); + } + } + } + return entries; +} + +export function lastUserPromptText(entries) { + const turn = lastTurnEntries(entries); + if (!turn || turn.length === 0) return ''; + const e = turn[0]; + if (!e || !e.message) return ''; + const c = e.message.content; + if (typeof c === 'string') return c; + if (Array.isArray(c)) { + return c.filter((b) => b && b.type === 'text').map((b) => b.text || '').join('\n'); + } + return ''; +} + +export function lastAssistantText(entries) { + const turn = lastTurnEntries(entries); + let out = ''; + for (const e of turn) { + if (e && e.message && e.message.role === 'assistant') { + const c = e.message.content; + if (Array.isArray(c)) { + for (const b of c) { + if (b && b.type === 'text' && typeof b.text === 'string') out += b.text + '\n'; + } + } + } + } + return out; +} + +export function parseCoverageLine(text) { + if (typeof text !== 'string') return null; + const m = text.match(/coverage:\s*(skill|node|chain|hook|agent|direct)\s*:\s*([^\s\n<>]+)/i); + if (!m) return null; + return { channel: m[1].toLowerCase(), id: m[2] }; +} + +export function turnToolUses(entries) { + const turn = lastTurnEntries(entries); + const uses = []; + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (b && b.type === 'tool_use') uses.push({ name: b.name, input: b.input || {} }); + } + } + return uses; +} + +export function turnToolResults(entries) { + const turn = lastTurnEntries(entries); + const results = []; + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (b && b.type === 'tool_result') { + const txt = typeof b.content === 'string' ? b.content + : Array.isArray(b.content) ? b.content.map((p) => (p && p.text) || '').join('\n') : ''; + results.push({ tool_use_id: b.tool_use_id, is_error: b.is_error === true, content: txt }); + } + } + } + return results; +} + +let _vocabCache = null; +export function loadOverrideVocab(path) { + if (_vocabCache) return _vocabCache; + try { + const p = path || join(__dirname, 'enforce-override-vocab.json'); + if (!existsSync(p)) return { phrases: [] }; + _vocabCache = JSON.parse(readFileSync(p, 'utf-8')); + return _vocabCache; + } catch { return { phrases: [] }; } +} + +export function _resetVocabCache() { _vocabCache = null; } + +export function findOverride(userPrompt, ruleKey, vocab) { + if (!userPrompt || typeof userPrompt !== 'string') return null; + const v = vocab || loadOverrideVocab(); + const lo = userPrompt.toLowerCase(); + for (const p of v.phrases || []) { + if (!p.phrase || !Array.isArray(p.suppresses)) continue; + if (!lo.includes(p.phrase.toLowerCase())) continue; + if (p.suppresses.includes(ruleKey)) return p; + } + return null; +} + +export function logOverride(ruleKey, phraseObj, sessionId) { + try { + const f = join(runtimeDir(), 'override-usage.jsonl'); + appendFileSync(f, JSON.stringify({ + ts: new Date().toISOString(), + session_id: sessionId || null, + rule: ruleKey, + phrase: phraseObj && phraseObj.phrase, + }) + '\n'); + } catch { /* ignore */ } +} + +/** + * Read current git branch via execFileSync with fixed args (no shell, no user + * input concatenation — safe by construction). Returns empty string on error. + */ +export function readGitBranch(cwd) { + try { + return execFileSync('git', ['branch', '--show-current'], { + cwd: cwd || process.cwd(), + encoding: 'utf-8', + timeout: 1000, + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + } catch { return ''; } +} + +export function expectedBranchPath(sessionId) { + return join(runtimeDir(), `expected-branch-${sessionId || 'unknown'}`); +} + +export function getExpectedBranch(sessionId) { + try { + const p = expectedBranchPath(sessionId); + if (!existsSync(p)) return ''; + return readFileSync(p, 'utf-8').trim(); + } catch { return ''; } +} + +export function setExpectedBranch(sessionId, branch) { + try { + writeFileSync(expectedBranchPath(sessionId), String(branch || '').trim()); + return true; + } catch { return false; } +} + +export function appendRationalizationFlag(sessionId, kind, evidence) { + try { + const f = join(runtimeDir(), `rationalization-flags-${sessionId || 'unknown'}.jsonl`); + appendFileSync(f, JSON.stringify({ + ts: new Date().toISOString(), + kind, + evidence: typeof evidence === 'string' ? evidence.slice(0, 240) : evidence, + }) + '\n'); + } catch { /* ignore */ } +} + +export function readRationalizationFlags(sessionId) { + try { + const f = join(runtimeDir(), `rationalization-flags-${sessionId || 'unknown'}.jsonl`); + if (!existsSync(f)) return []; + return readFileSync(f, 'utf-8').split('\n').filter(Boolean).map((l) => { + try { return JSON.parse(l); } catch { return null; } + }).filter(Boolean); + } catch { return []; } +} + +export function readRouterState(sessionId) { + try { + const p = join(runtimeDir(), `router-state-${sessionId || 'unknown'}.json`); + if (!existsSync(p)) return null; + return JSON.parse(readFileSync(p, 'utf-8')); + } catch { return null; } +} + +export function exitDecision({ block, message } = {}) { + if (block) { + if (message) process.stderr.write(message + '\n'); + process.exit(2); + return; + } + try { process.stdout.write('{}'); } catch { /* ignore */ } + process.exit(0); +} + +export function isProductionCodePath(p) { + if (typeof p !== 'string') return false; + const n = p.replace(/\\/g, '/'); + if (/\.(test|spec)\.[a-z0-9]+$/i.test(n)) return false; + if (/(?:^|\/)tests?\//.test(n) || /(?:^|\/)spec\//.test(n)) return false; + if (/(?:^|\/)tools\/[^/]+\.mjs$/.test(n)) return true; + if (/(?:^|\/)app\/app\/.+\.php$/.test(n)) return true; + if (/(?:^|\/)resources\/js\/.+\.(vue|ts|tsx|js)$/.test(n)) return true; + return false; +} + +export function isMemoryPath(p) { + if (typeof p !== 'string') return false; + const n = p.replace(/\\/g, '/'); + if (/\/memory\/[^/]+\.md$/i.test(n)) return true; + if (/\/MEMORY\.md$/i.test(n)) return true; + return false; +} + +export function detectGitCommandKind(cmd) { + if (typeof cmd !== 'string') return null; + const c = cmd.trim(); + if (/(^|\s|;|&&|\|\|)git\s+push\b/i.test(c)) return 'push'; + if (/(^|\s|;|&&|\|\|)git\s+commit\b/i.test(c)) return 'commit'; + if (/(^|\s|;|&&|\|\|)git\s+cherry-pick\b/i.test(c)) return 'cherry-pick'; + if (/(^|\s|;|&&|\|\|)git\s+reset\s+--hard\b/i.test(c)) return 'reset-hard'; + if (/(^|\s|;|&&|\|\|)git\s+rebase\b/i.test(c)) return 'rebase'; + if (/(^|\s|;|&&|\|\|)git\s+branch\s+-[df]\b/i.test(c)) return 'branch-force'; + return null; +} + +export function detectFullTestRun(cmd) { + if (typeof cmd !== 'string') return null; + const c = cmd.toLowerCase(); + // FIRST-REAL-COMMAND approach: split on shell separators, find first segment + // after skipping cd / env-prefix. Only that command counts. Embedded args + // (commit messages, echo strings) don't matter — they live inside the args + // of the first command, not as independent shell segments. + // + // Caveat: naive `&&` split can match inside quoted strings. We accept this + // because we use the FIRST segment only; later segments are ignored. As + // long as user's first real command is git/echo/etc, the whole command is + // classified as that. + const segments = c.split(/\s*(?:&&|\|\||;|\|)\s*/); + let firstReal = null; + for (let seg of segments) { + seg = seg.trim(); + // Strip env-var prefixes (KEY=value) and skip `cd ` segments. + seg = seg.replace(/^(?:[a-z_][a-z0-9_]*=\S+\s+)+/i, '').trim(); + if (/^cd\b/i.test(seg)) continue; + firstReal = seg; + break; + } + if (!firstReal) return null; + + // Hard guard: first real command starts with a non-test shell-utility → + // whole compound is not a test run, regardless of quoted args. + if (/^(?:git|scp|ssh|curl|wget|cat|echo|grep|awk|sed|tar|gzip|bzip2|cp|mv|rm|mkdir|touch|chmod|chown|ls|cd|pwd|head|tail|find)\b/.test(firstReal)) { + return null; + } + + if (/^npx\s+vitest\s+run\b/.test(firstReal) || /^vitest\s+run\b/.test(firstReal)) { + // narrow vitest (specific .test file) is NOT full + if (/\btools\/[^\s]+\.test\.mjs\b/.test(firstReal)) return null; + return 'vitest-full'; + } + if (/^npm\s+run\s+test\b/.test(firstReal)) return 'npm-test'; + if (/^php\s+artisan\s+test\b/.test(firstReal) || /^composer\s+test\b/.test(firstReal)) return 'pest'; + if (/^(?:\.\/)?(?:vendor\/bin\/)?pest\b/.test(firstReal)) return 'pest'; + return null; +} + +export function isVerificationFresh(sessionId, maxAgeSec = 1800) { + const s = readSentinel('verify-pass', sessionId); + if (!s || s.result !== 'pass') return false; + const age = sentinelAgeSec('verify-pass', sessionId); + return age !== null && age <= maxAgeSec; +} diff --git a/tools/enforce-hook-helpers.test.mjs b/tools/enforce-hook-helpers.test.mjs new file mode 100644 index 00000000..83d0273f --- /dev/null +++ b/tools/enforce-hook-helpers.test.mjs @@ -0,0 +1,271 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, writeFileSync, rmSync, existsSync, readFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + parseEventJson, + parseCoverageLine, + lastTurnEntries, + lastUserPromptText, + lastAssistantText, + turnToolUses, + turnToolResults, + loadOverrideVocab, + _resetVocabCache, + findOverride, + isProductionCodePath, + isMemoryPath, + detectGitCommandKind, + detectFullTestRun, +} from './enforce-hook-helpers.mjs'; + +describe('parseEventJson', () => { + it('parses well-formed JSON', () => { + expect(parseEventJson('{"a":1}')).toEqual({ a: 1 }); + }); + it('returns empty object on broken JSON', () => { + expect(parseEventJson('not-json')).toEqual({}); + }); + it('returns empty object on empty input', () => { + expect(parseEventJson('')).toEqual({}); + expect(parseEventJson(null)).toEqual({}); + }); +}); + +describe('parseCoverageLine', () => { + it('extracts skill coverage', () => { + const t = 'экономия: 100%\n\ncoverage: skill:superpowers:test-driven-development\n\nок поехали'; + expect(parseCoverageLine(t)).toEqual({ channel: 'skill', id: 'superpowers:test-driven-development' }); + }); + it('extracts direct coverage', () => { + expect(parseCoverageLine('coverage: direct:memory-sync')).toEqual({ channel: 'direct', id: 'memory-sync' }); + }); + it('extracts node coverage', () => { + expect(parseCoverageLine('coverage: node:#19')).toEqual({ channel: 'node', id: '#19' }); + }); + it('is case-insensitive on channel keyword', () => { + expect(parseCoverageLine('Coverage: Skill:foo')).toEqual({ channel: 'skill', id: 'foo' }); + }); + it('returns null when no coverage line present', () => { + expect(parseCoverageLine('just some text')).toBeNull(); + }); + it('returns null on non-string input', () => { + expect(parseCoverageLine(null)).toBeNull(); + expect(parseCoverageLine(42)).toBeNull(); + }); +}); + +describe('lastTurnEntries / lastUserPromptText / lastAssistantText / turnToolUses', () => { + const entries = [ + { message: { role: 'user', content: 'old prompt' } }, + { message: { role: 'assistant', content: [{ type: 'text', text: 'old reply' }] } }, + { message: { role: 'user', content: 'new prompt' } }, + { message: { role: 'assistant', content: [ + { type: 'text', text: 'I will edit' }, + { type: 'tool_use', name: 'Edit', input: { file_path: 'a.mjs' } }, + ] } }, + { message: { role: 'user', content: [{ type: 'tool_result', tool_use_id: 'x', content: 'ok', is_error: false }] } }, + ]; + + it('lastTurnEntries starts from last real user prompt', () => { + const turn = lastTurnEntries(entries); + expect(turn).toHaveLength(3); // new prompt + assistant + tool_result + expect(turn[0].message.content).toBe('new prompt'); + }); + + it('lastUserPromptText returns last user prompt string', () => { + expect(lastUserPromptText(entries)).toBe('new prompt'); + }); + + it('lastAssistantText concatenates assistant text blocks of last turn only', () => { + expect(lastAssistantText(entries)).toContain('I will edit'); + expect(lastAssistantText(entries)).not.toContain('old reply'); + }); + + it('turnToolUses returns only tool_use blocks from last turn', () => { + const uses = turnToolUses(entries); + expect(uses).toHaveLength(1); + expect(uses[0].name).toBe('Edit'); + expect(uses[0].input.file_path).toBe('a.mjs'); + }); + + it('turnToolResults includes is_error flag and concatenated text', () => { + const results = turnToolResults(entries); + expect(results).toHaveLength(1); + expect(results[0].is_error).toBe(false); + expect(results[0].content).toBe('ok'); + }); + + it('handles array text content in user message', () => { + const eps = [ + { message: { role: 'user', content: [{ type: 'text', text: 'hello' }, { type: 'text', text: ' world' }] } }, + ]; + expect(lastUserPromptText(eps)).toBe('hello\n world'); + }); +}); + +describe('loadOverrideVocab / findOverride', () => { + let tmp; + beforeEach(() => { + tmp = mkdtempSync(join(tmpdir(), 'vocab-')); + _resetVocabCache(); + }); + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }); + _resetVocabCache(); + }); + + it('loads vocab from explicit path', () => { + const p = join(tmp, 'vocab.json'); + writeFileSync(p, JSON.stringify({ + phrases: [ + { phrase: 'без скилов', suppresses: ['skill-required'] }, + ], + })); + const v = loadOverrideVocab(p); + expect(v.phrases).toHaveLength(1); + }); + + it('findOverride matches case-insensitively', () => { + const v = { phrases: [{ phrase: 'СРОЧНО', suppresses: ['verify-before-push'] }] }; + expect(findOverride('очень срочно нужно', 'verify-before-push', v)).toMatchObject({ phrase: 'СРОЧНО' }); + expect(findOverride('hello world', 'verify-before-push', v)).toBeNull(); + }); + + it('findOverride returns null if rule key not in suppresses', () => { + const v = { phrases: [{ phrase: 'без скилов', suppresses: ['skill-required'] }] }; + expect(findOverride('без скилов давай', 'tdd-gate', v)).toBeNull(); + expect(findOverride('без скилов давай', 'skill-required', v)).not.toBeNull(); + }); + + it('findOverride returns null on empty prompt / vocab', () => { + expect(findOverride('', 'x', { phrases: [] })).toBeNull(); + expect(findOverride(null, 'x', { phrases: [{ phrase: 'a', suppresses: ['x'] }] })).toBeNull(); + }); + + it('loads default vocab file when no path given (smoke)', () => { + _resetVocabCache(); + const v = loadOverrideVocab(); + expect(Array.isArray(v.phrases)).toBe(true); + expect(v.phrases.length).toBeGreaterThan(0); + }); +}); + +describe('isProductionCodePath', () => { + it('classifies tools/*.mjs as production', () => { + expect(isProductionCodePath('tools/router-classifier.mjs')).toBe(true); + expect(isProductionCodePath('c:/моя/проекты/портал crm/Документация/tools/foo.mjs')).toBe(true); + }); + it('excludes test files', () => { + expect(isProductionCodePath('tools/router-classifier.test.mjs')).toBe(false); + expect(isProductionCodePath('tools/foo.spec.mjs')).toBe(false); + }); + it('classifies app/app/**.php as production', () => { + expect(isProductionCodePath('app/app/Http/Controllers/X.php')).toBe(true); + }); + it('excludes app/tests/**', () => { + expect(isProductionCodePath('app/tests/Feature/X.php')).toBe(false); + }); + it('classifies resources/js/**.vue|ts|tsx|js as production', () => { + expect(isProductionCodePath('resources/js/views/Dashboard.vue')).toBe(true); + expect(isProductionCodePath('resources/js/api/admin.ts')).toBe(true); + }); + it('excludes *.spec.ts/*.test.ts', () => { + expect(isProductionCodePath('resources/js/views/Dashboard.spec.ts')).toBe(false); + expect(isProductionCodePath('resources/js/views/Dashboard.test.ts')).toBe(false); + }); + it('returns false for non-production paths', () => { + expect(isProductionCodePath('docs/x.md')).toBe(false); + expect(isProductionCodePath('CLAUDE.md')).toBe(false); + expect(isProductionCodePath('package.json')).toBe(false); + }); +}); + +describe('isMemoryPath', () => { + it('matches user-memory store .md files', () => { + expect(isMemoryPath('C:\\Users\\Administrator\\.claude\\projects\\proj\\memory\\reference.md')).toBe(true); + expect(isMemoryPath('/Users/x/.claude/projects/proj/memory/foo.md')).toBe(true); + }); + it('matches MEMORY.md regardless of folder', () => { + expect(isMemoryPath('C:\\Users\\x\\.claude\\projects\\proj\\memory\\MEMORY.md')).toBe(true); + expect(isMemoryPath('/foo/MEMORY.md')).toBe(true); + }); + it('returns false for normal docs', () => { + expect(isMemoryPath('docs/x.md')).toBe(false); + expect(isMemoryPath('CLAUDE.md')).toBe(false); + }); +}); + +describe('detectGitCommandKind', () => { + it('detects push', () => { + expect(detectGitCommandKind('git push origin main')).toBe('push'); + expect(detectGitCommandKind('LEFTHOOK=0 git push')).toBe('push'); + }); + it('detects commit', () => { + expect(detectGitCommandKind('git commit -m "x"')).toBe('commit'); + }); + it('detects cherry-pick', () => { + expect(detectGitCommandKind('git cherry-pick abc123')).toBe('cherry-pick'); + }); + it('detects branch -f', () => { + expect(detectGitCommandKind('git branch -f main HEAD')).toBe('branch-force'); + expect(detectGitCommandKind('git branch -d feature')).toBe('branch-force'); + }); + it('detects rebase', () => { + expect(detectGitCommandKind('git rebase main')).toBe('rebase'); + }); + it('returns null for non-git commands', () => { + expect(detectGitCommandKind('ls -la')).toBeNull(); + expect(detectGitCommandKind('git status')).toBeNull(); + }); +}); + +describe('detectFullTestRun', () => { + it('detects vitest run as full when no specific path', () => { + expect(detectFullTestRun('npx vitest run')).toBe('vitest-full'); + expect(detectFullTestRun('npx vitest run --reporter=basic')).toBe('vitest-full'); + }); + it('returns null for narrow vitest with specific test path', () => { + expect(detectFullTestRun('npx vitest run tools/foo.test.mjs')).toBeNull(); + }); + it('detects pest / composer test', () => { + expect(detectFullTestRun('php artisan test')).toBe('pest'); + expect(detectFullTestRun('composer test')).toBe('pest'); + expect(detectFullTestRun('./vendor/bin/pest')).toBe('pest'); + }); + it('returns null for non-test commands', () => { + expect(detectFullTestRun('git status')).toBeNull(); + }); + + it('returns null when "vitest run" appears INSIDE a git commit message (false-positive guard)', () => { + // Real bug we hit during bootstrap: commit message saying "full vitest run + // (8092/8092)" caused detectFullTestRun to match and overwrite sentinel. + expect(detectFullTestRun('git commit -m "feat: full vitest run all green"')).toBeNull(); + expect(detectFullTestRun('LEFTHOOK=0 git commit -m "ran pest"')).toBeNull(); + expect(detectFullTestRun('echo "pest passed" && ls')).toBeNull(); + expect(detectFullTestRun('cat sentinel | grep vitest')).toBeNull(); + }); + + it('still detects vitest in compound command starting with cd or having cat/echo segments', () => { + // Second bug: overly aggressive guard blocked legitimate vitest run that + // appeared in a compound command with cd / cat / echo somewhere. + // We want: ANY segment starting with `npx vitest run` (or pest) counts. + expect(detectFullTestRun('cd /path && npx vitest run tools/ 2>&1 | tail -5')).toBe('vitest-full'); + expect(detectFullTestRun('LEFTHOOK=0 npx vitest run')).toBe('vitest-full'); + expect(detectFullTestRun('npx vitest run && echo done')).toBe('vitest-full'); + expect(detectFullTestRun('cd app && composer test')).toBe('pest'); + expect(detectFullTestRun('cd app && php artisan test')).toBe('pest'); + expect(detectFullTestRun('./vendor/bin/pest')).toBe('pest'); + }); + + it('returns null when git commit message itself contains a compound that looks like test run (third false-positive)', () => { + // Third bug: split-by-&& naively splits inside quoted commit messages. + // A commit message like `git commit -m "... npx vitest run ..."` would + // produce a segment `npx vitest run` from inside the quoted string. + // Fix: identify FIRST real command (after cd/env), if it's git/etc → null. + expect(detectFullTestRun('git commit -m "fix: command like cd ... && npx vitest run"')).toBeNull(); + expect(detectFullTestRun('cd /path && git commit -m "and then npx vitest run && echo done"')).toBeNull(); + expect(detectFullTestRun('git push origin main')).toBeNull(); + expect(detectFullTestRun('cd app && cp src dst')).toBeNull(); + }); +}); diff --git a/tools/enforce-memory-coverage.mjs b/tools/enforce-memory-coverage.mjs new file mode 100644 index 00000000..bc41e89d --- /dev/null +++ b/tools/enforce-memory-coverage.mjs @@ -0,0 +1,83 @@ +#!/usr/bin/env node +/** + * Rule #5 — Memory write requires memory-sync coverage. + * + * PreToolUse hook on Edit / Write / MultiEdit. If the file_path looks like a + * memory store .md (memory/*.md or MEMORY.md), require the last assistant + * message to declare `coverage: direct:memory-sync` OR `coverage: skill:*` for + * a memory-related skill. Otherwise block with a re-announce instruction. + * + * Override phrase: `memory dump` in user's last prompt suppresses this rule. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + lastAssistantText, + parseCoverageLine, + findOverride, + logOverride, + exitDecision, + isMemoryPath, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY = 'memory-sync-coverage'; + +function isMemorySyncCoverage(cov) { + if (!cov) return false; + if (cov.channel === 'direct' && /memory-sync/i.test(cov.id)) return true; + if (cov.channel === 'skill' && /memory/i.test(cov.id)) return true; + return false; +} + +export function decide({ toolName, filePath, transcriptEntries, override }) { + if (!['Edit', 'Write', 'MultiEdit'].includes(toolName)) { + return { block: false }; + } + if (!isMemoryPath(filePath)) return { block: false }; + if (override) return { block: false }; + + const assistantText = lastAssistantText(transcriptEntries); + const cov = parseCoverageLine(assistantText); + if (isMemorySyncCoverage(cov)) return { block: false }; + + return { + block: true, + message: [ + `[enforce-memory-coverage] Write to memory path requires memory-sync coverage tag.`, + `Detected coverage: ${cov ? cov.channel + ':' + cov.id : 'NONE'} (stale or absent).`, + ``, + `Re-announce on a fresh assistant turn first:`, + ` coverage: direct:memory-sync`, + `Then retry the Edit/Write.`, + ``, + `Override: include the phrase "memory dump" in your prompt.`, + ].join('\n'), + }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const toolName = event.tool_name || ''; + const filePath = (event.tool_input && (event.tool_input.file_path || event.tool_input.notebook_path)) || ''; + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const override = findOverride(userPrompt, RULE_KEY); + if (override) logOverride(RULE_KEY, override, event.session_id); + + const result = decide({ toolName, filePath, transcriptEntries: transcript, override }); + exitDecision(result); + } catch { + // Fail-quiet on any internal error. + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-memory-coverage.mjs'); +if (isCli) main(); diff --git a/tools/enforce-memory-coverage.test.mjs b/tools/enforce-memory-coverage.test.mjs new file mode 100644 index 00000000..999691df --- /dev/null +++ b/tools/enforce-memory-coverage.test.mjs @@ -0,0 +1,86 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-memory-coverage.mjs'; + +function entries(userPrompt, assistantText) { + const out = []; + if (userPrompt) out.push({ message: { role: 'user', content: userPrompt } }); + if (assistantText) out.push({ message: { role: 'assistant', content: [{ type: 'text', text: assistantText }] } }); + return out; +} + +describe('enforce-memory-coverage / decide', () => { + it('allows non-memory paths regardless of coverage', () => { + const r = decide({ + toolName: 'Write', + filePath: 'tools/foo.mjs', + transcriptEntries: entries('do it', 'coverage: skill:tdd'), + }); + expect(r.block).toBe(false); + }); + + it('blocks memory path with TDD coverage (stale)', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'C:\\Users\\x\\.claude\\projects\\proj\\memory\\foo.md', + transcriptEntries: entries('do', 'coverage: skill:superpowers:test-driven-development'), + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/memory-sync/); + }); + + it('blocks memory path with no coverage at all', () => { + const r = decide({ + toolName: 'Write', + filePath: '/Users/x/.claude/projects/p/memory/x.md', + transcriptEntries: entries('do', 'no coverage line here'), + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/NONE/); + }); + + it('allows memory path with direct:memory-sync coverage', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'C:\\Users\\x\\.claude\\projects\\proj\\memory\\foo.md', + transcriptEntries: entries('do', 'coverage: direct:memory-sync\nок'), + }); + expect(r.block).toBe(false); + }); + + it('allows memory path with skill:memory-something coverage', () => { + const r = decide({ + toolName: 'Edit', + filePath: '/x/.claude/projects/p/memory/foo.md', + transcriptEntries: entries('do', 'coverage: skill:memory-coordinator'), + }); + expect(r.block).toBe(false); + }); + + it('allows memory path when override phrase present', () => { + const r = decide({ + toolName: 'Write', + filePath: '/x/.claude/projects/p/memory/foo.md', + transcriptEntries: entries('memory dump please', 'no coverage'), + override: { phrase: 'memory dump', suppresses: ['memory-sync-coverage'] }, + }); + expect(r.block).toBe(false); + }); + + it('skips non-Edit/Write/MultiEdit tools', () => { + const r = decide({ + toolName: 'Bash', + filePath: 'memory/x.md', + transcriptEntries: entries('do', 'no coverage'), + }); + expect(r.block).toBe(false); + }); + + it('matches MEMORY.md anywhere', () => { + const r = decide({ + toolName: 'Edit', + filePath: '/whatever/MEMORY.md', + transcriptEntries: entries('do', 'coverage: skill:tdd'), + }); + expect(r.block).toBe(true); + }); +}); diff --git a/tools/enforce-override-vocab.json b/tools/enforce-override-vocab.json new file mode 100644 index 00000000..4f814c12 --- /dev/null +++ b/tools/enforce-override-vocab.json @@ -0,0 +1,41 @@ +{ + "version": 1, + "comment": "Hard-coded override phrases. Substring-match (case-insensitive) against user's last prompt. Each phrase suppresses one or more rule categories for ONE prompt only.", + "phrases": [ + { + "phrase": "без скилов", + "suppresses": ["skill-required", "coverage-skill-match", "classifier-mismatch"], + "description": "Skill discipline relaxed for this one prompt" + }, + { + "phrase": "direct ok", + "suppresses": ["skill-required", "coverage-skill-match", "classifier-mismatch"], + "description": "Direct work allowed without skill invocation" + }, + { + "phrase": "срочно", + "suppresses": ["verify-before-commit", "verify-before-push", "tdd-gate"], + "description": "Urgency override: skip verification + TDD gate" + }, + { + "phrase": "быстрый коммит", + "suppresses": ["verify-before-commit", "tdd-gate", "writing-plans-required"], + "description": "Quick commit: skip TDD + verify + plans" + }, + { + "phrase": "recovery", + "suppresses": ["branch-switch", "git-recovery"], + "description": "Git recovery operation, branch-state mismatch ok" + }, + { + "phrase": "memory dump", + "suppresses": ["memory-sync-coverage", "skill-required"], + "description": "Memory write without separate coverage announcement" + }, + { + "phrase": "ремонт инфраструктуры", + "suppresses": ["tdd-gate", "verify-before-commit", "verify-before-push", "writing-plans-required", "skill-required", "memory-sync-coverage", "classifier-mismatch", "coverage-skill-match"], + "description": "Bypass all rules (full opt-out). Use only when literally fixing the enforce-infrastructure itself." + } + ] +} diff --git a/tools/enforce-prompt-injection.mjs b/tools/enforce-prompt-injection.mjs new file mode 100644 index 00000000..058c8881 --- /dev/null +++ b/tools/enforce-prompt-injection.mjs @@ -0,0 +1,115 @@ +#!/usr/bin/env node +/** + * Rule #1 — Mandatory re-classification injection. + * + * UserPromptSubmit hook. Reads router-state-.json (output of the + * existing router-prehook), reads rationalization flags from previous turns, + * and injects an `additionalContext` block into the conversation. + * + * The block: + * 1. Reminds: first line must be `coverage: :` + * 2. Lists recommended node/skill from classifier + * 3. Surfaces previous-turn rationalization flags (if any) + * + * NEVER blocks the prompt — failed injection just means no reminder appears. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readRouterState, + readRationalizationFlags, + findOverride, + loadOverrideVocab, +} from './enforce-hook-helpers.mjs'; + +const SUPPRESS_RULE = 'classifier-mismatch'; + +export function buildReminder({ classification, recentFlags, override }) { + const lines = ['## §17 Coverage / Discipline Reminder', '']; + if (override) { + lines.push(`Override phrase detected: "${override.phrase}". The following rules are suppressed for THIS prompt only:`); + lines.push(` ${override.suppresses.join(', ')}`); + lines.push(''); + } + lines.push('**First line of your response MUST be:**'); + lines.push(' `coverage: :`'); + lines.push('Channels: skill, node, chain, hook, agent, direct.'); + lines.push(''); + if (classification) { + lines.push(`**Classifier output:** task_type=${classification.task_type || 'unknown'}, confidence=${classification.confidence ?? 'n/a'}`); + if (classification.recommended_node) { + lines.push(`**Recommended node:** ${classification.recommended_node}`); + } + if (classification.recommended_chain) { + lines.push(`**Recommended chain:** ${classification.recommended_chain}`); + } + if (classification.task_type && /^(feature|bugfix|refactor|cleanup)$/i.test(classification.task_type)) { + lines.push(`**Plan required:** task type ${classification.task_type} requires either Skill(superpowers:writing-plans) invocation OR an existing plan file referenced before first production-code edit.`); + } + lines.push(''); + } + if (Array.isArray(recentFlags) && recentFlags.length > 0) { + const recent = recentFlags.slice(-3); + lines.push('**Previous turn flagged:**'); + for (const f of recent) lines.push(` - ${f.kind}: ${typeof f.evidence === 'string' ? f.evidence.slice(0, 120) : ''}`); + lines.push('Adjust behaviour accordingly.'); + lines.push(''); + } + lines.push('Override vocabulary (substring-match in user prompt):'); + lines.push(' без скилов / direct ok / срочно / быстрый коммит / recovery / memory dump / ремонт инфраструктуры'); + return lines.join('\n'); +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const sessionId = event.session_id; + const userPrompt = event.prompt || ''; + + // Override does NOT suppress this injection (it just notes the override). + const vocab = loadOverrideVocab(); + let override = null; + for (const p of (vocab.phrases || [])) { + if (!p.phrase) continue; + if (userPrompt.toLowerCase().includes(p.phrase.toLowerCase())) { override = p; break; } + } + + // Wait up to ~600ms for router-prehook to write state. + let state = readRouterState(sessionId); + if (!state) { + const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); + for (let i = 0; i < 3 && !state; i++) { + await sleep(200); + state = readRouterState(sessionId); + } + } + const classification = state && state.classification ? { + task_type: state.classification.task_type, + confidence: state.classification.confidence, + recommended_node: state.classification.recommended_node || state.classification.recommendedNode, + recommended_chain: state.classification.recommended_chain || state.classification.recommendedChain, + } : null; + + const flags = readRationalizationFlags(sessionId); + + const reminder = buildReminder({ classification, recentFlags: flags, override }); + + process.stdout.write(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'UserPromptSubmit', + additionalContext: reminder, + }, + })); + process.exit(0); + } catch { + try { process.stdout.write('{}'); } catch { /* ignore */ } + process.exit(0); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-prompt-injection.mjs'); +if (isCli) main(); diff --git a/tools/enforce-prompt-injection.test.mjs b/tools/enforce-prompt-injection.test.mjs new file mode 100644 index 00000000..8aa42e5b --- /dev/null +++ b/tools/enforce-prompt-injection.test.mjs @@ -0,0 +1,75 @@ +import { describe, it, expect } from 'vitest'; +import { buildReminder } from './enforce-prompt-injection.mjs'; + +describe('enforce-prompt-injection / buildReminder', () => { + it('always includes the coverage-first-line rule', () => { + const txt = buildReminder({ classification: null, recentFlags: [] }); + expect(txt).toMatch(/First line of your response MUST be/); + expect(txt).toMatch(/coverage:\s*:/); + }); + + it('includes classifier output when present', () => { + const txt = buildReminder({ + classification: { task_type: 'feature', confidence: 0.85, recommended_node: '#19', recommended_chain: 'L13' }, + recentFlags: [], + }); + expect(txt).toMatch(/task_type=feature/); + expect(txt).toMatch(/confidence=0\.85/); + expect(txt).toMatch(/#19/); + expect(txt).toMatch(/L13/); + }); + + it('mentions plan requirement for feature/bugfix/refactor/cleanup', () => { + for (const t of ['feature', 'bugfix', 'refactor', 'cleanup']) { + const txt = buildReminder({ + classification: { task_type: t, confidence: 0.7 }, + recentFlags: [], + }); + expect(txt).toMatch(/Plan required/); + } + }); + + it('omits plan requirement for conversation/question', () => { + const txt = buildReminder({ + classification: { task_type: 'question', confidence: 0.9 }, + recentFlags: [], + }); + expect(txt).not.toMatch(/Plan required/); + }); + + it('surfaces recent rationalization flags (up to 3)', () => { + const txt = buildReminder({ + classification: null, + recentFlags: [ + { kind: 'skipped-plan', evidence: 'too simple' }, + { kind: 'single-coverage-drift', evidence: 'TDD coverage used for memory sync' }, + { kind: 'weak-test', evidence: '1 expect' }, + { kind: 'commit-without-tests', evidence: 'production edit without test' }, + ], + }); + expect(txt).toMatch(/Previous turn flagged/); + // Last 3 should appear, first one should NOT + expect(txt).toMatch(/single-coverage-drift/); + expect(txt).toMatch(/weak-test/); + expect(txt).toMatch(/commit-without-tests/); + expect(txt).not.toMatch(/skipped-plan/); + }); + + it('notes detected override phrase + suppressed rule keys', () => { + const txt = buildReminder({ + classification: null, + recentFlags: [], + override: { phrase: 'срочно', suppresses: ['verify-before-push', 'tdd-gate'] }, + }); + expect(txt).toMatch(/Override phrase detected/); + expect(txt).toMatch(/срочно/); + expect(txt).toMatch(/verify-before-push/); + }); + + it('lists override-vocabulary phrases for user reference', () => { + const txt = buildReminder({ classification: null, recentFlags: [] }); + expect(txt).toMatch(/без скилов/); + expect(txt).toMatch(/direct ok/); + expect(txt).toMatch(/срочно/); + }); +}); diff --git a/tools/enforce-rationalization-audit.mjs b/tools/enforce-rationalization-audit.mjs new file mode 100644 index 00000000..c2c26221 --- /dev/null +++ b/tools/enforce-rationalization-audit.mjs @@ -0,0 +1,104 @@ +#!/usr/bin/env node +/** + * Rule #10 — Rationalization audit (PostToolUse). + * + * Reads the last assistant text + nearby tool history. Detects rationalization + * phrases and weak-test signals. Appends each flag to a JSONL file consumed by + * Rule #1 injection on next prompt. + * + * NEVER blocks — soft visibility. Failure modes: + * - skipped writing-plans for a feature task + * - prod-code edit without matching test in same turn (despite TDD-gate + * letting it through via override) + * - assistant text contains rationalization phrases + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastAssistantText, + turnToolUses, + appendRationalizationFlag, + exitDecision, + isProductionCodePath, +} from './enforce-hook-helpers.mjs'; + +const RATIONALIZATION_PHRASES = [ + 'just this once', + 'пока без', + 'сейчас быстрее', + 'потом разберусь', + 'временно', + 'просто рационализация', + "i'll come back to", + 'i will come back to', + 'we can skip', + 'rationalize', + 'без церемоний', + 'без скила сейчас', +]; + +export function findRationalizationPhrases(text) { + if (typeof text !== 'string') return []; + const lo = text.toLowerCase(); + const hits = []; + for (const p of RATIONALIZATION_PHRASES) { + if (lo.includes(p)) hits.push(p); + } + return hits; +} + +export function detectProdEditWithoutTest(toolUses) { + // Look for Edit/Write on production code; check if any test edit accompanies it. + const prodEdits = []; + let hasTestEdit = false; + for (const u of toolUses) { + if (!['Edit', 'Write', 'MultiEdit'].includes(u.name)) continue; + const p = (u.input && (u.input.file_path || u.input.notebook_path)) || ''; + if (/\.(test|spec)\.[a-z0-9]+$/i.test(p) || /Test\.php$/.test(p)) { hasTestEdit = true; continue; } + if (isProductionCodePath(p)) prodEdits.push(p); + } + return prodEdits.length > 0 && !hasTestEdit ? prodEdits : []; +} + +export function audit(transcriptEntries) { + const flags = []; + const text = lastAssistantText(transcriptEntries); + const phrases = findRationalizationPhrases(text); + for (const p of phrases) flags.push({ kind: 'rationalization-phrase', evidence: p }); + + const toolUses = turnToolUses(transcriptEntries); + const orphanProdEdits = detectProdEditWithoutTest(toolUses); + for (const p of orphanProdEdits) flags.push({ kind: 'prod-edit-without-test', evidence: p }); + + // Weak commit-message: git commit with very short message + for (const u of toolUses) { + if (u.name !== 'Bash') continue; + const cmd = (u.input && u.input.command) || ''; + if (!/git\s+commit/.test(cmd)) continue; + const m = cmd.match(/-m\s+["']([^"']+)["']/); + if (m && m[1].length < 12) { + flags.push({ kind: 'weak-commit-message', evidence: m[1] }); + } + } + return flags; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const transcript = readTranscript(event.transcript_path); + const flags = audit(transcript); + for (const f of flags) appendRationalizationFlag(event.session_id, f.kind, f.evidence); + exitDecision({ block: false }); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-rationalization-audit.mjs'); +if (isCli) main(); diff --git a/tools/enforce-rationalization-audit.test.mjs b/tools/enforce-rationalization-audit.test.mjs new file mode 100644 index 00000000..6446d96e --- /dev/null +++ b/tools/enforce-rationalization-audit.test.mjs @@ -0,0 +1,80 @@ +import { describe, it, expect } from 'vitest'; +import { findRationalizationPhrases, detectProdEditWithoutTest, audit } from './enforce-rationalization-audit.mjs'; + +describe('findRationalizationPhrases', () => { + it('detects "just this once" in mixed case', () => { + expect(findRationalizationPhrases('Hmm, Just This Once we will skip')).toContain('just this once'); + }); + it('detects "пока без" Russian', () => { + expect(findRationalizationPhrases('сделаем пока без тестов')).toContain('пока без'); + }); + it('detects multiple phrases in one text', () => { + const hits = findRationalizationPhrases('временно делаем потом разберусь'); + expect(hits.length).toBeGreaterThanOrEqual(2); + }); + it('returns empty array on clean text', () => { + expect(findRationalizationPhrases('coverage: skill:tdd')).toEqual([]); + }); +}); + +describe('detectProdEditWithoutTest', () => { + it('flags prod edit without any test edit in turn', () => { + const uses = [{ name: 'Edit', input: { file_path: 'tools/foo.mjs' } }]; + expect(detectProdEditWithoutTest(uses)).toEqual(['tools/foo.mjs']); + }); + it('does NOT flag when test also edited', () => { + const uses = [ + { name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { name: 'Edit', input: { file_path: 'tools/foo.mjs' } }, + ]; + expect(detectProdEditWithoutTest(uses)).toEqual([]); + }); + it('does NOT flag for non-prod paths', () => { + expect(detectProdEditWithoutTest([{ name: 'Edit', input: { file_path: 'docs/x.md' } }])).toEqual([]); + }); +}); + +describe('audit', () => { + it('flags rationalization phrases in assistant text', () => { + const entries = [ + { message: { role: 'user', content: 'go' } }, + { message: { role: 'assistant', content: [{ type: 'text', text: 'just this once без скила' }] } }, + ]; + const flags = audit(entries); + expect(flags.find((f) => f.kind === 'rationalization-phrase')).toBeTruthy(); + }); + + it('flags prod-edit-without-test', () => { + const entries = [ + { message: { role: 'user', content: 'go' } }, + { message: { role: 'assistant', content: [ + { type: 'tool_use', id: 't1', name: 'Edit', input: { file_path: 'tools/foo.mjs' } }, + ] } }, + ]; + const flags = audit(entries); + expect(flags.find((f) => f.kind === 'prod-edit-without-test')).toBeTruthy(); + }); + + it('flags weak commit messages (<12 chars)', () => { + const entries = [ + { message: { role: 'user', content: 'go' } }, + { message: { role: 'assistant', content: [ + { type: 'tool_use', id: 't1', name: 'Bash', input: { command: 'git commit -m "fix"' } }, + ] } }, + ]; + const flags = audit(entries); + expect(flags.find((f) => f.kind === 'weak-commit-message')).toBeTruthy(); + }); + + it('returns no flags for clean turn', () => { + const entries = [ + { message: { role: 'user', content: 'go' } }, + { message: { role: 'assistant', content: [ + { type: 'text', text: 'coverage: skill:tdd\nworking properly' }, + { type: 'tool_use', id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { type: 'tool_use', id: 't2', name: 'Edit', input: { file_path: 'tools/foo.mjs' } }, + ] } }, + ]; + expect(audit(entries)).toEqual([]); + }); +}); diff --git a/tools/enforce-tdd-gate.mjs b/tools/enforce-tdd-gate.mjs new file mode 100644 index 00000000..65480a67 --- /dev/null +++ b/tools/enforce-tdd-gate.mjs @@ -0,0 +1,216 @@ +#!/usr/bin/env node +/** + * Rule #3 + #6 — TDD-gate + writing-plans enforce for production code. + * + * PreToolUse on Edit / Write / MultiEdit. Pattern-matches file path against + * production-code heuristic (isProductionCodePath). When matched: + * 1. (#6) For feature/bugfix/refactor/cleanup classified tasks: require + * Skill(superpowers:writing-plans) OR existing plan-file reference in + * current turn. + * 2. (#3) Require preceding test edit + a `Bash` run of vitest/pest with + * a "fail" / "FAIL" / "Failed" indicator in its stdout (RED phase). + * + * Override: "срочно" / "быстрый коммит" / "ремонт инфраструктуры". + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + lastTurnEntries, + findOverride, + logOverride, + exitDecision, + isProductionCodePath, + readRouterState, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY_TDD = 'tdd-gate'; +const RULE_KEY_PLAN = 'writing-plans-required'; + +/** Map a production path to expected test path patterns (heuristic). */ +function expectedTestPathMatchers(prodPath) { + const n = String(prodPath || '').replace(/\\/g, '/'); + const matchers = []; + // tools/foo.mjs → tools/foo.test.mjs / tools/foo.spec.mjs + let m = n.match(/(.*\/)?([^/]+)\.mjs$/); + if (m) { + matchers.push(`${m[1] || ''}${m[2]}.test.mjs`); + matchers.push(`${m[1] || ''}${m[2]}.spec.mjs`); + } + // app/app/Path/X.php → app/tests/**/XTest.php OR app/tests/**/X*.php + m = n.match(/\/app\/app\/(.+)\/([^/]+)\.php$/); + if (m) { + matchers.push(`/app/tests/Unit/${m[2]}Test.php`); + matchers.push(`/app/tests/Feature/${m[2]}Test.php`); + // Loose containment + matchers.push(`/app/tests/.+${m[2]}Test.php`); + } + // resources/js/views/X.vue → X.spec.ts / X.test.ts loose + m = n.match(/\/resources\/js\/(.+\/)?([^/]+)\.(vue|ts|tsx|js)$/); + if (m) { + matchers.push(`/resources/js/${m[1] || ''}${m[2]}.spec.ts`); + matchers.push(`/resources/js/${m[1] || ''}${m[2]}.test.ts`); + matchers.push(`/resources/js/${m[1] || ''}__tests__/${m[2]}.spec.ts`); + } + return matchers; +} + +function hasMatchingTestEdit(turn, prodPath) { + const matchers = expectedTestPathMatchers(prodPath); + const basename = String(prodPath || '').replace(/\\/g, '/').split('/').pop().split('.')[0]; + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (!b || b.type !== 'tool_use') continue; + if (!['Edit', 'Write', 'MultiEdit'].includes(b.name)) continue; + const p = (b.input && (b.input.file_path || b.input.notebook_path) || '').replace(/\\/g, '/'); + if (!p) continue; + // Check test-file pattern (loose contains-basename + test/spec) + if (/\.(test|spec)\.[a-z0-9]+$/i.test(p) && p.includes(basename)) return true; + // Check explicit matchers + for (const m of matchers) { + const mPattern = m.replace(/[.+]/g, '\\$&').replace(/\\\.\\\+/g, '.+'); + if (new RegExp(mPattern + '$').test(p)) return true; + } + } + } + return false; +} + +function hasFailingTestRun(turn) { + // Look for Bash tool_use followed by tool_result containing a failure indicator + // OR PASS line with N failed > 0. + const bashIds = new Set(); + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (b && b.type === 'tool_use' && b.name === 'Bash') { + const cmd = (b.input && b.input.command) || ''; + if (/\b(vitest|pest|phpunit)\b/.test(cmd)) bashIds.add(b.id); + } + } + } + if (bashIds.size === 0) return false; + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (b && b.type === 'tool_result' && bashIds.has(b.tool_use_id)) { + const txt = typeof b.content === 'string' ? b.content + : Array.isArray(b.content) ? b.content.map((p) => p && p.text).filter(Boolean).join('\n') : ''; + if (/\b(fail|FAIL|Failed|×)\b/.test(txt)) return true; + // Numeric: "Tests N failed | M passed" with N>0 + const m = txt.match(/Tests\s+(\d+)\s+failed/); + if (m && Number(m[1]) > 0) return true; + } + } + } + return false; +} + +function hasPlanIndicator(turn) { + for (const e of turn) { + const c = e && e.message && e.message.content; + if (!Array.isArray(c)) continue; + for (const b of c) { + if (b && b.type === 'tool_use') { + if (b.name === 'Skill' && b.input && /writing-plans/i.test(String(b.input.skill || ''))) return true; + const p = (b.input && (b.input.file_path || b.input.notebook_path) || ''); + if (/docs\/superpowers\/plans\//i.test(p)) return true; + // Also accept Read of a plan file (existing plan) + if (b.name === 'Read' && /docs\/superpowers\/plans\//i.test(p)) return true; + } + if (b && b.type === 'text' && /docs\/superpowers\/plans\//.test(b.text || '')) return true; + } + } + return false; +} + +export function decide({ + toolName, filePath, transcriptEntries, classification, override, overridePlan, +}) { + if (!['Edit', 'Write', 'MultiEdit'].includes(toolName)) return { block: false }; + if (!isProductionCodePath(filePath)) return { block: false }; + + const turn = lastTurnEntries(transcriptEntries); + + // Rule #6 — plan requirement for feature/bugfix/refactor/cleanup. + const taskType = classification && classification.task_type; + if (!overridePlan && taskType && /^(feature|bugfix|refactor|cleanup)$/i.test(taskType)) { + if (!hasPlanIndicator(turn)) { + return { + block: true, + message: [ + `[enforce-tdd-gate] task_type="${taskType}" requires a plan before production-code edit.`, + `Either invoke superpowers:writing-plans via Skill tool,`, + `or reference an existing plan file (docs/superpowers/plans/...) in this turn first.`, + ``, + `Override: "быстрый коммит" / "ремонт инфраструктуры" in your prompt.`, + ].join('\n'), + }; + } + } + + // Rule #3 — TDD gate. + if (override) return { block: false }; + const hasTest = hasMatchingTestEdit(turn, filePath); + if (!hasTest) { + return { + block: true, + message: [ + `[enforce-tdd-gate] Production code edit on "${filePath}" without preceding test edit.`, + `Write the failing test FIRST in the corresponding *.test.mjs / *.spec.ts / *Test.php.`, + `Then run vitest/pest to confirm RED, then return to this prod-code Edit.`, + ``, + `Override: "срочно" / "быстрый коммит" / "ремонт инфраструктуры".`, + ].join('\n'), + }; + } + if (!hasFailingTestRun(turn)) { + return { + block: true, + message: [ + `[enforce-tdd-gate] Test was edited but no vitest/pest run with RED output observed in this turn.`, + `Run the test suite (vitest run / composer test) to confirm RED before prod-code edit.`, + ``, + `Override: "срочно" / "быстрый коммит" / "ремонт инфраструктуры".`, + ].join('\n'), + }; + } + return { block: false }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const toolName = event.tool_name || ''; + const filePath = (event.tool_input && (event.tool_input.file_path || event.tool_input.notebook_path)) || ''; + + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const override = findOverride(userPrompt, RULE_KEY_TDD); + const overridePlan = findOverride(userPrompt, RULE_KEY_PLAN); + if (override) logOverride(RULE_KEY_TDD, override, event.session_id); + if (overridePlan) logOverride(RULE_KEY_PLAN, overridePlan, event.session_id); + + const state = readRouterState(event.session_id); + const classification = state && state.classification ? { + task_type: state.classification.task_type, + } : null; + + const result = decide({ toolName, filePath, transcriptEntries: transcript, classification, override, overridePlan }); + exitDecision(result); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-tdd-gate.mjs'); +if (isCli) main(); diff --git a/tools/enforce-tdd-gate.test.mjs b/tools/enforce-tdd-gate.test.mjs new file mode 100644 index 00000000..becc7cf5 --- /dev/null +++ b/tools/enforce-tdd-gate.test.mjs @@ -0,0 +1,164 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-tdd-gate.mjs'; + +function userMsg(text) { + return { message: { role: 'user', content: text } }; +} +function assistantUses(uses) { + return { message: { role: 'assistant', content: uses.map((u, i) => ({ type: 'tool_use', id: u.id || `t${i}`, name: u.name, input: u.input })) } }; +} +function toolResults(results) { + return { message: { role: 'user', content: results.map((r) => ({ type: 'tool_result', tool_use_id: r.id, content: r.content, is_error: r.is_error || false })) } }; +} + +describe('enforce-tdd-gate / decide', () => { + it('allows non-production paths', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'docs/x.md', + transcriptEntries: [], + }); + expect(r.block).toBe(false); + }); + + it('allows test files themselves', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.test.mjs', + transcriptEntries: [], + }); + expect(r.block).toBe(false); + }); + + it('blocks prod edit with no preceding test edit', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [userMsg('do it')], + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/without preceding test edit/); + }); + + it('blocks when test edited but no vitest RED observed', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('do it'), + assistantUses([{ id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }]), + ], + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/no vitest.*RED/); + }); + + it('allows after test edit + vitest RED', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('do it'), + assistantUses([ + { id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { id: 't2', name: 'Bash', input: { command: 'npx vitest run tools/foo.test.mjs' } }, + ]), + toolResults([{ id: 't2', content: 'Tests 1 failed | 0 passed' }]), + ], + }); + expect(r.block).toBe(false); + }); + + it('allows when "fail" word in vitest stdout', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('do it'), + assistantUses([ + { id: 't1', name: 'Write', input: { file_path: 'tools/foo.test.mjs' } }, + { id: 't2', name: 'Bash', input: { command: 'npx vitest run tools/foo.test.mjs' } }, + ]), + toolResults([{ id: 't2', content: 'FAIL tools/foo.test.mjs' }]), + ], + }); + expect(r.block).toBe(false); + }); + + it('allows when override phrase present', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [userMsg('срочно надо')], + override: { phrase: 'срочно', suppresses: ['tdd-gate'] }, + }); + expect(r.block).toBe(false); + }); + + it('blocks feature-classified prod edit without plan indicator', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('добавь фичу X'), + assistantUses([{ id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }]), + ], + classification: { task_type: 'feature' }, + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/requires a plan/); + }); + + it('allows feature edit when Skill(superpowers:writing-plans) invoked', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('добавь фичу X'), + assistantUses([ + { id: 't0', name: 'Skill', input: { skill: 'superpowers:writing-plans' } }, + { id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { id: 't2', name: 'Bash', input: { command: 'npx vitest run tools/foo.test.mjs' } }, + ]), + toolResults([{ id: 't2', content: 'Tests 1 failed' }]), + ], + classification: { task_type: 'feature' }, + }); + expect(r.block).toBe(false); + }); + + it('allows feature edit when plan file is referenced', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('добавь фичу X'), + assistantUses([ + { id: 't0', name: 'Read', input: { file_path: 'docs/superpowers/plans/2026-05-26-foo.md' } }, + { id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { id: 't2', name: 'Bash', input: { command: 'npx vitest run tools/foo.test.mjs' } }, + ]), + toolResults([{ id: 't2', content: 'Tests 1 failed' }]), + ], + classification: { task_type: 'feature' }, + }); + expect(r.block).toBe(false); + }); + + it('does NOT require plan for non-feature task types', () => { + const r = decide({ + toolName: 'Edit', + filePath: 'tools/foo.mjs', + transcriptEntries: [ + userMsg('chore'), + assistantUses([ + { id: 't1', name: 'Edit', input: { file_path: 'tools/foo.test.mjs' } }, + { id: 't2', name: 'Bash', input: { command: 'npx vitest run tools/foo.test.mjs' } }, + ]), + toolResults([{ id: 't2', content: 'Tests 1 failed' }]), + ], + classification: { task_type: 'cleanup-but-not-strictly' }, + }); + expect(r.block).toBe(false); + }); +}); diff --git a/tools/enforce-verify-before-push.mjs b/tools/enforce-verify-before-push.mjs new file mode 100644 index 00000000..35106c5c --- /dev/null +++ b/tools/enforce-verify-before-push.mjs @@ -0,0 +1,97 @@ +#!/usr/bin/env node +/** + * Rule #4 — Require fresh verification artifact before git commit / push. + * + * PreToolUse on Bash. If command is git commit / push, check the + * verify-pass-.json sentinel: + * - missing → block + * - age > MAX_AGE_SEC → block ("stale") + * - result !== 'pass' → block ("last run failed") + * + * Override phrases: `срочно` / `быстрый коммит` / `ремонт инфраструктуры`. + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + readTranscript, + lastUserPromptText, + findOverride, + logOverride, + exitDecision, + detectGitCommandKind, + readSentinel, + sentinelAgeSec, +} from './enforce-hook-helpers.mjs'; + +const RULE_KEY_COMMIT = 'verify-before-commit'; +const RULE_KEY_PUSH = 'verify-before-push'; +const MAX_AGE_SEC = 30 * 60; // 30 min + +export function decide({ toolName, command, sentinel, sentinelAge, override }) { + if (toolName !== 'Bash' || typeof command !== 'string') return { block: false }; + const kind = detectGitCommandKind(command); + if (kind !== 'commit' && kind !== 'push') return { block: false }; + if (override) return { block: false }; + + if (!sentinel) { + return { + block: true, + message: [ + `[enforce-verify-before-push] No verification artifact found.`, + `Run a full test suite first (vitest run / composer test) before \`git ${kind}\`.`, + ``, + `Override: "срочно" / "быстрый коммит" / "ремонт инфраструктуры" in your prompt.`, + ].join('\n'), + }; + } + if (sentinel.result !== 'pass') { + return { + block: true, + message: [ + `[enforce-verify-before-push] Last verification FAILED (result=${sentinel.result}, exit=${sentinel.exit_code}).`, + `Tests: ${sentinel.tests_passed}/${sentinel.tests_total} passed, ${sentinel.tests_failed} failed.`, + `Re-run the suite and address failures before \`git ${kind}\`.`, + ].join('\n'), + }; + } + if (sentinelAge !== null && sentinelAge > MAX_AGE_SEC) { + return { + block: true, + message: [ + `[enforce-verify-before-push] Verification artifact is stale (age ${sentinelAge}s > ${MAX_AGE_SEC}s).`, + `Re-run the full test suite before \`git ${kind}\`.`, + ].join('\n'), + }; + } + return { block: false }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const toolName = event.tool_name || ''; + const command = (event.tool_input && event.tool_input.command) || ''; + + const transcript = readTranscript(event.transcript_path); + const userPrompt = lastUserPromptText(transcript); + const kind = detectGitCommandKind(command); + const ruleKey = kind === 'commit' ? RULE_KEY_COMMIT : RULE_KEY_PUSH; + const override = findOverride(userPrompt, ruleKey); + if (override) logOverride(ruleKey, override, event.session_id); + + const sentinel = readSentinel('verify-pass', event.session_id); + const age = sentinelAgeSec('verify-pass', event.session_id); + + const result = decide({ toolName, command, sentinel, sentinelAge: age, override }); + exitDecision(result); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-verify-before-push.mjs'); +if (isCli) main(); diff --git a/tools/enforce-verify-before-push.test.mjs b/tools/enforce-verify-before-push.test.mjs new file mode 100644 index 00000000..fe823b8c --- /dev/null +++ b/tools/enforce-verify-before-push.test.mjs @@ -0,0 +1,113 @@ +import { describe, it, expect } from 'vitest'; +import { decide } from './enforce-verify-before-push.mjs'; +import { decideRecord, extractTestMetrics } from './enforce-verify-record.mjs'; + +describe('enforce-verify-record / decideRecord', () => { + it('returns null for non-Bash', () => { + expect(decideRecord({ toolName: 'Edit', command: 'foo' })).toBeNull(); + }); + it('returns null for non-test command', () => { + expect(decideRecord({ toolName: 'Bash', command: 'git status', exitCode: 0, stdout: '' })).toBeNull(); + }); + it('returns null for narrow vitest (specific test file)', () => { + expect(decideRecord({ toolName: 'Bash', command: 'npx vitest run tools/foo.test.mjs', exitCode: 0, stdout: '' })).toBeNull(); + }); + it('records PASS on full vitest run with all-passed summary', () => { + const rec = decideRecord({ + toolName: 'Bash', command: 'npx vitest run', exitCode: 0, + stdout: 'Tests 3708 passed (3708)', + }); + expect(rec.result).toBe('pass'); + expect(rec.tests_total).toBe(3708); + expect(rec.tests_passed).toBe(3708); + }); + it('records FAIL on full vitest run with failed summary', () => { + const rec = decideRecord({ + toolName: 'Bash', command: 'npx vitest run', exitCode: 1, + stdout: 'Tests 3 failed | 600 passed (603)', + }); + expect(rec.result).toBe('fail'); + expect(rec.tests_failed).toBe(3); + }); + + it('records PASS when exit=1 but tests_failed=0 (infra file-load failures)', () => { + // E.g. worktree CRLF copies of test files crash to load → exit code 1 + // but all actual tests passed. + const rec = decideRecord({ + toolName: 'Bash', command: 'npx vitest run', exitCode: 1, + stdout: 'Test Files 95 failed | 411 passed (506)\n Tests 8091 passed (8091)', + }); + expect(rec.result).toBe('pass'); + }); + it('records pest', () => { + const rec = decideRecord({ + toolName: 'Bash', command: 'composer test', exitCode: 0, + stdout: 'Tests: 742 passed (1908 assertions)', + }); + expect(rec.result).toBe('pass'); + }); +}); + +describe('enforce-verify-record / extractTestMetrics', () => { + it('parses vitest all-passed', () => { + expect(extractTestMetrics('Tests 3708 passed (3708)')).toMatchObject({ + tests_passed: 3708, tests_total: 3708, tests_failed: 0, + }); + }); + it('parses vitest mixed failure', () => { + expect(extractTestMetrics('Tests 1 failed | 631 passed (632)')).toMatchObject({ + tests_failed: 1, tests_passed: 631, tests_total: 632, + }); + }); +}); + +describe('enforce-verify-before-push / decide', () => { + it('allows non-Bash', () => { + expect(decide({ toolName: 'Edit', command: '' }).block).toBe(false); + }); + it('allows non-git Bash', () => { + expect(decide({ toolName: 'Bash', command: 'ls -la' }).block).toBe(false); + }); + it('blocks git commit without sentinel', () => { + const r = decide({ toolName: 'Bash', command: 'git commit -m "x"' }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/No verification/); + }); + it('blocks git push without sentinel', () => { + expect(decide({ toolName: 'Bash', command: 'git push origin main' }).block).toBe(true); + }); + it('blocks when sentinel result=fail', () => { + const r = decide({ + toolName: 'Bash', command: 'git commit -m "x"', + sentinel: { result: 'fail', exit_code: 1, tests_passed: 600, tests_total: 603, tests_failed: 3 }, + sentinelAge: 60, + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/FAILED/); + }); + it('blocks when sentinel is stale', () => { + const r = decide({ + toolName: 'Bash', command: 'git commit -m "x"', + sentinel: { result: 'pass' }, + sentinelAge: 60 * 60, // 1 hour > 30 min + }); + expect(r.block).toBe(true); + expect(r.message).toMatch(/stale/); + }); + it('allows when sentinel is fresh + pass', () => { + const r = decide({ + toolName: 'Bash', command: 'git commit -m "x"', + sentinel: { result: 'pass' }, + sentinelAge: 120, + }); + expect(r.block).toBe(false); + }); + it('allows when override phrase present', () => { + const r = decide({ + toolName: 'Bash', command: 'git push', + sentinel: null, + override: { phrase: 'срочно', suppresses: ['verify-before-push'] }, + }); + expect(r.block).toBe(false); + }); +}); diff --git a/tools/enforce-verify-record.mjs b/tools/enforce-verify-record.mjs new file mode 100644 index 00000000..fc93e06d --- /dev/null +++ b/tools/enforce-verify-record.mjs @@ -0,0 +1,82 @@ +#!/usr/bin/env node +/** + * Rule #4 (companion) — Record verification artifact. + * + * PostToolUse on Bash. If the command was a full project test run AND it + * passed (exit 0 + recognisable PASS marker in stdout), write a sentinel + * `~/.claude/runtime/verify-pass-.json` consumed by the + * enforce-verify-before-push gate. + * + * Failed runs ALSO record a sentinel with result=fail — so the gate can + * distinguish "never ran" from "ran and failed". + * + * Spec: docs/superpowers/specs/2026-05-25-enforce-hard-rules-design.md + */ + +import { + readStdin, + parseEventJson, + writeSentinel, + exitDecision, + detectFullTestRun, +} from './enforce-hook-helpers.mjs'; + +export function extractTestMetrics(stdout) { + const out = { tests_total: null, tests_passed: null, tests_failed: null }; + if (typeof stdout !== 'string') return out; + // vitest summary lines: "Tests 3708 passed (3708)" or "Tests N failed | M passed (TOTAL)" + let m = stdout.match(/Tests\s+(\d+)\s+passed\s*\((\d+)\)/); + if (m) { out.tests_passed = +m[1]; out.tests_total = +m[2]; out.tests_failed = 0; return out; } + m = stdout.match(/Tests\s+(\d+)\s+failed\s*\|\s*(\d+)\s+passed\s*\((\d+)\)/); + if (m) { out.tests_failed = +m[1]; out.tests_passed = +m[2]; out.tests_total = +m[3]; return out; } + // Pest: "Tests: 742 passed (1908 assertions)" + m = stdout.match(/Tests:\s+(\d+)\s+passed/); + if (m) { out.tests_passed = +m[1]; out.tests_total = +m[1]; out.tests_failed = 0; return out; } + return out; +} + +export function decideRecord({ toolName, command, exitCode, stdout }) { + if (toolName !== 'Bash') return null; + const kind = detectFullTestRun(command); + if (!kind) return null; + const metrics = extractTestMetrics(stdout || ''); + // PASS criteria — actual test outcomes drive verdict, not exit code: + // - tests_failed parseable AND zero (e.g., "Tests 8091 passed (8091)" + // or "Tests 0 failed | 8091 passed"). Exit code may still be 1 if + // test FILES failed to load (infra failures like worktree CRLF or + // ruflo dormant copies) — those don't count. + // - tests_failed unparseable BUT exit code 0 AND tests_passed > 0 + // (legacy vitest output format). + const passed = (metrics.tests_failed !== null && metrics.tests_failed === 0 && metrics.tests_passed > 0) + || (exitCode === 0 && metrics.tests_passed && metrics.tests_failed === null); + return { + command_kind: kind, + command: String(command).slice(0, 200), + exit_code: exitCode, + result: passed ? 'pass' : 'fail', + tests_total: metrics.tests_total, + tests_passed: metrics.tests_passed, + tests_failed: metrics.tests_failed, + }; +} + +async function main() { + try { + const raw = await readStdin(); + const event = parseEventJson(raw); + const toolName = event.tool_name || ''; + const command = (event.tool_input && event.tool_input.command) || ''; + const resp = event.tool_response || {}; + const exitCode = typeof resp.exitCode === 'number' ? resp.exitCode : (typeof resp.exit_code === 'number' ? resp.exit_code : null); + const stdout = typeof resp.stdout === 'string' ? resp.stdout : ''; + + const record = decideRecord({ toolName, command, exitCode, stdout }); + if (record) writeSentinel('verify-pass', event.session_id, record); + exitDecision({ block: false }); + } catch { + exitDecision({ block: false }); + } +} + +const isCli = process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/enforce-verify-record.mjs'); +if (isCli) main(); diff --git a/tools/observer-self-assessment-api.test.mjs b/tools/observer-self-assessment-api.test.mjs index 33ce996e..01a82131 100644 --- a/tools/observer-self-assessment-api.test.mjs +++ b/tools/observer-self-assessment-api.test.mjs @@ -248,12 +248,24 @@ describe('readRuntimeFlag', () => { expect(result).toBe('off'); }); - it('returns "off" when value field is missing', () => { + it('reads "mode" field when "value" is absent (post-050b349a fix)', () => { + // After 050b349a's readRuntimeFlag fix, runtime files store {mode: "on"} as + // canonical shape. The legacy "value" key is still accepted as fallback, + // but "mode" is preferred. Test that mode='on' without value yields 'on'. const fakeFsImpl = { existsSync: () => true, - readFileSync: () => '{"mode":"on"}', // no "value" key + readFileSync: () => '{"mode":"on"}', }; + const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); + expect(result).toBe('on'); + }); + + it('returns "off" when neither "mode" nor "value" present', () => { + const fakeFsImpl = { + existsSync: () => true, + readFileSync: () => '{"other":"thing"}', + }; const result = readRuntimeFlag('self-assessment-mode', { homedir: '/fake', fsImpl: fakeFsImpl }); expect(result).toBe('off'); });