From 4665c537e8d2bddbc79d4d8a20a47a2a35985823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9?= Date: Sat, 23 May 2026 13:16:42 +0300 Subject: [PATCH] =?UTF-8?q?fix(observer):=20parser=20candidates=5Fconsider?= =?UTF-8?q?ed=20=E2=80=94=20whitelist=20filter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extractCandidates грузила в primary_rationale.candidates_considered ЛЮБОЙ нумерованный/маркированный список из ассистентского текста — без семантического фильтра. В topе оказывались куски прозы («Hard-floor работает только для §12 Superpowers …»), шаги процедуры («1. Hard-floor check, 2. Классификация …»), фрагменты кода (regex-паттерны) — не имена узлов реестра. Фикс: при загрузке модуля собираю KNOWN_NODES из tools/observer-known-nodes.txt + ключей observer-chain-map.json + сентинела «direct». После regex-извлечения item нормализуется (срезаются **/`/_/* обвязки + хвостовая пунктуация) и проверяется по: точное имя в реестре ИЛИ #NN (Tooling ID) ИЛИ plugin:skill форма. Если после фильтра <2 элементов — return []. Opt-in тег остаётся authoritative и идёт мимо фильтра. Триггеры/границы не трогал — их regex уже узкий (Pravila §N / ADR-N / PSR_v1 RN / L-цепочки). Repro-кейсы из живого episodes-2026-05.jsonl добавлены в тесты: prose-bullets, procedure-steps, code-snippet bullets, mixed list, single survivor. --- tools/observer-transcript-parser.mjs | 84 ++++++++++++++++++++++- tools/observer-transcript-parser.test.mjs | 77 +++++++++++++++++---- 2 files changed, 146 insertions(+), 15 deletions(-) diff --git a/tools/observer-transcript-parser.mjs b/tools/observer-transcript-parser.mjs index e50113ad..9f0f411b 100644 --- a/tools/observer-transcript-parser.mjs +++ b/tools/observer-transcript-parser.mjs @@ -15,9 +15,14 @@ * Per ADR-011 §6 + spec v1.1 §5.2.1. */ +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; import { detectChoiceProvenance, detectAskUserQuestionChoice } from './observer-choice-detector.mjs'; import { loadChainMap, chainsFor } from './observer-chain-detector.mjs'; +const __dirname = dirname(fileURLToPath(import.meta.url)); + let CHAIN_MAP = null; try { CHAIN_MAP = loadChainMap(); @@ -25,6 +30,67 @@ try { CHAIN_MAP = new Map(); // битый/отсутствующий JSON -> chainsFor вернёт null, observer не падает } +/** + * Whitelist of router-node names. Used by extractCandidates to filter out + * free-form prose bullets (analysis text, procedure steps, code snippets) that + * the regex on its own would happily slurp into candidates_considered. + * Sources, in order: + * - tools/observer-known-nodes.txt — bare names (brainstorming, ccpm, …) + * - tools/observer-chain-map.json keys — incl. plugin:skill form + * - sentinel "direct" (no-skill marker used by node_chosen) + * Tooling IDs (#NN) and arbitrary plugin:skill forms pass via regex below. + */ +const KNOWN_NODES = (() => { + const set = new Set(['direct']); + try { + const txt = readFileSync(join(__dirname, 'observer-known-nodes.txt'), 'utf8'); + for (const line of txt.split('\n')) { + const t = line.replace(/#.*$/, '').trim(); + if (t) set.add(t); + } + } catch { + // file missing in some test sandboxes — fall back to chain-map keys only + } + if (CHAIN_MAP) for (const node of CHAIN_MAP.keys()) set.add(node); + return set; +})(); + +const TOOLING_ID_RE = /^#\d+$/; +const NAMESPACED_SKILL_RE = /^[a-z][a-z0-9-]*:[a-z][a-z0-9-]*(?::[a-z][a-z0-9-]*)?$/; + +/** + * Strip lightweight markdown wrappers (bold, italic, code, trailing punctuation) + * before testing against the whitelist. Conservative — we accept that some + * weirdly-formatted node names slip through, but free-form prose bullets do not. + */ +function normalizeCandidate(s) { + let t = String(s || '').trim(); + // peel outer markdown markers: **x**, *x*, `x`, _x_ + while ( + (t.startsWith('**') && t.endsWith('**') && t.length > 4) || + (t.startsWith('`') && t.endsWith('`') && t.length > 2) || + (t.startsWith('*') && t.endsWith('*') && t.length > 2) || + (t.startsWith('_') && t.endsWith('_') && t.length > 2) + ) { + if (t.startsWith('**')) t = t.slice(2, -2).trim(); + else t = t.slice(1, -1).trim(); + } + // drop trailing punctuation (commas, periods, em-dashes) that lists often leave + t = t.replace(/[.,;:!?—–-]+$/u, '').trim(); + return t; +} + +function isKnownNode(raw) { + const t = normalizeCandidate(raw); + if (!t) return false; + if (KNOWN_NODES.has(t)) return true; + if (TOOLING_ID_RE.test(t)) return true; + // namespaced plugin:skill we haven't seen yet — accept if shape matches and + // contains no whitespace (a free-form bullet with a colon in prose won't pass). + if (NAMESPACED_SKILL_RE.test(t)) return true; + return false; +} + const SUPERPOWERS_PREFIX = 'superpowers:'; function parseLines(text) { @@ -375,13 +441,25 @@ export function extractTriggers(turn) { const CANDIDATE_NUMBERED_RE = /^\s*\d+[.\)]\s+([^\n]+)$/gm; const CANDIDATE_BULLET_RE = /^\s*[-*]\s+([^\n]+)$/gm; -/** Heuristic candidates: ≥2 numbered (preferred) or bulleted items. */ +/** + * Heuristic candidates: ≥2 numbered (preferred) or bulleted items, filtered to + * router-node identifiers (see isKnownNode). Free-form prose bullets are + * rejected — they belong in the assistant's narrative, not in + * primary_rationale.candidates_considered. The opt-in tag + * (parseReasoningTag) bypasses this filter; that channel is authoritative. + */ export function extractCandidates(turn) { const text = assistantTextOfTurn(turn); const numbered = [...text.matchAll(CANDIDATE_NUMBERED_RE)].map((m) => m[1].trim()); - if (numbered.length >= 2) return numbered; + if (numbered.length >= 2) { + const filtered = numbered.map(normalizeCandidate).filter(isKnownNode); + if (filtered.length >= 2) return filtered; + } const bulleted = [...text.matchAll(CANDIDATE_BULLET_RE)].map((m) => m[1].trim()); - if (bulleted.length >= 2) return bulleted; + if (bulleted.length >= 2) { + const filtered = bulleted.map(normalizeCandidate).filter(isKnownNode); + if (filtered.length >= 2) return filtered; + } return []; } diff --git a/tools/observer-transcript-parser.test.mjs b/tools/observer-transcript-parser.test.mjs index 09ac10e1..4545e18c 100644 --- a/tools/observer-transcript-parser.test.mjs +++ b/tools/observer-transcript-parser.test.mjs @@ -1143,21 +1143,74 @@ describe('reasoning capture heuristics (Task 6)', () => { }); describe('extractCandidates', () => { - it('extracts numbered options (≥2)', () => { - const c = extractCandidates(mkTurn('1. brainstorming\n2. subagent-driven\n3. direct')); - expect(c).toContain('brainstorming'); - expect(c.length).toBeGreaterThanOrEqual(2); + // Only items that look like router-node identifiers are accepted: + // - a known node from tools/observer-known-nodes.txt + // - a key from tools/observer-chain-map.json (e.g. superpowers:brainstorming) + // - a tooling ID matching ^#\d+$ (Прил. Н) + // - the sentinel "direct" + // Free-form prose bullets / numbered procedure steps / code snippets are rejected. + it('extracts numbered options that are known node names', () => { + const c = extractCandidates( + mkTurn('1. brainstorming\n2. subagent-driven-development\n3. direct') + ); + expect(c).toEqual(['brainstorming', 'subagent-driven-development', 'direct']); }); - it('extracts bullets when no numbered', () => { - expect(extractCandidates(mkTurn('- A\n- B\n- C')).length).toBeGreaterThanOrEqual(2); + it('accepts namespaced plugin:skill form from the chain map', () => { + const c = extractCandidates( + mkTurn('1. superpowers:brainstorming\n2. claude-md-management:claude-md-improver') + ); + expect(c).toEqual(['superpowers:brainstorming', 'claude-md-management:claude-md-improver']); }); - it('prefers numbered over bullets', () => { - const c = extractCandidates(mkTurn('1. X\n2. Y\n- A\n- B')); - expect(c).toContain('X'); - expect(c).toContain('Y'); + it('accepts tooling IDs like #25 / #74', () => { + expect(extractCandidates(mkTurn('1. #25\n2. #74'))).toEqual(['#25', '#74']); }); - it('returns empty when single item', () => { - expect(extractCandidates(mkTurn('1. only one'))).toEqual([]); + it('strips simple markdown wrappers before checking', () => { + const c = extractCandidates( + mkTurn('1. **brainstorming**\n2. `writing-plans`\n3. discovery-interview') + ); + expect(c).toEqual(['brainstorming', 'writing-plans', 'discovery-interview']); + }); + it('drops free-form prose bullets even when ≥2 are present', () => { + // Repro from docs/observer/episodes-2026-05.jsonl: analysis bullets with + // bold-prefix sentence text were going straight into candidates_considered. + const text = + '- **Hard-floor работает только для §12 Superpowers** (14 раз). §14/§15 в журнале не оставили следов.\n' + + '- **На feature/planning я не ищу триггеры** — 0% матча.\n' + + '- **Метка `regulated` врёт** в 79% случаев — нет настоящего применения границ.'; + expect(extractCandidates(mkTurn(text))).toEqual([]); + }); + it('drops numbered procedure-step text (real episode repro)', () => { + const text = + '1. **Hard-floor check** — Pravila §12 (Superpowers) / §14 (Queen) / §15.\n' + + '2. **Классификация** — определяю тип задачи (feature/bugfix/planning).\n' + + '3. **Trigger-based node selection** — по реестру Tooling Прил. Н §4.X.\n' + + '4. **Canonical chain check** — смотрю L1-L15.\n' + + '5. **Execution** — иду делать.'; + expect(extractCandidates(mkTurn(text))).toEqual([]); + }); + it('drops code-snippet bullets (regex patterns etc.)', () => { + const text = + '- regex `(?:^|[\\s\\"\\\'])(tools\\/[\\w-]+\\.(?:mjs|py|sh))` → имя файла.\n' + + '- fallback `inline:` — стабильно.'; + expect(extractCandidates(mkTurn(text))).toEqual([]); + }); + it('filters a mixed list down to just the real nodes', () => { + const text = + '1. brainstorming\n' + + '2. resolver + tests\n' + + '3. discovery-interview\n' + + '4. parser extension + tests + smoke'; + expect(extractCandidates(mkTurn(text))).toEqual(['brainstorming', 'discovery-interview']); + }); + it('returns empty when only one real node survives the filter', () => { + // ≥2 raw items but only 1 known-node → not enough signal, drop. + expect(extractCandidates(mkTurn('1. brainstorming\n2. некий текст'))).toEqual([]); + }); + it('prefers numbered over bullets when both lists contain known nodes', () => { + const c = extractCandidates( + mkTurn('1. brainstorming\n2. writing-plans\n- discovery-interview\n- regression') + ); + expect(c).toEqual(['brainstorming', 'writing-plans']); }); it('returns empty for prose', () => { expect(extractCandidates(mkTurn('просто текст'))).toEqual([]);