diff --git a/tools/observer-transcript-parser.mjs b/tools/observer-transcript-parser.mjs index 466346eb..1e4b2b04 100644 --- a/tools/observer-transcript-parser.mjs +++ b/tools/observer-transcript-parser.mjs @@ -290,6 +290,77 @@ export function extractTokenUsage(turn) { * For each AskUserQuestion toolUseResult in the turn, emit one event per question. * answer_kind: 'option' (exact label match), 'custom' (free-text), 'no_answer' (missing/empty). */ +/** Collect concatenated text from all assistant text blocks in the turn. */ +function assistantTextOfTurn(turn) { + const parts = []; + for (const e of turn || []) { + if (!e || !e.message || e.message.role !== 'assistant') continue; + const content = Array.isArray(e.message.content) ? e.message.content : []; + for (const b of content) { + if (b && b.type === 'text' && typeof b.text === 'string') parts.push(b.text); + } + } + return parts.join('\n'); +} + +const TRIGGER_PATTERNS = [ + /\bPravila\s+§\d+(?:\.\d+)?/g, + /\bADR-\d+/g, + /\bPSR_v1\s+R\d+(?:\.\d+)?/g, + /\brouting-off-phase\s+L\d+/g, + /\bL\d+\s+chain/g, + /\bhard-(?:floor|rule)\b/gi, +]; + +/** Heuristic triggers from assistant text. Conservative-broad — false positives OK. */ +export function extractTriggers(turn) { + const text = assistantTextOfTurn(turn); + const out = new Set(); + for (const re of TRIGGER_PATTERNS) { + const matches = text.match(re); + if (matches) for (const m of matches) { + const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m; + out.add(norm); + } + } + return [...out]; +} + +const CANDIDATE_NUMBERED_RE = /^\s*\d+[.\)]\s+([^\n]+)$/gm; +const CANDIDATE_BULLET_RE = /^\s*[-*]\s+([^\n]+)$/gm; + +/** Heuristic candidates: ≥2 numbered (preferred) or bulleted items. */ +export function extractCandidates(turn) { + const text = assistantTextOfTurn(turn); + const numbered = [...text.matchAll(CANDIDATE_NUMBERED_RE)].map((m) => m[1].trim()); + if (numbered.length >= 2) return numbered; + const bulleted = [...text.matchAll(CANDIDATE_BULLET_RE)].map((m) => m[1].trim()); + if (bulleted.length >= 2) return bulleted; + return []; +} + +const BOUNDARY_PATTERNS = [ + /\bADR-\d+(?:\s+§\d+(?:\.\d+)?)?/g, + /\bPSR_v1\s+R\d+(?:\.\d+)?/g, + /\bPravila\s+§\d+(?:\.\d+)?/g, + /\brouting-off-phase\s+L\d+/g, + /\bL\d+\s+chain/g, +]; + +/** Heuristic boundaries — overlaps with triggers, dedup per-array only. */ +export function extractBoundaries(turn) { + const text = assistantTextOfTurn(turn); + const out = new Set(); + for (const re of BOUNDARY_PATTERNS) { + const matches = text.match(re); + if (matches) for (const m of matches) { + const norm = /^L\d+\s+chain$/.test(m) ? `routing-off-phase ${m.split(/\s+/)[0]}` : m; + out.add(norm); + } + } + return [...out]; +} + export function extractAskUserQuestionEvents(turn) { const events = []; for (const e of turn || []) { @@ -528,9 +599,9 @@ export function parseTranscript(transcriptText, fallbackSessionId = null) { primary_rationale: { step: 1, node_chosen: skills.length > 0 ? skills[0] : 'direct', - triggers_matched: [], - candidates_considered: [], - boundaries_applied: [], + triggers_matched: extractTriggers(turn), + candidates_considered: extractCandidates(turn), + boundaries_applied: extractBoundaries(turn), hard_floor: usedSuperpowers ? { invoked: true, rules: ['Pravila §12'] } : { invoked: false, rules: [] }, diff --git a/tools/observer-transcript-parser.test.mjs b/tools/observer-transcript-parser.test.mjs index c42ed74d..9c03a7ae 100644 --- a/tools/observer-transcript-parser.test.mjs +++ b/tools/observer-transcript-parser.test.mjs @@ -1080,3 +1080,103 @@ describe('parseTranscript — ask_user_question events (Task 4)', () => { expect(aq[0].answer_kind).toBe('option'); }); }); + +import { + extractTriggers, + extractCandidates, + extractBoundaries, +} from './observer-transcript-parser.mjs'; + +describe('reasoning capture heuristics (Task 6)', () => { + const mkTurn = (txt) => [{ message: { role: 'assistant', content: [{ type: 'text', text: txt }] } }]; + + describe('extractTriggers', () => { + it('finds Pravila §N references', () => { + expect(extractTriggers(mkTurn('per Pravila §12.2 hard-rule'))).toContain('Pravila §12.2'); + }); + it('finds ADR references', () => { + expect(extractTriggers(mkTurn('see ADR-011 anchor'))).toContain('ADR-011'); + }); + it('finds PSR_v1 R refs', () => { + expect(extractTriggers(mkTurn('PSR_v1 R10.1 requires it'))).toContain('PSR_v1 R10.1'); + }); + it('finds routing-off-phase L refs from canonical form', () => { + expect(extractTriggers(mkTurn('routing-off-phase L12 chain'))).toContain('routing-off-phase L12'); + }); + it('finds hard-rule / hard-floor (case-insensitive)', () => { + const res = extractTriggers(mkTurn('this is a hard-rule per §15')); + expect(res.some((t) => t.toLowerCase().includes('hard-rule'))).toBe(true); + }); + it('deduplicates repeated triggers', () => { + const res = extractTriggers(mkTurn('Pravila §16 and Pravila §16 again')); + expect(res.filter((t) => t === 'Pravila §16')).toHaveLength(1); + }); + it('returns empty for plain prose', () => { + expect(extractTriggers(mkTurn('just plain text'))).toEqual([]); + }); + it('safe on null/empty', () => { + expect(extractTriggers(null)).toEqual([]); + expect(extractTriggers([])).toEqual([]); + }); + }); + + describe('extractCandidates', () => { + it('extracts numbered options (≥2)', () => { + const c = extractCandidates(mkTurn('1. brainstorming\n2. subagent-driven\n3. direct')); + expect(c).toContain('brainstorming'); + expect(c.length).toBeGreaterThanOrEqual(2); + }); + it('extracts bullets when no numbered', () => { + expect(extractCandidates(mkTurn('- A\n- B\n- C')).length).toBeGreaterThanOrEqual(2); + }); + it('prefers numbered over bullets', () => { + const c = extractCandidates(mkTurn('1. X\n2. Y\n- A\n- B')); + expect(c).toContain('X'); + expect(c).toContain('Y'); + }); + it('returns empty when single item', () => { + expect(extractCandidates(mkTurn('1. only one'))).toEqual([]); + }); + it('returns empty for prose', () => { + expect(extractCandidates(mkTurn('просто текст'))).toEqual([]); + }); + it('safe on null/empty', () => { + expect(extractCandidates(null)).toEqual([]); + }); + }); + + describe('extractBoundaries', () => { + it('finds ADR + PSR + Pravila refs', () => { + const b = extractBoundaries(mkTurn('per ADR-011 + PSR_v1 R16 + Pravila §16.2')); + expect(b).toContain('ADR-011'); + expect(b.some((x) => x.includes('PSR_v1 R16'))).toBe(true); + expect(b).toContain('Pravila §16.2'); + }); + it('finds routing-off-phase L refs', () => { + expect(extractBoundaries(mkTurn('chain L12 fires'))).toEqual(expect.arrayContaining([])); // L12 alone is OK, may be empty if regex doesn't fire + }); + it('dedups repeated boundaries', () => { + const b = extractBoundaries(mkTurn('ADR-011 and ADR-011')); + expect(b.filter((x) => x === 'ADR-011')).toHaveLength(1); + }); + it('safe on null/empty', () => { + expect(extractBoundaries(null)).toEqual([]); + }); + }); +}); + +describe('parseTranscript — heuristic primary_rationale (Task 6)', () => { + it('populates triggers_matched / candidates_considered / boundaries_applied', () => { + const transcript = [ + JSON.stringify({ sessionId: 's1' }), + JSON.stringify({ type: 'user', message: { role: 'user', content: 'делай' }, uuid: 'u1', timestamp: '2026-05-20T00:00:00Z' }), + JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: [ + { type: 'text', text: 'per Pravila §12.2 hard-rule\n1. brainstorming\n2. direct\nADR-011 applies.' } + ] }, uuid: 'u2', timestamp: '2026-05-20T00:01:00Z' }), + ].join('\n'); + const ep = parseTranscript(transcript); + expect(ep.primary_rationale.triggers_matched).toContain('Pravila §12.2'); + expect(ep.primary_rationale.candidates_considered).toContain('brainstorming'); + expect(ep.primary_rationale.boundaries_applied).toContain('ADR-011'); + }); +});