From ea9430d8a7076f03b3cd2d36517f585c9fa01f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9?= Date: Tue, 26 May 2026 10:52:35 +0300 Subject: [PATCH] feat(observer): session-length warning in STATUS.md (retro #5 candidate B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brain-retro #5 surfaced a correlation: long sessions (≥50 turns) correlate with discipline drift. Reviewer pass showed regulated rate dropped 19% → 4.5% during a long session. This commit adds: • computeSessionLengthBlock(episodes, opts?) — pure function that groups today's (UTC) episodes by task_id, finds the MAX session_turn per session, and surfaces sessions with ≥threshold turns (default 50) in a markdown block. • Wire-up in renderStatus + main CLI: new "## Длинные сессии" section inserted between disciplineBlock/activeProjects and costBlock. • 7 new unit tests (36/36 total green). Behavior: • No sessions today → ✅ "Ни одной сессии с >50 ходов". • One+ flagged → ⚠️ table { session_id, max turn, regulated %, last episode ts }. • Custom threshold via opts.threshold. Per memory project_enforce_hard_rules.md: this is an indicator, not a hook; no blocking, just observability. Owner can decide whether to restart when regulated % drops in a long session. --- docs/observer/STATUS.md | 28 ++++++------ tools/status-md-generator.mjs | 67 ++++++++++++++++++++++++++++- tools/status-md-generator.test.mjs | 69 +++++++++++++++++++++++++++++- 3 files changed, 149 insertions(+), 15 deletions(-) diff --git a/docs/observer/STATUS.md b/docs/observer/STATUS.md index 9e22c936..49b1e0bf 100644 --- a/docs/observer/STATUS.md +++ b/docs/observer/STATUS.md @@ -1,6 +1,6 @@ # Brain Status (auto-generated) -Last updated: 2026-05-26T05:20:43.980Z +Last updated: 2026-05-26T07:52:20.201Z | Контролёр | Состояние | Детали | |---|---|---| @@ -8,13 +8,13 @@ Last updated: 2026-05-26T05:20:43.980Z | C2 Cross-ref consistency | ✅ | [cross-ref-checker] OK — 0 drift in 4 files | | C3 Observer-of-observer | ✅ | [observer-of-observer] OK — last read 0 week(s) ago | | C4 Сигнальный статус | ✅ | This file (self-reference) | -| C5 Observer-coverage | ⚠️ | 444 episode(s) this month · Stop-hook + post-commit OK · 21 missed activation(s) — see /brain-retro | +| C5 Observer-coverage | ⚠️ | 464 episode(s) this month · Stop-hook + post-commit OK · 21 missed activation(s) — see /brain-retro | | C6 Chain map sync | ✅ | [chain-map-checker] OK — 16 chains in sync | ## Метрики (информационные, не алерты) -- Observer evidence: 444 episodes this month, 0 observer_error markers, 70 PII matches before filter -- Legacy v1 episodes (not in factor analysis): 305 +- Observer evidence: 464 episodes this month, 0 observer_error markers, 74 PII matches before filter +- Legacy v1 episodes (not in factor analysis): 325 - Last /brain-retro: 0 day(s) ago - Использование узлов: см. `/brain-retro` (раз в спринт). missed_activations: 21. **Неиспользованные узлы — не алерт, если профильной задачи не было** (Pravila §16.4 v1.36; capability-readiness; см. memory `feedback_brain_unused_tools_not_problem` — outside-repo memory store). @@ -24,17 +24,17 @@ Baseline дисциплины роутера (этап 2 router discipline overh | Тип задачи | Эпизодов | % с триггер-матчем | % через скил | |---|---|---|---| -| analysis | 19 | 42.1% | 21.1% | -| monitoring | 19 | 0.0% | 0.0% | +| monitoring | 22 | 0.0% | 0.0% | +| analysis | 20 | 40.0% | 20.0% | | feature | 14 | 14.3% | 0.0% | | planning | 11 | 18.2% | 18.2% | | bugfix | 11 | 36.4% | 45.5% | -| cleanup | 2 | 0.0% | 0.0% | +| cleanup | 4 | 0.0% | 0.0% | | refactor | 1 | 0.0% | 0.0% | -Router step distribution: 1: 180, 2: 158, 3: 54, 5: 47 +Router step distribution: 1: 187, 2: 170, 3: 54, 5: 48 -Boundaries applied (ADR / границы): 65 of 439 эпизодов (14.8%). +Boundaries applied (ADR / границы): 65 of 459 эпизодов (14.2%). ## Активные многоэтапные проекты @@ -44,6 +44,10 @@ Boundaries applied (ADR / границы): 65 of 439 эпизодов (14.8%). - Этап 3 (принуждение — хук на routing) — Phase A+B (классификатор + 3 хука: router-prehook/tool-gate/stop-gate в `.claude/settings.json`) ✅ + влит в main 2026-05-24. Гейт работает в режиме **`warn-only`** (только stderr-предупреждения, никакой блокировки). Bug-fix `bec69aa5`: `deriveRouterStep` в `tools/discipline-metrics.mjs` — шаг роутера теперь выводится из наблюдаемых признаков (был захардкоженной константой 1). **Follow-up 3 fixes 2026-05-24** (после ANTHROPIC_API_KEY + рестарта CC выявлены при инспекции state): (a) UTF-8 stdin helper `tools/router-stdin-helper.mjs` через `StringDecoder` + подключение к 3 хукам (русский в state-файл и Anthropic API без mojibake); (b) `tools/observer-state-enricher.mjs` — pure helper для чтения `router-state-.json`; (c) `parseTranscript` обогащение `primary_rationale` 4 полями (`recommended_node` override + `recommended_chain` + `chain_progress` + `chain_completed`). 538 tools-тестов GREEN. Plan: `docs/superpowers/plans/2026-05-24-router-stage3-three-fixes.md`. CHECKPOINT B: дать warn-only накопить реальные наблюдения с **починенным** сторожем (план говорит «минимум 24 часа»), затем Task 9 — переключение в `enforce` + 2 новых метрики (domain-hit-rate / chain-completion). Plan: `docs/superpowers/plans/2026-05-24-router-overhaul-stage-3-enforcement.md`. - Этап 4 (уборка устаревших правил, deprecation `observer-classification-map.json` → удаление) — не начат. +## Длинные сессии + +Ни одной сессии с >50 ходов сегодня (UTC). ✅ + ## Стоимость месяца | Компонент | Токены (in/out) | USD | @@ -61,13 +65,13 @@ Boundaries applied (ADR / границы): 65 of 439 эпизодов (14.8%). ## Авто-ретроспектива -Last self-retrospect: never -Episodes since last run: 0 / threshold: 10 +Last self-retrospect: never ⚠️ (202 эпизодов с последнего запуска, порог 10) +Episodes since last run: 202 / threshold: 10 ## Reviewer: субагент vs fallback -0 эпизодов проверено из 444. +0 эпизодов проверено из 464. ## Алерт-индикаторы diff --git a/tools/status-md-generator.mjs b/tools/status-md-generator.mjs index e191fb0a..b3a1b2f7 100644 --- a/tools/status-md-generator.mjs +++ b/tools/status-md-generator.mjs @@ -118,6 +118,67 @@ Last self-retrospect: never } } +/** + * Brain-retro #5 candidate B (2026-05-26): session-length warning. + * + * Long sessions correlate with discipline drift — reviewer pass on retro #5 + * showed regulated rate dropped 19% → 4.5% during a long session. + * + * Algorithm: group episodes by task_id (session id), compute MAX + * session_turn per session over the current calendar day (UTC), surface + * sessions with turn count >= threshold. + * + * Pure — takes episodes array, returns markdown string. No I/O. + */ +export function computeSessionLengthBlock(episodes, opts = {}) { + const threshold = opts.threshold ?? 50; + const now = opts.now ? new Date(opts.now) : new Date(); + const todayUtc = now.toISOString().slice(0, 10); + + if (!Array.isArray(episodes) || episodes.length === 0) { + return `## Длинные сессии\n\n(нет данных)`; + } + + const sessions = new Map(); + for (const e of episodes) { + if (!e || !e.task_id || !e.timestamps?.started_at) continue; + if (e.timestamps.started_at.slice(0, 10) !== todayUtc) continue; + const turn = Number(e.environment?.session_turn); + if (!Number.isFinite(turn)) continue; + const id = e.task_id; + const cur = sessions.get(id) || { maxTurn: 0, lastSeen: '', regulated: 0, total: 0 }; + if (turn > cur.maxTurn) cur.maxTurn = turn; + if (e.timestamps.started_at > cur.lastSeen) cur.lastSeen = e.timestamps.started_at; + cur.total++; + if (e.path_type === 'regulated') cur.regulated++; + sessions.set(id, cur); + } + + const longOnes = [...sessions.entries()] + .filter(([, v]) => v.maxTurn >= threshold) + .sort((a, b) => b[1].maxTurn - a[1].maxTurn); + + if (longOnes.length === 0) { + return `## Длинные сессии\n\nНи одной сессии с >${threshold} ходов сегодня (UTC). ✅`; + } + + const rows = longOnes.map(([id, v]) => { + const regPct = v.total > 0 ? ((v.regulated / v.total) * 100).toFixed(0) : '—'; + const shortId = id.slice(0, 8); + return `| \`${shortId}\` | ${v.maxTurn} | ${regPct}% | ${v.lastSeen} |`; + }).join('\n'); + + return `## Длинные сессии + +⚠️ Сегодня (${todayUtc} UTC) есть сессии с ≥${threshold} ходов — корреляция с падением дисциплины роутинга (retro #5 candidate B). + +| session_id | макс. ход | % regulated | последний эпизод | +|---|---|---|---| +${rows} + +Long sessions correlate with discipline drift. Если % regulated просел в текущей сессии — рассмотри перезапуск.`; +} + export function computeReviewerBlock(episodes) { const reviewed = episodes.filter(ep => ep.review?.reviewed_at !== null && ep.review?.reviewed_at !== undefined); const total = episodes.length; @@ -213,7 +274,7 @@ Last updated: ${now} - Legacy v1 episodes (not in factor analysis): ${observer.v1Episodes || 0} - Last /brain-retro: ${retroLine} - Использование узлов: см. \`/brain-retro\` (раз в спринт). missed_activations: ${missed.totalMissed}. **Неиспользованные узлы — не алерт, если профильной задачи не было** (Pravila §16.4 v1.36; capability-readiness; см. memory \`feedback_brain_unused_tools_not_problem\` — outside-repo memory store). -${disciplineBlock}${projectsBlock}${inputs.costBlock ? `\n${inputs.costBlock}\n` : ''}${inputs.anomalyBlock ? `\n${inputs.anomalyBlock}\n` : ''}${inputs.selfRetrospectBlock ? `\n${inputs.selfRetrospectBlock}\n` : ''}${inputs.reviewerBlock ? `\n${inputs.reviewerBlock}\n` : ''} +${disciplineBlock}${projectsBlock}${inputs.sessionLengthBlock ? `\n${inputs.sessionLengthBlock}\n` : ''}${inputs.costBlock ? `\n${inputs.costBlock}\n` : ''}${inputs.anomalyBlock ? `\n${inputs.anomalyBlock}\n` : ''}${inputs.selfRetrospectBlock ? `\n${inputs.selfRetrospectBlock}\n` : ''}${inputs.reviewerBlock ? `\n${inputs.reviewerBlock}\n` : ''} ## Алерт-индикаторы ✅ — норма ・ ⚠️ — внимание ・ 🔴 — действие требуется ・ ⚪ — не запускалось @@ -343,15 +404,17 @@ if (process.argv[1] && process.argv[1].replace(/\\/g, '/').endsWith('/status-md- }; const eps = loadCurrentMonthEpisodes(); - let costBlock = null, anomalyBlock = null, selfRetrospectBlock = null, reviewerBlock = null; + let costBlock = null, anomalyBlock = null, selfRetrospectBlock = null, reviewerBlock = null, sessionLengthBlock = null; try { costBlock = computeCostBlock(eps, PRICING); } catch (err) { console.warn('[status-md-generator] costBlock skipped:', err.message); costBlock = '(нет данных)'; } try { anomalyBlock = computeAnomalyBlock(eps); } catch (err) { console.warn('[status-md-generator] anomalyBlock skipped:', err.message); anomalyBlock = '(нет данных)'; } try { selfRetrospectBlock = computeSelfRetrospectBlock(join('docs', 'observer', '.self-retrospect-counter.json')); } catch (err) { console.warn('[status-md-generator] selfRetrospectBlock skipped:', err.message); selfRetrospectBlock = '(нет данных)'; } try { reviewerBlock = computeReviewerBlock(eps); } catch (err) { console.warn('[status-md-generator] reviewerBlock skipped:', err.message); reviewerBlock = '(нет данных)'; } + try { sessionLengthBlock = computeSessionLengthBlock(eps); } catch (err) { console.warn('[status-md-generator] sessionLengthBlock skipped:', err.message); sessionLengthBlock = '(нет данных)'; } inputs.costBlock = costBlock; inputs.anomalyBlock = anomalyBlock; inputs.selfRetrospectBlock = selfRetrospectBlock; inputs.reviewerBlock = reviewerBlock; + inputs.sessionLengthBlock = sessionLengthBlock; const md = renderStatus(inputs); writeFileSync('docs/observer/STATUS.md', md); diff --git a/tools/status-md-generator.test.mjs b/tools/status-md-generator.test.mjs index fa2eab55..818771b0 100644 --- a/tools/status-md-generator.test.mjs +++ b/tools/status-md-generator.test.mjs @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest'; -import { renderStatus, computeCostBlock, computeAnomalyBlock, computeSelfRetrospectBlock, computeReviewerBlock } from './status-md-generator.mjs'; +import { renderStatus, computeCostBlock, computeAnomalyBlock, computeSelfRetrospectBlock, computeReviewerBlock, computeSessionLengthBlock } from './status-md-generator.mjs'; const baseInputs = (overrides = {}) => ({ now: '2026-05-19T10:00:00+03:00', @@ -312,3 +312,70 @@ describe('renderStatus — 4 new optional blocks integration', () => { expect(md).not.toContain('## Reviewer: субагент vs fallback'); }); }); + +// ----------------------------------------------------------------------------- +// computeSessionLengthBlock — brain-retro #5 candidate B (2026-05-26) +// Long sessions correlate with discipline drift; surface a warning when any +// session today (UTC) has ≥50 turns. +// ----------------------------------------------------------------------------- + +describe('computeSessionLengthBlock', () => { + const day = '2026-05-26'; + const ep = (turn, opts = {}) => ({ + task_id: opts.id ?? 'sess-1', + timestamps: { started_at: `${opts.day ?? day}T01:00:0${turn % 10}Z`, ended_at: `${opts.day ?? day}T01:00:0${turn % 10}Z` }, + environment: { session_turn: turn }, + path_type: opts.regulated ? 'regulated' : 'improvised', + }); + + it('returns "no data" placeholder when episodes empty', () => { + expect(computeSessionLengthBlock([])).toContain('(нет данных)'); + }); + + it('returns OK (✅) when no session reaches threshold', () => { + const out = computeSessionLengthBlock([ep(1), ep(2), ep(10)], { now: `${day}T05:00:00Z` }); + expect(out).toContain('✅'); + expect(out).toContain('Ни одной сессии'); + }); + + it('flags a session that crossed threshold', () => { + const eps = Array.from({ length: 55 }, (_, i) => ep(i + 1)); + const out = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z` }); + expect(out).toContain('⚠️'); + expect(out).toContain('`sess-1'); + expect(out).toContain('55'); // max turn + }); + + it('respects custom threshold', () => { + const eps = Array.from({ length: 15 }, (_, i) => ep(i + 1)); + const flagged = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z`, threshold: 10 }); + const notFlagged = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z`, threshold: 20 }); + expect(flagged).toContain('⚠️'); + expect(notFlagged).toContain('✅'); + }); + + it('ignores episodes from other UTC days', () => { + const eps = Array.from({ length: 55 }, (_, i) => ep(i + 1, { day: '2026-05-25' })); + const out = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z` }); + expect(out).toContain('✅'); // yesterday's session not counted + }); + + it('computes regulated % per long session', () => { + const eps = Array.from({ length: 50 }, (_, i) => ep(i + 1, { regulated: i < 10 })); + const out = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z`, threshold: 40 }); + expect(out).toContain('⚠️'); + expect(out).toContain('20%'); // 10 regulated out of 50 = 20% + }); + + it('handles missing session_turn / task_id gracefully', () => { + const eps = [ + { task_id: 'x', timestamps: { started_at: `${day}T01:00:00Z` } }, // no session_turn + { timestamps: { started_at: `${day}T01:00:00Z` }, environment: { session_turn: 60 } }, // no task_id + ep(70, { id: 'real' }), + ]; + const out = computeSessionLengthBlock(eps, { now: `${day}T05:00:00Z` }); + expect(out).toContain('⚠️'); + expect(out).toContain('`real'); + expect(out).toContain('70'); + }); +});