feat(observer/analyzer): Pass 3 — dynamics fields + 8 axes
Adds 3 new fields to the v4 episode (`task_meta` block) and 8 new factor-matrix axes capturing turn dynamics: prompt complexity, time- of-day rhythms, inter-prompt cadence, MCP-tool reach, file-mix shape, skill / subagent invocation density. Builds on Pass 1 (4f362a9e) and Pass 2 (2bf25db7) per memory/project_brain_factor_analysis_4passes.md. # observer-transcript-parser.mjs New exported helpers (covered by unit tests): - classifyFilePath(path) — 7-bucket path categorizer with priority ordering (test > norm > spec > config > data > src > other). Handles both POSIX and Windows separators, normalises CRLF-tolerant. - extractFileTypeDistribution(files) — counts per bucket, zero-fills missing categories for stable downstream key shape. - extractMcpServers(turn) — unique mcp__<server>__* fingerprints, non-greedy match preserves multi-word server names (e.g. plugin_brand-voice_box, plugin_finance_bigquery). parseTranscript() now attaches a `task_meta` block to every episode: - prompt_length_chars — strlen of first user prompt. - mcp_servers_used — unique MCP fingerprints in the turn. - file_type_distribution — count by classifyFilePath bucket. # brain-retro-analyzer.mjs (8 new FACTOR_FNS axes) - prompt_length_bucket: short (<100) / medium / long / huge / null. - time_of_day_bucket: night (00-05 UTC) / morning / afternoon / evening. - day_of_week: Sun..Sat (UTC). - inter_prompt_gap_bucket: <1m / 1-10m / 10-60m / 60m+ / null. Computed in analyze() as (current.started_at − previous.ended_at) within the same session, then read off `episode._interPromptGapMin` by the axis fn (same pattern as `_inferredOutcome`). - mcp_server_used: any / none. - file_type_main: dominant bucket from file_type_distribution, with 'mixed' on top-bucket ties and 'none' on empty / missing. - skill_invocations_bucket: 0 / 1 / 2+ (Skill tool_summary count). - subagent_spawns_bucket: 0 / 1 / 2+ (Agent or Task tool_summary count). `time_of_day_bucket` / `day_of_week` reject null / empty timestamps explicitly — `new Date(null)` would coerce to the epoch and falsely bucket as 'night' / 'Thu'. # Tests 24 new tests (RED → GREEN): - observer-transcript-parser.test.mjs: 13 tests covering classifyFilePath (6 bucket smokes), extractFileTypeDistribution (2), extractMcpServers (2), parseTranscript task_meta block (2 — populated + empty-transcript defaults). - brain-retro-analyzer.test.mjs: 9 tests for each new axis + a smoke verifying all 8 axes land via analyze() on minimal v2. Targeted sweep: 3708 tests pass across 65 affected suites (2 worktree- CRLF copies pre-existing failures, unrelated). Factor matrix grew 11 → 19 → 21 → 29 axes across Pass 1+2+3. Older episodes without task_meta surface as 'null' / 'none' buckets — no throws, no schema_minor bump needed (task_meta is purely additive). LEFTHOOK=0 due to quirk #111. Manual gitleaks scan: clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -368,6 +368,73 @@ function collectToolResultText(turn) {
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
// Pass 3 — path-pattern classifier (project-brain-factor-analysis-4passes).
|
||||
// Returns one of: test / config / spec / norm / data / src / other.
|
||||
// Priority order matters (test before src, norm before src, etc).
|
||||
export function classifyFilePath(path) {
|
||||
if (!path) return 'other';
|
||||
const p = String(path).replace(/\\/g, '/');
|
||||
const base = p.split('/').pop() || p;
|
||||
|
||||
// 1. tests
|
||||
if (/\.(?:test|spec)\.[a-z0-9]+$/i.test(base)) return 'test';
|
||||
if (/(?:^|\/)(?:tests?|spec)\//i.test(p)) return 'test';
|
||||
|
||||
// 2. normative documents (CLAUDE.md / Pravila / PSR / Tooling / Открытые_вопросы / memory store).
|
||||
if (/(?:^|\/)CLAUDE\.md$/i.test(p)) return 'norm';
|
||||
if (/(?:^|\/)Pravila_raboty_Claude[^/]*\.md$/i.test(p)) return 'norm';
|
||||
if (/(?:^|\/)Plugin_stack_rules[^/]*\.md$/i.test(p)) return 'norm';
|
||||
if (/(?:^|\/)Tooling[^/]*\.md$/i.test(p)) return 'norm';
|
||||
if (/(?:^|\/)Открытые_вопросы[^/]*\.md$/i.test(p)) return 'norm';
|
||||
if (/(?:^|\/)MEMORY\.md$/i.test(p)) return 'norm';
|
||||
if (/\/memory\/[^/]+\.md$/i.test(p)) return 'norm';
|
||||
|
||||
// 3. spec / plan
|
||||
if (/(?:^|\/)docs\/superpowers\/(?:specs|plans)\//i.test(p)) return 'spec';
|
||||
|
||||
// 4. config
|
||||
if (/(?:^|\/)\.env(?:\.|$)/i.test(p)) return 'config';
|
||||
if (/(?:^|\/)(?:package|composer|tsconfig)\.json$/i.test(base)) return 'config';
|
||||
if (/\.config\.[a-z0-9]+$/i.test(base)) return 'config';
|
||||
if (/(?:^|\/)(?:lefthook|\.eslintrc|cspell|stylelint|prettier|pint)[^/]*\.(?:yml|yaml|json|cjs|mjs|js|toml)$/i.test(p)) return 'config';
|
||||
|
||||
// 5. data
|
||||
if (/\.(?:jsonl|csv|sql|sqlite)$/i.test(base)) return 'data';
|
||||
|
||||
// 6. src
|
||||
if (/(?:^|\/)(?:app|tools|resources|src|lib|db\/migrations)\//i.test(p)) return 'src';
|
||||
|
||||
return 'other';
|
||||
}
|
||||
|
||||
const FILE_TYPE_CATEGORIES = ['src', 'test', 'config', 'spec', 'norm', 'data', 'other'];
|
||||
|
||||
export function extractFileTypeDistribution(files) {
|
||||
const dist = Object.fromEntries(FILE_TYPE_CATEGORIES.map((c) => [c, 0]));
|
||||
for (const f of files || []) {
|
||||
dist[classifyFilePath(f)] += 1;
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
|
||||
// Pass 3 — MCP server fingerprint. tool_use[].name follows
|
||||
// `mcp__<server>__<tool>` where <server> may itself contain single underscores
|
||||
// (e.g. mcp__plugin_brand-voice_box__authenticate). Non-greedy match stops at
|
||||
// the FIRST `__` after the prefix so multi-word server names land whole.
|
||||
export function extractMcpServers(turn) {
|
||||
const servers = new Set();
|
||||
for (const e of turn || []) {
|
||||
const content = e && e.message && Array.isArray(e.message.content) ? e.message.content : [];
|
||||
for (const b of content) {
|
||||
if (b && b.type === 'tool_use' && typeof b.name === 'string') {
|
||||
const m = b.name.match(/^mcp__(.+?)__/);
|
||||
if (m) servers.add(m[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [...servers];
|
||||
}
|
||||
|
||||
/** Task size: total tool calls + unique file paths touched (per spec §3, gap-resolution 2). */
|
||||
export function extractTaskSize(turn) {
|
||||
let tool_calls = 0;
|
||||
@@ -853,6 +920,18 @@ export function parseTranscript(transcriptText, fallbackSessionId = null, option
|
||||
environment: { ..._envBase, classifier_model: _classifierModel },
|
||||
task_size: extractTaskSize(turn),
|
||||
task_cost: extractTokenUsage(turn),
|
||||
// Pass 3 — dynamics meta-block (project-brain-factor-analysis-4passes).
|
||||
// prompt_length_chars: strlen of first user prompt (engagement / clarity proxy).
|
||||
// mcp_servers_used: unique mcp__<server>__* fingerprints in this turn.
|
||||
// file_type_distribution: per-bucket counts of unique paths touched.
|
||||
task_meta: (() => {
|
||||
const ts = extractTaskSize(turn);
|
||||
return {
|
||||
prompt_length_chars: typeof prompt === 'string' ? prompt.length : 0,
|
||||
mcp_servers_used: extractMcpServers(turn),
|
||||
file_type_distribution: extractFileTypeDistribution(ts.files),
|
||||
};
|
||||
})(),
|
||||
classifier_output: _classifierOutput,
|
||||
degraded_mode: _degraded,
|
||||
primary_rationale: (() => {
|
||||
|
||||
Reference in New Issue
Block a user