feat(observer): Task 2 — extractTokenUsage + task_cost in parseTranscript

- export extractTokenUsage(turn): sums input/output/cache/iterations/ web_search/web_fetch across all assistant messages in a turn - parseTranscript now includes task_cost field (zero-filled when no usage) - 7 new tests (5 unit + 2 integration); total 248/248 GREEN - V2_FIELDS in observer-stop-hook.mjs NOT changed (backward compat)
2026-05-20 12:40:41 +03:00
parent 47c03a9e18
commit 8e5eaecf6a
2 changed files with 130 additions and 0 deletions
@@ -240,6 +240,52 @@ export function extractTaskSize(turn) {
  return { tool_calls, files_touched: files.size, files: [...files] };
 }

+/**
+ * Token-usage aggregation across all assistant messages in the turn.
+ *
+ * DESIGN: returns zero-filled object (NOT null) when no `usage` data was
+ * captured. Consumers cannot currently distinguish "actually 0 tokens" from
+ * "no usage data" — accepted trade-off because (a) every assistant message
+ * in real Claude Code transcripts has `usage` (verified B1 brain-retro
+ * 2026-05-20: 6265/6265 messages with usage, 0 partial-stream), and
+ * (b) `task_cost` is not yet read by analyzer/STATUS.md, so the semantic
+ * gap is a future-only concern. Re-evaluate when factor matrix adds cost.
+ *
+ * Captures: 4 base token fields + `iterations` (extended-thinking detector)
+ * + `server_tool_use.{web_search,web_fetch}_requests` counts.
+ * Other usage fields (cache_creation object, inference_geo, service_tier,
+ * speed) — out-of-scope for current analyzer.
+ *
+ * Defensive: skips entries where `usage` is not a plain object (handles
+ * malformed transcript edge cases like `"usage": 42`).
+ */
+export function extractTokenUsage(turn) {
+  let input = 0, output = 0, cache_read = 0, cache_creation = 0;
+  let web_search = 0, web_fetch = 0, iterations = 0;
+  for (const e of turn || []) {
+    const u = e && e.message && e.message.usage;
+    if (!u || typeof u !== 'object') continue;
+    input          += u.input_tokens || 0;
+    output         += u.output_tokens || 0;
+    cache_read     += u.cache_read_input_tokens || 0;
+    cache_creation += u.cache_creation_input_tokens || 0;
+    iterations     += u.iterations || 0;
+    if (u.server_tool_use) {
+      web_search += u.server_tool_use.web_search_requests || 0;
+      web_fetch  += u.server_tool_use.web_fetch_requests  || 0;
+    }
+  }
+  return {
+    input_tokens: input,
+    output_tokens: output,
+    cache_read_input_tokens: cache_read,
+    cache_creation_input_tokens: cache_creation,
+    web_search_requests: web_search,
+    web_fetch_requests: web_fetch,
+    iterations,
+  };
+}
+
 /** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
 export function classifyPromptSignal(text) {
  const t = String(text || '').toLowerCase().trim();
@@ -454,6 +500,7 @@ export function parseTranscript(transcriptText, fallbackSessionId = null) {
    decision_provenance,
    environment: extractEnvironment(entries, start),
    task_size: extractTaskSize(turn),
+    task_cost: extractTokenUsage(turn),
    primary_rationale: {
      step: 1,
      node_chosen: skills.length > 0 ? skills[0] : 'direct',