feat(observer): Task 2 — extractTokenUsage + task_cost in parseTranscript
- export extractTokenUsage(turn): sums input/output/cache/iterations/ web_search/web_fetch across all assistant messages in a turn - parseTranscript now includes task_cost field (zero-filled when no usage) - 7 new tests (5 unit + 2 integration); total 248/248 GREEN - V2_FIELDS in observer-stop-hook.mjs NOT changed (backward compat)
This commit is contained in:
@@ -240,6 +240,52 @@ export function extractTaskSize(turn) {
|
||||
return { tool_calls, files_touched: files.size, files: [...files] };
|
||||
}
|
||||
|
||||
/**
|
||||
* Token-usage aggregation across all assistant messages in the turn.
|
||||
*
|
||||
* DESIGN: returns zero-filled object (NOT null) when no `usage` data was
|
||||
* captured. Consumers cannot currently distinguish "actually 0 tokens" from
|
||||
* "no usage data" — accepted trade-off because (a) every assistant message
|
||||
* in real Claude Code transcripts has `usage` (verified B1 brain-retro
|
||||
* 2026-05-20: 6265/6265 messages with usage, 0 partial-stream), and
|
||||
* (b) `task_cost` is not yet read by analyzer/STATUS.md, so the semantic
|
||||
* gap is a future-only concern. Re-evaluate when factor matrix adds cost.
|
||||
*
|
||||
* Captures: 4 base token fields + `iterations` (extended-thinking detector)
|
||||
* + `server_tool_use.{web_search,web_fetch}_requests` counts.
|
||||
* Other usage fields (cache_creation object, inference_geo, service_tier,
|
||||
* speed) — out-of-scope for current analyzer.
|
||||
*
|
||||
* Defensive: skips entries where `usage` is not a plain object (handles
|
||||
* malformed transcript edge cases like `"usage": 42`).
|
||||
*/
|
||||
export function extractTokenUsage(turn) {
|
||||
let input = 0, output = 0, cache_read = 0, cache_creation = 0;
|
||||
let web_search = 0, web_fetch = 0, iterations = 0;
|
||||
for (const e of turn || []) {
|
||||
const u = e && e.message && e.message.usage;
|
||||
if (!u || typeof u !== 'object') continue;
|
||||
input += u.input_tokens || 0;
|
||||
output += u.output_tokens || 0;
|
||||
cache_read += u.cache_read_input_tokens || 0;
|
||||
cache_creation += u.cache_creation_input_tokens || 0;
|
||||
iterations += u.iterations || 0;
|
||||
if (u.server_tool_use) {
|
||||
web_search += u.server_tool_use.web_search_requests || 0;
|
||||
web_fetch += u.server_tool_use.web_fetch_requests || 0;
|
||||
}
|
||||
}
|
||||
return {
|
||||
input_tokens: input,
|
||||
output_tokens: output,
|
||||
cache_read_input_tokens: cache_read,
|
||||
cache_creation_input_tokens: cache_creation,
|
||||
web_search_requests: web_search,
|
||||
web_fetch_requests: web_fetch,
|
||||
iterations,
|
||||
};
|
||||
}
|
||||
|
||||
/** Classify the opening user-prompt sentiment (per spec §6 / gap-resolution 1). */
|
||||
export function classifyPromptSignal(text) {
|
||||
const t = String(text || '').toLowerCase().trim();
|
||||
@@ -454,6 +500,7 @@ export function parseTranscript(transcriptText, fallbackSessionId = null) {
|
||||
decision_provenance,
|
||||
environment: extractEnvironment(entries, start),
|
||||
task_size: extractTaskSize(turn),
|
||||
task_cost: extractTokenUsage(turn),
|
||||
primary_rationale: {
|
||||
step: 1,
|
||||
node_chosen: skills.length > 0 ? skills[0] : 'direct',
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
parseRoutingTag,
|
||||
extractLastUserPromptText,
|
||||
classifyTask,
|
||||
extractTokenUsage,
|
||||
} from './observer-transcript-parser.mjs';
|
||||
|
||||
// Build a JSONL transcript string from entry objects.
|
||||
@@ -920,3 +921,85 @@ describe('classifyTask — extended dictionary (Task 1)', () => {
|
||||
expect(classifyTask('почини баг в logger')).toBe('bugfix');
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractTokenUsage (Task 2)', () => {
|
||||
it('sums input/output/cache fields across multiple assistant messages', () => {
|
||||
const turn = [
|
||||
{ message: { usage: { input_tokens: 10, output_tokens: 5, cache_read_input_tokens: 100, cache_creation_input_tokens: 50 } } },
|
||||
{ message: { usage: { input_tokens: 8, output_tokens: 3, cache_read_input_tokens: 80, cache_creation_input_tokens: 20 } } },
|
||||
];
|
||||
expect(extractTokenUsage(turn)).toEqual({
|
||||
input_tokens: 18, output_tokens: 8, cache_read_input_tokens: 180,
|
||||
cache_creation_input_tokens: 70, web_search_requests: 0, web_fetch_requests: 0, iterations: 0,
|
||||
});
|
||||
});
|
||||
it('captures server_tool_use bonus fields (web_search/web_fetch)', () => {
|
||||
const turn = [
|
||||
{ message: { usage: { input_tokens: 5, output_tokens: 2, server_tool_use: { web_search_requests: 3, web_fetch_requests: 1 } } } },
|
||||
];
|
||||
const result = extractTokenUsage(turn);
|
||||
expect(result.web_search_requests).toBe(3);
|
||||
expect(result.web_fetch_requests).toBe(1);
|
||||
});
|
||||
it('captures iterations (extended-thinking detector)', () => {
|
||||
const turn = [
|
||||
{ message: { usage: { input_tokens: 100, output_tokens: 50, iterations: 4 } } },
|
||||
];
|
||||
expect(extractTokenUsage(turn).iterations).toBe(4);
|
||||
});
|
||||
it('returns zero-filled object when no usage present', () => {
|
||||
const turn = [
|
||||
{ message: {} },
|
||||
{ message: { usage: null } },
|
||||
];
|
||||
expect(extractTokenUsage(turn)).toEqual({
|
||||
input_tokens: 0, output_tokens: 0, cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0, web_search_requests: 0, web_fetch_requests: 0, iterations: 0,
|
||||
});
|
||||
});
|
||||
it('handles empty/null turn safely', () => {
|
||||
expect(extractTokenUsage([])).toEqual({
|
||||
input_tokens: 0, output_tokens: 0, cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0, web_search_requests: 0, web_fetch_requests: 0, iterations: 0,
|
||||
});
|
||||
expect(extractTokenUsage(null)).toEqual({
|
||||
input_tokens: 0, output_tokens: 0, cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0, web_search_requests: 0, web_fetch_requests: 0, iterations: 0,
|
||||
});
|
||||
});
|
||||
it('safely skips entries where usage is a non-object primitive (defensive guard)', () => {
|
||||
const turn = [
|
||||
{ message: { usage: 42 } }, // malformed — usage as primitive
|
||||
{ message: { usage: { input_tokens: 5, output_tokens: 3 } } },
|
||||
];
|
||||
const r = extractTokenUsage(turn);
|
||||
expect(r.input_tokens).toBe(5);
|
||||
expect(r.output_tokens).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseTranscript — task_cost integration (Task 2)', () => {
|
||||
it('attaches task_cost to a v2 episode', () => {
|
||||
const lines = [
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: [{ type: 'text', text: 'implement feature X' }] } }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: [{ type: 'text', text: 'done' }], usage: { input_tokens: 42, output_tokens: 7 } } }),
|
||||
];
|
||||
const result = parseTranscript(lines.join('\n'));
|
||||
expect(result).not.toBeNull();
|
||||
expect(result.task_cost).toBeDefined();
|
||||
expect(result.task_cost.input_tokens).toBe(42);
|
||||
expect(result.task_cost.output_tokens).toBe(7);
|
||||
});
|
||||
it('attaches zero-filled task_cost when no usage in transcript', () => {
|
||||
const lines = [
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: [{ type: 'text', text: 'do something' }] } }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: [{ type: 'text', text: 'ok' }] } }),
|
||||
];
|
||||
const result = parseTranscript(lines.join('\n'));
|
||||
expect(result).not.toBeNull();
|
||||
expect(result.task_cost).toEqual({
|
||||
input_tokens: 0, output_tokens: 0, cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0, web_search_requests: 0, web_fetch_requests: 0, iterations: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user