Files
brain/docs/observer/tmp/summary.json
T

200 lines
5.2 KiB
JSON

{
"path_type": {
"improvised": 65,
"regulated": 2
},
"node_chosen_top": [
[
"direct",
64
],
[
"superpowers:using-git-worktrees",
1
],
[
"subagent-driven-development",
1
],
[
"superpowers:brainstorming",
1
]
],
"recommended_node": [
[
"null",
60
],
[
"#37",
4
],
[
"#18",
1
],
[
"#25",
1
],
[
"#11",
1
]
],
"sources": {
"prefilter": 23,
"regex": 10,
"prefilter_inherited": 3,
"llm": 29,
"cache": 2
},
"perCls": {
"other": {
"total": 37,
"trigger_matched": 4,
"via_skill": 3
},
"release": {
"total": 7,
"trigger_matched": 0,
"via_skill": 0
},
"question": {
"total": 13,
"trigger_matched": 0,
"via_skill": 0
},
"monitoring": {
"total": 7,
"trigger_matched": 0,
"via_skill": 0
},
"planning": {
"total": 1,
"trigger_matched": 0,
"via_skill": 0
},
"bugfix": {
"total": 1,
"trigger_matched": 0,
"via_skill": 0
},
"cleanup": {
"total": 1,
"trigger_matched": 0,
"via_skill": 0
}
},
"outcomesReviewed": {
"soft_success": 33,
"success": 16,
"rework": 13,
"blocked": 5
},
"groupSummary": {
"skill_used": {
"total": 3,
"outcomes": {
"success": 1,
"blocked": 1,
"soft_success": 1
},
"rework_rate": "0.0%"
},
"direct_no_rec": {
"total": 58,
"outcomes": {
"soft_success": 29,
"success": 15,
"rework": 10,
"blocked": 4
},
"rework_rate": "17.2%"
},
"direct_ignored_rec": {
"total": 6,
"outcomes": {
"soft_success": 3,
"rework": 3
},
"rework_rate": "50.0%"
}
},
"reviewerVerdicts": {
"node_quality": {
"disputable": 31,
"correct": 25,
"wrong_node": 11
},
"self_assessment_accuracy": {
"accurate": 38,
"no_self_assessment": 29
}
},
"gap": {
"total": 6,
"rework": 3,
"cases": [
{
"time": "2026-05-27T04:05:21.242Z",
"task": "b11f6b8d",
"rec": "#37",
"outcome": "soft_success",
"node_quality": "disputable",
"reasoning": "Task was a background completion notification with trivial processing (1 Read, 1 TodoWrite). Direct handling is reasonable despite classifier recommending #37 for deploy/release, since no actual deployment work was needed. Agent's self-assessment honestly flags the unexplained divergence."
},
{
"time": "2026-05-27T04:09:31.149Z",
"task": "b11f6b8d",
"rec": "#37",
"outcome": "rework",
"node_quality": "wrong_node",
"reasoning": "Classifier recommended #37 but agent went direct without override justification. Self-assessment honestly flags this with low confidence (0.15) and identifies the missing override step. Agent should have either invoked #37 or documented an explicit override."
},
{
"time": "2026-05-27T05:32:27.040Z",
"task": "b11f6b8d",
"rec": "#18",
"outcome": "rework",
"node_quality": "wrong_node",
"reasoning": "Classifier recommended node #18 for the task-notification, but the agent went direct without invoking it, risking loss of background task result handling. The agent's self-assessment honestly acknowledges this deviation and its consequences."
},
{
"time": "2026-05-27T07:16:20.117Z",
"task": "0ade4c82",
"rec": "#25",
"outcome": "rework",
"node_quality": "wrong_node",
"reasoning": "Agent went direct on an ambiguous short prompt ('долго ждешь проверь') despite classifier recommending #25, then hit a PowerShell error during execution. Self-assessment honestly recognizes the routing mistake and need for clarification."
},
{
"time": "2026-05-27T08:14:25.441Z",
"task": "0ade4c82",
"rec": "#37",
"outcome": "soft_success",
"node_quality": "disputable",
"reasoning": "Classifier recommended #37 with low confidence (0.5) after parse failure, but agent chose direct handling for a background task notification. Self-assessment honestly flags the deviation and uncertainty. Direct response is plausible for a monitoring-type notification, though #37 may have been more appropriate."
},
{
"time": "2026-05-27T12:31:06.105Z",
"task": "0ade4c82",
"rec": "#11",
"outcome": "soft_success",
"node_quality": "disputable",
"reasoning": "Classifier recommended #11 (cleanup) with low-ish confidence and a parse_null LLM error, but agent chose 'direct' path. With only a 55-char prompt and no tool calls/files touched, a direct response was reasonable for a cleanup-type task. Self-assessment is pending so honesty cannot be evaluated."
}
]
},
"cost": {
"main_in": 1313,
"main_out": 453422,
"cache_read": 159238917,
"cache_create": 8548887,
"classifier_in": 3373,
"classifier_out": 23382,
"total_iter": 505,
"total_tool_calls": 181
}
}