brain/docs/observer/tmp/summary.json

{
  "path_type": {
    "improvised": 65,
    "regulated": 2
  },
  "node_chosen_top": [
    [
      "direct",
      64
    ],
    [
      "superpowers:using-git-worktrees",
      1
    ],
    [
      "subagent-driven-development",
      1
    ],
    [
      "superpowers:brainstorming",
      1
    ]
  ],
  "recommended_node": [
    [
      "null",
      60
    ],
    [
      "#37",
      4
    ],
    [
      "#18",
      1
    ],
    [
      "#25",
      1
    ],
    [
      "#11",
      1
    ]
  ],
  "sources": {
    "prefilter": 23,
    "regex": 10,
    "prefilter_inherited": 3,
    "llm": 29,
    "cache": 2
  },
  "perCls": {
    "other": {
      "total": 37,
      "trigger_matched": 4,
      "via_skill": 3
    },
    "release": {
      "total": 7,
      "trigger_matched": 0,
      "via_skill": 0
    },
    "question": {
      "total": 13,
      "trigger_matched": 0,
      "via_skill": 0
    },
    "monitoring": {
      "total": 7,
      "trigger_matched": 0,
      "via_skill": 0
    },
    "planning": {
      "total": 1,
      "trigger_matched": 0,
      "via_skill": 0
    },
    "bugfix": {
      "total": 1,
      "trigger_matched": 0,
      "via_skill": 0
    },
    "cleanup": {
      "total": 1,
      "trigger_matched": 0,
      "via_skill": 0
    }
  },
  "outcomesReviewed": {
    "soft_success": 33,
    "success": 16,
    "rework": 13,
    "blocked": 5
  },
  "groupSummary": {
    "skill_used": {
      "total": 3,
      "outcomes": {
        "success": 1,
        "blocked": 1,
        "soft_success": 1
      },
      "rework_rate": "0.0%"
    },
    "direct_no_rec": {
      "total": 58,
      "outcomes": {
        "soft_success": 29,
        "success": 15,
        "rework": 10,
        "blocked": 4
      },
      "rework_rate": "17.2%"
    },
    "direct_ignored_rec": {
      "total": 6,
      "outcomes": {
        "soft_success": 3,
        "rework": 3
      },
      "rework_rate": "50.0%"
    }
  },
  "reviewerVerdicts": {
    "node_quality": {
      "disputable": 31,
      "correct": 25,
      "wrong_node": 11
    },
    "self_assessment_accuracy": {
      "accurate": 38,
      "no_self_assessment": 29
    }
  },
  "gap": {
    "total": 6,
    "rework": 3,
    "cases": [
      {
        "time": "2026-05-27T04:05:21.242Z",
        "task": "b11f6b8d",
        "rec": "#37",
        "outcome": "soft_success",
        "node_quality": "disputable",
        "reasoning": "Task was a background completion notification with trivial processing (1 Read, 1 TodoWrite). Direct handling is reasonable despite classifier recommending #37 for deploy/release, since no actual deployment work was needed. Agent's self-assessment honestly flags the unexplained divergence."
      },
      {
        "time": "2026-05-27T04:09:31.149Z",
        "task": "b11f6b8d",
        "rec": "#37",
        "outcome": "rework",
        "node_quality": "wrong_node",
        "reasoning": "Classifier recommended #37 but agent went direct without override justification. Self-assessment honestly flags this with low confidence (0.15) and identifies the missing override step. Agent should have either invoked #37 or documented an explicit override."
      },
      {
        "time": "2026-05-27T05:32:27.040Z",
        "task": "b11f6b8d",
        "rec": "#18",
        "outcome": "rework",
        "node_quality": "wrong_node",
        "reasoning": "Classifier recommended node #18 for the task-notification, but the agent went direct without invoking it, risking loss of background task result handling. The agent's self-assessment honestly acknowledges this deviation and its consequences."
      },
      {
        "time": "2026-05-27T07:16:20.117Z",
        "task": "0ade4c82",
        "rec": "#25",
        "outcome": "rework",
        "node_quality": "wrong_node",
        "reasoning": "Agent went direct on an ambiguous short prompt ('долго ждешь проверь') despite classifier recommending #25, then hit a PowerShell error during execution. Self-assessment honestly recognizes the routing mistake and need for clarification."
      },
      {
        "time": "2026-05-27T08:14:25.441Z",
        "task": "0ade4c82",
        "rec": "#37",
        "outcome": "soft_success",
        "node_quality": "disputable",
        "reasoning": "Classifier recommended #37 with low confidence (0.5) after parse failure, but agent chose direct handling for a background task notification. Self-assessment honestly flags the deviation and uncertainty. Direct response is plausible for a monitoring-type notification, though #37 may have been more appropriate."
      },
      {
        "time": "2026-05-27T12:31:06.105Z",
        "task": "0ade4c82",
        "rec": "#11",
        "outcome": "soft_success",
        "node_quality": "disputable",
        "reasoning": "Classifier recommended #11 (cleanup) with low-ish confidence and a parse_null LLM error, but agent chose 'direct' path. With only a 55-char prompt and no tool calls/files touched, a direct response was reasonable for a cleanup-type task. Self-assessment is pending so honesty cannot be evaluated."
      }
    ]
  },
  "cost": {
    "main_in": 1313,
    "main_out": 453422,
    "cache_read": 159238917,
    "cache_create": 8548887,
    "classifier_in": 3373,
    "classifier_out": 23382,
    "total_iter": 505,
    "total_tool_calls": 181
  }
}