2026-06-15 08:06:08 +03:00
import { describe , it , expect , beforeEach , afterEach } from 'vitest' ;
import { writeFileSync , readFileSync , existsSync , mkdtempSync , rmSync , mkdirSync , readdirSync } from 'fs' ;
import { join } from 'path' ;
import { tmpdir } from 'os' ;
import { appendEpisode , buildEpisodeFromContext , buildObserverError , routingGateDecision , buildExecutionTrace , buildEpisode , buildSelfAssessment , computeEmbeddingForEpisode , derivePrompt } from './observer-stop-hook.mjs' ;
let workdir ;
beforeEach ( ( ) => {
workdir = mkdtempSync ( join ( tmpdir ( ) , 'observer-test-' ) ) ;
mkdirSync ( join ( workdir , 'docs' , 'observer' ) , { recursive : true } ) ;
} ) ;
afterEach ( ( ) => {
rmSync ( workdir , { recursive : true , force : true } ) ;
} ) ;
const defaultRat = ( ) => ( {
step : 1 ,
node _chosen : '#1' ,
triggers _matched : [ ] ,
candidates _considered : [ ] ,
boundaries _applied : [ ] ,
hard _floor : { invoked : false , rules : [ ] } ,
task _classification : 'other' ,
} ) ;
// Full schema-v2 episode fixture.
const v2Episode = ( overrides = { } ) => ( {
schema _version : 2 ,
task _id : 'abc-123' ,
task _ref : 'abc-123' ,
timestamps : { started _at : '2026-05-19T10:00:00+03:00' , ended _at : '2026-05-19T10:05:00+03:00' } ,
path _type : 'regulated' ,
outcome : 'unknown' ,
prompt _signal : 'neutral' ,
decision _provenance : { kind : 'autonomous' , claude _would _have _chosen : null } ,
environment : { economy _level : 0 , model : 'claude-opus-4-7' , post _compaction : false , session _turn : 1 , parallel _session : false } ,
task _size : { tool _calls : 0 , files _touched : 0 , files : [ ] } ,
primary _rationale : defaultRat ( ) ,
events : [ ] ,
... overrides ,
} ) ;
describe ( 'appendEpisode' , ( ) => {
it ( 'appends one JSONL line to the monthly file' , ( ) => {
appendEpisode ( v2Episode ( ) , workdir , '2026-05' ) ;
const content = readFileSync ( join ( workdir , 'docs' , 'observer' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) ;
expect ( content ) . toContain ( '"task_id":"abc-123"' ) ;
expect ( content ) . toContain ( '"schema_version":2' ) ;
expect ( content . endsWith ( '\n' ) ) . toBe ( true ) ;
} ) ;
2026-06-15 19:21:13 +03:00
it ( 'uses stateDir param for the episode dir (config-seam); дефолт docs/observer' , ( ) => {
appendEpisode ( v2Episode ( { task _id : 'sd' } ) , workdir , '2026-05' , '.claude/brain-state' ) ;
const content = readFileSync ( join ( workdir , '.claude' , 'brain-state' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) ;
expect ( content ) . toContain ( '"task_id":"sd"' ) ;
// без 4-го аргумента — прежний путь docs/observer (backward-compat)
appendEpisode ( v2Episode ( { task _id : 'def' } ) , workdir , '2026-05' ) ;
const def = readFileSync ( join ( workdir , 'docs' , 'observer' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) ;
expect ( def ) . toContain ( '"task_id":"def"' ) ;
} ) ;
2026-06-15 08:06:08 +03:00
it ( 'appends to an existing file without overwrite' , ( ) => {
appendEpisode ( v2Episode ( { task _id : 'a' } ) , workdir , '2026-05' ) ;
appendEpisode ( v2Episode ( { task _id : 'b' , outcome : 'partial' } ) , workdir , '2026-05' ) ;
const lines = readFileSync ( join ( workdir , 'docs' , 'observer' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) . trim ( ) . split ( '\n' ) ;
expect ( lines ) . toHaveLength ( 2 ) ;
expect ( JSON . parse ( lines [ 0 ] ) . task _id ) . toBe ( 'a' ) ;
expect ( JSON . parse ( lines [ 1 ] ) . task _id ) . toBe ( 'b' ) ;
} ) ;
it ( 'applies the PII filter before write (including events[])' , ( ) => {
appendEpisode (
v2Episode ( { events : [ { kind : 'error' , message : 'call +79991234567 / mail x@y.com' } ] } ) ,
workdir ,
'2026-05'
) ;
const content = readFileSync ( join ( workdir , 'docs' , 'observer' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) ;
expect ( content ) . toContain ( '+7XXXXXXXXXX' ) ;
expect ( content ) . toContain ( '***@***' ) ;
expect ( content ) . not . toContain ( '79991234567' ) ;
} ) ;
it ( 'throws on a missing required field' , ( ) => {
expect ( ( ) => appendEpisode ( { } , workdir , '2026-05' ) ) . toThrow ( /required/i ) ;
} ) ;
it ( 'throws on a missing schema-v2 field' , ( ) => {
const ep = v2Episode ( ) ;
delete ep . decision _provenance ;
expect ( ( ) => appendEpisode ( ep , workdir , '2026-05' ) ) . toThrow ( /schema v2 field missing/i ) ;
} ) ;
it ( 'throws when prompt_signal is missing (C-7 strict validation)' , ( ) => {
const ep = v2Episode ( ) ;
delete ep . prompt _signal ;
expect ( ( ) => appendEpisode ( ep , workdir , '2026-05' ) ) . toThrow ( /schema v2 field missing/i ) ;
} ) ;
it ( 'throws when events is missing (C-7 strict validation)' , ( ) => {
const ep = v2Episode ( ) ;
delete ep . events ;
expect ( ( ) => appendEpisode ( ep , workdir , '2026-05' ) ) . toThrow ( /schema v2 field missing/i ) ;
} ) ;
it ( 'throws when schema_version is not 2, 3 or 4' , ( ) => {
expect ( ( ) => appendEpisode ( v2Episode ( { schema _version : 1 } ) , workdir , '2026-05' ) ) . toThrow ( /schema_version/i ) ;
} ) ;
it ( 'throws when a primary_rationale sub-field is missing' , ( ) => {
expect ( ( ) =>
appendEpisode ( v2Episode ( { primary _rationale : { step : 1 , node _chosen : '#1' } } ) , workdir , '2026-05' )
) . toThrow ( /primary_rationale field missing/i ) ;
} ) ;
it ( 'accepts a minimal observer_error marker' , ( ) => {
appendEpisode (
{
schema _version : 2 ,
observer _error : true ,
error _message : 'parser blew up' ,
timestamps : { started _at : '2026-05-19T10:00:00Z' , ended _at : '2026-05-19T10:00:00Z' } ,
task _id : 'err-1' ,
} ,
workdir ,
'2026-05'
) ;
const line = JSON . parse ( readFileSync ( join ( workdir , 'docs' , 'observer' , 'episodes-2026-05.jsonl' ) , 'utf-8' ) . trim ( ) ) ;
expect ( line . observer _error ) . toBe ( true ) ;
expect ( line . error _message ) . toBe ( 'parser blew up' ) ;
} ) ;
it ( 'throws when an observer_error marker is missing a field' , ( ) => {
expect ( ( ) =>
appendEpisode ( { schema _version : 2 , observer _error : true , task _id : 'x' } , workdir , '2026-05' )
) . toThrow ( /observer_error marker field missing/i ) ;
} ) ;
it ( 'persists PII match counts to .pii-counters.json (Task 3)' , ( ) => {
const ep = v2Episode ( {
events : [ { kind : 'tool_summary' , counts : { Bash : 1 } } ] ,
task _size : { tool _calls : 1 , files _touched : 0 , files : [ '+71234567890.txt' ] } ,
} ) ;
appendEpisode ( ep , workdir , '2026-05' ) ;
const counterPath = join ( workdir , 'docs' , 'observer' , '.pii-counters.json' ) ;
expect ( existsSync ( counterPath ) ) . toBe ( true ) ;
const store = JSON . parse ( readFileSync ( counterPath , 'utf-8' ) ) ;
expect ( store [ '2026-05' ] ) . toBeDefined ( ) ;
expect ( store [ '2026-05' ] . RU _PHONE ) . toBeGreaterThanOrEqual ( 1 ) ;
} ) ;
} ) ;
describe ( 'buildEpisodeFromContext' , ( ) => {
it ( 'builds a v4 episode on the fallback path (no transcript)' , ( ) => {
const ep = buildEpisodeFromContext ( { session _id : 'sess-1' , result : 'success' } ) ;
expect ( ep . schema _version ) . toBe ( 4 ) ;
expect ( ep . schema _minor ) . toBe ( 1 ) ;
expect ( ep . task _id ) . toBe ( 'sess-1' ) ;
expect ( ep . task _ref ) . toBe ( 'sess-1' ) ;
expect ( ep . outcome ) . toBe ( 'success' ) ;
expect ( ep . decision _provenance ) . toEqual ( { kind : 'autonomous' , claude _would _have _chosen : null } ) ;
expect ( ep . environment ) . toEqual ( {
economy _level : null ,
model : null ,
post _compaction : false ,
session _turn : 0 ,
parallel _session : false ,
} ) ;
expect ( ep . task _size ) . toEqual ( { tool _calls : 0 , files _touched : 0 , files : [ ] } ) ;
} ) ;
it ( 'defaults outcome to unknown when none supplied' , ( ) => {
expect ( buildEpisodeFromContext ( { session _id : 'x' } ) . outcome ) . toBe ( 'unknown' ) ;
} ) ;
it ( 'derives a v4 episode from transcriptText when provided' , ( ) => {
const transcript = [
JSON . stringify ( { type : 'user' , message : { role : 'user' , content : 'fix the bug' } , timestamp : '2026-05-19T10:00:00Z' , sessionId : 'sess-t' } ) ,
JSON . stringify ( { type : 'assistant' , message : { role : 'assistant' , content : [ { type : 'tool_use' , id : 't1' , name : 'Skill' , input : { skill : 'superpowers:systematic-debugging' } } ] } , timestamp : '2026-05-19T10:01:00Z' , sessionId : 'sess-t' } ) ,
] . join ( '\n' ) ;
const ep = buildEpisodeFromContext ( { session _id : 'sess-t' } , transcript ) ;
expect ( ep . schema _version ) . toBe ( 4 ) ;
expect ( ep . task _id ) . toBe ( 'sess-t' ) ;
expect ( ep . primary _rationale . node _chosen ) . toBe ( 'superpowers:systematic-debugging' ) ;
} ) ;
} ) ;
describe ( 'buildExecutionTrace + buildEpisode — Phase 3 Task 16 (spec §5)' , ( ) => {
it ( 'buildExecutionTrace builds chain_gaps when chain is incomplete' , ( ) => {
const t = buildExecutionTrace ( { recommended _chain : [ 'a' , 'b' , 'c' ] , invoked : [ 'a' ] } ) ;
expect ( t . recommended _chain ) . toEqual ( [ 'a' , 'b' , 'c' ] ) ;
expect ( t . invoked ) . toEqual ( [ 'a' ] ) ;
expect ( t . chain _gaps [ 0 ] . executed _steps ) . toBe ( 1 ) ;
expect ( t . chain _gaps [ 0 ] . expected _steps ) . toBe ( 3 ) ;
} ) ;
it ( 'buildExecutionTrace emits no chain_gaps when chain is complete' , ( ) => {
const t = buildExecutionTrace ( { recommended _chain : [ 'a' , 'b' ] , invoked : [ 'a' , 'b' ] } ) ;
expect ( t . chain _gaps ) . toEqual ( [ ] ) ;
} ) ;
it ( 'buildExecutionTrace handles empty recommended_chain (no gap)' , ( ) => {
const t = buildExecutionTrace ( { recommended _chain : [ ] , invoked : [ 'x' ] } ) ;
expect ( t . chain _gaps ) . toEqual ( [ ] ) ;
} ) ;
it ( 'buildEpisode copies inheritance from state (B5)' , ( ) => {
const ep = buildEpisode ( { state : { inheritance : { inherited _from _task _id : 'x' , inheritance _age _minutes : 7 } } } ) ;
expect ( ep . inheritance . inherited _from _task _id ) . toBe ( 'x' ) ;
expect ( ep . inheritance . inheritance _age _minutes ) . toBe ( 7 ) ;
} ) ;
it ( 'buildEpisode omits inheritance when state has none' , ( ) => {
const ep = buildEpisode ( { state : { } } ) ;
expect ( ep . inheritance ) . toBeUndefined ( ) ;
} ) ;
it ( 'buildEpisode marks schema_minor=3 (Task 20 bump)' , ( ) => {
const ep = buildEpisode ( { state : { } , ctx : { session _id : 'sess-x' } } ) ;
expect ( ep . schema _version ) . toBe ( 4 ) ;
expect ( ep . schema _minor ) . toBe ( 3 ) ;
} ) ;
} ) ;
describe ( 'buildSelfAssessment — Phase 3 Task 17 (spec §4.5)' , ( ) => {
it ( 'marks self_assessment_pending=true when API skipped (apiResult null)' , ( ) => {
const sa = buildSelfAssessment ( { apiResult : null } ) ;
expect ( sa . self _assessment _pending ) . toBe ( true ) ;
} ) ;
it ( 'parses a valid JSON apiResult into the four-field schema' , ( ) => {
const sa = buildSelfAssessment ( {
apiResult : '{"summary":"chose superpowers:test-driven-development for new code","confidence_in_choice":0.8,"what_could_be_better":null,"lesson_learned":null}' ,
} ) ;
expect ( sa . summary ) . toContain ( 'superpowers:test-driven-development' ) ;
expect ( sa . confidence _in _choice ) . toBe ( 0.8 ) ;
expect ( sa . what _could _be _better ) . toBeNull ( ) ;
expect ( sa . lesson _learned ) . toBeNull ( ) ;
expect ( sa . self _assessment _pending ) . toBe ( false ) ;
} ) ;
it ( 'strips ```json fence on apiResult' , ( ) => {
const sa = buildSelfAssessment ( {
apiResult : '```json\n{"summary":"x","confidence_in_choice":0.5,"what_could_be_better":"y","lesson_learned":"z"}\n```' ,
} ) ;
expect ( sa . confidence _in _choice ) . toBe ( 0.5 ) ;
expect ( sa . lesson _learned ) . toBe ( 'z' ) ;
expect ( sa . self _assessment _pending ) . toBe ( false ) ;
} ) ;
it ( 'marks pending=true with parse_error on malformed apiResult' , ( ) => {
const sa = buildSelfAssessment ( { apiResult : 'not json' } ) ;
expect ( sa . self _assessment _pending ) . toBe ( true ) ;
expect ( typeof sa . parse _error ) . toBe ( 'string' ) ;
} ) ;
it ( 'clamps confidence outside [0,1] to null (defensive)' , ( ) => {
const sa = buildSelfAssessment ( {
apiResult : '{"summary":"x","confidence_in_choice":5,"what_could_be_better":null,"lesson_learned":null}' ,
} ) ;
expect ( sa . confidence _in _choice ) . toBeNull ( ) ;
} ) ;
} ) ;
describe ( 'buildObserverError' , ( ) => {
it ( 'produces a minimal valid observer_error marker' , ( ) => {
const marker = buildObserverError ( { session _id : 'sess-e' } , new Error ( 'boom' ) ) ;
expect ( marker . observer _error ) . toBe ( true ) ;
expect ( marker . schema _version ) . toBe ( 4 ) ;
expect ( marker . task _id ) . toBe ( 'sess-e' ) ;
expect ( marker . error _message ) . toContain ( 'boom' ) ;
expect ( marker . timestamps . started _at ) . toBeTruthy ( ) ;
} ) ;
} ) ;
describe ( 'routingGateDecision' , ( ) => {
const NODES = [ 'discovery-interview' , 'brainstorming' ] ;
const autonomousEp = v2Episode ( ) ;
const taggedEp = v2Episode ( { decision _provenance : { kind : 'user_directed_method' , claude _would _have _chosen : 'brainstorming' } } ) ;
it ( 'blocks when a method was directed but no routing tag is present' , ( ) => {
const gate = routingGateDecision ( autonomousEp , 'запусти discovery-interview' , NODES , false ) ;
expect ( gate . block ) . toBe ( true ) ;
expect ( gate . reason ) . toContain ( 'discovery-interview' ) ;
} ) ;
it ( 'does not block when the routing tag is present' , ( ) => {
const gate = routingGateDecision ( taggedEp , 'запусти discovery-interview' , NODES , false ) ;
expect ( gate . block ) . toBe ( false ) ;
} ) ;
it ( 'does not block when no method was directed' , ( ) => {
const gate = routingGateDecision ( autonomousEp , 'добавь колонку Город' , NODES , false ) ;
expect ( gate . block ) . toBe ( false ) ;
} ) ;
it ( 'does not block when stop_hook_active is true (loop guard)' , ( ) => {
const gate = routingGateDecision ( autonomousEp , 'запусти discovery-interview' , NODES , true ) ;
expect ( gate . block ) . toBe ( false ) ;
} ) ;
it ( 'does not block for user_chose_from_options even when prompt mentions a node' , ( ) => {
const choiceEp = v2Episode ( {
decision _provenance : {
kind : 'user_chose_from_options' ,
node : 'brainstorming' ,
options _offered : [ 'brainstorming' , 'writing-plans' ] ,
claude _would _have _chosen : 'brainstorming' ,
} ,
} ) ;
const gate = routingGateDecision ( choiceEp , 'запусти brainstorming' , NODES , false ) ;
expect ( gate . block ) . toBe ( false ) ;
} ) ;
} ) ;
// ---------------------------------------------------------------------------
// Step 3.6 embedding async wiring (Phase 4 follow-up)
// ---------------------------------------------------------------------------
describe ( 'Step 3.6 embedding async wiring' , ( ) => {
// Helper to build an episode with a given task_classification.
const epWithClass = ( cls = 'feature' ) => v2Episode ( {
primary _rationale : { ... defaultRat ( ) , task _classification : cls } ,
} ) ;
it ( 'embedding-mode off → embedding not computed, field null' , async ( ) => {
const ep = epWithClass ( 'feature' ) ;
const embedFn = async ( ) => new Float32Array ( [ 0.1 , 0.2 , 0.3 ] ) ;
await computeEmbeddingForEpisode ( ep , { prompt : 'напиши тест' } , {
embedMode : 'off' ,
embedFn ,
} ) ;
expect ( ep . prompt _embedding _base64 ) . toBeUndefined ( ) ;
expect ( ep . environment ? . embedding _unavailable ) . toBeUndefined ( ) ;
} ) ;
it ( 'taskType="conversation" (exempt) → embedding skipped, field null' , async ( ) => {
const ep = epWithClass ( 'conversation' ) ;
let called = false ;
const embedFn = async ( ) => { called = true ; return new Float32Array ( [ 0.1 ] ) ; } ;
await computeEmbeddingForEpisode ( ep , { prompt : 'спасибо' } , {
embedMode : 'on' ,
embedFn ,
} ) ;
expect ( called ) . toBe ( false ) ;
expect ( ep . prompt _embedding _base64 ) . toBeUndefined ( ) ;
expect ( ep . environment ? . embedding _unavailable ) . toBeUndefined ( ) ;
} ) ;
it ( 'embedding success → prompt_embedding_base64 is base64 string, environment.embedding_unavailable not set' , async ( ) => {
const ep = epWithClass ( 'feature' ) ;
// Distinctive non-zero vector so encoding produces a stable, non-empty base64.
const fakeVec = new Float32Array ( [ 0.5 , - 0.25 , 1.0 , 0.0 ] ) ;
const embedFn = async ( ) => fakeVec ;
await computeEmbeddingForEpisode ( ep , { prompt : 'напиши тест для биллинга' } , {
embedMode : 'on' ,
embedFn ,
} ) ;
expect ( typeof ep . prompt _embedding _base64 ) . toBe ( 'string' ) ;
expect ( ep . prompt _embedding _base64 . length ) . toBeGreaterThan ( 0 ) ;
// Base64-only chars (no whitespace, no null prefix).
expect ( ep . prompt _embedding _base64 ) . toMatch ( /^[A-Za-z0-9+/]+=*$/ ) ;
expect ( ep . environment ? . embedding _unavailable ) . toBeUndefined ( ) ;
} ) ;
it ( 'embedding timeout (2s) → field null, environment.embedding_unavailable=true' , async ( ) => {
const ep = epWithClass ( 'feature' ) ;
// embedFn never resolves — timeout (overridden short for test) must win.
const embedFn = ( ) => new Promise ( ( ) => { } ) ;
await computeEmbeddingForEpisode ( ep , { prompt : 'долгая задача' } , {
embedMode : 'on' ,
embedFn ,
timeoutMs : 30 , // short override so the test is fast
} ) ;
expect ( ep . prompt _embedding _base64 ) . toBeUndefined ( ) ;
expect ( ep . environment . embedding _unavailable ) . toBe ( true ) ;
} ) ;
} ) ;
// -----------------------------------------------------------------------------
// derivePrompt — Bug fix 2026-05-26: ctx.prompt is never set by Claude Code Stop
// stdin (only session_id / transcript_path / stop_hook_active are sent). The
// real user prompt lives in the transcript file. Self-assessment and embedding
// both consumed ctx.prompt blindly → empty string passed to Sonnet ("(пусто)")
// and embedding was silently skipped. derivePrompt unifies the fallback: prefer
// ctx.prompt when present (e.g. tests), otherwise extract last user message
// from transcriptText.
// -----------------------------------------------------------------------------
describe ( 'derivePrompt — Stop-event prompt resolution' , ( ) => {
const minimalTranscript = ( text ) =>
JSON . stringify ( {
type : 'user' ,
sessionId : 's1' ,
timestamp : '2026-05-26T03:00:00Z' ,
message : { role : 'user' , content : text } ,
} ) + '\n' ;
it ( 'returns ctx.prompt when explicitly provided (test path)' , ( ) => {
expect ( derivePrompt ( { prompt : 'explicit' } , null ) ) . toBe ( 'explicit' ) ;
} ) ;
it ( 'extracts last user prompt from transcript when ctx.prompt missing (real Stop-event path)' , ( ) => {
const transcript = minimalTranscript ( 'реальный длинный запрос от заказчика' ) ;
expect ( derivePrompt ( { } , transcript ) ) . toBe ( 'реальный длинный запрос от заказчика' ) ;
} ) ;
it ( 'returns null when both ctx.prompt and transcriptText absent' , ( ) => {
expect ( derivePrompt ( { } , null ) ) . toBeNull ( ) ;
expect ( derivePrompt ( { } , '' ) ) . toBeNull ( ) ;
} ) ;
it ( 'prefers ctx.prompt over transcript when both present' , ( ) => {
const transcript = minimalTranscript ( 'from transcript' ) ;
expect ( derivePrompt ( { prompt : 'from ctx' } , transcript ) ) . toBe ( 'from ctx' ) ;
} ) ;
it ( 'handles ctx=null/undefined gracefully' , ( ) => {
const transcript = minimalTranscript ( 'из транскрипта' ) ;
expect ( derivePrompt ( null , transcript ) ) . toBe ( 'из транскрипта' ) ;
expect ( derivePrompt ( undefined , transcript ) ) . toBe ( 'из транскрипта' ) ;
expect ( derivePrompt ( null , null ) ) . toBeNull ( ) ;
} ) ;
} ) ;