Files
portal/app/tests/Feature/Incidents/SingleLeadStormTest.php
T
Дмитрий 84620665a5 feat(incidents): single-lead-storm detection in incidents:watch-failures
Добавлен БЛОК 5 в IncidentsWatchFailures::handle() — детекция шторма от
одного supplier_lead_id. Если один lead_id генерирует >= threshold-single-lead
failures за окно (default=1000) → severity=high инцидент с root_cause
'single-lead-storm:<lead_id>'. Дедуп по dedup-window как в остальных блоках.

Новая опция: --threshold-single-lead=1000 (configurable).

Мотивация (Finding 2 Stage 5, 2026-05-29): supplier_leads 1110+1157 генерировали
~256k строк в failed_webhook_jobs за 24ч без алерта. Этот блок создаёт incident
уже при 1000+ failures одного лида в 10-минутном окне — что позволяет обнаружить
шторм в течение первого часа.

Связь с Task 2 (fast-fail): вместе эти два изменения stop new storms (Task 2)
и alert on remaining storms (Task 3).

Tests: 4 passing в SingleLeadStormTest.php
- детекция шторма (>= threshold)
- НЕ создаёт incident при распределённых failures
- default threshold=1000
- dedup (второй запуск = 0 новых инцидентов)

Task 3 plan 2026-05-29-supplier-webhook-fast-fail-and-stuck-cleanup.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 09:11:27 +03:00

150 lines
5.2 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
declare(strict_types=1);
use Illuminate\Foundation\Testing\DatabaseTransactions;
use Illuminate\Support\Facades\DB;
use Tests\Concerns\SharesSupplierPdo;
/**
* Task 3 — plan 2026-05-29-supplier-webhook-fast-fail-and-stuck-cleanup.md
*
* Tests the single-lead-storm detection in incidents:watch-failures command.
* A single supplier_lead_id generating >= threshold-single-lead failures within
* the watch window should create a severity=high incident with root_cause
* containing 'single-lead-storm'.
*/
uses(DatabaseTransactions::class);
uses(SharesSupplierPdo::class);
// ---------- helpers --------------------------------------------------------
/**
* Insert failed_webhook_jobs rows for a given supplier_lead_id.
* Uses default DB::table() (pgsql connection) — same pattern as
* IncidentsWatchFailuresExpandedTest's makeFailedWebhookJobExp().
* SharesSupplierPdo ensures the command (pgsql_supplier) sees this data.
*/
function makeStormWebhookRows(int $supplierLeadId, int $count): void
{
$rows = [];
for ($i = 0; $i < $count; $i++) {
$rows[] = [
'raw_payload' => json_encode(['supplier_lead_id' => $supplierLeadId]),
'exception' => 'DomainException: B1 platform does not support SMS signals',
'retry_count' => 3,
'failed_at' => now()->subMinutes(rand(1, 9))->toDateTimeString(),
];
}
// Insert in chunks to stay under query size limits
foreach (array_chunk($rows, 200) as $chunk) {
DB::table('failed_webhook_jobs')->insert($chunk);
}
}
/**
* Ensure there is at least one active saas_admin_user (required by command).
* Mirrors ensureAdminExp() pattern in IncidentsWatchFailuresExpandedTest.
*/
function ensureAdminForStormTest(): int
{
$id = DB::table('saas_admin_users')->where('is_active', true)->whereNull('deleted_at')->value('id');
if ($id !== null) {
return (int) $id;
}
return (int) DB::table('saas_admin_users')->insertGetId([
'email' => 'storm-watch-test@liderra.ru',
'full_name' => 'Storm Watch Test Admin',
'password_hash' => '$2y$12$placeholder',
'role' => 'dev_oncall',
'is_active' => true,
'created_at' => now(),
]);
}
// ---------- setup ----------------------------------------------------------
beforeEach(function (): void {
ensureAdminForStormTest();
// Clean only the tables the command reads/writes.
// Do NOT delete saas_admin_users (may have FK refs from other tables).
DB::table('failed_webhook_jobs')->delete();
DB::table('incidents_log')->whereNull('resolved_at')->delete();
});
// ---------- tests ----------------------------------------------------------
it('detects single-lead-storm when one supplier_lead_id has >= 1000 failures in window', function (): void {
makeStormWebhookRows(9999, 1001);
$this->artisan('incidents:watch-failures', [
'--threshold-single-lead' => 1000,
'--window' => 10,
'--threshold' => 99999, // disable generic webhook spike to isolate
])->assertSuccessful();
$incident = DB::table('incidents_log')
->where('root_cause', 'LIKE', '%single-lead-storm%')
->first();
expect($incident)->not->toBeNull('should create incident for storm');
expect($incident->severity)->toBe('high');
expect($incident->root_cause)->toContain('9999');
});
it('does NOT create storm incident when failures are spread across many leads', function (): void {
// 100 different supplier_lead_ids × 5 failures = 500 total, none reaches threshold
for ($i = 1; $i <= 100; $i++) {
makeStormWebhookRows($i, 5);
}
$this->artisan('incidents:watch-failures', [
'--threshold-single-lead' => 1000,
'--window' => 10,
'--threshold' => 99999, // disable generic webhook spike
])->assertSuccessful();
$stormIncidents = DB::table('incidents_log')
->where('root_cause', 'LIKE', '%single-lead-storm%')
->count();
expect($stormIncidents)->toBe(0, 'no storm when failures spread across leads');
});
it('uses default threshold of 1000 when --threshold-single-lead is not provided', function (): void {
makeStormWebhookRows(7777, 1001);
$this->artisan('incidents:watch-failures', [
'--threshold' => 99999, // disable generic webhook spike
])->assertSuccessful();
$incident = DB::table('incidents_log')
->where('root_cause', 'LIKE', '%single-lead-storm%')
->first();
expect($incident)->not->toBeNull('default threshold=1000 should detect 1001 failures');
expect($incident->severity)->toBe('high');
});
it('deduplicates: does not create duplicate storm incident within dedup window', function (): void {
makeStormWebhookRows(8888, 1001);
// Run twice — should only create 1 incident (dedup window default 60 min)
$this->artisan('incidents:watch-failures', [
'--threshold-single-lead' => 1000,
'--threshold' => 99999,
])->assertSuccessful();
$this->artisan('incidents:watch-failures', [
'--threshold-single-lead' => 1000,
'--threshold' => 99999,
])->assertSuccessful();
$count = DB::table('incidents_log')
->where('root_cause', 'LIKE', '%single-lead-storm:8888%')
->count();
expect($count)->toBe(1, 'dedup should prevent duplicate incident');
});