84620665a5
Добавлен БЛОК 5 в IncidentsWatchFailures::handle() — детекция шторма от одного supplier_lead_id. Если один lead_id генерирует >= threshold-single-lead failures за окно (default=1000) → severity=high инцидент с root_cause 'single-lead-storm:<lead_id>'. Дедуп по dedup-window как в остальных блоках. Новая опция: --threshold-single-lead=1000 (configurable). Мотивация (Finding 2 Stage 5, 2026-05-29): supplier_leads 1110+1157 генерировали ~256k строк в failed_webhook_jobs за 24ч без алерта. Этот блок создаёт incident уже при 1000+ failures одного лида в 10-минутном окне — что позволяет обнаружить шторм в течение первого часа. Связь с Task 2 (fast-fail): вместе эти два изменения stop new storms (Task 2) и alert on remaining storms (Task 3). Tests: 4 passing в SingleLeadStormTest.php - детекция шторма (>= threshold) - НЕ создаёт incident при распределённых failures - default threshold=1000 - dedup (второй запуск = 0 новых инцидентов) Task 3 plan 2026-05-29-supplier-webhook-fast-fail-and-stuck-cleanup.md. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
150 lines
5.2 KiB
PHP
150 lines
5.2 KiB
PHP
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
use Illuminate\Foundation\Testing\DatabaseTransactions;
|
||
use Illuminate\Support\Facades\DB;
|
||
use Tests\Concerns\SharesSupplierPdo;
|
||
|
||
/**
|
||
* Task 3 — plan 2026-05-29-supplier-webhook-fast-fail-and-stuck-cleanup.md
|
||
*
|
||
* Tests the single-lead-storm detection in incidents:watch-failures command.
|
||
* A single supplier_lead_id generating >= threshold-single-lead failures within
|
||
* the watch window should create a severity=high incident with root_cause
|
||
* containing 'single-lead-storm'.
|
||
*/
|
||
uses(DatabaseTransactions::class);
|
||
uses(SharesSupplierPdo::class);
|
||
|
||
// ---------- helpers --------------------------------------------------------
|
||
|
||
/**
|
||
* Insert failed_webhook_jobs rows for a given supplier_lead_id.
|
||
* Uses default DB::table() (pgsql connection) — same pattern as
|
||
* IncidentsWatchFailuresExpandedTest's makeFailedWebhookJobExp().
|
||
* SharesSupplierPdo ensures the command (pgsql_supplier) sees this data.
|
||
*/
|
||
function makeStormWebhookRows(int $supplierLeadId, int $count): void
|
||
{
|
||
$rows = [];
|
||
for ($i = 0; $i < $count; $i++) {
|
||
$rows[] = [
|
||
'raw_payload' => json_encode(['supplier_lead_id' => $supplierLeadId]),
|
||
'exception' => 'DomainException: B1 platform does not support SMS signals',
|
||
'retry_count' => 3,
|
||
'failed_at' => now()->subMinutes(rand(1, 9))->toDateTimeString(),
|
||
];
|
||
}
|
||
// Insert in chunks to stay under query size limits
|
||
foreach (array_chunk($rows, 200) as $chunk) {
|
||
DB::table('failed_webhook_jobs')->insert($chunk);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Ensure there is at least one active saas_admin_user (required by command).
|
||
* Mirrors ensureAdminExp() pattern in IncidentsWatchFailuresExpandedTest.
|
||
*/
|
||
function ensureAdminForStormTest(): int
|
||
{
|
||
$id = DB::table('saas_admin_users')->where('is_active', true)->whereNull('deleted_at')->value('id');
|
||
if ($id !== null) {
|
||
return (int) $id;
|
||
}
|
||
|
||
return (int) DB::table('saas_admin_users')->insertGetId([
|
||
'email' => 'storm-watch-test@liderra.ru',
|
||
'full_name' => 'Storm Watch Test Admin',
|
||
'password_hash' => '$2y$12$placeholder',
|
||
'role' => 'dev_oncall',
|
||
'is_active' => true,
|
||
'created_at' => now(),
|
||
]);
|
||
}
|
||
|
||
// ---------- setup ----------------------------------------------------------
|
||
|
||
beforeEach(function (): void {
|
||
ensureAdminForStormTest();
|
||
// Clean only the tables the command reads/writes.
|
||
// Do NOT delete saas_admin_users (may have FK refs from other tables).
|
||
DB::table('failed_webhook_jobs')->delete();
|
||
DB::table('incidents_log')->whereNull('resolved_at')->delete();
|
||
});
|
||
|
||
// ---------- tests ----------------------------------------------------------
|
||
|
||
it('detects single-lead-storm when one supplier_lead_id has >= 1000 failures in window', function (): void {
|
||
makeStormWebhookRows(9999, 1001);
|
||
|
||
$this->artisan('incidents:watch-failures', [
|
||
'--threshold-single-lead' => 1000,
|
||
'--window' => 10,
|
||
'--threshold' => 99999, // disable generic webhook spike to isolate
|
||
])->assertSuccessful();
|
||
|
||
$incident = DB::table('incidents_log')
|
||
->where('root_cause', 'LIKE', '%single-lead-storm%')
|
||
->first();
|
||
|
||
expect($incident)->not->toBeNull('should create incident for storm');
|
||
expect($incident->severity)->toBe('high');
|
||
expect($incident->root_cause)->toContain('9999');
|
||
});
|
||
|
||
it('does NOT create storm incident when failures are spread across many leads', function (): void {
|
||
// 100 different supplier_lead_ids × 5 failures = 500 total, none reaches threshold
|
||
for ($i = 1; $i <= 100; $i++) {
|
||
makeStormWebhookRows($i, 5);
|
||
}
|
||
|
||
$this->artisan('incidents:watch-failures', [
|
||
'--threshold-single-lead' => 1000,
|
||
'--window' => 10,
|
||
'--threshold' => 99999, // disable generic webhook spike
|
||
])->assertSuccessful();
|
||
|
||
$stormIncidents = DB::table('incidents_log')
|
||
->where('root_cause', 'LIKE', '%single-lead-storm%')
|
||
->count();
|
||
|
||
expect($stormIncidents)->toBe(0, 'no storm when failures spread across leads');
|
||
});
|
||
|
||
it('uses default threshold of 1000 when --threshold-single-lead is not provided', function (): void {
|
||
makeStormWebhookRows(7777, 1001);
|
||
|
||
$this->artisan('incidents:watch-failures', [
|
||
'--threshold' => 99999, // disable generic webhook spike
|
||
])->assertSuccessful();
|
||
|
||
$incident = DB::table('incidents_log')
|
||
->where('root_cause', 'LIKE', '%single-lead-storm%')
|
||
->first();
|
||
|
||
expect($incident)->not->toBeNull('default threshold=1000 should detect 1001 failures');
|
||
expect($incident->severity)->toBe('high');
|
||
});
|
||
|
||
it('deduplicates: does not create duplicate storm incident within dedup window', function (): void {
|
||
makeStormWebhookRows(8888, 1001);
|
||
|
||
// Run twice — should only create 1 incident (dedup window default 60 min)
|
||
$this->artisan('incidents:watch-failures', [
|
||
'--threshold-single-lead' => 1000,
|
||
'--threshold' => 99999,
|
||
])->assertSuccessful();
|
||
|
||
$this->artisan('incidents:watch-failures', [
|
||
'--threshold-single-lead' => 1000,
|
||
'--threshold' => 99999,
|
||
])->assertSuccessful();
|
||
|
||
$count = DB::table('incidents_log')
|
||
->where('root_cause', 'LIKE', '%single-lead-storm:8888%')
|
||
->count();
|
||
|
||
expect($count)->toBe(1, 'dedup should prevent duplicate incident');
|
||
});
|