Files
portal/app/tests/Feature/Incidents/IncidentsWatchFailuresExpandedTest.php
T
Дмитрий 527f628a21 feat(ops): incidents:watch-failures расширен на failed_jobs + 3 правила (holes #3+#5)
Закрывает дыры #3 (доп. пороги) и #5 (доп. job-классы) аудита журналирования.

Что добавлено:
* СКАН failed_jobs (Laravel-standard) дополнительно к failed_webhook_jobs:
  покрывает 7 ShouldQueue классов которые раньше не алертились
  (SyncSupplierProject, ImportLeads, GenerateReport, CsvReconcile,
  CleanupInactiveSupplierProjects, RefreshSupplierSession, DeleteSupplierProject)
* 3 правила детекции для failed_jobs:
  - spike: ≥10 failures одного job-класса за окно 10 мин → severity=high
  - daily-total: ≥50 failures одного job-класса за 24ч → severity=medium
  - persistent: exception повторяется >3ч → severity=medium
* Группировка по (job_class, LEFT(exception, 80)) через JSON-экстракт
  `payload::json->>'displayName'`
* Дедуп переведён с LIKE %summary% на точное совпадение root_cause —
  надёжно и без false-positive
* Mailable IncidentDetectedMail (отдельный от SchedulerHeartbeatMissingMail),
  отправка ТОЛЬКО при severity=high (medium = тихий signal в incidents_log)
* warn-only при отсутствии saas_admin_users (паттерн VerifyAuditChains)

Параметры команды (новые):
  --threshold-spike=10 --threshold-daily=50 --persistent-hours=3
  (старые --window=10 --threshold=200 --dedup-window=60 сохранены)

Тесты: 11/11 passed (4 старых + 7 новых, 37 assertions, 3.6s).

Plan: docs/superpowers/plans/2026-05-23-7-holes-overview.md (#3+#5).
2026-05-23 12:01:20 +03:00

201 lines
6.7 KiB
PHP

<?php
declare(strict_types=1);
use App\Mail\IncidentDetectedMail;
use Illuminate\Foundation\Testing\DatabaseTransactions;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Mail;
use Illuminate\Support\Str;
use Tests\Concerns\SharesSupplierPdo;
uses(DatabaseTransactions::class);
uses(SharesSupplierPdo::class);
// ─── Helpers ────────────────────────────────────────────────────────────────
function makeFailedJob(string $jobClass, string $exception, ?Carbon $at = null): void
{
$payload = json_encode(['displayName' => $jobClass, 'job' => $jobClass]);
DB::table('failed_jobs')->insert([
'uuid' => (string) Str::uuid(),
'connection' => 'redis',
'queue' => 'default',
'payload' => $payload,
'exception' => $exception,
'failed_at' => $at ?? now(),
]);
}
function makeFailedWebhookJobExp(string $exception, ?Carbon $at = null): void
{
DB::table('failed_webhook_jobs')->insert([
'failed_at' => $at ?? now(),
'exception' => $exception,
'raw_payload' => '{}',
'retry_count' => 0,
]);
}
function ensureAdminExp(): int
{
$id = DB::table('saas_admin_users')->value('id');
if ($id !== null) {
return (int) $id;
}
return (int) DB::table('saas_admin_users')->insertGetId([
'email' => 'cron-expanded@liderra.ru',
'full_name' => 'Cron Expanded',
'password_hash' => '$2y$12$placeholder',
'role' => 'dev_oncall',
'is_active' => true,
'created_at' => now(),
]);
}
// ─── Setup ──────────────────────────────────────────────────────────────────
beforeEach(function () {
Mail::fake();
ensureAdminExp();
});
// ─── Tests ──────────────────────────────────────────────────────────────────
test('failed_webhook_jobs spike still creates high incident (existing logic preserved)', function () {
$now = Carbon::now();
for ($i = 0; $i < 201; $i++) {
makeFailedWebhookJobExp('App\\Exceptions\\WebhookException: connection refused', $now);
}
$this->artisan('incidents:watch-failures')->assertSuccessful();
$incidents = DB::table('incidents_log')->get();
expect($incidents)->toHaveCount(1);
expect($incidents->first()->severity)->toBe('high');
});
test('failed_jobs spike threshold creates incident severity=high and sends mail', function () {
$now = Carbon::now();
for ($i = 0; $i < 11; $i++) {
makeFailedJob(
'App\\Jobs\\SyncSupplierProjectsJob',
'RuntimeException: connection timeout',
$now
);
}
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
$incidents = DB::table('incidents_log')
->where('summary', 'like', '%spike%')
->get();
expect($incidents)->toHaveCount(1);
expect($incidents->first()->severity)->toBe('high');
Mail::assertSent(IncidentDetectedMail::class, 1);
});
test('failed_jobs daily-total threshold creates incident severity=medium', function () {
$yesterday = Carbon::now()->subHours(12);
for ($i = 0; $i < 51; $i++) {
makeFailedJob(
'App\\Jobs\\GenerateReportJob',
'PDOException: SQLSTATE connection refused',
$yesterday
);
}
$this->artisan('incidents:watch-failures', ['--threshold-daily' => 50])->assertSuccessful();
$incidents = DB::table('incidents_log')
->where('summary', 'like', '%daily-total%')
->get();
expect($incidents)->toHaveCount(1);
expect($incidents->first()->severity)->toBe('medium');
// Medium — no mail
Mail::assertNotSent(IncidentDetectedMail::class);
});
test('failed_jobs persistent exception creates incident severity=medium', function () {
$old = Carbon::now()->subHours(4);
for ($i = 0; $i < 3; $i++) {
makeFailedJob(
'App\\Jobs\\CsvReconcileJob',
'Illuminate\\Database\\QueryException: duplicate key value',
$old
);
}
$this->artisan('incidents:watch-failures', ['--persistent-hours' => 3])->assertSuccessful();
$incidents = DB::table('incidents_log')
->where('summary', 'like', '%persistent%')
->get();
expect($incidents)->toHaveCount(1);
expect($incidents->first()->severity)->toBe('medium');
// Medium — no mail
Mail::assertNotSent(IncidentDetectedMail::class);
});
test('dedup prevents duplicate incidents for same failed_jobs spike', function () {
$now = Carbon::now();
for ($i = 0; $i < 11; $i++) {
makeFailedJob('App\\Jobs\\ImportLeadsJob', 'RuntimeException: quota exceeded', $now);
}
// First run — creates incident
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
expect(DB::table('incidents_log')->where('summary', 'like', '%spike%')->count())->toBe(1);
// Second run — dedup kicks in
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
expect(DB::table('incidents_log')->where('summary', 'like', '%spike%')->count())->toBe(1);
});
test('mail is sent only for high severity, not for medium', function () {
$now = Carbon::now();
// High: webhook spike
for ($i = 0; $i < 201; $i++) {
makeFailedWebhookJobExp('App\\Exceptions\\WebhookException: ssl error', $now);
}
// Medium: daily-total
$yesterday = Carbon::now()->subHours(12);
for ($i = 0; $i < 55; $i++) {
makeFailedJob('App\\Jobs\\CleanupInactiveSupplierProjectsJob', 'RuntimeException: cleanup fail', $yesterday);
}
$this->artisan('incidents:watch-failures', ['--threshold-daily' => 50])->assertSuccessful();
// Only 1 mail for the high webhook incident
Mail::assertSent(IncidentDetectedMail::class, 1);
});
test('warn-only when no saas_admin_users exist', function () {
// Remove all admins
DB::table('saas_admin_users')->delete();
$now = Carbon::now();
for ($i = 0; $i < 11; $i++) {
makeFailedJob('App\\Jobs\\SyncSupplierProjectsJob', 'RuntimeException: no admin', $now);
}
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])
->assertSuccessful(); // SUCCESS not FAILURE
// No incidents created (no admin FK)
expect(DB::table('incidents_log')->count())->toBe(0);
// No mail
Mail::assertNotSent(IncidentDetectedMail::class);
});