527f628a21
Закрывает дыры #3 (доп. пороги) и #5 (доп. job-классы) аудита журналирования. Что добавлено: * СКАН failed_jobs (Laravel-standard) дополнительно к failed_webhook_jobs: покрывает 7 ShouldQueue классов которые раньше не алертились (SyncSupplierProject, ImportLeads, GenerateReport, CsvReconcile, CleanupInactiveSupplierProjects, RefreshSupplierSession, DeleteSupplierProject) * 3 правила детекции для failed_jobs: - spike: ≥10 failures одного job-класса за окно 10 мин → severity=high - daily-total: ≥50 failures одного job-класса за 24ч → severity=medium - persistent: exception повторяется >3ч → severity=medium * Группировка по (job_class, LEFT(exception, 80)) через JSON-экстракт `payload::json->>'displayName'` * Дедуп переведён с LIKE %summary% на точное совпадение root_cause — надёжно и без false-positive * Mailable IncidentDetectedMail (отдельный от SchedulerHeartbeatMissingMail), отправка ТОЛЬКО при severity=high (medium = тихий signal в incidents_log) * warn-only при отсутствии saas_admin_users (паттерн VerifyAuditChains) Параметры команды (новые): --threshold-spike=10 --threshold-daily=50 --persistent-hours=3 (старые --window=10 --threshold=200 --dedup-window=60 сохранены) Тесты: 11/11 passed (4 старых + 7 новых, 37 assertions, 3.6s). Plan: docs/superpowers/plans/2026-05-23-7-holes-overview.md (#3+#5).
201 lines
6.7 KiB
PHP
201 lines
6.7 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
use App\Mail\IncidentDetectedMail;
|
|
use Illuminate\Foundation\Testing\DatabaseTransactions;
|
|
use Illuminate\Support\Carbon;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Mail;
|
|
use Illuminate\Support\Str;
|
|
use Tests\Concerns\SharesSupplierPdo;
|
|
|
|
uses(DatabaseTransactions::class);
|
|
uses(SharesSupplierPdo::class);
|
|
|
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
|
|
function makeFailedJob(string $jobClass, string $exception, ?Carbon $at = null): void
|
|
{
|
|
$payload = json_encode(['displayName' => $jobClass, 'job' => $jobClass]);
|
|
DB::table('failed_jobs')->insert([
|
|
'uuid' => (string) Str::uuid(),
|
|
'connection' => 'redis',
|
|
'queue' => 'default',
|
|
'payload' => $payload,
|
|
'exception' => $exception,
|
|
'failed_at' => $at ?? now(),
|
|
]);
|
|
}
|
|
|
|
function makeFailedWebhookJobExp(string $exception, ?Carbon $at = null): void
|
|
{
|
|
DB::table('failed_webhook_jobs')->insert([
|
|
'failed_at' => $at ?? now(),
|
|
'exception' => $exception,
|
|
'raw_payload' => '{}',
|
|
'retry_count' => 0,
|
|
]);
|
|
}
|
|
|
|
function ensureAdminExp(): int
|
|
{
|
|
$id = DB::table('saas_admin_users')->value('id');
|
|
if ($id !== null) {
|
|
return (int) $id;
|
|
}
|
|
|
|
return (int) DB::table('saas_admin_users')->insertGetId([
|
|
'email' => 'cron-expanded@liderra.ru',
|
|
'full_name' => 'Cron Expanded',
|
|
'password_hash' => '$2y$12$placeholder',
|
|
'role' => 'dev_oncall',
|
|
'is_active' => true,
|
|
'created_at' => now(),
|
|
]);
|
|
}
|
|
|
|
// ─── Setup ──────────────────────────────────────────────────────────────────
|
|
|
|
beforeEach(function () {
|
|
Mail::fake();
|
|
ensureAdminExp();
|
|
});
|
|
|
|
// ─── Tests ──────────────────────────────────────────────────────────────────
|
|
|
|
test('failed_webhook_jobs spike still creates high incident (existing logic preserved)', function () {
|
|
$now = Carbon::now();
|
|
for ($i = 0; $i < 201; $i++) {
|
|
makeFailedWebhookJobExp('App\\Exceptions\\WebhookException: connection refused', $now);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures')->assertSuccessful();
|
|
|
|
$incidents = DB::table('incidents_log')->get();
|
|
expect($incidents)->toHaveCount(1);
|
|
expect($incidents->first()->severity)->toBe('high');
|
|
});
|
|
|
|
test('failed_jobs spike threshold creates incident severity=high and sends mail', function () {
|
|
$now = Carbon::now();
|
|
for ($i = 0; $i < 11; $i++) {
|
|
makeFailedJob(
|
|
'App\\Jobs\\SyncSupplierProjectsJob',
|
|
'RuntimeException: connection timeout',
|
|
$now
|
|
);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
|
|
|
|
$incidents = DB::table('incidents_log')
|
|
->where('summary', 'like', '%spike%')
|
|
->get();
|
|
|
|
expect($incidents)->toHaveCount(1);
|
|
expect($incidents->first()->severity)->toBe('high');
|
|
|
|
Mail::assertSent(IncidentDetectedMail::class, 1);
|
|
});
|
|
|
|
test('failed_jobs daily-total threshold creates incident severity=medium', function () {
|
|
$yesterday = Carbon::now()->subHours(12);
|
|
for ($i = 0; $i < 51; $i++) {
|
|
makeFailedJob(
|
|
'App\\Jobs\\GenerateReportJob',
|
|
'PDOException: SQLSTATE connection refused',
|
|
$yesterday
|
|
);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures', ['--threshold-daily' => 50])->assertSuccessful();
|
|
|
|
$incidents = DB::table('incidents_log')
|
|
->where('summary', 'like', '%daily-total%')
|
|
->get();
|
|
|
|
expect($incidents)->toHaveCount(1);
|
|
expect($incidents->first()->severity)->toBe('medium');
|
|
|
|
// Medium — no mail
|
|
Mail::assertNotSent(IncidentDetectedMail::class);
|
|
});
|
|
|
|
test('failed_jobs persistent exception creates incident severity=medium', function () {
|
|
$old = Carbon::now()->subHours(4);
|
|
for ($i = 0; $i < 3; $i++) {
|
|
makeFailedJob(
|
|
'App\\Jobs\\CsvReconcileJob',
|
|
'Illuminate\\Database\\QueryException: duplicate key value',
|
|
$old
|
|
);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures', ['--persistent-hours' => 3])->assertSuccessful();
|
|
|
|
$incidents = DB::table('incidents_log')
|
|
->where('summary', 'like', '%persistent%')
|
|
->get();
|
|
|
|
expect($incidents)->toHaveCount(1);
|
|
expect($incidents->first()->severity)->toBe('medium');
|
|
|
|
// Medium — no mail
|
|
Mail::assertNotSent(IncidentDetectedMail::class);
|
|
});
|
|
|
|
test('dedup prevents duplicate incidents for same failed_jobs spike', function () {
|
|
$now = Carbon::now();
|
|
for ($i = 0; $i < 11; $i++) {
|
|
makeFailedJob('App\\Jobs\\ImportLeadsJob', 'RuntimeException: quota exceeded', $now);
|
|
}
|
|
|
|
// First run — creates incident
|
|
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
|
|
expect(DB::table('incidents_log')->where('summary', 'like', '%spike%')->count())->toBe(1);
|
|
|
|
// Second run — dedup kicks in
|
|
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])->assertSuccessful();
|
|
expect(DB::table('incidents_log')->where('summary', 'like', '%spike%')->count())->toBe(1);
|
|
});
|
|
|
|
test('mail is sent only for high severity, not for medium', function () {
|
|
$now = Carbon::now();
|
|
|
|
// High: webhook spike
|
|
for ($i = 0; $i < 201; $i++) {
|
|
makeFailedWebhookJobExp('App\\Exceptions\\WebhookException: ssl error', $now);
|
|
}
|
|
|
|
// Medium: daily-total
|
|
$yesterday = Carbon::now()->subHours(12);
|
|
for ($i = 0; $i < 55; $i++) {
|
|
makeFailedJob('App\\Jobs\\CleanupInactiveSupplierProjectsJob', 'RuntimeException: cleanup fail', $yesterday);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures', ['--threshold-daily' => 50])->assertSuccessful();
|
|
|
|
// Only 1 mail for the high webhook incident
|
|
Mail::assertSent(IncidentDetectedMail::class, 1);
|
|
});
|
|
|
|
test('warn-only when no saas_admin_users exist', function () {
|
|
// Remove all admins
|
|
DB::table('saas_admin_users')->delete();
|
|
|
|
$now = Carbon::now();
|
|
for ($i = 0; $i < 11; $i++) {
|
|
makeFailedJob('App\\Jobs\\SyncSupplierProjectsJob', 'RuntimeException: no admin', $now);
|
|
}
|
|
|
|
$this->artisan('incidents:watch-failures', ['--threshold-spike' => 10])
|
|
->assertSuccessful(); // SUCCESS not FAILURE
|
|
|
|
// No incidents created (no admin FK)
|
|
expect(DB::table('incidents_log')->count())->toBe(0);
|
|
|
|
// No mail
|
|
Mail::assertNotSent(IncidentDetectedMail::class);
|
|
});
|