name: Disk-full recovery on liderra.ru # Incident response: PG в PANIC loop из-за / диск 100%. # 1) Диагностика: что где лежит (top-20 крупных, du по /var/log) # 2) Безопасная чистка: # - truncate /var/log/postgresql/postgresql-16-main.log (PG в PANIC, не пишет, inode preserved) # - journalctl --vacuum-size=200M # - старые ротированные *.gz логи nginx >7 дней # - apt-get clean # - Laravel storage/logs *.log >7 дней # 3) Final df check + PG probe. # # Триггер: gh workflow run disk-recover.yml -f confirm_apply=true on: workflow_dispatch: inputs: confirm_apply: description: 'Подтверждаю удаление логов на проде' required: true default: 'false' type: boolean jobs: recover: runs-on: ubuntu-latest timeout-minutes: 10 env: LIDERRA_HOST: 111.88.246.137 LIDERRA_USER: ubuntu CONFIRM: ${{ github.event.inputs.confirm_apply }} steps: - name: Guard run: | if [[ "$CONFIRM" != "true" ]]; then echo "::error::confirm_apply=true required (this workflow mutates disk on prod)" exit 1 fi - name: Setup SSH key run: | mkdir -p ~/.ssh echo "${{ secrets.LIDERRA_SSH_KEY }}" > ~/.ssh/liderra_deploy chmod 600 ~/.ssh/liderra_deploy ssh-keyscan -H ${{ env.LIDERRA_HOST }} >> ~/.ssh/known_hosts 2>/dev/null - name: Diagnose + cleanup run: | ssh -i ~/.ssh/liderra_deploy ${{ env.LIDERRA_USER }}@${{ env.LIDERRA_HOST }} \ "bash -s" <<'REMOTE' | tee /tmp/recover.log set +e echo "=== A. BEFORE: df -h / ===" df -h / /var /var/lib/postgresql 2>&1 | head -10 echo echo "=== B. Top-20 largest files in /var (>50M) ===" sudo find /var -xdev -type f -size +50M -printf "%s %p\n" 2>/dev/null | sort -rn | head -20 | awk '{printf "%8.1f MB %s\n", $1/1024/1024, $2}' echo echo "=== C. du /var/log/ top-15 directories ===" sudo du -sh /var/log/*/ 2>/dev/null | sort -rh | head -15 echo echo "=== D. du /var/log/postgresql/* (individual files) ===" sudo du -sh /var/log/postgresql/* 2>/dev/null | sort -rh | head -10 echo echo "=== E. journalctl disk usage ===" sudo journalctl --disk-usage 2>&1 echo echo "=== F. /var/lib/postgresql/16/main top-15 subdirs ===" sudo du -sh /var/lib/postgresql/16/main/*/ 2>/dev/null | sort -rh | head -15 echo echo "=== G. /var/www top-10 if exists ===" sudo du -sh /var/www/*/ 2>/dev/null | sort -rh | head -10 sudo du -sh /var/www/lidpotok/storage/logs/ 2>/dev/null echo echo "=== H. apt cache + tmp ===" sudo du -sh /var/cache/apt/archives/ /tmp/ /var/tmp/ 2>/dev/null echo echo "==========================================" echo "=== STARTING CLEANUP (confirm_apply=true) ===" echo "==========================================" echo echo "=== 1a. PRIORITY: Truncate laravel.log (8.7 GB!) and rotated copies ===" for f in /var/www/liderra/app/storage/logs/laravel.log /var/www/liderra/app/storage/logs/laravel.log.1; do if [[ -f "$f" ]]; then BEFORE=$(sudo du -m "$f" | cut -f1) echo "BEFORE: $f = $BEFORE MB" sudo bash -c ": > '$f'" 2>&1 || sudo truncate -s 0 "$f" AFTER=$(sudo du -m "$f" | cut -f1) echo "AFTER: $f = $AFTER MB" fi done # Старые laravel-* (если daily-rotated) sudo find /var/www/liderra/app/storage/logs -name "laravel-*.log" -mtime +3 -print -delete 2>&1 | head -10 echo echo "=== 1b. Truncate PG audit log via sudo bash redirect (workaround) ===" if [[ -f /var/log/postgresql/postgresql-16-main.log ]]; then BEFORE=$(sudo du -m /var/log/postgresql/postgresql-16-main.log | cut -f1) echo "BEFORE: $BEFORE MB" sudo bash -c ': > /var/log/postgresql/postgresql-16-main.log' 2>&1 AFTER=$(sudo du -m /var/log/postgresql/postgresql-16-main.log | cut -f1) echo "AFTER: $AFTER MB" fi sudo find /var/log/postgresql -type f \( -name "*.gz" -o -name "*.log.[0-9]*" \) -delete 2>&1 echo echo "=== 1c. Truncate syslog (525M) ===" sudo bash -c ': > /var/log/syslog' 2>&1 echo "syslog now: $(sudo du -m /var/log/syslog 2>/dev/null | cut -f1) MB" echo echo "=== 1d. Remove playwright dev cache (~440M, не нужен в проде) ===" if [[ -d /var/www/.cache/ms-playwright ]]; then sudo du -sh /var/www/.cache/ms-playwright 2>&1 sudo rm -rf /var/www/.cache/ms-playwright echo "removed" fi echo echo "=== 2. journalctl vacuum --size=200M ===" sudo journalctl --vacuum-size=200M 2>&1 | tail -10 echo echo "=== 3. nginx old rotated logs (gz files >3 days) ===" sudo find /var/log/nginx -name "*.gz" -mtime +3 -print -delete 2>&1 | head -20 echo # current access.log если >500M — truncate (nginx переоткрывает по reopen signal) for f in /var/log/nginx/access.log /var/log/nginx/error.log; do if [[ -f "$f" ]]; then SIZE_MB=$(sudo du -m "$f" | cut -f1) if [[ $SIZE_MB -gt 500 ]]; then echo "Truncating $f ($SIZE_MB MB)" sudo truncate -s 0 "$f" fi fi done echo echo "=== 4. apt-get clean ===" sudo apt-get clean 2>&1 | tail -5 echo echo "=== 5. Laravel storage/logs *.log older 7 days ===" if [[ -d /var/www/lidpotok ]]; then sudo find /var/www/lidpotok -path '*/storage/logs/*.log' -mtime +7 -print -delete 2>&1 | head -20 fi for d in /var/www/*/; do if [[ -d "$d/storage/logs" ]]; then for f in "$d"/storage/logs/laravel.log "$d"/storage/logs/worker.log; do if [[ -f "$f" ]]; then SIZE_MB=$(sudo du -m "$f" | cut -f1) if [[ $SIZE_MB -gt 200 ]]; then echo "Truncating $f ($SIZE_MB MB)" sudo truncate -s 0 "$f" fi fi done fi done echo echo "=== 6. Old rotated *.1 *.2 *.gz logs >50M anywhere in /var/log ===" sudo find /var/log -type f \( -name "*.1" -o -name "*.2" -o -name "*.3" -o -name "*.gz" \) -size +50M -print -delete 2>&1 | head -20 echo echo "==========================================" echo "=== AFTER CLEANUP ===" echo "==========================================" echo "=== Z1. df -h / ===" df -h / /var /var/lib/postgresql 2>&1 | head -10 echo echo "=== Z2. PG status quick check ===" sudo systemctl status postgresql@16-main --no-pager 2>&1 | head -10 echo echo "=== Z3. PG probe ===" sleep 5 sudo -u postgres psql -d liderra -c "SELECT 1 AS probe, NOW() AS ts" 2>&1 echo echo "=== Z4. HTTPS probe ===" curl -sI -o /dev/null -w "HTTP %{http_code}\nTotal: %{time_total}s\n" https://liderra.ru/ --max-time 10 echo echo "=== DONE ===" REMOTE - name: Print summary if: always() run: | { echo "## Disk recovery on liderra.ru" echo echo '```' cat /tmp/recover.log 2>/dev/null || echo "(no log captured)" echo '```' } >> "$GITHUB_STEP_SUMMARY" - name: Cleanup SSH key if: always() run: rm -f ~/.ssh/liderra_deploy