#\!/bin/bash # 智能健康检查 - 只在有问题时干预 # 每12小时运行一次 LOG_FILE="logs/smart_health_check.log" ERROR_LOG="logs/integrated_bot_errors.log" DETAIL_LOG="logs/integrated_bot_detailed.log" log() { echo "[$(date "+%Y-%m-%d %H:%M:%S")] $1" | tee -a "$LOG_FILE" } log "==========================================" log "开始健康检查..." # 1. 检查机器人进程是否运行 if \! pgrep -f "python.*integrated_bot_ai.py" >/dev/null; then log "❌ 机器人进程未运行,需要启动" ./manage_bot.sh restart >> "$LOG_FILE" 2>&1 log "✅ 已重启机器人" exit 0 fi # 2. 检查 Pyrogram 客户端状态 PYROGRAM_RUNNING=$(tail -100 "$DETAIL_LOG" 2>/dev/null | grep -c "✅ Pyrogram客户端已启动") if [ "$PYROGRAM_RUNNING" -gt 0 ]; then log "✅ Pyrogram 客户端状态: 正常" PYROGRAM_OK=true else log "⚠️ Pyrogram 客户端状态: 未知" PYROGRAM_OK=false fi # 3. 检查最近1小时是否有 Connection lost 错误 HOUR_AGO=$(date -d "1 hour ago" "+%Y-%m-%d %H" 2>/dev/null || date -v-1H "+%Y-%m-%d %H") RECENT_CONNECTION_ERRORS=$(tail -200 "$ERROR_LOG" 2>/dev/null | grep "Connection lost" | grep "$HOUR_AGO" | wc -l) log "最近1小时 Connection lost 错误: $RECENT_CONNECTION_ERRORS 个" # 4. 检查最近1小时是否有 AUTH_KEY 错误 RECENT_AUTH_ERRORS=$(tail -200 "$ERROR_LOG" 2>/dev/null | grep "AUTH_KEY_UNREGISTERED" | grep "$HOUR_AGO" | wc -l) log "最近1小时 AUTH_KEY 错误: $RECENT_AUTH_ERRORS 个" # 5. 决策逻辑 NEED_RESTART=false if [ "$RECENT_CONNECTION_ERRORS" -gt 5 ]; then log "❌ 检测到过多连接错误 ($RECENT_CONNECTION_ERRORS 个)" NEED_RESTART=true fi if [ "$RECENT_AUTH_ERRORS" -gt 0 ]; then log "❌ 检测到 AUTH_KEY 错误" NEED_RESTART=true fi if [ "$PYROGRAM_OK" = false ] && [ "$RECENT_CONNECTION_ERRORS" -gt 2 ]; then log "❌ Pyrogram 状态异常且有连接错误" NEED_RESTART=true fi # 6. 执行操作 if [ "$NEED_RESTART" = true ]; then log "==========================================", log "🔄 检测到问题,准备重启机器人..." log "==========================================", # 重启 ./manage_bot.sh restart >> "$LOG_FILE" 2>&1 # 等待启动 sleep 10 # 验证 if grep -q "✅ Pyrogram客户端已启动" "$DETAIL_LOG" 2>/dev/null; then log "✅ 重启成功 - Pyrogram 已重新连接" else log "⚠️ 重启完成 - 状态待确认" fi log "==========================================" else log "✅ 一切正常,无需干预" log "==========================================" fi # 7. 状态摘要 log "状态摘要:" log " - 进程: 运行中" log " - Pyrogram: $([ "$PYROGRAM_OK" = true ] && echo "正常" || echo "未知")" log " - 连接错误: $RECENT_CONNECTION_ERRORS 个" log " - AUTH错误: $RECENT_AUTH_ERRORS 个" log " - 操作: $([ "$NEED_RESTART" = true ] && echo "已重启" || echo "无操作")" log ""