chore: initial commit
This commit is contained in:
95
smart_health_check.sh
Executable file
95
smart_health_check.sh
Executable file
@@ -0,0 +1,95 @@
|
||||
#\!/bin/bash
|
||||
# 智能健康检查 - 只在有问题时干预
|
||||
# 每12小时运行一次
|
||||
|
||||
LOG_FILE="logs/smart_health_check.log"
|
||||
ERROR_LOG="logs/integrated_bot_errors.log"
|
||||
DETAIL_LOG="logs/integrated_bot_detailed.log"
|
||||
|
||||
log() {
|
||||
echo "[$(date "+%Y-%m-%d %H:%M:%S")] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log "=========================================="
|
||||
log "开始健康检查..."
|
||||
|
||||
# 1. 检查机器人进程是否运行
|
||||
if \! pgrep -f "python.*integrated_bot_ai.py" >/dev/null; then
|
||||
log "❌ 机器人进程未运行,需要启动"
|
||||
./manage_bot.sh restart >> "$LOG_FILE" 2>&1
|
||||
log "✅ 已重启机器人"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. 检查 Pyrogram 客户端状态
|
||||
PYROGRAM_RUNNING=$(tail -100 "$DETAIL_LOG" 2>/dev/null | grep -c "✅ Pyrogram客户端已启动")
|
||||
if [ "$PYROGRAM_RUNNING" -gt 0 ]; then
|
||||
log "✅ Pyrogram 客户端状态: 正常"
|
||||
PYROGRAM_OK=true
|
||||
else
|
||||
log "⚠️ Pyrogram 客户端状态: 未知"
|
||||
PYROGRAM_OK=false
|
||||
fi
|
||||
|
||||
# 3. 检查最近1小时是否有 Connection lost 错误
|
||||
HOUR_AGO=$(date -d "1 hour ago" "+%Y-%m-%d %H" 2>/dev/null || date -v-1H "+%Y-%m-%d %H")
|
||||
RECENT_CONNECTION_ERRORS=$(tail -200 "$ERROR_LOG" 2>/dev/null | grep "Connection lost" | grep "$HOUR_AGO" | wc -l)
|
||||
|
||||
log "最近1小时 Connection lost 错误: $RECENT_CONNECTION_ERRORS 个"
|
||||
|
||||
# 4. 检查最近1小时是否有 AUTH_KEY 错误
|
||||
RECENT_AUTH_ERRORS=$(tail -200 "$ERROR_LOG" 2>/dev/null | grep "AUTH_KEY_UNREGISTERED" | grep "$HOUR_AGO" | wc -l)
|
||||
|
||||
log "最近1小时 AUTH_KEY 错误: $RECENT_AUTH_ERRORS 个"
|
||||
|
||||
# 5. 决策逻辑
|
||||
NEED_RESTART=false
|
||||
|
||||
if [ "$RECENT_CONNECTION_ERRORS" -gt 5 ]; then
|
||||
log "❌ 检测到过多连接错误 ($RECENT_CONNECTION_ERRORS 个)"
|
||||
NEED_RESTART=true
|
||||
fi
|
||||
|
||||
if [ "$RECENT_AUTH_ERRORS" -gt 0 ]; then
|
||||
log "❌ 检测到 AUTH_KEY 错误"
|
||||
NEED_RESTART=true
|
||||
fi
|
||||
|
||||
if [ "$PYROGRAM_OK" = false ] && [ "$RECENT_CONNECTION_ERRORS" -gt 2 ]; then
|
||||
log "❌ Pyrogram 状态异常且有连接错误"
|
||||
NEED_RESTART=true
|
||||
fi
|
||||
|
||||
# 6. 执行操作
|
||||
if [ "$NEED_RESTART" = true ]; then
|
||||
log "==========================================",
|
||||
log "🔄 检测到问题,准备重启机器人..."
|
||||
log "==========================================",
|
||||
|
||||
# 重启
|
||||
./manage_bot.sh restart >> "$LOG_FILE" 2>&1
|
||||
|
||||
# 等待启动
|
||||
sleep 10
|
||||
|
||||
# 验证
|
||||
if grep -q "✅ Pyrogram客户端已启动" "$DETAIL_LOG" 2>/dev/null; then
|
||||
log "✅ 重启成功 - Pyrogram 已重新连接"
|
||||
else
|
||||
log "⚠️ 重启完成 - 状态待确认"
|
||||
fi
|
||||
|
||||
log "=========================================="
|
||||
else
|
||||
log "✅ 一切正常,无需干预"
|
||||
log "=========================================="
|
||||
fi
|
||||
|
||||
# 7. 状态摘要
|
||||
log "状态摘要:"
|
||||
log " - 进程: 运行中"
|
||||
log " - Pyrogram: $([ "$PYROGRAM_OK" = true ] && echo "正常" || echo "未知")"
|
||||
log " - 连接错误: $RECENT_CONNECTION_ERRORS 个"
|
||||
log " - AUTH错误: $RECENT_AUTH_ERRORS 个"
|
||||
log " - 操作: $([ "$NEED_RESTART" = true ] && echo "已重启" || echo "无操作")"
|
||||
log ""
|
||||
Reference in New Issue
Block a user