ci: 增强部署诊断能力
- 添加容器状态和端口占用检查 - 添加容器内部监听情况诊断 - 增加详细的健康检查日志(100行) - 健康检查重试次数从5次增加到10次 - 第5次失败时执行深度诊断 - 添加独立的部署健康检查脚本 改进点: 1. 诊断端口冲突问题 2. 检查容器内部监听配置 3. 增加详细的错误日志输出 4. SSH回连获取实时状态
This commit is contained in:
@@ -127,15 +127,35 @@ jobs:
|
||||
echo "⏳ 等待服务启动..."
|
||||
sleep 10
|
||||
|
||||
# 检查容器状态
|
||||
# 1. 检查容器状态
|
||||
echo "📊 容器状态:"
|
||||
sudo docker-compose ps
|
||||
|
||||
# 检查容器日志
|
||||
echo "📝 容器日志:"
|
||||
sudo docker-compose logs --tail=20
|
||||
# 2. 检查端口占用情况
|
||||
echo ""
|
||||
echo "🔍 检查端口8080占用:"
|
||||
sudo lsof -i :8080 || echo "端口8080未被占用"
|
||||
|
||||
# 3. 检查容器内部监听情况
|
||||
echo ""
|
||||
echo "🔍 检查容器内部监听:"
|
||||
CONTAINER_ID=$(sudo docker-compose ps -q kt-financial 2>/dev/null || echo "")
|
||||
if [ -n "$CONTAINER_ID" ]; then
|
||||
sudo docker exec $CONTAINER_ID ss -tlnp | grep ':80' || echo "容器内无80端口监听"
|
||||
fi
|
||||
|
||||
# 4. 检查容器详细日志(增加行数)
|
||||
echo ""
|
||||
echo "📝 容器日志(最近100行):"
|
||||
sudo docker-compose logs --tail=100
|
||||
|
||||
# 5. 检查容器健康状态
|
||||
echo ""
|
||||
echo "🏥 容器健康检查:"
|
||||
sudo docker inspect --format='{{.State.Health.Status}}' $CONTAINER_ID 2>/dev/null || echo "未配置健康检查"
|
||||
|
||||
# 清理旧镜像和悬空镜像
|
||||
echo ""
|
||||
echo "🧹 清理旧镜像..."
|
||||
sudo docker image prune -f
|
||||
|
||||
@@ -146,23 +166,52 @@ jobs:
|
||||
run: |
|
||||
echo "🔍 执行健康检查..."
|
||||
|
||||
# 等待服务完全启动
|
||||
sleep 15
|
||||
# 等待服务完全启动(延长等待时间)
|
||||
sleep 20
|
||||
|
||||
# 健康检查
|
||||
for i in {1..5}; do
|
||||
echo "尝试 $i/5: 检查服务..."
|
||||
# 健康检查(增加重试次数和诊断信息)
|
||||
for i in {1..10}; do
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "尝试 $i/10: 检查服务 ${{ env.HEALTH_CHECK_URL }}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
if curl -f -s -o /dev/null -w "%{http_code}" ${{ env.HEALTH_CHECK_URL }} | grep -q "200\|301\|302"; then
|
||||
echo "✅ 服务健康检查通过!"
|
||||
# 详细的curl诊断
|
||||
HTTP_CODE=$(curl -v -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 ${{ env.HEALTH_CHECK_URL }} 2>&1)
|
||||
echo "响应: $HTTP_CODE"
|
||||
|
||||
if echo "$HTTP_CODE" | grep -q "200\|301\|302"; then
|
||||
echo "✅ 服务健康检查通过!HTTP状态码正常"
|
||||
echo ""
|
||||
echo "🎉 部署成功!服务已正常运行"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "⏳ 等待5秒后重试..."
|
||||
sleep 5
|
||||
# 如果失败,显示更多诊断信息
|
||||
if [ $i -eq 5 ]; then
|
||||
echo ""
|
||||
echo "⚠️ 第5次尝试失败,执行深度诊断..."
|
||||
echo ""
|
||||
echo "🔍 检查容器运行状态:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps" || true
|
||||
echo ""
|
||||
echo "📝 最新容器日志:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose logs --tail=50" || true
|
||||
fi
|
||||
|
||||
if [ $i -lt 10 ]; then
|
||||
echo "⏳ 等待6秒后重试..."
|
||||
sleep 6
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ 健康检查失败,服务可能未正常启动"
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "❌ 健康检查失败:10次尝试均未成功"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
echo "🔍 最终诊断信息:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps && echo '---' && sudo docker-compose logs --tail=100" || true
|
||||
exit 1
|
||||
|
||||
- name: Send notification on success
|
||||
|
||||
94
scripts/check-deployment.sh
Executable file
94
scripts/check-deployment.sh
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 部署健康检查脚本
|
||||
|
||||
echo "================================================"
|
||||
echo "KT财务系统部署健康检查"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
|
||||
TARGET_HOST="172.16.74.149"
|
||||
TARGET_PORT="8080"
|
||||
MAX_RETRIES=10
|
||||
RETRY_INTERVAL=5
|
||||
|
||||
echo "🔍 检查目标: http://${TARGET_HOST}:${TARGET_PORT}"
|
||||
echo ""
|
||||
|
||||
# 1. 网络连通性检查
|
||||
echo "1️⃣ 检查网络连通性..."
|
||||
if ping -c 3 $TARGET_HOST > /dev/null 2>&1; then
|
||||
echo " ✅ 主机 $TARGET_HOST 可达"
|
||||
else
|
||||
echo " ❌ 主机 $TARGET_HOST 不可达"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 2. 端口检查
|
||||
echo ""
|
||||
echo "2️⃣ 检查端口连接 (${MAX_RETRIES}次重试)..."
|
||||
for i in $(seq 1 $MAX_RETRIES); do
|
||||
echo " 尝试 $i/$MAX_RETRIES..."
|
||||
|
||||
if nc -zv -w 3 $TARGET_HOST $TARGET_PORT 2>&1 | grep -q "succeeded\|Connected"; then
|
||||
echo " ✅ 端口 $TARGET_PORT 已开放"
|
||||
PORT_OPEN=true
|
||||
break
|
||||
fi
|
||||
|
||||
if [ $i -lt $MAX_RETRIES ]; then
|
||||
echo " ⏳ 等待 ${RETRY_INTERVAL}秒后重试..."
|
||||
sleep $RETRY_INTERVAL
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$PORT_OPEN" != "true" ]; then
|
||||
echo " ❌ 端口 $TARGET_PORT 无法连接"
|
||||
echo ""
|
||||
echo "⚠️ 可能的原因:"
|
||||
echo " - Docker容器未启动"
|
||||
echo " - 端口映射配置错误"
|
||||
echo " - 防火墙阻止连接"
|
||||
echo " - 服务启动失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 3. HTTP服务检查
|
||||
echo ""
|
||||
echo "3️⃣ 检查HTTP服务..."
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://${TARGET_HOST}:${TARGET_PORT}/ 2>/dev/null)
|
||||
|
||||
if [ -z "$HTTP_CODE" ]; then
|
||||
echo " ❌ 无法获取HTTP响应"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo " HTTP状态码: $HTTP_CODE"
|
||||
|
||||
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "301" ] || [ "$HTTP_CODE" = "302" ]; then
|
||||
echo " ✅ HTTP服务正常"
|
||||
else
|
||||
echo " ⚠️ HTTP状态码异常: $HTTP_CODE"
|
||||
fi
|
||||
|
||||
# 4. 响应时间检查
|
||||
echo ""
|
||||
echo "4️⃣ 检查响应时间..."
|
||||
RESPONSE_TIME=$(curl -s -o /dev/null -w "%{time_total}" --connect-timeout 5 http://${TARGET_HOST}:${TARGET_PORT}/ 2>/dev/null)
|
||||
|
||||
if [ -n "$RESPONSE_TIME" ]; then
|
||||
echo " 响应时间: ${RESPONSE_TIME}秒"
|
||||
|
||||
if [ $(echo "$RESPONSE_TIME < 3" | bc) -eq 1 ]; then
|
||||
echo " ✅ 响应时间正常"
|
||||
else
|
||||
echo " ⚠️ 响应时间较慢"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "================================================"
|
||||
echo "✅ 部署健康检查完成"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
echo "🌐 访问地址: http://${TARGET_HOST}:${TARGET_PORT}"
|
||||
Reference in New Issue
Block a user