ci: 增强部署诊断能力
- 添加容器状态和端口占用检查 - 添加容器内部监听情况诊断 - 增加详细的健康检查日志(100行) - 健康检查重试次数从5次增加到10次 - 第5次失败时执行深度诊断 - 添加独立的部署健康检查脚本 改进点: 1. 诊断端口冲突问题 2. 检查容器内部监听配置 3. 增加详细的错误日志输出 4. SSH回连获取实时状态
This commit is contained in:
@@ -127,15 +127,35 @@ jobs:
|
||||
echo "⏳ 等待服务启动..."
|
||||
sleep 10
|
||||
|
||||
# 检查容器状态
|
||||
# 1. 检查容器状态
|
||||
echo "📊 容器状态:"
|
||||
sudo docker-compose ps
|
||||
|
||||
# 检查容器日志
|
||||
echo "📝 容器日志:"
|
||||
sudo docker-compose logs --tail=20
|
||||
# 2. 检查端口占用情况
|
||||
echo ""
|
||||
echo "🔍 检查端口8080占用:"
|
||||
sudo lsof -i :8080 || echo "端口8080未被占用"
|
||||
|
||||
# 3. 检查容器内部监听情况
|
||||
echo ""
|
||||
echo "🔍 检查容器内部监听:"
|
||||
CONTAINER_ID=$(sudo docker-compose ps -q kt-financial 2>/dev/null || echo "")
|
||||
if [ -n "$CONTAINER_ID" ]; then
|
||||
sudo docker exec $CONTAINER_ID ss -tlnp | grep ':80' || echo "容器内无80端口监听"
|
||||
fi
|
||||
|
||||
# 4. 检查容器详细日志(增加行数)
|
||||
echo ""
|
||||
echo "📝 容器日志(最近100行):"
|
||||
sudo docker-compose logs --tail=100
|
||||
|
||||
# 5. 检查容器健康状态
|
||||
echo ""
|
||||
echo "🏥 容器健康检查:"
|
||||
sudo docker inspect --format='{{.State.Health.Status}}' $CONTAINER_ID 2>/dev/null || echo "未配置健康检查"
|
||||
|
||||
# 清理旧镜像和悬空镜像
|
||||
echo ""
|
||||
echo "🧹 清理旧镜像..."
|
||||
sudo docker image prune -f
|
||||
|
||||
@@ -146,23 +166,52 @@ jobs:
|
||||
run: |
|
||||
echo "🔍 执行健康检查..."
|
||||
|
||||
# 等待服务完全启动
|
||||
sleep 15
|
||||
# 等待服务完全启动(延长等待时间)
|
||||
sleep 20
|
||||
|
||||
# 健康检查
|
||||
for i in {1..5}; do
|
||||
echo "尝试 $i/5: 检查服务..."
|
||||
# 健康检查(增加重试次数和诊断信息)
|
||||
for i in {1..10}; do
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "尝试 $i/10: 检查服务 ${{ env.HEALTH_CHECK_URL }}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
if curl -f -s -o /dev/null -w "%{http_code}" ${{ env.HEALTH_CHECK_URL }} | grep -q "200\|301\|302"; then
|
||||
echo "✅ 服务健康检查通过!"
|
||||
# 详细的curl诊断
|
||||
HTTP_CODE=$(curl -v -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 ${{ env.HEALTH_CHECK_URL }} 2>&1)
|
||||
echo "响应: $HTTP_CODE"
|
||||
|
||||
if echo "$HTTP_CODE" | grep -q "200\|301\|302"; then
|
||||
echo "✅ 服务健康检查通过!HTTP状态码正常"
|
||||
echo ""
|
||||
echo "🎉 部署成功!服务已正常运行"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "⏳ 等待5秒后重试..."
|
||||
sleep 5
|
||||
# 如果失败,显示更多诊断信息
|
||||
if [ $i -eq 5 ]; then
|
||||
echo ""
|
||||
echo "⚠️ 第5次尝试失败,执行深度诊断..."
|
||||
echo ""
|
||||
echo "🔍 检查容器运行状态:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps" || true
|
||||
echo ""
|
||||
echo "📝 最新容器日志:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose logs --tail=50" || true
|
||||
fi
|
||||
|
||||
if [ $i -lt 10 ]; then
|
||||
echo "⏳ 等待6秒后重试..."
|
||||
sleep 6
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ 健康检查失败,服务可能未正常启动"
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "❌ 健康检查失败:10次尝试均未成功"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
echo "🔍 最终诊断信息:"
|
||||
ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps && echo '---' && sudo docker-compose logs --tail=100" || true
|
||||
exit 1
|
||||
|
||||
- name: Send notification on success
|
||||
|
||||
Reference in New Issue
Block a user