From faafcf926a6afabbcfcf6114a936b09f22e02918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=A0=E7=9A=84=E7=94=A8=E6=88=B7=E5=90=8D?= <你的邮箱> Date: Tue, 4 Nov 2025 21:23:33 +0800 Subject: [PATCH] =?UTF-8?q?ci:=20=E5=A2=9E=E5=BC=BA=E9=83=A8=E7=BD=B2?= =?UTF-8?q?=E8=AF=8A=E6=96=AD=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加容器状态和端口占用检查 - 添加容器内部监听情况诊断 - 增加详细的健康检查日志(100行) - 健康检查重试次数从5次增加到10次 - 第5次失败时执行深度诊断 - 添加独立的部署健康检查脚本 改进点: 1. 诊断端口冲突问题 2. 检查容器内部监听配置 3. 增加详细的错误日志输出 4. SSH回连获取实时状态 --- .gitea/workflows/deploy.yml | 77 ++++++++++++++++++++++++------ scripts/check-deployment.sh | 94 +++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 14 deletions(-) create mode 100755 scripts/check-deployment.sh diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index ca565653..8c26e5a8 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -127,15 +127,35 @@ jobs: echo "⏳ 等待服务启动..." sleep 10 - # 检查容器状态 + # 1. 检查容器状态 echo "📊 容器状态:" sudo docker-compose ps - # 检查容器日志 - echo "📝 容器日志:" - sudo docker-compose logs --tail=20 + # 2. 检查端口占用情况 + echo "" + echo "🔍 检查端口8080占用:" + sudo lsof -i :8080 || echo "端口8080未被占用" + + # 3. 检查容器内部监听情况 + echo "" + echo "🔍 检查容器内部监听:" + CONTAINER_ID=$(sudo docker-compose ps -q kt-financial 2>/dev/null || echo "") + if [ -n "$CONTAINER_ID" ]; then + sudo docker exec $CONTAINER_ID ss -tlnp | grep ':80' || echo "容器内无80端口监听" + fi + + # 4. 检查容器详细日志(增加行数) + echo "" + echo "📝 容器日志(最近100行):" + sudo docker-compose logs --tail=100 + + # 5. 检查容器健康状态 + echo "" + echo "🏥 容器健康检查:" + sudo docker inspect --format='{{.State.Health.Status}}' $CONTAINER_ID 2>/dev/null || echo "未配置健康检查" # 清理旧镜像和悬空镜像 + echo "" echo "🧹 清理旧镜像..." sudo docker image prune -f @@ -146,23 +166,52 @@ jobs: run: | echo "🔍 执行健康检查..." - # 等待服务完全启动 - sleep 15 + # 等待服务完全启动(延长等待时间) + sleep 20 - # 健康检查 - for i in {1..5}; do - echo "尝试 $i/5: 检查服务..." + # 健康检查(增加重试次数和诊断信息) + for i in {1..10}; do + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "尝试 $i/10: 检查服务 ${{ env.HEALTH_CHECK_URL }}" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - if curl -f -s -o /dev/null -w "%{http_code}" ${{ env.HEALTH_CHECK_URL }} | grep -q "200\|301\|302"; then - echo "✅ 服务健康检查通过!" + # 详细的curl诊断 + HTTP_CODE=$(curl -v -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 ${{ env.HEALTH_CHECK_URL }} 2>&1) + echo "响应: $HTTP_CODE" + + if echo "$HTTP_CODE" | grep -q "200\|301\|302"; then + echo "✅ 服务健康检查通过!HTTP状态码正常" + echo "" + echo "🎉 部署成功!服务已正常运行" exit 0 fi - echo "⏳ 等待5秒后重试..." - sleep 5 + # 如果失败,显示更多诊断信息 + if [ $i -eq 5 ]; then + echo "" + echo "⚠️ 第5次尝试失败,执行深度诊断..." + echo "" + echo "🔍 检查容器运行状态:" + ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps" || true + echo "" + echo "📝 最新容器日志:" + ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose logs --tail=50" || true + fi + + if [ $i -lt 10 ]; then + echo "⏳ 等待6秒后重试..." + sleep 6 + fi done - echo "❌ 健康检查失败,服务可能未正常启动" + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "❌ 健康检查失败:10次尝试均未成功" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "🔍 最终诊断信息:" + ssh -o StrictHostKeyChecking=no ${{ secrets.SERVER_USER || 'atai' }}@${{ secrets.SERVER_HOST || '172.16.74.149' }} "cd /home/atai/kt-financial-system && sudo docker-compose ps && echo '---' && sudo docker-compose logs --tail=100" || true exit 1 - name: Send notification on success diff --git a/scripts/check-deployment.sh b/scripts/check-deployment.sh new file mode 100755 index 00000000..8581f9ef --- /dev/null +++ b/scripts/check-deployment.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# 部署健康检查脚本 + +echo "================================================" +echo "KT财务系统部署健康检查" +echo "================================================" +echo "" + +TARGET_HOST="172.16.74.149" +TARGET_PORT="8080" +MAX_RETRIES=10 +RETRY_INTERVAL=5 + +echo "🔍 检查目标: http://${TARGET_HOST}:${TARGET_PORT}" +echo "" + +# 1. 网络连通性检查 +echo "1️⃣ 检查网络连通性..." +if ping -c 3 $TARGET_HOST > /dev/null 2>&1; then + echo " ✅ 主机 $TARGET_HOST 可达" +else + echo " ❌ 主机 $TARGET_HOST 不可达" + exit 1 +fi + +# 2. 端口检查 +echo "" +echo "2️⃣ 检查端口连接 (${MAX_RETRIES}次重试)..." +for i in $(seq 1 $MAX_RETRIES); do + echo " 尝试 $i/$MAX_RETRIES..." + + if nc -zv -w 3 $TARGET_HOST $TARGET_PORT 2>&1 | grep -q "succeeded\|Connected"; then + echo " ✅ 端口 $TARGET_PORT 已开放" + PORT_OPEN=true + break + fi + + if [ $i -lt $MAX_RETRIES ]; then + echo " ⏳ 等待 ${RETRY_INTERVAL}秒后重试..." + sleep $RETRY_INTERVAL + fi +done + +if [ "$PORT_OPEN" != "true" ]; then + echo " ❌ 端口 $TARGET_PORT 无法连接" + echo "" + echo "⚠️ 可能的原因:" + echo " - Docker容器未启动" + echo " - 端口映射配置错误" + echo " - 防火墙阻止连接" + echo " - 服务启动失败" + exit 1 +fi + +# 3. HTTP服务检查 +echo "" +echo "3️⃣ 检查HTTP服务..." +HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://${TARGET_HOST}:${TARGET_PORT}/ 2>/dev/null) + +if [ -z "$HTTP_CODE" ]; then + echo " ❌ 无法获取HTTP响应" + exit 1 +fi + +echo " HTTP状态码: $HTTP_CODE" + +if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "301" ] || [ "$HTTP_CODE" = "302" ]; then + echo " ✅ HTTP服务正常" +else + echo " ⚠️ HTTP状态码异常: $HTTP_CODE" +fi + +# 4. 响应时间检查 +echo "" +echo "4️⃣ 检查响应时间..." +RESPONSE_TIME=$(curl -s -o /dev/null -w "%{time_total}" --connect-timeout 5 http://${TARGET_HOST}:${TARGET_PORT}/ 2>/dev/null) + +if [ -n "$RESPONSE_TIME" ]; then + echo " 响应时间: ${RESPONSE_TIME}秒" + + if [ $(echo "$RESPONSE_TIME < 3" | bc) -eq 1 ]; then + echo " ✅ 响应时间正常" + else + echo " ⚠️ 响应时间较慢" + fi +fi + +echo "" +echo "================================================" +echo "✅ 部署健康检查完成" +echo "================================================" +echo "" +echo "🌐 访问地址: http://${TARGET_HOST}:${TARGET_PORT}"