# shellcheck shell=bash # 由 pull-and-restart.sh / restart.sh 在定义好 ROOT、compose_cmd、run_sudo 之后 source。 # 统一策略:仅使用宿主机 Nginx;容器 yh_nginx 不再作为入口。 YH_COMPOSE_FILES="-f docker-compose.yml -f docker-compose.host-nginx.yml" require_cmd() { local c="$1" command -v "$c" >/dev/null 2>&1 || { echo "错误: 缺少命令 $c,请先安装后重试。" >&2 exit 1 } } ensure_host_deploy_env() { require_cmd systemctl require_cmd ss require_cmd sed require_cmd awk require_cmd curl } host_nginx_online() { command -v systemctl >/dev/null 2>&1 || return 1 systemctl is-active nginx >/dev/null 2>&1 && return 0 systemctl is-active nginx.service >/dev/null 2>&1 && return 0 return 1 } force_release_port() { local p="$1" local victims victims="$(run_sudo ss -tlnp "sport = :$p" 2>/dev/null | sed -n 's/.*pid=\([0-9]\+\).*/\1/p' | awk '!seen[$0]++')" [ -z "$victims" ] && return 0 for pid in $victims; do [ -z "$pid" ] && continue local comm comm="$(run_sudo sh -c "cat /proc/$pid/comm 2>/dev/null" || true)" if [ "$comm" = "nginx" ]; then continue fi echo "端口 $p 被非宿主机 nginx 进程占用(pid=$pid, comm=${comm:-unknown}),强制停止..." run_sudo kill -9 "$pid" 2>/dev/null || true done } force_release_host_ports() { # 先优先停掉所有发布了宿主机 80/443 的容器(最常见冲突源) if command -v docker >/dev/null 2>&1; then for cid in $(run_sudo docker ps --filter "publish=80" --format "{{.ID}}" 2>/dev/null); do [ -n "$cid" ] && run_sudo docker rm -f "$cid" 2>/dev/null || true done for cid in $(run_sudo docker ps --filter "publish=443" --format "{{.ID}}" 2>/dev/null); do [ -n "$cid" ] && run_sudo docker rm -f "$cid" 2>/dev/null || true done fi # 再兜底杀掉非 nginx 的占用进程 force_release_port 80 force_release_port 443 } ensure_host_nginx_started() { ensure_host_deploy_env force_release_host_ports if host_nginx_online; then echo "宿主机 Nginx 在线,跳过启动。" return 0 fi echo "宿主机 Nginx 未在线,尝试启动..." run_sudo systemctl start nginx 2>/dev/null || run_sudo systemctl start nginx.service run_sudo systemctl enable nginx 2>/dev/null || true if host_nginx_online; then echo "宿主机 Nginx 启动成功。" return 0 fi echo "错误: 无法启动宿主机 Nginx,请检查 systemctl status nginx" >&2 exit 1 } # 停止本项目 Compose 栈(包含可能残留的 yh_nginx),不停止宿主机 Nginx。 yh_compose_down() { if [ ! -f "$ROOT/docker-compose.host-nginx.yml" ]; then compose_cmd down --remove-orphans 2>/dev/null || true return 0 fi compose_cmd $YH_COMPOSE_FILES down --remove-orphans 2>/dev/null || true } # 仅启动业务容器,不再启动容器 yh_nginx。 yh_compose_up() { if [ ! -f "$ROOT/docker-compose.host-nginx.yml" ]; then echo "未找到 docker-compose.host-nginx.yml,使用默认 compose 启动业务容器。" compose_cmd up -d --force-recreate mongo api web admin return 0 fi compose_cmd $YH_COMPOSE_FILES up -d --force-recreate mongo api web admin } # 从模板生成宿主机站点配置,并在配置检查通过后 reload(若离线则 start)。 yh_install_host_nginx_site_conf() { local domain="${NGINX_DOMAIN:-yuheng.yuxindazhineng.com}" local tpl="$ROOT/nginx/yuheng.host.conf" local out="/etc/nginx/conf.d/${domain}.conf" local ts ts="$(date +%Y%m%d%H%M%S)" if [ ! -f "$tpl" ]; then echo "未找到 $tpl,跳过宿主机站点配置生成。" return 0 fi ensure_host_deploy_env # 同域名冲突兜底:将 conf.d 下其他包含相同 server_name 的配置下线,避免错误 upstream 继续生效导致 502。 run_sudo sh -c "for f in /etc/nginx/conf.d/*.conf; do [ -f \"\$f\" ] || continue [ \"\$f\" = \"$out\" ] && continue if grep -Eq '^[[:space:]]*server_name[[:space:]]+[^;]*${domain}([[:space:];]|$)' \"\$f\"; then mv -f \"\$f\" \"\${f}.disabled_by_yh_${ts}\" fi done" mkdir -p "$ROOT/verify-root" sed "s|__VERIFY_ROOT__|$ROOT/verify-root|g" "$tpl" | run_sudo tee "$out" >/dev/null if ! run_sudo nginx -t 2>/dev/null; then echo "错误: 宿主机 nginx -t 失败,请检查 $out" >&2 exit 1 fi if host_nginx_online; then run_sudo systemctl reload nginx 2>/dev/null && echo "宿主机 Nginx 已重载($out)。" || true else ensure_host_nginx_started fi } yh_post_deploy_healthcheck() { local domain="${NGINX_DOMAIN:-yuheng.yuxindazhineng.com}" local code="" # 先验证上游容器端口(避免把 upstream 问题误判成 nginx 问题) curl -fsS --max-time 6 http://127.0.0.1:9080/ >/dev/null || { echo "错误: 前台上游 127.0.0.1:9080 不可用" >&2 return 1 } curl -fsS --max-time 6 http://127.0.0.1:9081/ >/dev/null || { echo "错误: 后台上游 127.0.0.1:9081 不可用" >&2 return 1 } curl -fsS --max-time 6 http://127.0.0.1:8088/api/health | grep -q '"status":"ok"' || { echo "错误: API 上游 127.0.0.1:8088/api/health 不可用" >&2 return 1 } code="$(curl -k -sS -o /dev/null -w '%{http_code}' --max-time 10 "https://${domain}" || true)" if [ "$code" = "502" ] || [ "$code" = "000" ]; then echo "检测到 https://${domain} 返回 ${code},尝试自动重载宿主机 Nginx 后重试..." run_sudo systemctl reload nginx 2>/dev/null || true sleep 1 code="$(curl -k -sS -o /dev/null -w '%{http_code}' --max-time 10 "https://${domain}" || true)" fi if [ "${code:-000}" -ge 500 ] || [ "${code:-000}" = "000" ]; then echo "错误: https://${domain} 返回 ${code},部署后健康检查失败。" >&2 echo "==== 诊断:80/443 监听 ====" >&2 run_sudo ss -tlnp | sed -n '1p;/\:80 \|:443 /p' >&2 || true echo "==== 诊断:最近 Nginx 错误日志 ====" >&2 run_sudo sh -c 'tail -n 80 /var/log/nginx/error.log 2>/dev/null || true' >&2 return 1 fi echo "健康检查通过:https://${domain} -> ${code}" }