# nginx高可用

# 介绍

什么是高可用

高可用HA（High Availability）是分布式系统架构设计中必须考虑的因素之一，它通常是指，通过设计减少系统不能提供服务的时间。如果一个系统能够一直提供服务，那么这个可用性则是百分之百，但是天有不测风云。所以我们只能尽可能的去减少服务的故障。

解决的问题

在生产环境上很多时候是以Nginx做反向代理对外提供服务，但是一天Nginx难免遇见故障，如：服务器宕机。当Nginx宕机那么所有对外提供的接口都将导致无法访问。

虽然我们无法保证服务器百分之百可用，但是也得想办法避免这种悲剧，今天我们使用keepalived来实现Nginx的高可用。

双机热备方案

这种方案是国内企业中最为普遍的一种高可用方案，双机热备其实就是指一台服务器在提供服务，另一台为某服务的备用状态，当一台服务器不可用另外一台就会顶替上去。

keepalived是什么

Keepalived软件起初是专为LVS负载均衡软件设计的，用来管理并监控LVS集群系统中各个服务节点的状态，后来又加入了可以实现高可用的VRRP (Virtual Router Redundancy Protocol ,虚拟路由器冗余协议）功能。因此，Keepalived除了能够管理LVS软件外，还可以作为其他服务（例如：Nginx、Haproxy、MySQL等）的高可用解决方案软件

故障转移机制

Keepalived高可用服务之间的故障切换转移，是通过VRRP协议来实现的。在 Keepalived服务正常工作时，主 Master节点会不断地向备节点发送（多播的方式）心跳消息，用以告诉备Backup节点自己还活着，当主 Master节点发生故障时，就无法发送心跳消息，备节点也就因此无法继续检测到来自主 Master节点的心跳了，于是调用自身的接管程序，接管主Master节点的 IP资源及服务。而当主 Master节点恢复时，备Backup节点又会释放主节点故障时自身接管的IP资源及服务，恢复到原来的备用角色。

# nginx参考单机部署文档

单机版部署文档

# keepalived部署

外网环境下 yum 安装

yum -y install keepalived

修改 /etc/keepalived 下配置文件

vi /etc/keepalived/keepalived.conf

主节点配置内容：

! Configuration File for keepalived

global_defs {
   router_id app_router
}

#检测脚本
vrrp_script chk_http_port {
    script "/etc/keepalived/check_pid.sh" #心跳执行的脚本，检测nginx是否启动
    interval 2                          #（检测脚本执行的间隔，单位是秒）
    weight -20                            #权重
}

vrrp_instance VI_1 {
    state MASTER
    interface ens192
    virtual_router_id 66
    priority 100
    unicast_src_ip 本机IP  # 配置单播的源地址
    unicast_peer {
        从机IP
        从机IP  #配置单播的目标地址，多个 ip 回车 ip 回车 ip
    }

    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    track_script {
        chk_http_port            #（调用检测脚本）
    }
    virtual_ipaddress {
        虚拟IP，尽可能在一个网段
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

从节点配置：

! Configuration File for keepalived

global_defs {
   router_id app_router # 节点之间相同
}


#检测脚本
vrrp_script chk_http_port {
    script "/etc/keepalived/check_pid.sh" #心跳执行的脚本，检测nginx是否启动
    interval 2                          #（检测脚本执行的间隔）
    weight 2                            #权重
}

#vrrp 实例定义部分
vrrp_instance VI_1 {
    state BACKUP                        # 指定keepalived的角色，MASTER为主，BACKUP为备
    interface ens192                      # 当前进行vrrp通讯的网络接口卡(当前centos的网卡) ip addr 查看
    virtual_router_id 66                # 虚拟路由编号，主从要一直
    priority 50                         # 优先级，数值越大，获取处理请求的优先级越高
    unicast_src_ip 本机IP  # 配置单播的源地址
    unicast_peer {
        其他节点IP       #配置单播的目标地址
    }
    advert_int 1                        # 检查间隔，默认为1s(vrrp组播周期秒数)
    #授权访问
    authentication {
        auth_type PASS #设置验证类型和密码，MASTER和BACKUP必须使用相同的密码才能正常通信
        auth_pass 1111
    }
    track_script {
        chk_http_port                   #（调用检测脚本）
    }
    virtual_ipaddress {
        虚拟IP                   # 定义虚拟ip(VIP)，可多设，每行一个
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

nginx检测脚本：check_pid.sh

#!/bin/bash
#检测nginx是否启动了
A=`ps -C nginx --no-header |wc -l`        
if [ $A -eq 0 ];then    #如果nginx没有启动就启动nginx                        
      /usr/local/nginx/sbin/nginx                #重启nginx，环境变量不生效，需要全路径
      if [ `ps -C nginx --no-header |wc -l` -eq 0 ];then    #nginx重启失败，则停掉keepalived服务，进行VIP转移
              killall keepalived                    
      fi
fi

1
2
3
4
5
6
7
8
9

- 授权检测脚本：

chmod 775 check_pid.sh

nacos检测脚本: check_pid.sh

#!/bin/bash
#检测nacos是否启动了
A=`ps -ef | grep nacos | grep -v grep | wc -l`
if [ $A -eq 0 ];then    #如果nacos没有启动就启动nginx                        
       /home/nacos/bin/startup.sh       #重启nacos，环境变量不生效，需要全路径
      if [ `ps -ef | grep nacos | grep -v grep | wc -l` -eq 0 ];then    #nacos重启失败，则停
掉keepalived服务，进行VIP转移
              killall keepalived
      fi
fi

1
2
3
4
5
6
7
8
9
10

- 授权检测脚本

chmod 775 check_pid.sh

mysql

#!/bin/bash
host=127.0.0.1
user=账号
passwd=密码
# 安装路径调整
/usr/bin/mysqladmin -h 127.0.0.1 -u $user -p'$passwd' ping &>/dev/null
if [ $? -eq 0 ]
then
/usr/bin/mysql -u$user -p$passwd --connect_timeout=5 -e "show databases;"
if [ $? -ne 0 ]
then
/usr/ps aux |grep mysql |grep -v grep | awk '{print $2}' | xargs kill -a
/usr/bin/systemctl start mysqld
fi
else
/usr/bin/systemctl start mysqld
fi

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

启动关闭

systemctl start/restart/stop keepalived

# 问题

两台机器VIP都有，发生争抢：

https://www.cnblogs.com/xiaobaozi-95/p/11497295.html

注意检测脚本的执行，脚本名称不要有和检测目标相同字段 nacos,check_pid.sh

# 搭配nginx负载均衡

# nacos

upstream  nacos-servers {
          server 127.0.0.1:8848;
          server 127.0.0.2:8848;
          server 127.0.0.3:8848;
}
   
server {
        listen  8848;

        #charset koi8-r;

        location / {
           proxy_pass http://nacos-servers/;
        }

        #access_log  logs/host.access.log  main;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }

    }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# fastdfs

upstream  fastdfs-servers {
          server 127.0.0.1:22123;
          server 127.0.0.2:22123;
          server 127.0.0.3:22123;
}
   
server {
        listen       22123;

        #charset koi8-r;

        location / {
           proxy_pass http://fastdfs-servers/;
        }

        #access_log  logs/host.access.log  main;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }

    }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

← Redis集群