# 031. redis cluster 的自动化 slave 迁移实现更强的高可用架构的部署方案

slave 的自动迁移:比如现在有 10 个 master,每个有 1 个 slave,新增了 3 个 slave 作为冗余,有的 master 就有 2 个 slave 了(出现了salve冗余),其他的 master 还是只有 1 个 slave

如果某个 master 的 slave 挂了,那么 redis cluster 会自动迁移一个冗余的 slave 给那个 master

这样的好处:如果你每个 master 只有一个 slave,万一说一个 slave 死了,然后很快,master也死了,那可用性还是降低了,增强了高可用性

测试

# 查看集群中只有一个的 slave
[root@eshop-cache01 ~]# redis-trib.rb check eshop-cache01:7001
>>> Performing Cluster Check (using node eshop-cache01:7001)
# 7001 也只有一个
M: b49a4562f0164bbf9c8fc9c6059e09e420ee7abf eshop-cache01:7001
   slots:0-5460 (5461 slots) master
   1 additional replica(s)
M: 536e275ff8e2130c8f3c298e16c4dbdc9f765e76 192.168.99.172:7005
   slots:5461-6825,12288-16383 (5461 slots) master
   1 additional replica(s)
S: 728e473d6e5e36ddb051c600c7708f23733c46f7 192.168.99.172:7008
   slots: (0 slots) slave
   replicates b49a4562f0164bbf9c8fc9c6059e09e420ee7abf
S: 4051750a5828342877f31679189697741683e3c4 192.168.99.172:7006
   slots: (0 slots) slave
   replicates 536e275ff8e2130c8f3c298e16c4dbdc9f765e76
S: 50545f07b14843655452a08420316d11ecd12743 192.168.99.171:7004
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
S: 45db2a3a0c71a628bde2bb8d6f5a9ebd4489e387 192.168.99.170:7002
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
# 7003 有两个
M: 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc 192.168.99.171:7003
   slots:6826-12287 (5462 slots) master
   2 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
# 查看 7001 的 slave 是哪个节点
[root@eshop-cache01 ~]# redis-cli -h 192.168.99.170 -p 7001
192.168.99.170:7001> info replication
# Replication
role:master
connected_slaves:1
# 可以看到 7008 是他的 slave,那么我们把 7008 的给干掉
slave0:ip=192.168.99.172,port=7008,state=online,offset=2745,lag=0
master_repl_offset:2745
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:2744
192.168.99.170:7001> exit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

kill 掉 7008 的 slave 节点

[root@eshop-cache03 ~]# ps -ef | grep redis
root      1369     1  0 12:55 ?        00:00:23 /usr/local/bin/redis-server 192.168.99.172:7005 [cluster]
root      1374     1  0 12:55 ?        00:00:08 /usr/local/bin/redis-server 192.168.99.172:7006 [cluster]
root      1418     1  0 13:10 ?        00:00:06 /usr/local/bin/redis-server 192.168.99.172:7008 [cluster]
root      1441  1427  0 13:39 pts/1    00:00:00 grep redis
[root@eshop-cache03 ~]# kill 1418
1
2
3
4
5
6

kill 掉之后,再检查集群状态,可以看到一开始是连接不上 7008 的, 稍等一会 7003 原本有 2 个 slave,现在自动迁移成 7001 的 slave 了

[root@eshop-cache01 ~]# redis-trib.rb check eshop-cache01:7001
[ERR] Sorry, can't connect to node 192.168.99.172:7008
>>> Performing Cluster Check (using node eshop-cache01:7001)
M: b49a4562f0164bbf9c8fc9c6059e09e420ee7abf eshop-cache01:7001
   slots:0-5460 (5461 slots) master
   0 additional replica(s)
M: 536e275ff8e2130c8f3c298e16c4dbdc9f765e76 192.168.99.172:7005
   slots:5461-6825,12288-16383 (5461 slots) master
   1 additional replica(s)
S: 4051750a5828342877f31679189697741683e3c4 192.168.99.172:7006
   slots: (0 slots) slave
   replicates 536e275ff8e2130c8f3c298e16c4dbdc9f765e76
S: 50545f07b14843655452a08420316d11ecd12743 192.168.99.171:7004
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
S: 45db2a3a0c71a628bde2bb8d6f5a9ebd4489e387 192.168.99.170:7002
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
M: 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc 192.168.99.171:7003
   slots:6826-12287 (5462 slots) master
   2 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
[root@eshop-cache01 ~]# redis-trib.rb check eshop-cache01:7001
>>> Performing Cluster Check (using node eshop-cache01:7001)
M: b49a4562f0164bbf9c8fc9c6059e09e420ee7abf eshop-cache01:7001
   slots:0-5460 (5461 slots) master
   1 additional replica(s)
M: 536e275ff8e2130c8f3c298e16c4dbdc9f765e76 192.168.99.172:7005
   slots:5461-6825,12288-16383 (5461 slots) master
   1 additional replica(s)
S: 4051750a5828342877f31679189697741683e3c4 192.168.99.172:7006
   slots: (0 slots) slave
   replicates 536e275ff8e2130c8f3c298e16c4dbdc9f765e76
S: 50545f07b14843655452a08420316d11ecd12743 192.168.99.171:7004
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
S: 45db2a3a0c71a628bde2bb8d6f5a9ebd4489e387 192.168.99.170:7002
   slots: (0 slots) slave
   replicates b49a4562f0164bbf9c8fc9c6059e09e420ee7abf
M: 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc 192.168.99.171:7003
   slots:6826-12287 (5462 slots) master
   1 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

恢复 7008 的节点,再次查看集群状态,可以发现 7001 变成了 2 个 slave 节点

[root@eshop-cache01 ~]# redis-trib.rb check eshop-cache01:7001
>>> Performing Cluster Check (using node eshop-cache01:7001)
M: b49a4562f0164bbf9c8fc9c6059e09e420ee7abf eshop-cache01:7001
   slots:0-5460 (5461 slots) master
   2 additional replica(s)
M: 536e275ff8e2130c8f3c298e16c4dbdc9f765e76 192.168.99.172:7005
   slots:5461-6825,12288-16383 (5461 slots) master
   1 additional replica(s)
S: 728e473d6e5e36ddb051c600c7708f23733c46f7 192.168.99.172:7008
   slots: (0 slots) slave
   replicates b49a4562f0164bbf9c8fc9c6059e09e420ee7abf
S: 4051750a5828342877f31679189697741683e3c4 192.168.99.172:7006
   slots: (0 slots) slave
   replicates 536e275ff8e2130c8f3c298e16c4dbdc9f765e76
S: 50545f07b14843655452a08420316d11ecd12743 192.168.99.171:7004
   slots: (0 slots) slave
   replicates 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc
S: 45db2a3a0c71a628bde2bb8d6f5a9ebd4489e387 192.168.99.170:7002
   slots: (0 slots) slave
   replicates b49a4562f0164bbf9c8fc9c6059e09e420ee7abf
M: 0e6a3d40e63cb19f2606709a8b61ed4a6e511fcc 192.168.99.171:7003
   slots:6826-12287 (5462 slots) master
   1 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28