If you use keepalived for your HA services, and especially if you use the virtual_server options, you might have great use of Substitutions capabilities in the config file.
I’ll just post 2 identical configurations, both working from keepalived 2.2 (which comes by default on Ubuntu 22.04), so you can judge by yourself. The configuration with Substitutions 3 times shorter than the long configuration and IMHO is much easier to read and to manage.
I decided to post this config because I did find the manual not so easy, and have unsuccessfully tried to prevent repetitions with virtual_server_group.
The 2 below config only shows the virtual_servers
part and produces the same configuration. Of course, you can use the same for vrrp_instance
.
By reading the example and checking a bit the manual on the SEQ, LST and variables, you should be able to simplify your configuration easily.
The variables with RS
are for Real Servers, and the _ID
are the last octet of the IP address… It might be easier to read with a view on the result at the end.
Short version
$DEFAULT_RS_OPTIONS= \
delay_loop 20 \
lb_algo rr \
lb_kind NAT \
persistence_timeout 180 \
protocol TCP
# Templates
$RS_TPL= \
real_server ${BASE_RS}${RS_ID} ${PORT} { \
weight 1 \
TCP_CHECK { \
connect_timeout 3 \
} \
}
$VS_TPL= \
virtual_server $VS ${PORT} { \
$DEFAULT_RS_OPTIONS \
~SEQ(RS_ID, $MIN_RS, $MAX_RS) $RS_TPL \
}
# Config
$BASE_RS=10.10.66.
$MIN_RS=31
$MAX_RS=33
$VS=10.10.178.11
~LST(PORT, 80, 443, 2222) $VS_TPL
# 8082 Traefik
# 9323 Docker Prometheus metrics
$VS=10.10.232.1
~LST(PORT, 8082, 9323) $VS_TPL
# 9221 Proxmox Exporter
$BASE_RS=10.10.178.
$MIN_RS=2
$MAX_RS=4
~LST(PORT, 9221) $VS_TPL
# Prometheus metrics from Ceph
$CEPH_RS_TPL= \
real_server 10.10.178.${RS_ID} 9283 { \
weight 1 \
MISC_CHECK { \
misc_path "${_PWD}/scripts/head_long.py http://10.10.178.${RS_ID}:9283/metrics" \
misc_timeout 5 \
} \
}
virtual_server 10.10.232.1 9283 {
$DEFAULT_RS_OPTIONS
~SEQ(RS_ID, 2, 4) $CEPH_RS_TPL
}
# Galera cluster (MariaDB)
$GALERA_RS= \
real_server 10.10.10.${RS_ID} 3306 { \
weight 1 \
TCP_CHECK { \
connect_timeout 3 \
} \
MISC_CHECK { \
misc_path "${_PWD}/scripts/keepalived_http_check.py http://10.10.10.${RS_ID}:9104/metrics" \
misc_timeout 5 \
user dyngandi \
} \
}
virtual_server 10.10.232.1 3306 {
$DEFAULT_RS_OPTIONS
delay_loop 15
~SEQ(RS_ID,51,53) $GALERA_RS
}
Long version
virtual_server 10.10.178.11 80 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.66.31 80 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.32 80 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.33 80 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
virtual_server 10.10.178.11 443 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.66.31 443 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.32 443 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.33 443 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
virtual_server 10.10.178.11 2222 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.66.31 2222 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.32 2222 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.33 2222 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
virtual_server 10.10.232.1 8082 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.66.31 8082 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.32 8082 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.33 8082 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
virtual_server 10.10.232.1 9323 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.66.31 9323 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.32 9323 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.66.33 9323 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
virtual_server 10.10.232.1 9221 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.178.2 9221 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.178.3 9221 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
real_server 10.10.178.4 9221 {
weight 1
TCP_CHECK {
connect_timeout 3
}
}
}
# Prometheus metrics from Ceph
virtual_server 10.10.232.1 9283 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.178.2 9283 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.2:9283/metrics"
misc_timeout 5
}
}
real_server 10.10.178.3 9283 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.3:9283/metrics"
misc_timeout 5
}
}
real_server 10.10.178.4 9283 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.4:9283/metrics"
misc_timeout 5
}
}
}
# Galera cluster (MariaDB)
virtual_server 10.10.232.1 3306 {
delay_loop 20
lb_algo rr
lb_kind NAT
persistence_timeout 180
protocol TCP
real_server 10.10.10.51 3306 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.51:9104/metrics"
misc_timeout 5
}
}
real_server 10.10.10.52 3306 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.52:9104/metrics"
misc_timeout 5
}
}
real_server 10.10.10.53 3306 {
weight 1
MISC_CHECK {
misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.53:9104/metrics"
misc_timeout 5
}
}
}
The verbose version is OK, but as the configuration gets longer and if you have a lot of virtual servers (VS) and real servers (RS), I find much easier to work with the short version.
Few commands to help with configuration & debug
journalctl -u keepalived.service -f
journalctl -u keepalived.service -f | grep Keepalived_healthcheckers
keepalived -t && echo ConfOK
ipvsadm -l
( Check the below example)
# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.10.178.11:80 rr persistent 180
-> 10.10.66.32:80 Masq 1 1 0
TCP 10.10.178.11:443 rr persistent 180
-> 10.10.66.32:443 Masq 1 2 0
TCP 10.10.178.11:2222 rr persistent 180
-> 10.10.66.31:2222 Masq 1 0 1
-> 10.10.66.32:2222 Masq 1 0 0
-> 10.10.66.33:2222 Masq 1 0 0
TCP 10.10.232.1:3306 rr persistent 180
-> 10.10.10.51:3306 Masq 1 0 0
-> 10.10.10.52:3306 Masq 1 0 11
-> 10.10.10.53:3306 Masq 1 0 67
TCP 10.10.232.1:8082 rr persistent 180
-> 10.10.66.31:8082 Masq 1 0 0
-> 10.10.66.32:8082 Masq 1 0 0
-> 10.10.66.33:8082 Masq 1 1 0
TCP 10.10.232.1:9221 rr persistent 180
-> 10.10.178.2:9221 Masq 1 0 8
-> 10.10.178.3:9221 Masq 1 0 0
-> 10.10.178.4:9221 Masq 1 0 0
TCP 10.10.232.1:9283 rr persistent 180
-> 10.10.178.2:9283 Masq 1 1 8
TCP 10.10.232.1:9323 rr persistent 180
-> 10.10.66.31:9323 Masq 1 1 0
-> 10.10.66.32:9323 Masq 1 0 0
-> 10.10.66.33:9323 Masq 1 0 0