-
-
Save elico/492d8f75f584ec1bed98b2a054a02cbb to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash | |
DEST_NET="192.168.111.0/24" | |
NEXT_HOPS="2" | |
NEXT_HOP_1="192.168.126.202" | |
NEXT_HOP_2="192.168.126.203" | |
NEXT_HOP_1_TABLE="202" | |
NEXT_HOP_2_TABLE="203" | |
NFTABLES="/usr/sbin/nft" | |
IPTABLES="/sbin/iptables" | |
IP="/sbin/ip" | |
LAN="eth0" | |
WAN="eth1" | |
## Disabling Reverse path filter | |
for i in /proc/sys/net/ipv4/conf/*/rp_filter | |
do | |
echo $i | |
cat $i | |
echo 0 > $i | |
done | |
DTABLE="${NEXT_HOP_1_TABLE}" | |
$IP route del ${DEST_NET} | |
$IP route flush table ${DTABLE} | |
$IP route show | grep -Ev '^default' \ | |
| while read ROUTE ; do | |
$IP route add table ${DTABLE} ${ROUTE} | |
done | |
$IP route add default via ${NEXT_HOP_1} table ${DTABLE} | |
DTABLE="${NEXT_HOP_2_TABLE}" | |
$IP route flush table ${DTABLE} | |
$IP route show | grep -Ev "^default" \ | |
| while read ROUTE ; do | |
$IP route add table ${DTABLE} ${ROUTE} | |
done | |
$IP route add default via ${NEXT_HOP_2} table ${DTABLE} | |
$IP route add ${DEST_NET} via ${NEXT_HOP_1} | |
#NAT | |
${NFTABLES} add table nat | |
${NFTABLES} add chain ip nat postrouting '{ type nat hook postrouting priority 100; policy accept; }' | |
${NFTABLES} add rule nat postrouting oif ${WAN} masquerade | |
# MANGLE | |
${NFTABLES} add table mangle | |
${NFTABLES} add chain ip mangle prerouting '{ type filter hook prerouting priority -150; policy accept; }' | |
${NFTABLES} add chain ip mangle input '{ type filter hook input priority -150; policy accept; }' | |
${NFTABLES} add chain ip mangle forward '{ type filter hook forward priority -150; policy accept; }' | |
${NFTABLES} add chain ip mangle output '{ type route hook output priority -150; policy accept; }' | |
${NFTABLES} add chain ip mangle postrouting '{ type filter hook postrouting priority -150; policy accept; }' | |
${NFTABLES} add chain ip mangle wan1 | |
${NFTABLES} add rule ip mangle wan1 counter ct mark set 0x1 | |
${NFTABLES} add chain ip mangle wan2 | |
${NFTABLES} add rule ip mangle wan2 counter ct mark set 0x2 | |
# 5-tuple/flow/PCC LOAD Balance | |
${NFTABLES} add chain ip mangle PCC_OUT_TCP | |
${NFTABLES} add rule ip mangle PCC_OUT_TCP counter jhash ip saddr . tcp sport . ip daddr . tcp dport mod 2 vmap { 0 : jump wan1, 1 : jump wan2 } | |
${NFTABLES} add chain ip mangle PCC_OUT_UDP | |
${NFTABLES} add rule ip mangle PCC_OUT_UDP counter jhash ip saddr . udp sport . ip daddr . udp dport mod 2 vmap { 0 : jump wan1, 1 : jump wan2 } | |
${NFTABLES} add chain ip mangle PCC_OUT_OTHERS | |
${NFTABLES} add rule ip mangle PCC_OUT_OTHERS counter ip protocol { tcp, udp } return | |
${NFTABLES} add rule ip mangle PCC_OUT_OTHERS counter jhash ip saddr . ip daddr mod 2 vmap { 0 : jump wan1, 1 : jump wan2 } | |
${NFTABLES} add rule ip mangle prerouting counter meta mark set ct mark | |
${NFTABLES} add rule ip mangle prerouting ct mark != 0x0 counter ct mark set mark | |
${NFTABLES} add rule ip mangle prerouting iifname "${LAN}" ip protocol tcp ct state new counter jump PCC_OUT_TCP | |
${NFTABLES} add rule ip mangle prerouting iifname "${LAN}" ip protocol udp ct state new counter jump PCC_OUT_UDP | |
${NFTABLES} add rule ip mangle prerouting iifname "${LAN}" ct state new counter jump PCC_OUT_OTHERS | |
${NFTABLES} add rule ip mangle prerouting ct mark 0x1 counter meta mark set 0x1 | |
${NFTABLES} add rule ip mangle prerouting ct mark 0x2 counter meta mark set 0x2 | |
${NFTABLES} add rule ip mangle postrouting counter ct mark set mark | |
$IP rule|grep "from all fwmark 0x1 lookup ${NEXT_HOP_1_TABLE}" >/dev/null | |
if [ "$?" -eq "1" ]; then | |
$IP rule add fwmark 1 table ${NEXT_HOP_1_TABLE} | |
fi | |
$IP rule|grep "from all fwmark 0x2 lookup ${NEXT_HOP_2_TABLE}" >/dev/null | |
if [ "$?" -eq "1" ]; then | |
$IP rule add fwmark 2 table ${NEXT_HOP_2_TABLE} | |
fi |
@cyayon it's up to you if email or not.
I will try later to upgrade the nftables-rules-dump-putput.txt in the repo.
Since netfilter(nftables/iptables) have jumps and goto you can partition the tables and update only specific parts of it using a nft script.
Since nftables nft does atomic changes(compared to iptables which is not) you are guaranteed that once you change a vmap or another part it will not affect traffic and will not cause distribution of service.
I don't know how you check or how you do things and feel free to share more if you would like on email.
The main difference can be seen at:
elico/mwan-nft-lb-example@22e6eb2#diff-68054fdcdf4d0108a2b62e83360a742fb3b8334f4bb93b6f285ca27403eca11a
For a simple ruleset with 10-50 +- commands you can use a single nft add rule
or any similar single command action.
But when you have 150 commands it takes a lot of time to run and is also prune to time race conditions.
With 1000+ nft commans you must use a nft script to be performance wise.
Comparing the 100 rules/commands to 1.2 k commands which I used, it took more then 30 seconds compared to 1-2 seconds inside a nft script.
Be in touch!
thanks.
@cyayon I updated the rules.
Take a peek at:
https://github.com/elico/mwan-nft-lb-example/blob/main/run-lab.sh
It creates a full lab in Linux Namespaces.
You might need to tune the code a bit to make it work on you environment.
The scripts create multiple routers and a client that simulates a network with 10 GWs.
thanks !
Hi,
thanks for your answer.
Could you please update your nftables-rules-dump-putput.txt with your last rule set ? I think you have only updated you .rb script... Does your final rule set change ?
I have some services on firewall/router itself (openvpn for example), and i have to begin my mangle / OUTPUT chain with : "ct mark != 0x0 counter meta mark set ct mark" to allow this services to work as expected.
My complete mangle / OUTPUT chain is :
chain OUTPUT {
type route hook output priority mangle; policy accept;
# necessary for local services (ovpn)
ct mark != 0x0 counter meta mark set ct mark
# force reroute-check DHCPC RENEW skgid process via its own iface
udp sport 68 udp dport 67 meta skgid $skgid_wan1 counter jump MWAN1_SL comment "mwan1_dhcpc_skgid"
udp sport 68 udp dport 67 meta skgid $skgid_wan2 counter jump MWAN2_SL comment "mwan2_dhcpc_skgid"
#udp sport 68 udp dport 67 meta skgid $skgid_wan3 counter jump MWAN3_SL comment "mwan3_dhcpc_skgid"
}
Moreover, if i don't have "ct mark != 0x0 counter meta mark set ct mark" at the END of mangle / PREROUTING, the packets from internet clients to router/firewall itself services (openvpn), are NOT marked as expected (but it works). I think it is not clean... Finally, my complete mangle / PREROUTING is :
chain PREROUTING {
type filter hook prerouting priority mangle; policy accept;
iifname $iface_wan1 ct state new counter jump MWAN1 comment "mwan1_orange1"
iifname $iface_wan2 ct state new counter jump MWAN2 comment "mwan2_orange2"
iifname $iface_wan3 ct state new counter jump MWAN3 comment "mwan3_lte"
iifname $iface_beta ct state new counter jump MWAN5 comment "mwan5_beta"
iifname $iface_vanisher1 ct state new counter jump MWAN6 comment "mwan6_vanisher_orange1"
iifname $iface_vanisher2 ct state new counter jump MWAN7 comment "mwan7_vanisher_orange2"
iifname $iface_vanisher3 ct state new counter jump MWAN9 comment "mwan9_vanisher_lte"
ct state new counter jump MWAN
ct mark != 0x0 counter meta mark set ct mark
}
I do not understand why is required to begin mangle / PREROUTING with "ct mark != 0x0 counter ct mark set mark"...
Finally, i have a very (too much) complicated daemon script to monitor nft tables, i will simplify and refactor it following your recommendations :)
Do you prefer to continue this on your email ?
thanks.