Skip to content

Instantly share code, notes, and snippets.

@thimslugga
Last active May 18, 2025 03:18
Show Gist options
  • Save thimslugga/5238646ca6daac37784e54f5615a24ff to your computer and use it in GitHub Desktop.
Save thimslugga/5238646ca6daac37784e54f5615a24ff to your computer and use it in GitHub Desktop.
Setup EC2 NAT Instance with Amazon Linux 2
#!/bin/bash
# https://github.com/1debit/alternat
# https://serverfault.com/questions/1137692/aws-nat-instance-setup
# https://www.redhat.com/en/blog/using-iptables-nft-hybrid-linux-firewall
# https://www.frozentux.net/iptables-tutorial/iptables-tutorial.html
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/security_guide/sec-configuring_nat_using_nftables
# https://wiki.nftables.org/wiki-nftables/index.php/Performing_Network_Address_Translation_(NAT)
# https://home.regit.org/netfilter-en/nftables-quick-howto/
# https://unix.stackexchange.com/questions/283275/how-to-do-masquerading-with-nftables
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
shopt -s expand_aliases
export AWS_PAGER=""
curl_cmd="curl --silent --fail";
imds_uri='http://169.254.169.254/latest'
token=$($curl_cmd -X PUT "${imds_uri}/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 300")
alias curl_cmd_with_token="$curl_cmd -H \"X-aws-ec2-metadata-token: ${token}\""
iid_uri="${imds_uri}/meta-data/instance-id";
instance_id=$(curl_cmd_with_token $iid_uri)
sudo yum update -y
sudo amazon-linux-extras install -y kernel-5.15
sudo yum install -y yum-utils \
bzip2 \
zstd \
sysstat \
ethtool \
iproute-tc \
iptables-nft \
nftables \
ipset \
tuned \
tuna \
irqbalance \
conntrack-tools \
grubby \
perf \
vim \
jq
#yum install -y pcp pcp-system-tools pcp-zeroconf
sudo grubby --update-kernel=ALL --args="intel_idle.max_cstate=0 processor.max_cstate=0"
# Enable cgroupsv2 and pressure stall info
#sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1 swapaccount=1 psi=1"
# Enable irqbalance
sudo systemctl enable --now irqbalance
# Enable TuneD and use the network-throughput profile
sudo systemctl enable --now tuned
sudo tuned-adm profile network-throughput
tuned-adm active
# Required to enable tcp bbr for congestion control
# Default is pfifo_fast on AL2
sudo sysctl -q -w net.core.default_qdisc=fq_codel
# Use Google TCP BBR
# Default is cubic on AL2
sudo /sbin/modprobe tcp_bbr
sudo sysctl -q -w net.ipv4.tcp_congestion_control=bbr
# More network tuning at runtime
sudo sysctl -q -w net.ipv4.ip_forward=1
sudo sysctl -q -w net.ipv4.ip_local_port_range="1024 65535"
sudo sysctl -q -w net.ipv4.tcp_slow_start_after_idle=0
sudo sysctl -q -w net.ipv4.conf.eth0.send_redirects=0
# Get the total amount of memory in kilobytes from /proc/meminfo
memtotal=$(grep '^MemTotal:' /proc/meminfo | awk '{print $2}')
#memtotal=$(grep '^MemTotal:' /proc/meminfo | tr -s ' ' | cut -d ' ' -f2)
# Calculate the optimal value of vm.min_free_kbytes based on the amount of memory
if test "${memtotal}" -lt 524288; then
# if the system has less than 512MB of memory, set vm.min_free_kbytes to 12800
echo "vm.min_free_kbytes=12800"
vm_min_free_kbytes=12800
elif test "${memtotal}" -lt 1048576; then
# if the system has less than 1GB of memory, set vm.min_free_kbytes to 64000
echo "vm.min_free_kbytes=64000"
vm_min_free_kbytes=64000
elif test "${memtotal}" -lt 2097152; then
# if the system has less than 2GB of memory, set vm.min_free_kbytes to 128000
echo "vm.min_free_kbytes=128000"
vm_min_free_kbytes=128000
elif test "${memtotal}" -lt 4194304; then
# if the system has less than 4GB of memory, set vm.min_free_kbytes to 256000
echo "vm.min_free_kbytes=256000"
vm_min_free_kbytes=256000
elif test "${memtotal}" -lt 8388608; then
# if the system has less than 8GB of memory, set vm.min_free_kbytes to 512000
echo "vm.min_free_kbytes=512000"
vm_min_free_kbytes=512000
else
# if the system has 8GB+ memory, set vm.min_free_kbytes to 1048576
echo "vm.min_free_kbytes=1048576"
vm_min_free_kbytes=1048576
fi
cat <<EOF | sudo tee /etc/sysctl.d/99-ec2-nat-instance.conf
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking-ena.html
# https://github.com/amzn/amzn-drivers/blob/master/kernel/linux/ena/ENA_Linux_Best_Practices.rst
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ena-express.html
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html/monitoring_and_managing_system_status_and_performance/tuning-the-network-performance_monitoring-and-managing-system-status-and-performance
# Minimize console logging level for kernel printk messages
# 4 4 1 7 works too
kernel.printk=3 4 1 7
# By default, the kernel.panic tunable is set to 0 and so the host
# does not reboot automatically if the kernel panics.
#
# To ensure that the node reboots automatically after it panics,
# this tunable must be set to a non zero value.
kernel.panic=10
# Ensure that your reserved kernel memory is sufficient to sustain a
# high rate of packet buffer allocations (the default value may be too small).
#
# As a rule of thumb, you should set this value to between 1-3% of available
# system memory, and adjust this value up or down to meet the needs of your
# application.
vm.min_free_kbytes="${vm_min_free_kbytes}"
vm.swappiness=10
# Suppress logging of net_ratelimit callback
net.core.message_cost=0
# Enable BPF JIT compiler
net.core.bpf_jit_enable=1
# https://access.redhat.com/solutions/1241943
# https://access.redhat.com/solutions/30453
net.core.netdev_max_backlog=1000
net.core.somaxconn=4096
#net.core.netdev_max_backlog=2000
#net.core.somaxconn=8192
#net.core.netdev_budget=600
#net.core.netdev_budget_usecs=4000
# fq can be used as a drop in replacement for pfifo_fast.
# http://man7.org/linux/man-pages/man8/tc-fq.8.html
# https://www.bufferbloat.net/projects/codel/wiki/
#
# * fq is a better choice for end hosts because of it supports tcp pacing,
# which is a requirement for the bbr congestion control algorithm.
# * fq is best for fat servers with tcp-heavy workloads and particularly at
# 10GigE speeds or above
#
# * fq-codel is a better choice for forwarding/routers which don't originate
# local traffic, hypervisors and best general purpose qdisc.
#
# Note: For queue management, sch_fq was recommended instead of fq_codel as of linux 3.12.
# Note: Required to enable BBR for congestion control
# Note: Default is pfifo_fast on AL2
net.core.default_qdisc=fq_codel
# TCP BBR
# This is not an official Google product LOL
# https://github.com/google/bbr/blob/master/README
# Note: BBR will support fq_codel after linux-4.13.
# Note: BBR must be used with fq qdisc with pacing enabled, since pacing is integral to the BBR design
# and implementation. BBR without pacing would not function properly and may incur unnecessary
# high packet loss rates.
# Note: Default is cubic on AL2
net.ipv4.tcp_congestion_control=bbr
# Negotiate TCP ECN for active and passive connections
#
# Turn on ECN as this will let AQM sort out the congestion backpressure without
# incurring packet losses and retransmissions.
#
# In order to make best used of this we really need ECN-enablement
# sysctl net.ipv4.tcp_ecn on end-hosts.
#
# https://github.com/systemd/systemd/pull/9143
# https://github.com/systemd/systemd/issues/9748
#net.ipv4.tcp_ecn=1
net.ipv4.tcp_ecn=2
net.ipv4.tcp_ecn_fallback=1
# Turn on MultiPath TCP
net.mptcp.enabled=1
# Turn on tcp window scaling
net.ipv4.tcp_window_scaling=1
# Recommended to enable this for hosts with jumbo frames
# i.e. mtu 9000+ enabled
net.ipv4.tcp_mtu_probing=1
# https://blog.cloudflare.com/optimizing-the-linux-stack-for-mobile-web-per/
# https://access.redhat.com/solutions/168483
# Avoid falling back to slow start after a connection goes idle
# keeps our cwnd large with the keep alive connections (kernel > 3.6)
net.ipv4.tcp_slow_start_after_idle=0
# Bump the local port range
net.ipv4.ip_local_port_range="1024 65535"
# Turn on IPv4 forwarding
net.ipv4.ip_forward=1
#net.ipv4.conf.all.forwarding=1
#net.ipv4.conf.default.forwarding=1
# Turn on IPv6 forwading
#net.ipv6.conf.all.forwarding=1
#net.ipv6.conf.default.forwarding=1
# DO NOT send redirects as this is not our job
net.ipv4.conf.eth0.send_redirects=0
EOF
sudo sysctl --system
# udev rules
cat <<'EOR' | sudo tee /etc/udev/rules.d/51-ec2-net-tuning.rules
# Increase the rx and tx ring buffer size, use ethtool -g eth0 to verify
# https://serverfault.com/a/975746
SUBSYSTEM=="net", ACTION=="add|change", KERNEL=="eth*|en*", DRIVERS=="ena", RUN+="/usr/sbin/ethtool -G %k rx 4096 tx 1024"
EOR
sudo systemctl daemon-reload
#sudo systemctl restart systemd-udevd
#sudo udevadm test /sys/class/net/eth0
sudo udevadm control --reload-rules \
&& sudo udevadm trigger --attr-match=subsystem=net
# nftables
sudo /sbin/modprobe nft_nat
sudo /sbin/modprobe nft_chain_nat
sudo alternatives --set iptables /usr/sbin/iptables-nft
sudo alternatives --set ip6tables /usr/sbin/ip6tables-nft
sudo alternatives --set arptables /usr/sbin/arptables-nft
sudo alternatives --set ebtables /usr/sbin/ebtables-nft
systemctl enable --now nftables
# verify
iptables --version
nft list ruleset
cat <<'EOF' | sudo tee /etc/nftables/ec2-nat-instance.conf
#!/usr/sbin/nft -f
flush ruleset
define INT_IF = eth1
define EXT_IF = eth1
table inet firewall {
chain inbound {
#type filter hook input priority 0; policy drop;
#ct state established,related accept
#ct state invalid drop
iifname lo accept
#ip protocol icmp limit rate 4/second accept
#ip6 nexthdr ipv6-icmp limit rate 4/second accept
#ip protocol igmp limit rate 4/second accept
#iifname $INT_IF udp dport { 67, 68 } accept
#iifname $INT_IF tcp dport 22 ip saddr $INT_NET accept
#log prefix "[nftables] Inbound Denied: " flags all counter drop
}
chain forward {
#type filter hook forward priority 0; policy drop;
#ct state established,related accept
#ct state invalid drop
#ip saddr { $INT_NET, $EXT_HOST } accept
#ip daddr $WEB tcp dport 80 limit rate 10/second log prefix "[nftables] Web Server Access: " accept
#log prefix "[nftables] Forward Denied: " flags all counter drop
}
chain outbound {
#type filter hook output priority 0; policy accept;
}
}
table ip nat {
chain prerouting {
type nat hook prerouting priority -100; policy accept;
}
chain postrouting {
type nat hook postrouting priority 100; policy accept;
oifname { "eth1" } masquerade # "eth1" is our external interface
}
}
EOF
cat <<'EOF' | sudo tee -a /etc/sysconfig/nftables.conf
include "/etc/nftables/ec2-nat-instance.nft"
EOF
sudo systemctl reboot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment