Last active
August 26, 2025 16:23
-
-
Save supermarsx/5efa0d01b0a1ebe23abd7db4b331ac2d to your computer and use it in GitHub Desktop.
Docker sysctl.conf master file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Docker-tailored sysctl.conf | |
| # ------------------------------------------------------------ | |
| # These settings are tuned for Linux hosts running Docker/containers | |
| # (bridge networks, veth pairs, NAT/masquerade, overlay/ingress in Swarm). | |
| # Notes focus on packet bursts from containers, conntrack/NAT behavior, | |
| # asymmetric routing across bridges, and handling spikes from L4/L7 load balancers. | |
| # | |
| # IMPORTANT: | |
| # - These are host-level sysctls. Some can also be applied per-container with | |
| # `docker run --sysctl key=value`, but many only make sense on the host. | |
| # - Validate against your kernel version; some legacy toggles are no-ops/unsafe. | |
| # - Test gradually in staging before production. Monitor drops, retransmits, | |
| # and conntrack exhaustion (e.g., `ss -s`, `netstat -s`, `dmesg`, `conntrack -S`). | |
| # -------------------------------------------------------------------------------------------------------- | |
| # SUMMARY TABLE (Defaults vs Tuned vs Recommended) | |
| # -------------------------------------------------------------------------------------------------------- | |
| # PARAMETER DEFAULT VALUE TUNED VALUE RECOMMENDED SAFE VALUE | |
| # -------------------------------------------------------------------------------------------------------- | |
| # net.ipv4.tcp_syncookies 1 1 1 (enable) | |
| # net.ipv4.tcp_max_syn_backlog 128 20480 4096–16384 (depends on load) | |
| # net.ipv4.tcp_synack_retries 5 2 3–5 (avoid too low) | |
| # net.ipv4.ip_forward 0 1 1 if routing/NAT needed | |
| # net.ipv4.tcp_timestamps 1 0 1 (keep enabled for RTT/PAWS) | |
| # net.ipv4.tcp_tw_recycle 0 (removed) 0 0 (always off) | |
| # net.ipv4.tcp_tw_reuse 0 (commented) 0 (leave off unless tested) | |
| # net.ipv4.tcp_window_scaling 1 0 1 (needed for high throughput) | |
| # net.core.somaxconn 128 4096 1024–8192 depending on app | |
| # net.core.netdev_max_backlog 1000 4096 2048–8192 (NIC dependent) | |
| # net.ipv4.tcp_max_tw_buckets 262144 400000 262144–400000 | |
| # net.ipv4.tcp_no_metrics_save 0 1 1 (safe with containers) | |
| # net.ipv4.tcp_rmem 4096 87380 6291456 4096 87380 16777216 4096 87380 16777216 | |
| # net.ipv4.tcp_syn_retries 6 2 3–4 (safer middle ground) | |
| # net.ipv4.tcp_wmem 4096 16384 4194304 4096 65536 16777216 4096 65536 16777216 | |
| # net.ipv4.conf.*.rp_filter 1 2 2 (loose mode for Docker) | |
| # --------------------------------------------------------------------------------------------------------- | |
| # TCP_SYNCookies (default: 1 on most modern distros) | |
| # Defend against SYN flood bursts common behind public load balancers. | |
| # In container environments, many short-lived connections arrive concurrently. | |
| # SYN cookies avoid backlog exhaustion for half-open handshakes at the cost of | |
| # disabling some TCP extensions during attack conditions. | |
| net.ipv4.tcp_syncookies = 1 | |
| # TCP_MAX_SYN_BACKLOG (OPTIONAL) (default: 128) | |
| # Increase queued connection requests (SYN backlog) for listeners. | |
| # Useful if a containerized service gets spiky traffic (autoscaling, LB health checks). | |
| # The effective accept queue is min(net.core.somaxconn, app backlog). | |
| # net.ipv4.tcp_max_syn_backlog = 2048 | |
| # TCP_SYNACK_RETRIES (OPTIONAL) (default: 5) | |
| # Lower SYN+ACK retries to fail faster under attack. In Docker hosts | |
| # serving internet clients over NAT/LB, be cautious—overly low values hurt users | |
| # on high-latency/packet-loss networks. | |
| # net.ipv4.tcp_synack_retries = 3 | |
| # IP_FORWARD (default: 0) | |
| # Enable routing/NAT between interfaces (Docker bridge/overlay need this). | |
| # Required for containers to reach external networks when using iptables MASQUERADE. | |
| net.ipv4.ip_forward = 1 | |
| # TCP_TIMESTAMPS (default: 1) | |
| # Disable TCP timestamps. Pros: minor privacy, a few CPU cycles saved. Cons: | |
| # hurts PAWS and RTT estimation; in overlay/ingress networks or WAN paths, this | |
| # can reduce performance. Keep disabled only if your traffic is mostly east-west | |
| # on the same host/segment and you’ve validated throughput. | |
| net.ipv4.tcp_timestamps = 0 | |
| # TCP_TW_RECYCLE (removed since kernel 4.12, default: 0) | |
| # Explicitly keep tw_recycle off. It was unsafe with NAT and broke | |
| # clients behind load balancers (common with Docker). Leave at 0. | |
| net.ipv4.tcp_tw_recycle = 0 | |
| # TCP_TW_REUSE (OPTIONAL) (default: 0) | |
| # TIME-WAIT reuse. Leaving it commented avoids edge-case failures | |
| # with NATed clients and load balancers. If you enable it, test carefully. | |
| # net.ipv4.tcp_tw_reuse = 0 | |
| # TCP_WINDOW_SCALING (default: 1) | |
| # Disable window scaling. WARNING: This caps throughput (~65KB window) and can | |
| # severely throttle services across overlays/WAN. Use only if you deliberately | |
| # want small windows (e.g., constrained local links) or for troubleshooting. | |
| # For general Docker production, leaving scaling ON (value 1) is typical. | |
| net.ipv4.tcp_window_scaling = 0 | |
| # SOMAXCONN (default: 128) | |
| # Bump accept queue length for busy containerized servers (nginx, haproxy, etc.). | |
| # Real limit is min(this, app listen backlog). Helps absorb bursts from LBs. | |
| net.core.somaxconn = 4096 | |
| # NETDEV_MAX_BACKLOG (default: 1000) | |
| # Increase the device input queue for veth/bridge bursts (container fan-in). | |
| # Reduces drops when packets arrive faster than the stack can process. | |
| net.core.netdev_max_backlog = 4096 | |
| # TCP_MAX_SYN_BACKLOG (OVERRIDE) (default: 128) | |
| # High SYN backlog to tolerate large spikes from autoscaling and LB health checks. | |
| # This overrides the earlier commented example. Tune with SYN cookies enabled. | |
| net.ipv4.tcp_max_syn_backlog = 20480 | |
| # TCP_MAX_TW_BUCKETS (default: 262144) | |
| # Allow many TIME-WAIT sockets before GC. Microservices with short connections | |
| # create lots of TIME-WAIT; high limits reduce premature recycling but consume RAM. | |
| net.ipv4.tcp_max_tw_buckets = 400000 | |
| # TCP_NO_METRICS_SAVE (default: 0) | |
| # Don’t persist per-destination TCP metrics (RTT/cwnd) across ephemeral flows. | |
| # With containers frequently moving IPs (redeploys, overlays), stale metrics can | |
| # mislead the stack; disabling saves surprises. | |
| net.ipv4.tcp_no_metrics_save = 1 | |
| # TCP_RMEM (default: 4096 87380 6291456) | |
| # TCP receive buffer: min/default/max. Larger max helps high-BDP paths (cross-AZ, | |
| # overlay), but increases memory pressure under many connections. Monitor `slab`. | |
| net.ipv4.tcp_rmem = 4096 87380 16777216 | |
| # TCP_SYN_RETRIES (default: 6) | |
| # Outgoing SYN retries for *active* connects from the host/containers via NAT. | |
| # Lowering fails faster when peers are gone (rapid reschedules), but may harm | |
| # slow/long-RTT external services. | |
| net.ipv4.tcp_syn_retries = 2 | |
| # TCP_SYNACK_RETRIES (default: 5) | |
| # Passive side (listeners) SYN+ACK retries. Lower to reduce hold time under SYN | |
| # flood. Validate client success from mobile/geo-distributed networks. | |
| net.ipv4.tcp_synack_retries = 2 | |
| # TCP_WMEM (default: 4096 16384 4194304) | |
| # TCP send buffer: min/default/max. Larger ceilings allow better throughput for | |
| # services crossing overlays/WAN; watch memory under fan-out traffic patterns. | |
| net.ipv4.tcp_wmem = 4096 65536 16777216 | |
| # RP_FILTER (default: 1) | |
| # Reverse-path filtering in *loose* mode. Docker bridges and asymmetric paths | |
| # (DNAT/SNAT, multiple interfaces) need rp_filter=2 to avoid dropping valid | |
| # replies that return on a different interface. Keep both default and all. | |
| net.ipv4.conf.default.rp_filter = 2 | |
| net.ipv4.conf.all.rp_filter = 2 | |
| # ----------------------------------------------------------------------------- | |
| # FYI (not set here, but commonly tuned on Docker hosts): | |
| # | |
| # Example extra tunables (commented, with descriptions): | |
| # net.netfilter.nf_conntrack_max = 262144 # Default ~65536. Max number of tracked connections. Raise if NAT table exhausts. | |
| # net.netfilter.nf_conntrack_buckets = 65536 # Default varies (1/4 of max). Hash buckets for conntrack; increase with max. | |
| # net.ipv4.neigh.default.gc_thresh1 = 128 # Default 128. Min ARP cache entries before GC starts. | |
| # net.ipv4.neigh.default.gc_thresh2 = 512 # Default 512. ARP entries where GC becomes more aggressive. | |
| # net.ipv4.neigh.default.gc_thresh3 = 1024 # Default 1024. Max ARP cache entries before forced GC. | |
| # fs.file-max = 2097152 # Default ~nr_open (varies). Max open file handles system-wide. | |
| # fs.inotify.max_user_watches = 524288 # Default 8192. Max files watched per user by inotify. Raise for many containers. | |
| # net.core.rmem_max = 16777216 # Default 212992. Max socket receive buffer size. | |
| # net.core.wmem_max = 16777216 # Default 212992. Max socket send buffer size. | |
| # Always pair changes with observability: `ethtool -S`, `tc -s qdisc`, `ss -ni`, | |
| # iptables counters, and conntrack stats to verify impact. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment