Skip to content

Instantly share code, notes, and snippets.

@upa
Created March 28, 2015 09:19
Show Gist options
  • Save upa/5a0cf8515dacaa2b0660 to your computer and use it in GitHub Desktop.
Save upa/5a0cf8515dacaa2b0660 to your computer and use it in GitHub Desktop.
a patch for kernel-3.14 (mptcp_v0.89), enable MULTIPATH_HASHONLY balancing.
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9922093..67f6aad 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -292,6 +292,8 @@ int fib_sync_down_dev(struct net_device *dev, int force);
int fib_sync_down_addr(struct net *net, __be32 local);
int fib_sync_up(struct net_device *dev);
void fib_select_multipath(struct fib_result *res);
+void fib_select_multipath_hashonly(struct fib_result *res,
+ const struct flowi4 * fl4);
/* Exported by fib_trie.c */
void fib_trie_init(void);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a2936f6..74f1d90 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -90,6 +90,17 @@ config IP_ROUTE_MULTIPATH
equal "cost" and chooses one of them in a non-deterministic fashion
if a matching packet arrives.
+config IP_ROUTE_MULTIPATH_HASHONLY
+ bool "IP: ECMP hash only for flows"
+ depends on IP_ROUTE_MULTIPATH
+ help
+ Normally, IP_ROUTE_MULTIPATH load balance single flow to multiple
+ next hops. However, it causes packet reodering so that commodity
+ hardware routers using only hash to decide next hop for individual
+ flows. This option emulates this behavior. A next hop for a flow
+ is decided by hash value for 5-tuple of the flow, and the next hop
+ have been never changed.
+
config IP_ROUTE_VERBOSE
bool "IP: verbose route monitoring"
depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 017fa5e..6fd981e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,6 +33,10 @@
#include <linux/init.h>
#include <linux/slab.h>
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_HASHONLY
+#include <linux/hash.h> /* hash_32() */
+#endif
+
#include <net/arp.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -1331,3 +1335,30 @@ void fib_select_multipath(struct fib_result *res)
spin_unlock_bh(&fib_multipath_lock);
}
#endif
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_HASHONLY
+/* added by [email protected]
+ * If hash-only decides next hop for a flow by hash of 5 tuple. there
+ * is no weight based balancing (like commodity hardware routers).
+ */
+
+static inline unsigned long fib_calculate_hash (struct flowi4 * fl4)
+{
+ return hash_32 (fl4->saddr + fl4->daddr + fl4->flowi4_proto +
+ fl4->fl4_sport + fl4->fl4_dport, 8);
+}
+
+void
+fib_select_multipath_hashonly(struct fib_result *res,
+ const struct flowi4 * fl4)
+{
+ unsigned long key;
+ struct fib_info *fi = res->fi;
+
+ key = fib_calculate_hash (fl4);
+ res->nh_sel = key % fi->fib_nhs;
+
+ return;
+}
+
+#endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 844323b..bda098e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -335,6 +335,17 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
if (rt != NULL)
goto packet_routed;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_HASHONLY
+ iph = ip_hdr (skb);
+ if (iph->protocol == IPPROTO_TCP) {
+ fl4->fl4_sport = tcp_hdr (skb)->source;
+ fl4->fl4_dport = tcp_hdr (skb)->dest;
+ } else if (iph->protocol == IPPROTO_UDP) {
+ fl4->fl4_sport = udp_hdr (skb)->source;
+ fl4->fl4_dport = udp_hdr (skb)->dest;
+ }
+#endif
+
/* Make sure we can route this packet. */
rt = (struct rtable *)__sk_dst_check(sk, 0);
if (rt == NULL) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment