kwilczynski · March 22, 2021 00:23
diff --git a/ipvs-thundering-herd-3.0.patch b/ipvs-thundering-herd-3.0.patch
 diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
 index 4deb383..3d4e7af 100644
 --- a/include/linux/ip_vs.h
 +++ b/include/linux/ip_vs.h
 @@ -27,6 +27,18 @@
 #define IP_VS_DEST_F_AVAILABLE	0x0001		/* server is available */
 #define IP_VS_DEST_F_OVERLOAD	0x0002		/* server is overloaded */
 
 + /*
 + *      Advisory flags for slow start.
 + *
 + *      The absolute value size of the weight change will be stored
 + *      in dest->slow_start_data.  The flag and slow_start_data may
 + *      be used and modified by the scheduler to effect slow start.
 + */
 +#define IP_VS_DEST_F_WEIGHT_INC       0x0004    /* Weight has been increaced */
 +#define IP_VS_DEST_F_WEIGHT_DEC       0x0008    /* Weight has been increaced */
 +#define IP_VS_DEST_F_WEIGHT_MASK	\
 +	(IP_VS_DEST_F_WEIGHT_INC|IP_VS_DEST_F_WEIGHT_DEC)
 +
 /*
  *      IPVS sync daemon states
  */
 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
 index de527d1..894ad55 100644
 --- a/include/net/ip_vs.h
 +++ b/include/net/ip_vs.h
 @@ -673,6 +673,10 @@ struct ip_vs_dest {
 	__be16			vport;		/* virtual port number */
 	union nf_inet_addr	vaddr;		/* virtual IP address */
 	__u32			vfwmark;	/* firewall mark of service */
 +
 +	/* for slow start */
 +	atomic_t		slow_start_data;
 +	atomic_t		slow_start_data2;
 };
 
 
 @@ -1209,6 +1213,9 @@ ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
 		__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
 		__u16 protocol, __u32 fwmark);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 +extern inline unsigned int
 +ip_vs_conn_slow_start_dest_handicap(struct ip_vs_dest *dest,
 +		struct ip_vs_service *svc);
 
 
 /*
 @@ -1407,7 +1414,7 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 #endif
 
 static inline unsigned int
 -ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 +ip_vs_dest_conn_overhead(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 {
 	/*
 	 * We think the overhead of processing active connections is 256
 @@ -1416,7 +1423,8 @@ ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 	 * use the following formula to estimate the overhead now:
 	 *		  dest->activeconns*256 + dest->inactconns
 	 */
 -	return (atomic_read(&dest->activeconns) << 8) +
 +	return ((atomic_read(&dest->activeconns) +
 +			ip_vs_conn_slow_start_dest_handicap(dest, svc)) << 8) +
 		atomic_read(&dest->inactconns);
 }
 
 diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
 index 5c318f7..85ecc88 100644
 --- a/net/netfilter/ipvs/ip_vs_ctl.c
 +++ b/net/netfilter/ipvs/ip_vs_ctl.c
 @@ -52,6 +52,8 @@
 
 #include <net/ip_vs.h>
 
 +EXPORT_SYMBOL_GPL(ip_vs_conn_slow_start_dest_handicap);
 +
 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
 static DEFINE_MUTEX(__ip_vs_mutex);
 
 @@ -764,6 +766,21 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 {
 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 	int conn_flags;
 +	int old_weight;
 +
 +	/* set hints for slow start */
 +	dest->flags &= ~IP_VS_DEST_F_WEIGHT_MASK;
 +
 +	old_weight = atomic_read(&dest->weight);
 +
 +	if (old_weight < udest->weight) {
 +		atomic_set(&dest->slow_start_data, udest->weight - old_weight);
 +		dest->flags |= IP_VS_DEST_F_WEIGHT_INC;
 +	}
 +	else if (old_weight > udest->weight) {
 +		atomic_set(&dest->slow_start_data, old_weight - udest->weight);
 +		dest->flags |= IP_VS_DEST_F_WEIGHT_DEC;
 +	}
 
 	/* set the weight and the flags */
 	atomic_set(&dest->weight, udest->weight);
 @@ -3472,6 +3489,130 @@ out:
 	return ret;
 }
 
 +static void
 +__ip_vs_conn_set_slow_start(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 +{
 +	__u32 ss_handicap;
 +	__u32 ss_shift;
 +	__u32 ndest;
 +	__u32 w = 0;
 +	__u32 dest_w = 0;
 +	struct list_head *l, *e;
 +	struct ip_vs_dest *d;
 +
 +	/*
 +	 * If the weight is zero just set the slow_start hint and data to
 +	 * zero too as they won't be used.
 +	 */
 +
 +	if((dest->flags & IP_VS_DEST_F_WEIGHT_DEC) ||
 +			!(dest_w = atomic_read(&dest->weight))) {
 +#ifdef CONFIG_IP_VS_DEBUG
 +		IP_VS_DBG(1, "CONN slow_start: null\n");
 +#endif
 +		atomic_set(&dest->slow_start_data, 0);
 +		atomic_set(&dest->slow_start_data2, 0);
 +		return;
 +	}
 +
 +	/*
 +	 * Calculate a weighted number of connections this server would
 +	 * have if all the currently active connections were redistributed
 +	 * limited to a maximum of 64k.
 +	 */
 +	l = &svc->destinations;
 +
 +	ss_handicap = 0;
 +	ndest = 0;
 +
 +	for (e = l->next; e != l; e = e->next) {
 +		d = list_entry(e, struct ip_vs_dest, n_list);
 +		w = atomic_read(&d->weight);
 +
 +		if (w < 1 || d == dest) {
 +			continue;
 +		}
 +
 +		ndest++;
 +
 +		/* Try to avoid overflowint ss_handicap */
 +		ss_shift = atomic_read(&d->activeconns);
 +
 +		if (ss_shift & 0xffff0000)
 +			ss_shift = 0xffff;
 +
 +		ss_shift = (ss_shift << 16 ) / (w & 0xffff);
 +
 +		if (~0L - ss_handicap < ss_shift) {
 +			ss_handicap = ~0L;
 +			break;
 +		}
 +
 +		ss_handicap += ss_shift;
 +	}
 +
 +	if (ndest)
 +		ss_handicap = (ss_handicap * dest_w / ndest) >> 16;
 +
 +	/* ss_shift = log_2((ss_handicap & 0xfff) >> 3) */
 +	if (ss_handicap) {
 +		__u32 i;
 +		ss_shift = ss_handicap;;
 +
 +		for (i = 12; i > 0; i--) {
 +			if (ss_shift & 0x8000)
 +				break;
 +
 +			ss_shift <<= 1;
 +		}
 +
 +		ss_shift = i;
 +		ss_handicap <<= ss_shift;
 +	}
 +	else
 +		ss_shift = 0;
 +
 +	atomic_set(&dest->slow_start_data, ss_handicap);
 +	atomic_set(&dest->slow_start_data2, ss_shift);
 +
 +#ifdef CONFIG_IP_VS_DEBUG
 +	IP_VS_DBG_BUF(1, "CONN slow_start_init: server %s:%u "
 +		"handicap=%u (%u) shift=%u ndest=%u\n",
 +		IP_VS_DBG_ADDR(dest->af, &dest->addr),
 +		ntohs(dest->port), ss_handicap,
 +		ss_handicap >> ss_shift, ss_shift, ndest);
 +#endif
 +}
 +
 +inline unsigned int
 +ip_vs_conn_slow_start_dest_handicap(struct ip_vs_dest *dest,
 +		struct ip_vs_service *svc)
 +{
 +	unsigned int handicap;
 +
 +	/* Set up slow_start if weight has recently changed */
 +	if (unlikely(dest->flags & IP_VS_DEST_F_WEIGHT_MASK)) {
 +		__ip_vs_conn_set_slow_start(dest, svc);
 +		dest->flags &= ~IP_VS_DEST_F_WEIGHT_MASK;
 +	}
 +
 +	handicap = atomic_read(&dest->slow_start_data);
 +
 +	if (unlikely(!handicap))
 +		return 0;
 +
 +	handicap--;
 +	atomic_set(&dest->slow_start_data, handicap);
 +
 +#ifdef CONFIG_IP_VS_DEBUG
 +	if (unlikely(!handicap))
 +		IP_VS_DBG_BUF(1, "CONN slow_start_end: server %s:%u ",
 +			IP_VS_DBG_ADDR(dest->af, &dest->addr),
 +			ntohs(dest->port));
 +#endif
 +
 +	return handicap >> atomic_read(&dest->slow_start_data2);
 +}
 
 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
 	{
 diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
 index 0f16283..bc0927d 100644
 --- a/net/netfilter/ipvs/ip_vs_lblc.c
 +++ b/net/netfilter/ipvs/ip_vs_lblc.c
 @@ -413,7 +413,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 			continue;
 		if (atomic_read(&dest->weight) > 0) {
 			least = dest;
 -			loh = ip_vs_dest_conn_overhead(least);
 +			loh = ip_vs_dest_conn_overhead(least, svc);
 			goto nextstage;
 		}
 	}
 @@ -427,7 +427,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
 diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
 index eec797f..707a32a 100644
 --- a/net/netfilter/ipvs/ip_vs_lblcr.c
 +++ b/net/netfilter/ipvs/ip_vs_lblcr.c
 @@ -161,7 +161,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 }
 
 /* get weighted least-connection node in the destination set */
 -static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 +static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set,
 +		struct ip_vs_service *svc)
 {
 	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *least;
 @@ -178,7 +179,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 		if ((atomic_read(&least->weight) > 0)
 		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
 -			loh = ip_vs_dest_conn_overhead(least);
 +			loh = ip_vs_dest_conn_overhead(least, svc);
 			goto nextstage;
 		}
 	}
 @@ -191,7 +192,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		if ((loh * atomic_read(&dest->weight) >
 		     doh * atomic_read(&least->weight))
 		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 @@ -213,7 +214,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 
 /* get weighted most-connection node in the destination set */
 -static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 +static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set,
 +		struct ip_vs_service *svc)
 {
 	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *most;
 @@ -226,7 +228,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 	list_for_each_entry(e, &set->list, list) {
 		most = e->dest;
 		if (atomic_read(&most->weight) > 0) {
 -			moh = ip_vs_dest_conn_overhead(most);
 +			moh = ip_vs_dest_conn_overhead(most, svc);
 			goto nextstage;
 		}
 	}
 @@ -236,7 +238,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
   nextstage:
 	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
 		if ((moh * atomic_read(&dest->weight) <
 		     doh * atomic_read(&most->weight))
 @@ -583,7 +585,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 
 		if (atomic_read(&dest->weight) > 0) {
 			least = dest;
 -			loh = ip_vs_dest_conn_overhead(least);
 +			loh = ip_vs_dest_conn_overhead(least, svc);
 			goto nextstage;
 		}
 	}
 @@ -597,7 +599,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
 @@ -662,7 +664,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 		/* Get the least loaded destination */
 		read_lock(&en->set.lock);
 -		dest = ip_vs_dest_set_min(&en->set);
 +		dest = ip_vs_dest_set_min(&en->set, svc);
 		read_unlock(&en->set.lock);
 
 		/* More than one destination + enough time passed by, cleanup */
 @@ -672,7 +674,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			struct ip_vs_dest *m;
 
 			write_lock(&en->set.lock);
 -			m = ip_vs_dest_set_max(&en->set);
 +			m = ip_vs_dest_set_max(&en->set, svc);
 			if (m)
 				ip_vs_dest_set_erase(&en->set, m);
 			write_unlock(&en->set.lock);
 diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
 index f391819..743d7b7 100644
 --- a/net/netfilter/ipvs/ip_vs_lc.c
 +++ b/net/netfilter/ipvs/ip_vs_lc.c
 @@ -46,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
 		    atomic_read(&dest->weight) == 0)
 			continue;
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		if (!least || doh < loh) {
 			least = dest;
 			loh = doh;
 diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
 index bc1bfc4..074e61b 100644
 --- a/net/netfilter/ipvs/ip_vs_wlc.c
 +++ b/net/netfilter/ipvs/ip_vs_wlc.c
 @@ -55,7 +55,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
 		    atomic_read(&dest->weight) > 0) {
 			least = dest;
 -			loh = ip_vs_dest_conn_overhead(least);
 +			loh = ip_vs_dest_conn_overhead(least, svc);
 			goto nextstage;
 		}
 	}
 @@ -69,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 -		doh = ip_vs_dest_conn_overhead(dest);
 +		doh = ip_vs_dest_conn_overhead(dest, svc);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
	diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
	index 4deb383..3d4e7af 100644
	--- a/include/linux/ip_vs.h
	+++ b/include/linux/ip_vs.h
	@@ -27,6 +27,18 @@
	#define IP_VS_DEST_F_AVAILABLE 0x0001 /* server is available */
	#define IP_VS_DEST_F_OVERLOAD 0x0002 /* server is overloaded */

	+ /*
	+ * Advisory flags for slow start.
	+ *
	+ * The absolute value size of the weight change will be stored
	+ * in dest->slow_start_data. The flag and slow_start_data may
	+ * be used and modified by the scheduler to effect slow start.
	+ */
	+#define IP_VS_DEST_F_WEIGHT_INC 0x0004 /* Weight has been increaced */
	+#define IP_VS_DEST_F_WEIGHT_DEC 0x0008 /* Weight has been increaced */
	+#define IP_VS_DEST_F_WEIGHT_MASK \
	+ (IP_VS_DEST_F_WEIGHT_INC\|IP_VS_DEST_F_WEIGHT_DEC)
	+
	/*
	* IPVS sync daemon states
	*/
	diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
	index de527d1..894ad55 100644
	--- a/include/net/ip_vs.h
	+++ b/include/net/ip_vs.h
	@@ -673,6 +673,10 @@ struct ip_vs_dest {
	__be16 vport; /* virtual port number */
	union nf_inet_addr vaddr; /* virtual IP address */
	__u32 vfwmark; /* firewall mark of service */
	+
	+ /* for slow start */
	+ atomic_t slow_start_data;
	+ atomic_t slow_start_data2;
	};


	@@ -1209,6 +1213,9 @@ ip_vs_find_dest(struct net net, int af, const union nf_inet_addr daddr,
	__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
	__u16 protocol, __u32 fwmark);
	extern struct ip_vs_dest ip_vs_try_bind_dest(struct ip_vs_conn cp);
	+extern inline unsigned int
	+ip_vs_conn_slow_start_dest_handicap(struct ip_vs_dest *dest,
	+ struct ip_vs_service *svc);


	/*
	@@ -1407,7 +1414,7 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
	#endif

	static inline unsigned int
	-ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
	+ip_vs_dest_conn_overhead(struct ip_vs_dest dest, struct ip_vs_service svc)
	{
	/*
	* We think the overhead of processing active connections is 256
	@@ -1416,7 +1423,8 @@ ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
	* use the following formula to estimate the overhead now:
	* dest->activeconns*256 + dest->inactconns
	*/
	- return (atomic_read(&dest->activeconns) << 8) +
	+ return ((atomic_read(&dest->activeconns) +
	+ ip_vs_conn_slow_start_dest_handicap(dest, svc)) << 8) +
	atomic_read(&dest->inactconns);
	}

	diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
	index 5c318f7..85ecc88 100644
	--- a/net/netfilter/ipvs/ip_vs_ctl.c
	+++ b/net/netfilter/ipvs/ip_vs_ctl.c
	@@ -52,6 +52,8 @@

	#include <net/ip_vs.h>

	+EXPORT_SYMBOL_GPL(ip_vs_conn_slow_start_dest_handicap);
	+
	/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
	static DEFINE_MUTEX(__ip_vs_mutex);

	@@ -764,6 +766,21 @@ __ip_vs_update_dest(struct ip_vs_service svc, struct ip_vs_dest dest,
	{
	struct netns_ipvs *ipvs = net_ipvs(svc->net);
	int conn_flags;
	+ int old_weight;
	+
	+ /* set hints for slow start */
	+ dest->flags &= ~IP_VS_DEST_F_WEIGHT_MASK;
	+
	+ old_weight = atomic_read(&dest->weight);
	+
	+ if (old_weight < udest->weight) {
	+ atomic_set(&dest->slow_start_data, udest->weight - old_weight);
	+ dest->flags \|= IP_VS_DEST_F_WEIGHT_INC;
	+ }
	+ else if (old_weight > udest->weight) {
	+ atomic_set(&dest->slow_start_data, old_weight - udest->weight);
	+ dest->flags \|= IP_VS_DEST_F_WEIGHT_DEC;
	+ }

	/* set the weight and the flags */
	atomic_set(&dest->weight, udest->weight);
	@@ -3472,6 +3489,130 @@ out:
	return ret;
	}

	+static void
	+__ip_vs_conn_set_slow_start(struct ip_vs_dest dest, struct ip_vs_service svc)
	+{
	+ __u32 ss_handicap;
	+ __u32 ss_shift;
	+ __u32 ndest;
	+ __u32 w = 0;
	+ __u32 dest_w = 0;
	+ struct list_head l, e;
	+ struct ip_vs_dest *d;
	+
	+ /*
	+ * If the weight is zero just set the slow_start hint and data to
	+ * zero too as they won't be used.
	+ */
	+
	+ if((dest->flags & IP_VS_DEST_F_WEIGHT_DEC) \|\|
	+ !(dest_w = atomic_read(&dest->weight))) {
	+#ifdef CONFIG_IP_VS_DEBUG
	+ IP_VS_DBG(1, "CONN slow_start: null\n");
	+#endif
	+ atomic_set(&dest->slow_start_data, 0);
	+ atomic_set(&dest->slow_start_data2, 0);
	+ return;
	+ }
	+
	+ /*
	+ * Calculate a weighted number of connections this server would
	+ * have if all the currently active connections were redistributed
	+ * limited to a maximum of 64k.
	+ */
	+ l = &svc->destinations;
	+
	+ ss_handicap = 0;
	+ ndest = 0;
	+
	+ for (e = l->next; e != l; e = e->next) {
	+ d = list_entry(e, struct ip_vs_dest, n_list);
	+ w = atomic_read(&d->weight);
	+
	+ if (w < 1 \|\| d == dest) {
	+ continue;
	+ }
	+
	+ ndest++;
	+
	+ /* Try to avoid overflowint ss_handicap */
	+ ss_shift = atomic_read(&d->activeconns);
	+
	+ if (ss_shift & 0xffff0000)
	+ ss_shift = 0xffff;
	+
	+ ss_shift = (ss_shift << 16 ) / (w & 0xffff);
	+
	+ if (~0L - ss_handicap < ss_shift) {
	+ ss_handicap = ~0L;
	+ break;
	+ }
	+
	+ ss_handicap += ss_shift;
	+ }
	+
	+ if (ndest)
	+ ss_handicap = (ss_handicap * dest_w / ndest) >> 16;
	+
	+ /* ss_shift = log_2((ss_handicap & 0xfff) >> 3) */
	+ if (ss_handicap) {
	+ __u32 i;
	+ ss_shift = ss_handicap;;
	+
	+ for (i = 12; i > 0; i--) {
	+ if (ss_shift & 0x8000)
	+ break;
	+
	+ ss_shift <<= 1;
	+ }
	+
	+ ss_shift = i;
	+ ss_handicap <<= ss_shift;
	+ }
	+ else
	+ ss_shift = 0;
	+
	+ atomic_set(&dest->slow_start_data, ss_handicap);
	+ atomic_set(&dest->slow_start_data2, ss_shift);
	+
	+#ifdef CONFIG_IP_VS_DEBUG
	+ IP_VS_DBG_BUF(1, "CONN slow_start_init: server %s:%u "
	+ "handicap=%u (%u) shift=%u ndest=%u\n",
	+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
	+ ntohs(dest->port), ss_handicap,
	+ ss_handicap >> ss_shift, ss_shift, ndest);
	+#endif
	+}
	+
	+inline unsigned int
	+ip_vs_conn_slow_start_dest_handicap(struct ip_vs_dest *dest,
	+ struct ip_vs_service *svc)
	+{
	+ unsigned int handicap;
	+
	+ /* Set up slow_start if weight has recently changed */
	+ if (unlikely(dest->flags & IP_VS_DEST_F_WEIGHT_MASK)) {
	+ __ip_vs_conn_set_slow_start(dest, svc);
	+ dest->flags &= ~IP_VS_DEST_F_WEIGHT_MASK;
	+ }
	+
	+ handicap = atomic_read(&dest->slow_start_data);
	+
	+ if (unlikely(!handicap))
	+ return 0;
	+
	+ handicap--;
	+ atomic_set(&dest->slow_start_data, handicap);
	+
	+#ifdef CONFIG_IP_VS_DEBUG
	+ if (unlikely(!handicap))
	+ IP_VS_DBG_BUF(1, "CONN slow_start_end: server %s:%u ",
	+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
	+ ntohs(dest->port));
	+#endif
	+
	+ return handicap >> atomic_read(&dest->slow_start_data2);
	+}

	static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
	{
	diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
	index 0f16283..bc0927d 100644
	--- a/net/netfilter/ipvs/ip_vs_lblc.c
	+++ b/net/netfilter/ipvs/ip_vs_lblc.c
	@@ -413,7 +413,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
	continue;
	if (atomic_read(&dest->weight) > 0) {
	least = dest;
	- loh = ip_vs_dest_conn_overhead(least);
	+ loh = ip_vs_dest_conn_overhead(least, svc);
	goto nextstage;
	}
	}
	@@ -427,7 +427,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
	continue;

	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	if (loh * atomic_read(&dest->weight) >
	doh * atomic_read(&least->weight)) {
	least = dest;
	diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
	index eec797f..707a32a 100644
	--- a/net/netfilter/ipvs/ip_vs_lblcr.c
	+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
	@@ -161,7 +161,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
	}

	/* get weighted least-connection node in the destination set */
	-static inline struct ip_vs_dest ip_vs_dest_set_min(struct ip_vs_dest_set set)
	+static inline struct ip_vs_dest ip_vs_dest_set_min(struct ip_vs_dest_set set,
	+ struct ip_vs_service *svc)
	{
	register struct ip_vs_dest_set_elem *e;
	struct ip_vs_dest dest, least;
	@@ -178,7 +179,7 @@ static inline struct ip_vs_dest ip_vs_dest_set_min(struct ip_vs_dest_set set)

	if ((atomic_read(&least->weight) > 0)
	&& (least->flags & IP_VS_DEST_F_AVAILABLE)) {
	- loh = ip_vs_dest_conn_overhead(least);
	+ loh = ip_vs_dest_conn_overhead(least, svc);
	goto nextstage;
	}
	}
	@@ -191,7 +192,7 @@ static inline struct ip_vs_dest ip_vs_dest_set_min(struct ip_vs_dest_set set)
	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
	continue;

	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	if ((loh * atomic_read(&dest->weight) >
	doh * atomic_read(&least->weight))
	&& (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
	@@ -213,7 +214,8 @@ static inline struct ip_vs_dest ip_vs_dest_set_min(struct ip_vs_dest_set set)


	/* get weighted most-connection node in the destination set */
	-static inline struct ip_vs_dest ip_vs_dest_set_max(struct ip_vs_dest_set set)
	+static inline struct ip_vs_dest ip_vs_dest_set_max(struct ip_vs_dest_set set,
	+ struct ip_vs_service *svc)
	{
	register struct ip_vs_dest_set_elem *e;
	struct ip_vs_dest dest, most;
	@@ -226,7 +228,7 @@ static inline struct ip_vs_dest ip_vs_dest_set_max(struct ip_vs_dest_set set)
	list_for_each_entry(e, &set->list, list) {
	most = e->dest;
	if (atomic_read(&most->weight) > 0) {
	- moh = ip_vs_dest_conn_overhead(most);
	+ moh = ip_vs_dest_conn_overhead(most, svc);
	goto nextstage;
	}
	}
	@@ -236,7 +238,7 @@ static inline struct ip_vs_dest ip_vs_dest_set_max(struct ip_vs_dest_set set)
	nextstage:
	list_for_each_entry(e, &set->list, list) {
	dest = e->dest;
	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	/* moh/mw < doh/dw ==> mohdw < dohmw, where mw,dw>0 */
	if ((moh * atomic_read(&dest->weight) <
	doh * atomic_read(&most->weight))
	@@ -583,7 +585,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)

	if (atomic_read(&dest->weight) > 0) {
	least = dest;
	- loh = ip_vs_dest_conn_overhead(least);
	+ loh = ip_vs_dest_conn_overhead(least, svc);
	goto nextstage;
	}
	}
	@@ -597,7 +599,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
	continue;

	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	if (loh * atomic_read(&dest->weight) >
	doh * atomic_read(&least->weight)) {
	least = dest;
	@@ -662,7 +664,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service svc, const struct sk_buff skb)

	/* Get the least loaded destination */
	read_lock(&en->set.lock);
	- dest = ip_vs_dest_set_min(&en->set);
	+ dest = ip_vs_dest_set_min(&en->set, svc);
	read_unlock(&en->set.lock);

	/* More than one destination + enough time passed by, cleanup */
	@@ -672,7 +674,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service svc, const struct sk_buff skb)
	struct ip_vs_dest *m;

	write_lock(&en->set.lock);
	- m = ip_vs_dest_set_max(&en->set);
	+ m = ip_vs_dest_set_max(&en->set, svc);
	if (m)
	ip_vs_dest_set_erase(&en->set, m);
	write_unlock(&en->set.lock);
	diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
	index f391819..743d7b7 100644
	--- a/net/netfilter/ipvs/ip_vs_lc.c
	+++ b/net/netfilter/ipvs/ip_vs_lc.c
	@@ -46,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service svc, const struct sk_buff skb)
	if ((dest->flags & IP_VS_DEST_F_OVERLOAD) \|\|
	atomic_read(&dest->weight) == 0)
	continue;
	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	if (!least \|\| doh < loh) {
	least = dest;
	loh = doh;
	diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
	index bc1bfc4..074e61b 100644
	--- a/net/netfilter/ipvs/ip_vs_wlc.c
	+++ b/net/netfilter/ipvs/ip_vs_wlc.c
	@@ -55,7 +55,7 @@ ip_vs_wlc_schedule(struct ip_vs_service svc, const struct sk_buff skb)
	if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
	atomic_read(&dest->weight) > 0) {
	least = dest;
	- loh = ip_vs_dest_conn_overhead(least);
	+ loh = ip_vs_dest_conn_overhead(least, svc);
	goto nextstage;
	}
	}
	@@ -69,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service svc, const struct sk_buff skb)
	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
	continue;
	- doh = ip_vs_dest_conn_overhead(dest);
	+ doh = ip_vs_dest_conn_overhead(dest, svc);
	if (loh * atomic_read(&dest->weight) >
	doh * atomic_read(&least->weight)) {
	least = dest;