Last active
October 22, 2017 10:20
-
-
Save laoar/8ca7a8b1a81234482d2eb1c983605f97 to your computer and use it in GitHub Desktop.
introduce a new sysctl knob sysctl_tcp_rto_max to control the max value of TCP_RTO and make it in netns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
index 9a14a08..f12c655 100644 | |
--- a/include/net/netns/ipv4.h | |
+++ b/include/net/netns/ipv4.h | |
@@ -125,6 +125,7 @@ struct netns_ipv4 { | |
int sysctl_tcp_sack; | |
int sysctl_tcp_window_scaling; | |
int sysctl_tcp_timestamps; | |
+ int sysctl_tcp_rto_max; | |
struct inet_timewait_death_row tcp_death_row; | |
int sysctl_max_syn_backlog; | |
diff --git a/include/net/tcp.h b/include/net/tcp.h | |
index 7048329..801a03a 100644 | |
--- a/include/net/tcp.h | |
+++ b/include/net/tcp.h | |
@@ -137,7 +137,7 @@ | |
#define TCP_DELACK_MIN 4U | |
#define TCP_ATO_MIN 4U | |
#endif | |
-#define TCP_RTO_MAX ((unsigned)(120*HZ)) | |
+#define TCP_RTO_MAX ((unsigned)(net->ipv4.sysctl_tcp_rto_max*HZ)) | |
#define TCP_RTO_MIN ((unsigned)(HZ/5)) | |
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ | |
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now | |
@@ -624,6 +624,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |
static inline void tcp_bound_rto(const struct sock *sk) | |
{ | |
+ struct net *net = sock_net(sk); | |
+ | |
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | |
inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | |
} | |
@@ -1210,6 +1212,8 @@ static inline unsigned long tcp_probe0_when(const struct sock *sk, | |
static inline void tcp_check_probe_timer(struct sock *sk) | |
{ | |
+ struct net *net = sock_net(sk); | |
+ | |
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) | |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | |
tcp_probe0_base(sk), TCP_RTO_MAX); | |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c | |
index 4089c01..c46d441 100644 | |
--- a/net/ipv4/inet_connection_sock.c | |
+++ b/net/ipv4/inet_connection_sock.c | |
@@ -729,6 +729,8 @@ static void reqsk_timer_handler(unsigned long data) | |
!inet_rtx_syn_ack(sk_listener, req) || | |
inet_rsk(req)->acked)) { | |
unsigned long timeo; | |
+ struct sock *sk = sk_listener; | |
+ struct net *net = sock_net(sk); | |
if (req->num_timeout++ == 0) | |
atomic_dec(&queue->young); | |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
index 9bf8097..b46847f 100644 | |
--- a/net/ipv4/sysctl_net_ipv4.c | |
+++ b/net/ipv4/sysctl_net_ipv4.c | |
@@ -1141,6 +1141,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, | |
.mode = 0644, | |
.proc_handler = proc_dointvec | |
}, | |
+ { | |
+ .procname = "tcp_rto_max", | |
+ .data = &init_net.ipv4.sysctl_tcp_rto_max, | |
+ .maxlen = sizeof(int), | |
+ .mode = 0644, | |
+ .proc_handler = proc_dointvec, | |
+ }, | |
{ } | |
}; | |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c | |
index ce9c7fe..ac61aa7 100644 | |
--- a/net/ipv4/tcp_fastopen.c | |
+++ b/net/ipv4/tcp_fastopen.c | |
@@ -176,6 +176,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, | |
{ | |
struct tcp_sock *tp; | |
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | |
+ struct net *net = sock_net(sk); | |
struct sock *child; | |
bool own_req; | |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c | |
index 2920e0c..4f7ffc5 100644 | |
--- a/net/ipv4/tcp_input.c | |
+++ b/net/ipv4/tcp_input.c | |
@@ -2030,6 +2030,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag) | |
{ | |
if (flag & FLAG_SACK_RENEGING) { | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), | |
msecs_to_jiffies(10)); | |
@@ -2990,6 +2991,7 @@ void tcp_rearm_rto(struct sock *sk) | |
{ | |
const struct inet_connection_sock *icsk = inet_csk(sk); | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
/* If the retrans timer is currently being used by Fast Open | |
* for SYN-ACK retrans purpose, stay put. | |
@@ -3250,6 +3252,7 @@ static void tcp_ack_probe(struct sock *sk) | |
{ | |
const struct tcp_sock *tp = tcp_sk(sk); | |
struct inet_connection_sock *icsk = inet_csk(sk); | |
+ struct net *net = sock_net(sk); | |
/* Was it a usable window open? */ | |
@@ -5655,6 +5658,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |
{ | |
struct inet_connection_sock *icsk = inet_csk(sk); | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
struct tcp_fastopen_cookie foc = { .len = -1 }; | |
int saved_clamp = tp->rx_opt.mss_clamp; | |
bool fastopen_fail; | |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
index a20e7f0..db03ab6 100644 | |
--- a/net/ipv4/tcp_ipv4.c | |
+++ b/net/ipv4/tcp_ipv4.c | |
@@ -1551,6 +1551,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) | |
bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |
{ | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
if (sysctl_tcp_low_latency || !tp->ucopy.task) | |
return false; | |
@@ -2527,6 +2528,7 @@ static int __net_init tcp_sk_init(struct net *net) | |
net->ipv4.sysctl_tcp_sack = 1; | |
net->ipv4.sysctl_tcp_window_scaling = 1; | |
net->ipv4.sysctl_tcp_timestamps = 1; | |
+ net->ipv4.sysctl_tcp_rto_max = 120; | |
return 0; | |
fail: | |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c | |
index 0ff83c1..9f2b12d 100644 | |
--- a/net/ipv4/tcp_minisocks.c | |
+++ b/net/ipv4/tcp_minisocks.c | |
@@ -563,6 +563,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | |
bool paws_reject = false; | |
bool own_req; | |
+ struct net *net = sock_net(sk); | |
tmp_opt.saw_tstamp = 0; | |
if (th->doff > (sizeof(struct tcphdr)>>2)) { | |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c | |
index 2f1588b..6b2c463 100644 | |
--- a/net/ipv4/tcp_output.c | |
+++ b/net/ipv4/tcp_output.c | |
@@ -2377,6 +2377,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |
{ | |
struct inet_connection_sock *icsk = inet_csk(sk); | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
u32 timeout, tlp_time_stamp, rto_time_stamp; | |
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); | |
@@ -2933,6 +2934,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |
{ | |
const struct inet_connection_sock *icsk = inet_csk(sk); | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
struct sk_buff *skb; | |
struct sk_buff *hole = NULL; | |
u32 max_segs; | |
@@ -3445,6 +3447,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |
int tcp_connect(struct sock *sk) | |
{ | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
struct sk_buff *buff; | |
int err; | |
@@ -3549,6 +3552,7 @@ void tcp_send_delayed_ack(struct sock *sk) | |
void tcp_send_ack(struct sock *sk) | |
{ | |
struct sk_buff *buff; | |
+ struct net *net = sock_net(sk); | |
/* If we have been reset, we may not send again. */ | |
if (sk->sk_state == TCP_CLOSE) | |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c | |
index c0feeee..9f55728 100644 | |
--- a/net/ipv4/tcp_timer.c | |
+++ b/net/ipv4/tcp_timer.c | |
@@ -59,6 +59,7 @@ static void tcp_write_err(struct sock *sk) | |
static int tcp_out_of_resources(struct sock *sk, bool do_reset) | |
{ | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
int shift = 0; | |
/* If peer does not open window for long time, or did not transmit | |
@@ -151,6 +152,7 @@ static bool retransmits_timed_out(struct sock *sk, | |
{ | |
const unsigned int rto_base = TCP_RTO_MIN; | |
unsigned int linear_backoff_thresh, start_ts; | |
+ struct net *net = sock_net(sk); | |
if (!inet_csk(sk)->icsk_retransmits) | |
return false; | |
@@ -317,6 +319,7 @@ static void tcp_probe_timer(struct sock *sk) | |
{ | |
struct inet_connection_sock *icsk = inet_csk(sk); | |
struct tcp_sock *tp = tcp_sk(sk); | |
+ struct net *net = sock_net(sk); | |
int max_probes; | |
u32 start_ts; | |
@@ -367,8 +370,9 @@ static void tcp_probe_timer(struct sock *sk) | |
static void tcp_fastopen_synack_timer(struct sock *sk) | |
{ | |
struct inet_connection_sock *icsk = inet_csk(sk); | |
+ struct net *net = sock_net(sk); | |
int max_retries = icsk->icsk_syn_retries ? : | |
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ | |
+ net->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ | |
struct request_sock *req; | |
req = tcp_sk(sk)->fastopen_rsk; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment