Skip to content

Instantly share code, notes, and snippets.

@laoar
Last active October 22, 2017 10:20
Show Gist options
  • Save laoar/8ca7a8b1a81234482d2eb1c983605f97 to your computer and use it in GitHub Desktop.
Save laoar/8ca7a8b1a81234482d2eb1c983605f97 to your computer and use it in GitHub Desktop.
introduce a new sysctl knob sysctl_tcp_rto_max to control the max value of TCP_RTO and make it in netns
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a08..f12c655 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -125,6 +125,7 @@ struct netns_ipv4 {
int sysctl_tcp_sack;
int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps;
+ int sysctl_tcp_rto_max;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7048329..801a03a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -137,7 +137,7 @@
#define TCP_DELACK_MIN 4U
#define TCP_ATO_MIN 4U
#endif
-#define TCP_RTO_MAX ((unsigned)(120*HZ))
+#define TCP_RTO_MAX ((unsigned)(net->ipv4.sysctl_tcp_rto_max*HZ))
#define TCP_RTO_MIN ((unsigned)(HZ/5))
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
@@ -624,6 +624,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
static inline void tcp_bound_rto(const struct sock *sk)
{
+ struct net *net = sock_net(sk);
+
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
}
@@ -1210,6 +1212,8 @@ static inline unsigned long tcp_probe0_when(const struct sock *sk,
static inline void tcp_check_probe_timer(struct sock *sk)
{
+ struct net *net = sock_net(sk);
+
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
tcp_probe0_base(sk), TCP_RTO_MAX);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4089c01..c46d441 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -729,6 +729,8 @@ static void reqsk_timer_handler(unsigned long data)
!inet_rtx_syn_ack(sk_listener, req) ||
inet_rsk(req)->acked)) {
unsigned long timeo;
+ struct sock *sk = sk_listener;
+ struct net *net = sock_net(sk);
if (req->num_timeout++ == 0)
atomic_dec(&queue->young);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf8097..b46847f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1141,6 +1141,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_rto_max",
+ .data = &init_net.ipv4.sysctl_tcp_rto_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ce9c7fe..ac61aa7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -176,6 +176,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
{
struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ struct net *net = sock_net(sk);
struct sock *child;
bool own_req;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0c..4f7ffc5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2030,6 +2030,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
{
if (flag & FLAG_SACK_RENEGING) {
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
@@ -2990,6 +2991,7 @@ void tcp_rearm_rto(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
/* If the retrans timer is currently being used by Fast Open
* for SYN-ACK retrans purpose, stay put.
@@ -3250,6 +3252,7 @@ static void tcp_ack_probe(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
/* Was it a usable window open? */
@@ -5655,6 +5658,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f0..db03ab6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1551,6 +1551,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
if (sysctl_tcp_low_latency || !tp->ucopy.task)
return false;
@@ -2527,6 +2528,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+ net->ipv4.sysctl_tcp_rto_max = 120;
return 0;
fail:
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1..9f2b12d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -563,6 +563,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
bool own_req;
+ struct net *net = sock_net(sk);
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f1588b..6b2c463 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,6 +2377,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
@@ -2933,6 +2934,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sk_buff *skb;
struct sk_buff *hole = NULL;
u32 max_segs;
@@ -3445,6 +3447,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct sk_buff *buff;
int err;
@@ -3549,6 +3552,7 @@ void tcp_send_delayed_ack(struct sock *sk)
void tcp_send_ack(struct sock *sk)
{
struct sk_buff *buff;
+ struct net *net = sock_net(sk);
/* If we have been reset, we may not send again. */
if (sk->sk_state == TCP_CLOSE)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeee..9f55728 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -59,6 +59,7 @@ static void tcp_write_err(struct sock *sk)
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
int shift = 0;
/* If peer does not open window for long time, or did not transmit
@@ -151,6 +152,7 @@ static bool retransmits_timed_out(struct sock *sk,
{
const unsigned int rto_base = TCP_RTO_MIN;
unsigned int linear_backoff_thresh, start_ts;
+ struct net *net = sock_net(sk);
if (!inet_csk(sk)->icsk_retransmits)
return false;
@@ -317,6 +319,7 @@ static void tcp_probe_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
int max_probes;
u32 start_ts;
@@ -367,8 +370,9 @@ static void tcp_probe_timer(struct sock *sk)
static void tcp_fastopen_synack_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+ net->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct request_sock *req;
req = tcp_sk(sk)->fastopen_rsk;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment