Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 65e6d90

Browse files
yydcooldavem330
authored andcommitted
net-tcp: Disable TCP ssthresh metrics cache by default
This patch introduces a sysctl knob "net.ipv4.tcp_no_ssthresh_metrics_save" that disables TCP ssthresh metrics cache by default. Other parts of TCP metrics cache, e.g. rtt, cwnd, remain unchanged. As modern networks becoming more and more dynamic, TCP metrics cache today often causes more harm than benefits. For example, the same IP address is often shared by different subscribers behind NAT in residential networks. Even if the IP address is not shared by different users, caching the slow-start threshold of a previous short flow using loss-based congestion control (e.g. cubic) often causes the future longer flows of the same network path to exit slow-start prematurely with abysmal throughput. Caching ssthresh is very risky and can lead to terrible performance. Therefore it makes sense to make disabling ssthresh caching by default and opt-in for specific networks by the administrators. This practice also has worked well for several years of deployment with CUBIC congestion control at Google. Acked-by: Eric Dumazet <[email protected]> Acked-by: Neal Cardwell <[email protected]> Acked-by: Yuchung Cheng <[email protected]> Signed-off-by: Kevin(Yudong) Yang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4e7696d commit 65e6d90

File tree

5 files changed

+24
-4
lines changed

5 files changed

+24
-4
lines changed

Documentation/networking/ip-sysctl.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,10 @@ tcp_no_metrics_save - BOOLEAN
479479
degradation. If set, TCP will not cache metrics on closing
480480
connections.
481481

482+
tcp_no_ssthresh_metrics_save - BOOLEAN
483+
Controls whether TCP saves ssthresh metrics in the route cache.
484+
Default is 1, which disables ssthresh metrics.
485+
482486
tcp_orphan_retries - INTEGER
483487
This value influences the timeout of a locally closed TCP connection,
484488
when RTO retransmissions remain unacknowledged.

include/net/netns/ipv4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ struct netns_ipv4 {
154154
int sysctl_tcp_adv_win_scale;
155155
int sysctl_tcp_frto;
156156
int sysctl_tcp_nometrics_save;
157+
int sysctl_tcp_no_ssthresh_metrics_save;
157158
int sysctl_tcp_moderate_rcvbuf;
158159
int sysctl_tcp_tso_win_divisor;
159160
int sysctl_tcp_workaround_signed_windows;

net/ipv4/sysctl_net_ipv4.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,15 @@ static struct ctl_table ipv4_net_table[] = {
11921192
.mode = 0644,
11931193
.proc_handler = proc_dointvec,
11941194
},
1195+
{
1196+
.procname = "tcp_no_ssthresh_metrics_save",
1197+
.data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save,
1198+
.maxlen = sizeof(int),
1199+
.mode = 0644,
1200+
.proc_handler = proc_dointvec_minmax,
1201+
.extra1 = SYSCTL_ZERO,
1202+
.extra2 = SYSCTL_ONE,
1203+
},
11951204
{
11961205
.procname = "tcp_moderate_rcvbuf",
11971206
.data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,

net/ipv4/tcp_ipv4.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,6 +2674,7 @@ static int __net_init tcp_sk_init(struct net *net)
26742674
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
26752675
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
26762676
net->ipv4.sysctl_tcp_tw_reuse = 2;
2677+
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
26772678

26782679
cnt = tcp_hashinfo.ehash_mask + 1;
26792680
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;

net/ipv4/tcp_metrics.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,8 @@ void tcp_update_metrics(struct sock *sk)
385385

386386
if (tcp_in_initial_slowstart(tp)) {
387387
/* Slow start still did not finish. */
388-
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
388+
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
389+
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
389390
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
390391
if (val && (tp->snd_cwnd >> 1) > val)
391392
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -400,7 +401,8 @@ void tcp_update_metrics(struct sock *sk)
400401
} else if (!tcp_in_slow_start(tp) &&
401402
icsk->icsk_ca_state == TCP_CA_Open) {
402403
/* Cong. avoidance phase, cwnd is reliable. */
403-
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
404+
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
405+
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
404406
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
405407
max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
406408
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
@@ -416,7 +418,8 @@ void tcp_update_metrics(struct sock *sk)
416418
tcp_metric_set(tm, TCP_METRIC_CWND,
417419
(val + tp->snd_ssthresh) >> 1);
418420
}
419-
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
421+
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
422+
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
420423
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
421424
if (val && tp->snd_ssthresh > val)
422425
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -441,6 +444,7 @@ void tcp_init_metrics(struct sock *sk)
441444
{
442445
struct dst_entry *dst = __sk_dst_get(sk);
443446
struct tcp_sock *tp = tcp_sk(sk);
447+
struct net *net = sock_net(sk);
444448
struct tcp_metrics_block *tm;
445449
u32 val, crtt = 0; /* cached RTT scaled by 8 */
446450

@@ -458,7 +462,8 @@ void tcp_init_metrics(struct sock *sk)
458462
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
459463
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
460464

461-
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
465+
val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
466+
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
462467
if (val) {
463468
tp->snd_ssthresh = val;
464469
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)

0 commit comments

Comments
 (0)