tcp: fix cwnd limited checking to improve congestion control
Yuchung discovered tcp_is_cwnd_limited() was returning false in slow start phase even if the application filled the socket write queue. All congestion modules take into account tcp_is_cwnd_limited() before increasing cwnd, so this behavior limits slow start from probing the bandwidth at full speed. The problem is that even if write queue is full (aka we are _not_ application limited), cwnd can be under utilized if TSO should auto defer or TCP Small queues decided to hold packets. So the in_flight can be kept to smaller value, and we can get to the point tcp_is_cwnd_limited() returns false. With TCP Small Queues and FQ/pacing, this issue is more visible. We fix this by having tcp_cwnd_validate(), which is supposed to track such things, take into account unsent_segs, the number of segs that we are not sending at the moment due to TSO or TSQ, but intend to send real soon. Then when we are cwnd-limited, remember this fact while we are processing the window of ACKs that comes back. For example, suppose we have a brand new connection with cwnd=10; we are in slow start, and we send a flight of 9 packets. By the time we have received ACKs for all 9 packets we want our cwnd to be 18. We implement this by setting tp->lsnd_pending to 9, and considering ourselves to be cwnd-limited while cwnd is less than twice tp->lsnd_pending (2*9 -> 18). This makes tcp_is_cwnd_limited() more understandable, by removing the GSO/TSO kludge, that tried to work around the issue. Note the in_flight parameter can be removed in a followup cleanup patch. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
4e8bbb819d
commit
e114a710aa
@@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk)
|
||||
tp->snd_cwnd_stamp = tcp_time_stamp;
|
||||
}
|
||||
|
||||
/* Congestion window validation. (RFC2861) */
|
||||
static void tcp_cwnd_validate(struct sock *sk)
|
||||
static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (tp->packets_out >= tp->snd_cwnd) {
|
||||
tp->lsnd_pending = tp->packets_out + unsent_segs;
|
||||
|
||||
if (tcp_is_cwnd_limited(sk, 0)) {
|
||||
/* Network is feed fully. */
|
||||
tp->snd_cwnd_used = 0;
|
||||
tp->snd_cwnd_stamp = tcp_time_stamp;
|
||||
@@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
unsigned int tso_segs, sent_pkts;
|
||||
unsigned int tso_segs, sent_pkts, unsent_segs = 0;
|
||||
int cwnd_quota;
|
||||
int result;
|
||||
|
||||
@@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
break;
|
||||
} else {
|
||||
if (!push_one && tcp_tso_should_defer(sk, skb))
|
||||
break;
|
||||
goto compute_unsent_segs;
|
||||
}
|
||||
|
||||
/* TCP Small Queues :
|
||||
@@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
* there is no smp_mb__after_set_bit() yet
|
||||
*/
|
||||
smp_mb__after_clear_bit();
|
||||
if (atomic_read(&sk->sk_wmem_alloc) > limit)
|
||||
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
|
||||
u32 unsent_bytes;
|
||||
|
||||
compute_unsent_segs:
|
||||
unsent_bytes = tp->write_seq - tp->snd_nxt;
|
||||
unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
limit = mss_now;
|
||||
@@ -1990,7 +1997,7 @@ repair:
|
||||
/* Send one loss probe per tail loss episode. */
|
||||
if (push_one != 2)
|
||||
tcp_schedule_loss_probe(sk);
|
||||
tcp_cwnd_validate(sk);
|
||||
tcp_cwnd_validate(sk, unsent_segs);
|
||||
return false;
|
||||
}
|
||||
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
|
||||
|
Reference in New Issue
Block a user