tcp/dccp: fix hashdance race for passive sessions
Multiple cpus can process duplicates of incoming ACK messages matching a SYN_RECV request socket. This is a rare event under normal operations, but definitely can happen. Only one must win the race, otherwise corruption would occur. To fix this without adding new atomic ops, we use logic in inet_ehash_nolisten() to detect the request was present in the same ehash bucket where we try to insert the new child. If request socket was not found, we have to undo the child creation. This actually removes a spin_lock()/spin_unlock() pair in reqsk_queue_unlink() for the fast path. Fixes:e994b2f0fb
("tcp: do not lock listener to process SYN packets") Fixes:079096f103
("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
7b1311807f
commit
5e0724d027
@@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk)
|
||||
/* insert a socket into ehash, and eventually remove another one
|
||||
* (The another one can be a SYN_RECV or TIMEWAIT
|
||||
*/
|
||||
int inet_ehash_insert(struct sock *sk, struct sock *osk)
|
||||
bool inet_ehash_insert(struct sock *sk, struct sock *osk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct hlist_nulls_head *list;
|
||||
struct inet_ehash_bucket *head;
|
||||
spinlock_t *lock;
|
||||
int ret = 0;
|
||||
bool ret = true;
|
||||
|
||||
WARN_ON_ONCE(!sk_unhashed(sk));
|
||||
|
||||
@@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk)
|
||||
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
|
||||
spin_lock(lock);
|
||||
__sk_nulls_add_node_rcu(sk, list);
|
||||
if (osk) {
|
||||
WARN_ON(sk->sk_hash != osk->sk_hash);
|
||||
sk_nulls_del_node_init_rcu(osk);
|
||||
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
|
||||
ret = sk_nulls_del_node_init_rcu(osk);
|
||||
}
|
||||
if (ret)
|
||||
__sk_nulls_add_node_rcu(sk, list);
|
||||
spin_unlock(lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
|
||||
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
|
||||
{
|
||||
inet_ehash_insert(sk, osk);
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
bool ok = inet_ehash_insert(sk, osk);
|
||||
|
||||
if (ok) {
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
} else {
|
||||
percpu_counter_inc(sk->sk_prot->orphan_count);
|
||||
sk->sk_state = TCP_CLOSE;
|
||||
sock_set_flag(sk, SOCK_DEAD);
|
||||
inet_csk_destroy_sock(sk);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
|
||||
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
|
||||
|
||||
void __inet_hash(struct sock *sk, struct sock *osk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_listen_hashbucket *ilb;
|
||||
|
||||
if (sk->sk_state != TCP_LISTEN)
|
||||
return __inet_hash_nolisten(sk, osk);
|
||||
|
||||
if (sk->sk_state != TCP_LISTEN) {
|
||||
inet_ehash_nolisten(sk, osk);
|
||||
return;
|
||||
}
|
||||
WARN_ON(!sk_unhashed(sk));
|
||||
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
|
||||
|
||||
@@ -567,7 +578,7 @@ ok:
|
||||
inet_bind_hash(sk, tb, port);
|
||||
if (sk_unhashed(sk)) {
|
||||
inet_sk(sk)->inet_sport = htons(port);
|
||||
__inet_hash_nolisten(sk, (struct sock *)tw);
|
||||
inet_ehash_nolisten(sk, (struct sock *)tw);
|
||||
}
|
||||
if (tw)
|
||||
inet_twsk_bind_unhash(tw, hinfo);
|
||||
@@ -584,7 +595,7 @@ ok:
|
||||
tb = inet_csk(sk)->icsk_bind_hash;
|
||||
spin_lock_bh(&head->lock);
|
||||
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
|
||||
__inet_hash_nolisten(sk, NULL);
|
||||
inet_ehash_nolisten(sk, NULL);
|
||||
spin_unlock_bh(&head->lock);
|
||||
return 0;
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user