tcp/dccp: install syn_recv requests into ehash table

In this patch, we insert request sockets into TCP/DCCP
regular ehash table (where ESTABLISHED and TIMEWAIT sockets
are) instead of using the per listener hash table.

ACK packets find SYN_RECV pseudo sockets without having
to find and lock the listener.

In nominal conditions, this halves pressure on listener lock.

Note that this will allow for SO_REUSEPORT refinements,
so that we can select a listener using cpu/numa affinities instead
of the prior 'consistent hash', since only SYN packets will
apply this selection logic.

We will shrink listen_sock in the following patch to ease
code review.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ying Cai <ycai@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet
2015-10-02 11:43:32 -07:00
committed by David S. Miller
parent 2feda34192
commit 079096f103
15 changed files with 163 additions and 504 deletions

View File

@@ -94,73 +94,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
}
EXPORT_SYMBOL(inet6_csk_route_req);
/*
* request_sock (formerly open request) hash tables.
*/
static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
const u32 rnd, const u32 synq_hsize)
{
u32 c;
c = jhash_3words((__force u32)raddr->s6_addr32[0],
(__force u32)raddr->s6_addr32[1],
(__force u32)raddr->s6_addr32[2],
rnd);
c = jhash_2words((__force u32)raddr->s6_addr32[3],
(__force u32)rport,
c);
return c & (synq_hsize - 1);
}
struct request_sock *inet6_csk_search_req(struct sock *sk,
const __be16 rport,
const struct in6_addr *raddr,
const struct in6_addr *laddr,
const int iif)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
struct request_sock *req;
u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
lopt->nr_table_entries);
spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
const struct inet_request_sock *ireq = inet_rsk(req);
if (ireq->ir_rmt_port == rport &&
req->rsk_ops->family == AF_INET6 &&
ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
(!ireq->ir_iif || ireq->ir_iif == iif)) {
atomic_inc(&req->rsk_refcnt);
WARN_ON(req->sk != NULL);
break;
}
}
spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
return req;
}
EXPORT_SYMBOL_GPL(inet6_csk_search_req);
void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
struct request_sock *req,
const unsigned long timeout)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
inet_rsk(req)->ir_rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
inet_csk_reqsk_queue_added(sk);
}
EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;