rxrpc: Support network namespacing

Support network namespacing in AF_RXRPC with the following changes:

 (1) All the local endpoint, peer and call lists, locks, counters, etc. are
     moved into the per-namespace record.

 (2) All the connection tracking is moved into the per-namespace record
     with the exception of the client connection ID tree, which is kept
     global so that connection IDs are kept unique per-machine.

 (3) Each namespace gets its own epoch.  This allows each network namespace
     to pretend to be a separate client machine.

 (4) The /proc/net/rxrpc_xxx files are now called /proc/net/rxrpc/xxx and
     the contents reflect the namespace.

fs/afs/ should be okay with this patch as it explicitly requires the current
net namespace to be init_net to permit a mount to proceed at the moment.  It
will, however, need updating so that cells, IP addresses and DNS records are
per-namespace also.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David Howells
2017-05-24 17:02:32 +01:00
committed by David S. Miller
parent 878cd3ba37
commit 2baec2c3f8
12 changed files with 356 additions and 216 deletions

View File

@@ -31,7 +31,7 @@
* may freely grant available channels to new calls and calls may be
* waiting on it for channels to become available.
*
* The connection is on the rxrpc_active_client_conns list which is kept
* The connection is on the rxnet->active_client_conns list which is kept
* in activation order for culling purposes.
*
* rxrpc_nr_active_client_conns is held incremented also.
@@ -46,7 +46,7 @@
* expires, the EXPOSED flag is cleared and the connection transitions to
* the INACTIVE state.
*
* The connection is on the rxrpc_idle_client_conns list which is kept in
* The connection is on the rxnet->idle_client_conns list which is kept in
* order of how soon they'll expire.
*
* There are flags of relevance to the cache:
@@ -85,27 +85,13 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
static unsigned int rxrpc_nr_client_conns;
static unsigned int rxrpc_nr_active_client_conns;
static __read_mostly bool rxrpc_kill_all_client_conns;
static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
static LIST_HEAD(rxrpc_waiting_client_conns);
static LIST_HEAD(rxrpc_active_client_conns);
static LIST_HEAD(rxrpc_idle_client_conns);
/*
* We use machine-unique IDs for our client connections.
*/
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
static void rxrpc_cull_active_client_conns(void);
static void rxrpc_discard_expired_client_conns(struct work_struct *);
static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
rxrpc_discard_expired_client_conns);
static void rxrpc_cull_active_client_conns(struct rxrpc_net *);
/*
* Get a connection ID and epoch for a client connection from the global pool.
@@ -116,6 +102,7 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
{
struct rxrpc_net *rxnet = conn->params.local->rxnet;
int id;
_enter("");
@@ -131,7 +118,7 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
spin_unlock(&rxrpc_conn_id_lock);
idr_preload_end();
conn->proto.epoch = rxrpc_epoch;
conn->proto.epoch = rxnet->epoch;
conn->proto.cid = id << RXRPC_CIDSHIFT;
set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
_leave(" [CID %x]", conn->proto.cid);
@@ -183,6 +170,7 @@ static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet = cp->local->rxnet;
int ret;
_enter("");
@@ -213,9 +201,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
if (ret < 0)
goto error_2;
write_lock(&rxrpc_connection_lock);
list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
write_unlock(&rxrpc_connection_lock);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
/* We steal the caller's peer ref. */
cp->peer = NULL;
@@ -243,12 +231,13 @@ error_0:
*/
static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
{
struct rxrpc_net *rxnet = conn->params.local->rxnet;
int id_cursor, id, distance, limit;
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
if (conn->proto.epoch != rxrpc_epoch)
if (conn->proto.epoch != rxnet->epoch)
goto mark_dont_reuse;
/* The IDR tree gets very expensive on memory if the connection IDs are
@@ -440,12 +429,13 @@ error:
/*
* Activate a connection.
*/
static void rxrpc_activate_conn(struct rxrpc_connection *conn)
static void rxrpc_activate_conn(struct rxrpc_net *rxnet,
struct rxrpc_connection *conn)
{
trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
rxrpc_nr_active_client_conns++;
list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
rxnet->nr_active_client_conns++;
list_move_tail(&conn->cache_link, &rxnet->active_client_conns);
}
/*
@@ -460,7 +450,8 @@ static void rxrpc_activate_conn(struct rxrpc_connection *conn)
* channels if it has been culled to make space and then re-requested by a new
* call.
*/
static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet,
struct rxrpc_connection *conn)
{
unsigned int nr_conns;
@@ -469,12 +460,12 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
goto out;
spin_lock(&rxrpc_client_conn_cache_lock);
spin_lock(&rxnet->client_conn_cache_lock);
nr_conns = rxrpc_nr_client_conns;
nr_conns = rxnet->nr_client_conns;
if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
trace_rxrpc_client(conn, -1, rxrpc_client_count);
rxrpc_nr_client_conns = nr_conns + 1;
rxnet->nr_client_conns = nr_conns + 1;
}
switch (conn->cache_state) {
@@ -494,21 +485,21 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
}
out_unlock:
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_unlock(&rxnet->client_conn_cache_lock);
out:
_leave(" [%d]", conn->cache_state);
return;
activate_conn:
_debug("activate");
rxrpc_activate_conn(conn);
rxrpc_activate_conn(rxnet, conn);
goto out_unlock;
wait_for_capacity:
_debug("wait");
trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns);
goto out_unlock;
}
@@ -660,18 +651,19 @@ int rxrpc_connect_call(struct rxrpc_call *call,
struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
struct rxrpc_net *rxnet = cp->local->rxnet;
int ret;
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
rxrpc_discard_expired_client_conns(NULL);
rxrpc_cull_active_client_conns();
rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work);
rxrpc_cull_active_client_conns(rxnet);
ret = rxrpc_get_client_conn(call, cp, srx, gfp);
if (ret < 0)
return ret;
rxrpc_animate_client_conn(call->conn);
rxrpc_animate_client_conn(rxnet, call->conn);
rxrpc_activate_channels(call->conn);
ret = rxrpc_wait_for_channel(call, gfp);
@@ -729,6 +721,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
unsigned int channel = call->cid & RXRPC_CHANNELMASK;
struct rxrpc_connection *conn = call->conn;
struct rxrpc_channel *chan = &conn->channels[channel];
struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk));
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
call->conn = NULL;
@@ -750,7 +743,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
/* We must deactivate or idle the connection if it's now
* waiting for nothing.
*/
spin_lock(&rxrpc_client_conn_cache_lock);
spin_lock(&rxnet->client_conn_cache_lock);
if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
list_empty(&conn->waiting_calls) &&
!conn->active_chans)
@@ -787,14 +780,14 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
* list. It might even get moved back to the active list whilst we're
* waiting for the lock.
*/
spin_lock(&rxrpc_client_conn_cache_lock);
spin_lock(&rxnet->client_conn_cache_lock);
switch (conn->cache_state) {
case RXRPC_CONN_CLIENT_ACTIVE:
if (list_empty(&conn->waiting_calls)) {
rxrpc_deactivate_one_channel(conn, channel);
if (!conn->active_chans) {
rxrpc_nr_active_client_conns--;
rxnet->nr_active_client_conns--;
goto idle_connection;
}
goto out;
@@ -820,7 +813,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
}
out:
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_unlock(&rxnet->client_conn_cache_lock);
out_2:
spin_unlock(&conn->channel_lock);
rxrpc_put_connection(conn);
@@ -835,11 +828,11 @@ idle_connection:
trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
conn->idle_timestamp = jiffies;
conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
if (rxrpc_idle_client_conns.next == &conn->cache_link &&
!rxrpc_kill_all_client_conns)
list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
if (rxnet->idle_client_conns.next == &conn->cache_link &&
!rxnet->kill_all_client_conns)
queue_delayed_work(rxrpc_workqueue,
&rxrpc_client_conn_reap,
&rxnet->client_conn_reaper,
rxrpc_conn_idle_client_expiry);
} else {
trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
@@ -857,6 +850,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
{
struct rxrpc_connection *next = NULL;
struct rxrpc_local *local = conn->params.local;
struct rxrpc_net *rxnet = local->rxnet;
unsigned int nr_conns;
trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
@@ -875,18 +869,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
trace_rxrpc_client(conn, -1, rxrpc_client_uncount);
spin_lock(&rxrpc_client_conn_cache_lock);
nr_conns = --rxrpc_nr_client_conns;
spin_lock(&rxnet->client_conn_cache_lock);
nr_conns = --rxnet->nr_client_conns;
if (nr_conns < rxrpc_max_client_connections &&
!list_empty(&rxrpc_waiting_client_conns)) {
next = list_entry(rxrpc_waiting_client_conns.next,
!list_empty(&rxnet->waiting_client_conns)) {
next = list_entry(rxnet->waiting_client_conns.next,
struct rxrpc_connection, cache_link);
rxrpc_get_connection(next);
rxrpc_activate_conn(next);
rxrpc_activate_conn(rxnet, next);
}
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_unlock(&rxnet->client_conn_cache_lock);
}
rxrpc_kill_connection(conn);
@@ -921,10 +915,10 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
/*
* Kill the longest-active client connections to make room for new ones.
*/
static void rxrpc_cull_active_client_conns(void)
static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet)
{
struct rxrpc_connection *conn;
unsigned int nr_conns = rxrpc_nr_client_conns;
unsigned int nr_conns = rxnet->nr_client_conns;
unsigned int nr_active, limit;
_enter("");
@@ -936,12 +930,12 @@ static void rxrpc_cull_active_client_conns(void)
}
limit = rxrpc_reap_client_connections;
spin_lock(&rxrpc_client_conn_cache_lock);
nr_active = rxrpc_nr_active_client_conns;
spin_lock(&rxnet->client_conn_cache_lock);
nr_active = rxnet->nr_active_client_conns;
while (nr_active > limit) {
ASSERT(!list_empty(&rxrpc_active_client_conns));
conn = list_entry(rxrpc_active_client_conns.next,
ASSERT(!list_empty(&rxnet->active_client_conns));
conn = list_entry(rxnet->active_client_conns.next,
struct rxrpc_connection, cache_link);
ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
@@ -953,14 +947,14 @@ static void rxrpc_cull_active_client_conns(void)
trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
list_move_tail(&conn->cache_link,
&rxrpc_waiting_client_conns);
&rxnet->waiting_client_conns);
}
nr_active--;
}
rxrpc_nr_active_client_conns = nr_active;
spin_unlock(&rxrpc_client_conn_cache_lock);
rxnet->nr_active_client_conns = nr_active;
spin_unlock(&rxnet->client_conn_cache_lock);
ASSERTCMP(nr_active, >=, 0);
_leave(" [culled]");
}
@@ -972,22 +966,25 @@ static void rxrpc_cull_active_client_conns(void)
* This may be called from conn setup or from a work item so cannot be
* considered non-reentrant.
*/
static void rxrpc_discard_expired_client_conns(struct work_struct *work)
void rxrpc_discard_expired_client_conns(struct work_struct *work)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet =
container_of(to_delayed_work(work),
struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
bool did_discard = false;
_enter("%c", work ? 'w' : 'n');
_enter("");
if (list_empty(&rxrpc_idle_client_conns)) {
if (list_empty(&rxnet->idle_client_conns)) {
_leave(" [empty]");
return;
}
/* Don't double up on the discarding */
if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
if (!spin_trylock(&rxnet->client_conn_discard_lock)) {
_leave(" [already]");
return;
}
@@ -995,19 +992,19 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *work)
/* We keep an estimate of what the number of conns ought to be after
* we've discarded some so that we don't overdo the discarding.
*/
nr_conns = rxrpc_nr_client_conns;
nr_conns = rxnet->nr_client_conns;
next:
spin_lock(&rxrpc_client_conn_cache_lock);
spin_lock(&rxnet->client_conn_cache_lock);
if (list_empty(&rxrpc_idle_client_conns))
if (list_empty(&rxnet->idle_client_conns))
goto out;
conn = list_entry(rxrpc_idle_client_conns.next,
conn = list_entry(rxnet->idle_client_conns.next,
struct rxrpc_connection, cache_link);
ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
if (!rxrpc_kill_all_client_conns) {
if (!rxnet->kill_all_client_conns) {
/* If the number of connections is over the reap limit, we
* expedite discard by reducing the expiry timeout. We must,
* however, have at least a short grace period to be able to do
@@ -1030,7 +1027,7 @@ next:
conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
list_del_init(&conn->cache_link);
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_unlock(&rxnet->client_conn_cache_lock);
/* When we cleared the EXPOSED flag, we took on responsibility for the
* reference that that had on the usage count. We deal with that here.
@@ -1050,14 +1047,14 @@ not_yet_expired:
* then things get messier.
*/
_debug("not yet");
if (!rxrpc_kill_all_client_conns)
if (!rxnet->kill_all_client_conns)
queue_delayed_work(rxrpc_workqueue,
&rxrpc_client_conn_reap,
&rxnet->client_conn_reaper,
conn_expires_at - now);
out:
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_unlock(&rxrpc_client_conn_discard_mutex);
spin_unlock(&rxnet->client_conn_cache_lock);
spin_unlock(&rxnet->client_conn_discard_lock);
_leave("");
}
@@ -1065,17 +1062,17 @@ out:
* Preemptively destroy all the client connection records rather than waiting
* for them to time out
*/
void __exit rxrpc_destroy_all_client_connections(void)
void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
{
_enter("");
spin_lock(&rxrpc_client_conn_cache_lock);
rxrpc_kill_all_client_conns = true;
spin_unlock(&rxrpc_client_conn_cache_lock);
spin_lock(&rxnet->client_conn_cache_lock);
rxnet->kill_all_client_conns = true;
spin_unlock(&rxnet->client_conn_cache_lock);
cancel_delayed_work(&rxrpc_client_conn_reap);
cancel_delayed_work(&rxnet->client_conn_reaper);
if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0))
_debug("destroy: queue failed");
_leave("");