afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells <dhowells@redhat.com>
This commit is contained in:
109
fs/afs/server.c
109
fs/afs/server.c
@@ -231,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
|
||||
rwlock_init(&server->fs_lock);
|
||||
INIT_HLIST_HEAD(&server->cb_volumes);
|
||||
rwlock_init(&server->cb_break_lock);
|
||||
init_waitqueue_head(&server->probe_wq);
|
||||
spin_lock_init(&server->probe_lock);
|
||||
|
||||
afs_inc_servers_outstanding(net);
|
||||
_leave(" = %p", server);
|
||||
@@ -254,7 +256,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
|
||||
ret = -ERESTARTSYS;
|
||||
if (afs_begin_vlserver_operation(&vc, cell, key)) {
|
||||
while (afs_select_vlserver(&vc)) {
|
||||
if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
|
||||
if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
|
||||
alist = afs_yfsvl_get_endpoints(&vc, uuid);
|
||||
else
|
||||
alist = afs_vl_get_addrs_u(&vc, uuid);
|
||||
@@ -365,8 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
|
||||
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
|
||||
struct afs_addr_cursor ac = {
|
||||
.alist = alist,
|
||||
.start = alist->index,
|
||||
.index = 0,
|
||||
.index = alist->preferred,
|
||||
.error = 0,
|
||||
};
|
||||
_enter("%p", server);
|
||||
@@ -374,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
|
||||
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
|
||||
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
|
||||
|
||||
wait_var_event(&server->probe_outstanding,
|
||||
atomic_read(&server->probe_outstanding) == 0);
|
||||
|
||||
call_rcu(&server->rcu, afs_server_rcu);
|
||||
afs_dec_servers_outstanding(net);
|
||||
}
|
||||
@@ -506,105 +510,6 @@ void afs_purge_servers(struct afs_net *net)
|
||||
_leave("");
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe a fileserver to find its capabilities.
|
||||
*
|
||||
* TODO: Try service upgrade.
|
||||
*/
|
||||
static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
|
||||
{
|
||||
int i;
|
||||
|
||||
_enter("");
|
||||
|
||||
fc->ac.start = READ_ONCE(fc->ac.alist->index);
|
||||
fc->ac.index = fc->ac.start;
|
||||
fc->ac.error = 0;
|
||||
fc->ac.begun = false;
|
||||
|
||||
while (afs_iterate_addresses(&fc->ac)) {
|
||||
afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
|
||||
&fc->ac, fc->key);
|
||||
switch (fc->ac.error) {
|
||||
case 0:
|
||||
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
|
||||
for (i = 0; i < fc->ac.alist->nr_addrs; i++)
|
||||
fc->ac.alist->addrs[i].srx_service =
|
||||
YFS_FS_SERVICE;
|
||||
}
|
||||
afs_end_cursor(&fc->ac);
|
||||
set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
|
||||
return true;
|
||||
case -ECONNABORTED:
|
||||
fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
|
||||
goto error;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
goto error;
|
||||
case -ENETUNREACH:
|
||||
case -EHOSTUNREACH:
|
||||
case -ECONNREFUSED:
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
break;
|
||||
default:
|
||||
fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
error:
|
||||
afs_end_cursor(&fc->ac);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we haven't already, try probing the fileserver to get its capabilities.
|
||||
* We try not to instigate parallel probes, but it's possible that the parallel
|
||||
* probes will fail due to authentication failure when ours would succeed.
|
||||
*
|
||||
* TODO: Try sending an anonymous probe if an authenticated probe fails.
|
||||
*/
|
||||
bool afs_probe_fileserver(struct afs_fs_cursor *fc)
|
||||
{
|
||||
bool success;
|
||||
int ret, retries = 0;
|
||||
|
||||
_enter("");
|
||||
|
||||
retry:
|
||||
if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
|
||||
_leave(" = t");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
|
||||
success = afs_do_probe_fileserver(fc);
|
||||
clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
|
||||
wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
|
||||
_leave(" = t");
|
||||
return success;
|
||||
}
|
||||
|
||||
_debug("wait");
|
||||
ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
|
||||
TASK_INTERRUPTIBLE);
|
||||
if (ret == -ERESTARTSYS) {
|
||||
fc->ac.error = ret;
|
||||
_leave(" = f [%d]", ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
retries++;
|
||||
if (retries == 4) {
|
||||
fc->ac.error = -ESTALE;
|
||||
_leave(" = f [stale]");
|
||||
return false;
|
||||
}
|
||||
_debug("retry");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an update for a server's address list.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user