Merge tag 'nfs-for-4.21-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Stable bugfixes: - xprtrdma: Yet another double DMA-unmap # v4.20 Features: - Allow some /proc/sys/sunrpc entries without CONFIG_SUNRPC_DEBUG - Per-xprt rdma receive workqueues - Drop support for FMR memory registration - Make port= mount option optional for RDMA mounts Other bugfixes and cleanups: - Remove unused nfs4_xdev_fs_type declaration - Fix comments for behavior that has changed - Remove generic RPC credentials by switching to 'struct cred' - Fix crossing mountpoints with different auth flavors - Various xprtrdma fixes from testing and auditing the close code - Fixes for disconnect issues when using xprtrdma with krb5 - Clean up and improve xprtrdma trace points - Fix NFS v4.2 async copy reboot recovery" * tag 'nfs-for-4.21-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (63 commits) sunrpc: convert to DEFINE_SHOW_ATTRIBUTE sunrpc: Add xprt after nfs4_test_session_trunk() sunrpc: convert unnecessary GFP_ATOMIC to GFP_NOFS sunrpc: handle ENOMEM in rpcb_getport_async NFS: remove unnecessary test for IS_ERR(cred) xprtrdma: Prevent leak of rpcrdma_rep objects NFSv4.2 fix async copy reboot recovery xprtrdma: Don't leak freed MRs xprtrdma: Add documenting comment for rpcrdma_buffer_destroy xprtrdma: Replace outdated comment for rpcrdma_ep_post xprtrdma: Update comments in frwr_op_send SUNRPC: Fix some kernel doc complaints SUNRPC: Simplify defining common RPC trace events NFS: Fix NFSv4 symbolic trace point output xprtrdma: Trace mapping, alloc, and dereg failures xprtrdma: Add trace points for calls to transport switch methods xprtrdma: Relocate the xprtrdma_mr_map trace points xprtrdma: Clean up of xprtrdma chunk trace points xprtrdma: Remove unused fields from rpcrdma_ia xprtrdma: Cull dprintk() call sites ...
此提交包含在:
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
|
||||
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
|
||||
|
||||
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
|
||||
auth.o auth_null.o auth_unix.o auth_generic.o \
|
||||
auth.o auth_null.o auth_unix.o \
|
||||
svc.o svcsock.o svcauth.o svcauth_unix.o \
|
||||
addr.o rpcb_clnt.o timer.o xdr.o \
|
||||
sunrpc_syms.o cache.o rpc_pipe.o \
|
||||
|
@@ -39,6 +39,20 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
|
||||
static LIST_HEAD(cred_unused);
|
||||
static unsigned long number_cred_unused;
|
||||
|
||||
static struct cred machine_cred = {
|
||||
.usage = ATOMIC_INIT(1),
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the machine_cred pointer to be used whenever
|
||||
* the a generic machine credential is needed.
|
||||
*/
|
||||
const struct cred *rpc_machine_cred(void)
|
||||
{
|
||||
return &machine_cred;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_machine_cred);
|
||||
|
||||
#define MAX_HASHTABLE_BITS (14)
|
||||
static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
@@ -346,29 +360,6 @@ out_nocache:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
|
||||
|
||||
/*
|
||||
* Setup a credential key lifetime timeout notification
|
||||
*/
|
||||
int
|
||||
rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
|
||||
{
|
||||
if (!cred->cr_auth->au_ops->key_timeout)
|
||||
return 0;
|
||||
return cred->cr_auth->au_ops->key_timeout(auth, cred);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
|
||||
|
||||
bool
|
||||
rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred)
|
||||
{
|
||||
if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
|
||||
return false;
|
||||
if (!cred->cr_ops->crkey_to_expire)
|
||||
return false;
|
||||
return cred->cr_ops->crkey_to_expire(cred);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
|
||||
|
||||
char *
|
||||
rpcauth_stringify_acceptor(struct rpc_cred *cred)
|
||||
{
|
||||
@@ -587,13 +578,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
|
||||
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
|
||||
if (!entry->cr_ops->crmatch(acred, entry, flags))
|
||||
continue;
|
||||
if (flags & RPCAUTH_LOOKUP_RCU) {
|
||||
if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
|
||||
refcount_read(&entry->cr_count) == 0)
|
||||
continue;
|
||||
cred = entry;
|
||||
break;
|
||||
}
|
||||
cred = get_rpccred(entry);
|
||||
if (cred)
|
||||
break;
|
||||
@@ -603,9 +587,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
|
||||
if (cred != NULL)
|
||||
goto found;
|
||||
|
||||
if (flags & RPCAUTH_LOOKUP_RCU)
|
||||
return ERR_PTR(-ECHILD);
|
||||
|
||||
new = auth->au_ops->crcreate(auth, acred, flags, gfp);
|
||||
if (IS_ERR(new)) {
|
||||
cred = new;
|
||||
@@ -656,9 +637,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
|
||||
auth->au_ops->au_name);
|
||||
|
||||
memset(&acred, 0, sizeof(acred));
|
||||
acred.uid = cred->fsuid;
|
||||
acred.gid = cred->fsgid;
|
||||
acred.group_info = cred->group_info;
|
||||
acred.cred = cred;
|
||||
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
|
||||
return ret;
|
||||
}
|
||||
@@ -672,32 +651,42 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
|
||||
INIT_LIST_HEAD(&cred->cr_lru);
|
||||
refcount_set(&cred->cr_count, 1);
|
||||
cred->cr_auth = auth;
|
||||
cred->cr_flags = 0;
|
||||
cred->cr_ops = ops;
|
||||
cred->cr_expire = jiffies;
|
||||
cred->cr_uid = acred->uid;
|
||||
cred->cr_cred = get_cred(acred->cred);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
|
||||
|
||||
struct rpc_cred *
|
||||
rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
|
||||
{
|
||||
dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
|
||||
cred->cr_auth->au_ops->au_name, cred);
|
||||
return get_rpccred(cred);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
|
||||
|
||||
static struct rpc_cred *
|
||||
rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
|
||||
{
|
||||
struct rpc_auth *auth = task->tk_client->cl_auth;
|
||||
struct auth_cred acred = {
|
||||
.uid = GLOBAL_ROOT_UID,
|
||||
.gid = GLOBAL_ROOT_GID,
|
||||
.cred = get_task_cred(&init_task),
|
||||
};
|
||||
struct rpc_cred *ret;
|
||||
|
||||
dprintk("RPC: %5u looking up %s cred\n",
|
||||
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
|
||||
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
|
||||
put_cred(acred.cred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct rpc_cred *
|
||||
rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
|
||||
{
|
||||
struct rpc_auth *auth = task->tk_client->cl_auth;
|
||||
struct auth_cred acred = {
|
||||
.principal = task->tk_client->cl_principal,
|
||||
.cred = init_task.cred,
|
||||
};
|
||||
|
||||
if (!acred.principal)
|
||||
return NULL;
|
||||
dprintk("RPC: %5u looking up %s machine cred\n",
|
||||
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
|
||||
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
|
||||
}
|
||||
|
||||
@@ -712,18 +701,33 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
|
||||
}
|
||||
|
||||
static int
|
||||
rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
|
||||
rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
|
||||
{
|
||||
struct rpc_rqst *req = task->tk_rqstp;
|
||||
struct rpc_cred *new;
|
||||
struct rpc_cred *new = NULL;
|
||||
int lookupflags = 0;
|
||||
struct rpc_auth *auth = task->tk_client->cl_auth;
|
||||
struct auth_cred acred = {
|
||||
.cred = cred,
|
||||
};
|
||||
|
||||
if (flags & RPC_TASK_ASYNC)
|
||||
lookupflags |= RPCAUTH_LOOKUP_NEW;
|
||||
if (cred != NULL)
|
||||
new = cred->cr_ops->crbind(task, cred, lookupflags);
|
||||
else if (flags & RPC_TASK_ROOTCREDS)
|
||||
if (task->tk_op_cred)
|
||||
/* Task must use exactly this rpc_cred */
|
||||
new = get_rpccred(task->tk_op_cred);
|
||||
else if (cred != NULL && cred != &machine_cred)
|
||||
new = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
|
||||
else if (cred == &machine_cred)
|
||||
new = rpcauth_bind_machine_cred(task, lookupflags);
|
||||
|
||||
/* If machine cred couldn't be bound, try a root cred */
|
||||
if (new)
|
||||
;
|
||||
else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
|
||||
new = rpcauth_bind_root_cred(task, lookupflags);
|
||||
else if (flags & RPC_TASK_NULLCREDS)
|
||||
new = authnull_ops.lookup_cred(NULL, NULL, 0);
|
||||
else
|
||||
new = rpcauth_bind_new_cred(task, lookupflags);
|
||||
if (IS_ERR(new))
|
||||
@@ -901,15 +905,10 @@ int __init rpcauth_init_module(void)
|
||||
err = rpc_init_authunix();
|
||||
if (err < 0)
|
||||
goto out1;
|
||||
err = rpc_init_generic_auth();
|
||||
if (err < 0)
|
||||
goto out2;
|
||||
err = register_shrinker(&rpc_cred_shrinker);
|
||||
if (err < 0)
|
||||
goto out3;
|
||||
goto out2;
|
||||
return 0;
|
||||
out3:
|
||||
rpc_destroy_generic_auth();
|
||||
out2:
|
||||
rpc_destroy_authunix();
|
||||
out1:
|
||||
@@ -919,6 +918,5 @@ out1:
|
||||
void rpcauth_remove_module(void)
|
||||
{
|
||||
rpc_destroy_authunix();
|
||||
rpc_destroy_generic_auth();
|
||||
unregister_shrinker(&rpc_cred_shrinker);
|
||||
}
|
||||
|
@@ -1,293 +0,0 @@
|
||||
/*
|
||||
* Generic RPC credential
|
||||
*
|
||||
* Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
*/
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sunrpc/auth.h>
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/debug.h>
|
||||
#include <linux/sunrpc/sched.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
# define RPCDBG_FACILITY RPCDBG_AUTH
|
||||
#endif
|
||||
|
||||
#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID
|
||||
#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID
|
||||
|
||||
struct generic_cred {
|
||||
struct rpc_cred gc_base;
|
||||
struct auth_cred acred;
|
||||
};
|
||||
|
||||
static struct rpc_auth generic_auth;
|
||||
static const struct rpc_credops generic_credops;
|
||||
|
||||
/*
|
||||
* Public call interface
|
||||
*/
|
||||
struct rpc_cred *rpc_lookup_cred(void)
|
||||
{
|
||||
return rpcauth_lookupcred(&generic_auth, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_lookup_cred);
|
||||
|
||||
struct rpc_cred *
|
||||
rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
|
||||
{
|
||||
return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
|
||||
|
||||
struct rpc_cred *rpc_lookup_cred_nonblock(void)
|
||||
{
|
||||
return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
|
||||
|
||||
/*
|
||||
* Public call interface for looking up machine creds.
|
||||
*/
|
||||
struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
|
||||
{
|
||||
struct auth_cred acred = {
|
||||
.uid = RPC_MACHINE_CRED_USERID,
|
||||
.gid = RPC_MACHINE_CRED_GROUPID,
|
||||
.principal = service_name,
|
||||
.machine_cred = 1,
|
||||
};
|
||||
|
||||
dprintk("RPC: looking up machine cred for service %s\n",
|
||||
service_name);
|
||||
return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
|
||||
|
||||
static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
|
||||
struct rpc_cred *cred, int lookupflags)
|
||||
{
|
||||
struct rpc_auth *auth = task->tk_client->cl_auth;
|
||||
struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
|
||||
|
||||
return auth->au_ops->lookup_cred(auth, acred, lookupflags);
|
||||
}
|
||||
|
||||
static int
|
||||
generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
|
||||
{
|
||||
return hash_64(from_kgid(&init_user_ns, acred->gid) |
|
||||
((u64)from_kuid(&init_user_ns, acred->uid) <<
|
||||
(sizeof(gid_t) * 8)), hashbits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup generic creds for current process
|
||||
*/
|
||||
static struct rpc_cred *
|
||||
generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
|
||||
{
|
||||
return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static struct rpc_cred *
|
||||
generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
|
||||
{
|
||||
struct generic_cred *gcred;
|
||||
|
||||
gcred = kmalloc(sizeof(*gcred), gfp);
|
||||
if (gcred == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
|
||||
gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
|
||||
|
||||
gcred->acred.uid = acred->uid;
|
||||
gcred->acred.gid = acred->gid;
|
||||
gcred->acred.group_info = acred->group_info;
|
||||
gcred->acred.ac_flags = 0;
|
||||
if (gcred->acred.group_info != NULL)
|
||||
get_group_info(gcred->acred.group_info);
|
||||
gcred->acred.machine_cred = acred->machine_cred;
|
||||
gcred->acred.principal = acred->principal;
|
||||
|
||||
dprintk("RPC: allocated %s cred %p for uid %d gid %d\n",
|
||||
gcred->acred.machine_cred ? "machine" : "generic",
|
||||
gcred,
|
||||
from_kuid(&init_user_ns, acred->uid),
|
||||
from_kgid(&init_user_ns, acred->gid));
|
||||
return &gcred->gc_base;
|
||||
}
|
||||
|
||||
static void
|
||||
generic_free_cred(struct rpc_cred *cred)
|
||||
{
|
||||
struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
|
||||
|
||||
dprintk("RPC: generic_free_cred %p\n", gcred);
|
||||
if (gcred->acred.group_info != NULL)
|
||||
put_group_info(gcred->acred.group_info);
|
||||
kfree(gcred);
|
||||
}
|
||||
|
||||
static void
|
||||
generic_free_cred_callback(struct rcu_head *head)
|
||||
{
|
||||
struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
|
||||
generic_free_cred(cred);
|
||||
}
|
||||
|
||||
static void
|
||||
generic_destroy_cred(struct rpc_cred *cred)
|
||||
{
|
||||
call_rcu(&cred->cr_rcu, generic_free_cred_callback);
|
||||
}
|
||||
|
||||
static int
|
||||
machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
|
||||
{
|
||||
if (!gcred->acred.machine_cred ||
|
||||
gcred->acred.principal != acred->principal ||
|
||||
!uid_eq(gcred->acred.uid, acred->uid) ||
|
||||
!gid_eq(gcred->acred.gid, acred->gid))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Match credentials against current process creds.
|
||||
*/
|
||||
static int
|
||||
generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
|
||||
{
|
||||
struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
|
||||
int i;
|
||||
|
||||
if (acred->machine_cred)
|
||||
return machine_cred_match(acred, gcred, flags);
|
||||
|
||||
if (!uid_eq(gcred->acred.uid, acred->uid) ||
|
||||
!gid_eq(gcred->acred.gid, acred->gid) ||
|
||||
gcred->acred.machine_cred != 0)
|
||||
goto out_nomatch;
|
||||
|
||||
/* Optimisation in the case where pointers are identical... */
|
||||
if (gcred->acred.group_info == acred->group_info)
|
||||
goto out_match;
|
||||
|
||||
/* Slow path... */
|
||||
if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
|
||||
goto out_nomatch;
|
||||
for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
|
||||
if (!gid_eq(gcred->acred.group_info->gid[i],
|
||||
acred->group_info->gid[i]))
|
||||
goto out_nomatch;
|
||||
}
|
||||
out_match:
|
||||
return 1;
|
||||
out_nomatch:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init rpc_init_generic_auth(void)
|
||||
{
|
||||
return rpcauth_init_credcache(&generic_auth);
|
||||
}
|
||||
|
||||
void rpc_destroy_generic_auth(void)
|
||||
{
|
||||
rpcauth_destroy_credcache(&generic_auth);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test the the current time (now) against the underlying credential key expiry
|
||||
* minus a timeout and setup notification.
|
||||
*
|
||||
* The normal case:
|
||||
* If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
|
||||
* the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
|
||||
* rpc_credops crmatch routine to notify this generic cred when it's key
|
||||
* expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
|
||||
*
|
||||
* The error case:
|
||||
* If the underlying cred lookup fails, return -EACCES.
|
||||
*
|
||||
* The 'almost' error case:
|
||||
* If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
|
||||
* key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
|
||||
* on the acred ac_flags and return 0.
|
||||
*/
|
||||
static int
|
||||
generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
|
||||
{
|
||||
struct auth_cred *acred = &container_of(cred, struct generic_cred,
|
||||
gc_base)->acred;
|
||||
struct rpc_cred *tcred;
|
||||
int ret = 0;
|
||||
|
||||
|
||||
/* Fast track for non crkey_timeout (no key) underlying credentials */
|
||||
if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
|
||||
return 0;
|
||||
|
||||
/* Fast track for the normal case */
|
||||
if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
|
||||
return 0;
|
||||
|
||||
/* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
|
||||
tcred = auth->au_ops->lookup_cred(auth, acred, 0);
|
||||
if (IS_ERR(tcred))
|
||||
return -EACCES;
|
||||
|
||||
/* Test for the almost error case */
|
||||
ret = tcred->cr_ops->crkey_timeout(tcred);
|
||||
if (ret != 0) {
|
||||
set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
|
||||
ret = 0;
|
||||
} else {
|
||||
/* In case underlying cred key has been reset */
|
||||
if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
|
||||
&acred->ac_flags))
|
||||
dprintk("RPC: UID %d Credential key reset\n",
|
||||
from_kuid(&init_user_ns, tcred->cr_uid));
|
||||
/* set up fasttrack for the normal case */
|
||||
set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
|
||||
}
|
||||
|
||||
put_rpccred(tcred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct rpc_authops generic_auth_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.au_name = "Generic",
|
||||
.hash_cred = generic_hash_cred,
|
||||
.lookup_cred = generic_lookup_cred,
|
||||
.crcreate = generic_create_cred,
|
||||
.key_timeout = generic_key_timeout,
|
||||
};
|
||||
|
||||
static struct rpc_auth generic_auth = {
|
||||
.au_ops = &generic_auth_ops,
|
||||
.au_count = REFCOUNT_INIT(1),
|
||||
};
|
||||
|
||||
static bool generic_key_to_expire(struct rpc_cred *cred)
|
||||
{
|
||||
struct auth_cred *acred = &container_of(cred, struct generic_cred,
|
||||
gc_base)->acred;
|
||||
return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
|
||||
}
|
||||
|
||||
static const struct rpc_credops generic_credops = {
|
||||
.cr_name = "Generic cred",
|
||||
.crdestroy = generic_destroy_cred,
|
||||
.crbind = generic_bind_cred,
|
||||
.crmatch = generic_match,
|
||||
.crkey_to_expire = generic_key_to_expire,
|
||||
};
|
@@ -565,7 +565,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
|
||||
struct gss_cred *gss_cred = container_of(cred,
|
||||
struct gss_cred, gc_base);
|
||||
struct gss_upcall_msg *gss_new, *gss_msg;
|
||||
kuid_t uid = cred->cr_uid;
|
||||
kuid_t uid = cred->cr_cred->fsuid;
|
||||
|
||||
gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
|
||||
if (IS_ERR(gss_new))
|
||||
@@ -604,7 +604,7 @@ gss_refresh_upcall(struct rpc_task *task)
|
||||
int err = 0;
|
||||
|
||||
dprintk("RPC: %5u %s for uid %u\n",
|
||||
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
|
||||
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
|
||||
gss_msg = gss_setup_upcall(gss_auth, cred);
|
||||
if (PTR_ERR(gss_msg) == -EAGAIN) {
|
||||
/* XXX: warning on the first, under the assumption we
|
||||
@@ -637,7 +637,7 @@ gss_refresh_upcall(struct rpc_task *task)
|
||||
out:
|
||||
dprintk("RPC: %5u %s for uid %u result %d\n",
|
||||
task->tk_pid, __func__,
|
||||
from_kuid(&init_user_ns, cred->cr_uid), err);
|
||||
from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -653,7 +653,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
|
||||
int err;
|
||||
|
||||
dprintk("RPC: %s for uid %u\n",
|
||||
__func__, from_kuid(&init_user_ns, cred->cr_uid));
|
||||
__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
|
||||
retry:
|
||||
err = 0;
|
||||
/* if gssd is down, just skip upcalling altogether */
|
||||
@@ -701,7 +701,7 @@ out_intr:
|
||||
gss_release_msg(gss_msg);
|
||||
out:
|
||||
dprintk("RPC: %s for uid %u result %d\n",
|
||||
__func__, from_kuid(&init_user_ns, cred->cr_uid), err);
|
||||
__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1248,7 +1248,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
|
||||
new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
|
||||
if (new) {
|
||||
struct auth_cred acred = {
|
||||
.uid = gss_cred->gc_base.cr_uid,
|
||||
.cred = gss_cred->gc_base.cr_cred,
|
||||
};
|
||||
struct gss_cl_ctx *ctx =
|
||||
rcu_dereference_protected(gss_cred->gc_ctx, 1);
|
||||
@@ -1343,6 +1343,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
|
||||
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
|
||||
|
||||
RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
|
||||
put_cred(cred->cr_cred);
|
||||
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
|
||||
if (ctx)
|
||||
gss_put_ctx(ctx);
|
||||
@@ -1361,7 +1362,7 @@ gss_destroy_cred(struct rpc_cred *cred)
|
||||
static int
|
||||
gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
|
||||
{
|
||||
return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
|
||||
return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1381,7 +1382,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
|
||||
int err = -ENOMEM;
|
||||
|
||||
dprintk("RPC: %s for uid %d, flavor %d\n",
|
||||
__func__, from_kuid(&init_user_ns, acred->uid),
|
||||
__func__, from_kuid(&init_user_ns, acred->cred->fsuid),
|
||||
auth->au_flavor);
|
||||
|
||||
if (!(cred = kzalloc(sizeof(*cred), gfp)))
|
||||
@@ -1394,9 +1395,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
|
||||
*/
|
||||
cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
|
||||
cred->gc_service = gss_auth->service;
|
||||
cred->gc_principal = NULL;
|
||||
if (acred->machine_cred)
|
||||
cred->gc_principal = acred->principal;
|
||||
cred->gc_principal = acred->principal;
|
||||
kref_get(&gss_auth->kref);
|
||||
return &cred->gc_base;
|
||||
|
||||
@@ -1518,23 +1517,10 @@ out:
|
||||
if (gss_cred->gc_principal == NULL)
|
||||
return 0;
|
||||
ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
|
||||
goto check_expire;
|
||||
}
|
||||
if (gss_cred->gc_principal != NULL)
|
||||
return 0;
|
||||
ret = uid_eq(rc->cr_uid, acred->uid);
|
||||
|
||||
check_expire:
|
||||
if (ret == 0)
|
||||
return ret;
|
||||
|
||||
/* Notify acred users of GSS context expiration timeout */
|
||||
if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
|
||||
(gss_key_timeout(rc) != 0)) {
|
||||
/* test will now be done from generic cred */
|
||||
test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
|
||||
/* tell NFS layer that key will expire soon */
|
||||
set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
|
||||
} else {
|
||||
if (gss_cred->gc_principal != NULL)
|
||||
return 0;
|
||||
ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -1607,9 +1593,8 @@ static int gss_renew_cred(struct rpc_task *task)
|
||||
gc_base);
|
||||
struct rpc_auth *auth = oldcred->cr_auth;
|
||||
struct auth_cred acred = {
|
||||
.uid = oldcred->cr_uid,
|
||||
.cred = oldcred->cr_cred,
|
||||
.principal = gss_cred->gc_principal,
|
||||
.machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
|
||||
};
|
||||
struct rpc_cred *new;
|
||||
|
||||
@@ -2110,7 +2095,6 @@ static const struct rpc_credops gss_credops = {
|
||||
.cr_name = "AUTH_GSS",
|
||||
.crdestroy = gss_destroy_cred,
|
||||
.cr_init = gss_cred_init,
|
||||
.crbind = rpcauth_generic_bind_cred,
|
||||
.crmatch = gss_match,
|
||||
.crmarshal = gss_marshal,
|
||||
.crrefresh = gss_refresh,
|
||||
@@ -2125,7 +2109,6 @@ static const struct rpc_credops gss_credops = {
|
||||
static const struct rpc_credops gss_nullops = {
|
||||
.cr_name = "AUTH_GSS",
|
||||
.crdestroy = gss_destroy_nullcred,
|
||||
.crbind = rpcauth_generic_bind_cred,
|
||||
.crmatch = gss_match,
|
||||
.crmarshal = gss_marshal,
|
||||
.crrefresh = gss_refresh_null,
|
||||
|
@@ -244,7 +244,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
|
||||
|
||||
/**
|
||||
* gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
|
||||
* @array: array to fill in
|
||||
* @array_ptr: array to fill in
|
||||
* @size: size of "array"
|
||||
*
|
||||
* Returns the number of array items filled in, or a negative errno.
|
||||
|
@@ -36,8 +36,6 @@ nul_destroy(struct rpc_auth *auth)
|
||||
static struct rpc_cred *
|
||||
nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
|
||||
{
|
||||
if (flags & RPCAUTH_LOOKUP_RCU)
|
||||
return &null_cred;
|
||||
return get_rpccred(&null_cred);
|
||||
}
|
||||
|
||||
@@ -116,7 +114,6 @@ static
|
||||
struct rpc_auth null_auth = {
|
||||
.au_cslack = NUL_CALLSLACK,
|
||||
.au_rslack = NUL_REPLYSLACK,
|
||||
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
|
||||
.au_ops = &authnull_ops,
|
||||
.au_flavor = RPC_AUTH_NULL,
|
||||
.au_count = REFCOUNT_INIT(1),
|
||||
@@ -126,7 +123,6 @@ static
|
||||
const struct rpc_credops null_credops = {
|
||||
.cr_name = "AUTH_NULL",
|
||||
.crdestroy = nul_destroy_cred,
|
||||
.crbind = rpcauth_generic_bind_cred,
|
||||
.crmatch = nul_match,
|
||||
.crmarshal = nul_marshal,
|
||||
.crrefresh = nul_refresh,
|
||||
|
@@ -11,16 +11,11 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/auth.h>
|
||||
#include <linux/user_namespace.h>
|
||||
|
||||
struct unx_cred {
|
||||
struct rpc_cred uc_base;
|
||||
kgid_t uc_gid;
|
||||
kgid_t uc_gids[UNX_NGROUPS];
|
||||
};
|
||||
#define uc_uid uc_base.cr_uid
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
# define RPCDBG_FACILITY RPCDBG_AUTH
|
||||
@@ -28,6 +23,7 @@ struct unx_cred {
|
||||
|
||||
static struct rpc_auth unix_auth;
|
||||
static const struct rpc_credops unix_credops;
|
||||
static mempool_t *unix_pool;
|
||||
|
||||
static struct rpc_auth *
|
||||
unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
|
||||
@@ -42,15 +38,6 @@ static void
|
||||
unx_destroy(struct rpc_auth *auth)
|
||||
{
|
||||
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
|
||||
rpcauth_clear_credcache(auth->au_credcache);
|
||||
}
|
||||
|
||||
static int
|
||||
unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
|
||||
{
|
||||
return hash_64(from_kgid(&init_user_ns, acred->gid) |
|
||||
((u64)from_kuid(&init_user_ns, acred->uid) <<
|
||||
(sizeof(gid_t) * 8)), hashbits);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -59,52 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
|
||||
static struct rpc_cred *
|
||||
unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
|
||||
{
|
||||
return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
|
||||
}
|
||||
|
||||
static struct rpc_cred *
|
||||
unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
|
||||
{
|
||||
struct unx_cred *cred;
|
||||
unsigned int groups = 0;
|
||||
unsigned int i;
|
||||
struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
|
||||
|
||||
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
|
||||
from_kuid(&init_user_ns, acred->uid),
|
||||
from_kgid(&init_user_ns, acred->gid));
|
||||
from_kuid(&init_user_ns, acred->cred->fsuid),
|
||||
from_kgid(&init_user_ns, acred->cred->fsgid));
|
||||
|
||||
if (!(cred = kmalloc(sizeof(*cred), gfp)))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
|
||||
cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
|
||||
|
||||
if (acred->group_info != NULL)
|
||||
groups = acred->group_info->ngroups;
|
||||
if (groups > UNX_NGROUPS)
|
||||
groups = UNX_NGROUPS;
|
||||
|
||||
cred->uc_gid = acred->gid;
|
||||
for (i = 0; i < groups; i++)
|
||||
cred->uc_gids[i] = acred->group_info->gid[i];
|
||||
if (i < UNX_NGROUPS)
|
||||
cred->uc_gids[i] = INVALID_GID;
|
||||
|
||||
return &cred->uc_base;
|
||||
}
|
||||
|
||||
static void
|
||||
unx_free_cred(struct unx_cred *unx_cred)
|
||||
{
|
||||
dprintk("RPC: unx_free_cred %p\n", unx_cred);
|
||||
kfree(unx_cred);
|
||||
rpcauth_init_cred(ret, acred, auth, &unix_credops);
|
||||
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
unx_free_cred_callback(struct rcu_head *head)
|
||||
{
|
||||
struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
|
||||
unx_free_cred(unx_cred);
|
||||
struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
|
||||
dprintk("RPC: unx_free_cred %p\n", rpc_cred);
|
||||
put_cred(rpc_cred->cr_cred);
|
||||
mempool_free(rpc_cred, unix_pool);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -114,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred)
|
||||
}
|
||||
|
||||
/*
|
||||
* Match credentials against current process creds.
|
||||
* The root_override argument takes care of cases where the caller may
|
||||
* request root creds (e.g. for NFS swapping).
|
||||
* Match credentials against current the auth_cred.
|
||||
*/
|
||||
static int
|
||||
unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
|
||||
unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
|
||||
{
|
||||
struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
|
||||
unsigned int groups = 0;
|
||||
unsigned int i;
|
||||
|
||||
if (cred->cr_cred == acred->cred)
|
||||
return 1;
|
||||
|
||||
if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
|
||||
if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
|
||||
return 0;
|
||||
|
||||
if (acred->group_info != NULL)
|
||||
groups = acred->group_info->ngroups;
|
||||
if (acred->cred && acred->cred->group_info != NULL)
|
||||
groups = acred->cred->group_info->ngroups;
|
||||
if (groups > UNX_NGROUPS)
|
||||
groups = UNX_NGROUPS;
|
||||
for (i = 0; i < groups ; i++)
|
||||
if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
|
||||
return 0;
|
||||
if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
|
||||
if (cred->cr_cred->group_info == NULL)
|
||||
return groups == 0;
|
||||
if (groups != cred->cr_cred->group_info->ngroups)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < groups ; i++)
|
||||
if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i]))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -149,9 +110,10 @@ static __be32 *
|
||||
unx_marshal(struct rpc_task *task, __be32 *p)
|
||||
{
|
||||
struct rpc_clnt *clnt = task->tk_client;
|
||||
struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
|
||||
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
|
||||
__be32 *base, *hold;
|
||||
int i;
|
||||
struct group_info *gi = cred->cr_cred->group_info;
|
||||
|
||||
*p++ = htonl(RPC_AUTH_UNIX);
|
||||
base = p++;
|
||||
@@ -162,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p)
|
||||
*/
|
||||
p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
|
||||
|
||||
*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
|
||||
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
|
||||
*p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
|
||||
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
|
||||
hold = p++;
|
||||
for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
|
||||
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
|
||||
if (gi)
|
||||
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
|
||||
*p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
|
||||
*hold = htonl(p - hold - 1); /* gid array length */
|
||||
*base = htonl((p - base - 1) << 2); /* cred length */
|
||||
|
||||
@@ -213,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
|
||||
|
||||
int __init rpc_init_authunix(void)
|
||||
{
|
||||
return rpcauth_init_credcache(&unix_auth);
|
||||
unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred));
|
||||
return unix_pool ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
void rpc_destroy_authunix(void)
|
||||
{
|
||||
rpcauth_destroy_credcache(&unix_auth);
|
||||
mempool_destroy(unix_pool);
|
||||
}
|
||||
|
||||
const struct rpc_authops authunix_ops = {
|
||||
@@ -227,16 +191,13 @@ const struct rpc_authops authunix_ops = {
|
||||
.au_name = "UNIX",
|
||||
.create = unx_create,
|
||||
.destroy = unx_destroy,
|
||||
.hash_cred = unx_hash_cred,
|
||||
.lookup_cred = unx_lookup_cred,
|
||||
.crcreate = unx_create_cred,
|
||||
};
|
||||
|
||||
static
|
||||
struct rpc_auth unix_auth = {
|
||||
.au_cslack = UNX_CALLSLACK,
|
||||
.au_rslack = NUL_REPLYSLACK,
|
||||
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
|
||||
.au_ops = &authunix_ops,
|
||||
.au_flavor = RPC_AUTH_UNIX,
|
||||
.au_count = REFCOUNT_INIT(1),
|
||||
@@ -246,7 +207,6 @@ static
|
||||
const struct rpc_credops unix_credops = {
|
||||
.cr_name = "AUTH_UNIX",
|
||||
.crdestroy = unx_destroy_cred,
|
||||
.crbind = rpcauth_generic_bind_cred,
|
||||
.crmatch = unx_match,
|
||||
.crmarshal = unx_marshal,
|
||||
.crrefresh = unx_refresh,
|
||||
|
@@ -197,7 +197,7 @@ out_free:
|
||||
/**
|
||||
* xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
|
||||
* @xprt: the transport holding the preallocated strucures
|
||||
* @max_reqs the maximum number of preallocated structures to destroy
|
||||
* @max_reqs: the maximum number of preallocated structures to destroy
|
||||
*
|
||||
* Since these structures may have been allocated by multiple calls
|
||||
* to xprt_setup_backchannel, we only destroy up to the maximum number
|
||||
|
@@ -627,6 +627,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
|
||||
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
|
||||
new->cl_discrtry = clnt->cl_discrtry;
|
||||
new->cl_chatty = clnt->cl_chatty;
|
||||
new->cl_principal = clnt->cl_principal;
|
||||
return new;
|
||||
|
||||
out_err:
|
||||
@@ -1029,7 +1030,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
|
||||
task->tk_msg.rpc_argp = msg->rpc_argp;
|
||||
task->tk_msg.rpc_resp = msg->rpc_resp;
|
||||
if (msg->rpc_cred != NULL)
|
||||
task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
|
||||
task->tk_msg.rpc_cred = get_cred(msg->rpc_cred);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2521,9 +2522,8 @@ static int rpc_ping(struct rpc_clnt *clnt)
|
||||
.rpc_proc = &rpcproc_null,
|
||||
};
|
||||
int err;
|
||||
msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
|
||||
err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN);
|
||||
put_rpccred(msg.rpc_cred);
|
||||
err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
|
||||
RPC_TASK_NULLCREDS);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -2534,15 +2534,15 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
|
||||
{
|
||||
struct rpc_message msg = {
|
||||
.rpc_proc = &rpcproc_null,
|
||||
.rpc_cred = cred,
|
||||
};
|
||||
struct rpc_task_setup task_setup_data = {
|
||||
.rpc_client = clnt,
|
||||
.rpc_xprt = xprt,
|
||||
.rpc_message = &msg,
|
||||
.rpc_op_cred = cred,
|
||||
.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
|
||||
.callback_data = data,
|
||||
.flags = flags,
|
||||
.flags = flags | RPC_TASK_NULLCREDS,
|
||||
};
|
||||
|
||||
return rpc_run_task(&task_setup_data);
|
||||
@@ -2593,7 +2593,6 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
|
||||
void *dummy)
|
||||
{
|
||||
struct rpc_cb_add_xprt_calldata *data;
|
||||
struct rpc_cred *cred;
|
||||
struct rpc_task *task;
|
||||
|
||||
data = kmalloc(sizeof(*data), GFP_NOFS);
|
||||
@@ -2602,11 +2601,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
|
||||
data->xps = xprt_switch_get(xps);
|
||||
data->xprt = xprt_get(xprt);
|
||||
|
||||
cred = authnull_ops.lookup_cred(NULL, NULL, 0);
|
||||
task = rpc_call_null_helper(clnt, xprt, cred,
|
||||
RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
|
||||
task = rpc_call_null_helper(clnt, xprt, NULL,
|
||||
RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
|
||||
&rpc_cb_add_xprt_call_ops, data);
|
||||
put_rpccred(cred);
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
rpc_put_task(task);
|
||||
@@ -2637,7 +2634,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
|
||||
struct rpc_xprt *xprt,
|
||||
void *data)
|
||||
{
|
||||
struct rpc_cred *cred;
|
||||
struct rpc_task *task;
|
||||
struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
|
||||
int status = -EADDRINUSE;
|
||||
@@ -2649,11 +2645,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
|
||||
goto out_err;
|
||||
|
||||
/* Test the connection */
|
||||
cred = authnull_ops.lookup_cred(NULL, NULL, 0);
|
||||
task = rpc_call_null_helper(clnt, xprt, cred,
|
||||
RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
|
||||
task = rpc_call_null_helper(clnt, xprt, NULL,
|
||||
RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
|
||||
NULL, NULL);
|
||||
put_rpccred(cred);
|
||||
if (IS_ERR(task)) {
|
||||
status = PTR_ERR(task);
|
||||
goto out_err;
|
||||
@@ -2667,6 +2661,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
|
||||
/* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
|
||||
xtest->add_xprt_test(clnt, xprt, xtest->data);
|
||||
|
||||
xprt_put(xprt);
|
||||
xprt_switch_put(xps);
|
||||
|
||||
/* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
|
||||
return 1;
|
||||
out_err:
|
||||
|
@@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = {
|
||||
* that this file will be there and have a certain format.
|
||||
*/
|
||||
static int
|
||||
rpc_show_dummy_info(struct seq_file *m, void *v)
|
||||
rpc_dummy_info_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_printf(m, "RPC server: %s\n", utsname()->nodename);
|
||||
seq_printf(m, "service: foo (1) version 0\n");
|
||||
@@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v)
|
||||
seq_printf(m, "port: 0\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
rpc_dummy_info_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, rpc_show_dummy_info, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations rpc_dummy_info_operations = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rpc_dummy_info_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
|
||||
|
||||
static const struct rpc_filelist gssd_dummy_info_file[] = {
|
||||
[0] = {
|
||||
.name = "info",
|
||||
.i_fop = &rpc_dummy_info_operations,
|
||||
.i_fop = &rpc_dummy_info_fops,
|
||||
.mode = S_IFREG | 0400,
|
||||
},
|
||||
};
|
||||
|
@@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
goto bailout_nofree;
|
||||
}
|
||||
|
||||
map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
|
||||
map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
|
||||
if (!map) {
|
||||
status = -ENOMEM;
|
||||
dprintk("RPC: %5u %s: no memory available\n",
|
||||
@@ -770,7 +770,13 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
case RPCBVERS_4:
|
||||
case RPCBVERS_3:
|
||||
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
|
||||
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
|
||||
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
|
||||
if (!map->r_addr) {
|
||||
status = -ENOMEM;
|
||||
dprintk("RPC: %5u %s: no memory available\n",
|
||||
task->tk_pid, __func__);
|
||||
goto bailout_free_args;
|
||||
}
|
||||
map->r_owner = "";
|
||||
break;
|
||||
case RPCBVERS_2:
|
||||
@@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task)
|
||||
rpc_put_task(child);
|
||||
return;
|
||||
|
||||
bailout_free_args:
|
||||
kfree(map);
|
||||
bailout_release_client:
|
||||
rpc_release_client(rpcb_clnt);
|
||||
bailout_nofree:
|
||||
|
@@ -997,6 +997,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
|
||||
|
||||
task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
|
||||
|
||||
task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
|
||||
|
||||
if (task->tk_ops->rpc_call_prepare != NULL)
|
||||
task->tk_action = rpc_prepare_task;
|
||||
|
||||
@@ -1054,6 +1056,7 @@ static void rpc_free_task(struct rpc_task *task)
|
||||
{
|
||||
unsigned short tk_flags = task->tk_flags;
|
||||
|
||||
put_rpccred(task->tk_op_cred);
|
||||
rpc_release_calldata(task->tk_ops, task->tk_calldata);
|
||||
|
||||
if (tk_flags & RPC_TASK_DYNAMIC) {
|
||||
@@ -1071,7 +1074,7 @@ static void rpc_release_resources_task(struct rpc_task *task)
|
||||
{
|
||||
xprt_release(task);
|
||||
if (task->tk_msg.rpc_cred) {
|
||||
put_rpccred(task->tk_msg.rpc_cred);
|
||||
put_cred(task->tk_msg.rpc_cred);
|
||||
task->tk_msg.rpc_cred = NULL;
|
||||
}
|
||||
rpc_task_release_client(task);
|
||||
|
@@ -383,7 +383,7 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
|
||||
/**
|
||||
* xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
|
||||
* @xpi: pointer to rpc_xprt_iter
|
||||
* @xps: pointer to a new rpc_xprt_switch or NULL
|
||||
* @newswitch: pointer to a new rpc_xprt_switch or NULL
|
||||
*
|
||||
* Swaps out the existing xpi->xpi_xpswitch with a new value.
|
||||
*/
|
||||
@@ -401,7 +401,7 @@ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
|
||||
|
||||
/**
|
||||
* xprt_iter_destroy - Destroys the xprt iterator
|
||||
* @xpi pointer to rpc_xprt_iter
|
||||
* @xpi: pointer to rpc_xprt_iter
|
||||
*/
|
||||
void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
|
||||
{
|
||||
|
@@ -1,8 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
|
||||
|
||||
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
|
||||
fmr_ops.o frwr_ops.o \
|
||||
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
|
||||
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
|
||||
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
|
||||
module.o
|
||||
|
@@ -5,7 +5,6 @@
|
||||
* Support for backward direction RPCs on RPC/RDMA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sunrpc/xprt.h>
|
||||
#include <linux/sunrpc/svc.h>
|
||||
#include <linux/sunrpc/svc_xprt.h>
|
||||
@@ -20,29 +19,16 @@
|
||||
|
||||
#undef RPCRDMA_BACKCHANNEL_DEBUG
|
||||
|
||||
static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
|
||||
struct rpc_rqst *rqst)
|
||||
{
|
||||
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
|
||||
spin_lock(&buf->rb_reqslock);
|
||||
list_del(&req->rl_all);
|
||||
spin_unlock(&buf->rb_reqslock);
|
||||
|
||||
rpcrdma_destroy_req(req);
|
||||
}
|
||||
|
||||
static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
|
||||
unsigned int count)
|
||||
{
|
||||
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
|
||||
struct rpcrdma_req *req;
|
||||
struct rpc_rqst *rqst;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (count << 1); i++) {
|
||||
struct rpcrdma_regbuf *rb;
|
||||
struct rpcrdma_req *req;
|
||||
size_t size;
|
||||
|
||||
req = rpcrdma_create_req(r_xprt);
|
||||
@@ -68,7 +54,7 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
|
||||
return 0;
|
||||
|
||||
out_fail:
|
||||
rpcrdma_bc_free_rqst(r_xprt, rqst);
|
||||
rpcrdma_req_destroy(req);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -101,7 +87,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
|
||||
goto out_free;
|
||||
|
||||
r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
|
||||
request_module("svcrdma");
|
||||
trace_xprtrdma_cb_setup(r_xprt, reqs);
|
||||
return 0;
|
||||
|
||||
@@ -173,21 +158,21 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
|
||||
*/
|
||||
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
|
||||
struct rpc_xprt *xprt = rqst->rq_xprt;
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
int rc;
|
||||
|
||||
if (!xprt_connected(rqst->rq_xprt))
|
||||
goto drop_connection;
|
||||
if (!xprt_connected(xprt))
|
||||
return -ENOTCONN;
|
||||
|
||||
if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
|
||||
if (!xprt_request_get_cong(xprt, rqst))
|
||||
return -EBADSLT;
|
||||
|
||||
rc = rpcrdma_bc_marshal_reply(rqst);
|
||||
if (rc < 0)
|
||||
goto failed_marshal;
|
||||
|
||||
rpcrdma_post_recvs(r_xprt, true);
|
||||
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
|
||||
goto drop_connection;
|
||||
return 0;
|
||||
@@ -196,7 +181,7 @@ failed_marshal:
|
||||
if (rc != -ENOTCONN)
|
||||
return rc;
|
||||
drop_connection:
|
||||
xprt_disconnect_done(rqst->rq_xprt);
|
||||
xprt_rdma_close(xprt);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
@@ -207,7 +192,6 @@ drop_connection:
|
||||
*/
|
||||
void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
struct rpc_rqst *rqst, *tmp;
|
||||
|
||||
spin_lock(&xprt->bc_pa_lock);
|
||||
@@ -215,7 +199,7 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
|
||||
list_del(&rqst->rq_bc_pa_list);
|
||||
spin_unlock(&xprt->bc_pa_lock);
|
||||
|
||||
rpcrdma_bc_free_rqst(r_xprt, rqst);
|
||||
rpcrdma_req_destroy(rpcr_to_rdmar(rqst));
|
||||
|
||||
spin_lock(&xprt->bc_pa_lock);
|
||||
}
|
||||
@@ -231,9 +215,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
|
||||
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
|
||||
struct rpc_xprt *xprt = rqst->rq_xprt;
|
||||
|
||||
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
|
||||
__func__, rqst, req);
|
||||
|
||||
rpcrdma_recv_buffer_put(req->rl_reply);
|
||||
req->rl_reply = NULL;
|
||||
|
||||
@@ -319,7 +300,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
|
||||
|
||||
out_overflow:
|
||||
pr_warn("RPC/RDMA backchannel overflow\n");
|
||||
xprt_disconnect_done(xprt);
|
||||
xprt_force_disconnect(xprt);
|
||||
/* This receive buffer gets reposted automatically
|
||||
* when the connection is re-established.
|
||||
*/
|
||||
|
@@ -1,337 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2015, 2017 Oracle. All rights reserved.
|
||||
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Lightweight memory registration using Fast Memory Regions (FMR).
|
||||
* Referred to sometimes as MTHCAFMR mode.
|
||||
*
|
||||
* FMR uses synchronous memory registration and deregistration.
|
||||
* FMR registration is known to be fast, but FMR deregistration
|
||||
* can take tens of usecs to complete.
|
||||
*/
|
||||
|
||||
/* Normal operation
|
||||
*
|
||||
* A Memory Region is prepared for RDMA READ or WRITE using the
|
||||
* ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
|
||||
* finished, the Memory Region is unmapped using the ib_unmap_fmr
|
||||
* verb (fmr_op_unmap).
|
||||
*/
|
||||
|
||||
#include <linux/sunrpc/svc_rdma.h>
|
||||
|
||||
#include "xprt_rdma.h"
|
||||
#include <trace/events/rpcrdma.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
# define RPCDBG_FACILITY RPCDBG_TRANS
|
||||
#endif
|
||||
|
||||
/* Maximum scatter/gather per FMR */
|
||||
#define RPCRDMA_MAX_FMR_SGES (64)
|
||||
|
||||
/* Access mode of externally registered pages */
|
||||
enum {
|
||||
RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
|
||||
IB_ACCESS_REMOTE_READ,
|
||||
};
|
||||
|
||||
bool
|
||||
fmr_is_supported(struct rpcrdma_ia *ia)
|
||||
{
|
||||
if (!ia->ri_device->ops.alloc_fmr) {
|
||||
pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
|
||||
ia->ri_device->name);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
__fmr_unmap(struct rpcrdma_mr *mr)
|
||||
{
|
||||
LIST_HEAD(l);
|
||||
int rc;
|
||||
|
||||
list_add(&mr->fmr.fm_mr->list, &l);
|
||||
rc = ib_unmap_fmr(&l);
|
||||
list_del(&mr->fmr.fm_mr->list);
|
||||
if (rc)
|
||||
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
|
||||
mr, rc);
|
||||
}
|
||||
|
||||
/* Release an MR.
|
||||
*/
|
||||
static void
|
||||
fmr_op_release_mr(struct rpcrdma_mr *mr)
|
||||
{
|
||||
int rc;
|
||||
|
||||
kfree(mr->fmr.fm_physaddrs);
|
||||
kfree(mr->mr_sg);
|
||||
|
||||
/* In case this one was left mapped, try to unmap it
|
||||
* to prevent dealloc_fmr from failing with EBUSY
|
||||
*/
|
||||
__fmr_unmap(mr);
|
||||
|
||||
rc = ib_dealloc_fmr(mr->fmr.fm_mr);
|
||||
if (rc)
|
||||
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
|
||||
mr, rc);
|
||||
|
||||
kfree(mr);
|
||||
}
|
||||
|
||||
/* MRs are dynamically allocated, so simply clean up and release the MR.
|
||||
* A replacement MR will subsequently be allocated on demand.
|
||||
*/
|
||||
static void
|
||||
fmr_mr_recycle_worker(struct work_struct *work)
|
||||
{
|
||||
struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
|
||||
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
|
||||
|
||||
trace_xprtrdma_mr_recycle(mr);
|
||||
|
||||
trace_xprtrdma_mr_unmap(mr);
|
||||
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
|
||||
mr->mr_sg, mr->mr_nents, mr->mr_dir);
|
||||
|
||||
spin_lock(&r_xprt->rx_buf.rb_mrlock);
|
||||
list_del(&mr->mr_all);
|
||||
r_xprt->rx_stats.mrs_recycled++;
|
||||
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
|
||||
fmr_op_release_mr(mr);
|
||||
}
|
||||
|
||||
static int
|
||||
fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
|
||||
{
|
||||
static struct ib_fmr_attr fmr_attr = {
|
||||
.max_pages = RPCRDMA_MAX_FMR_SGES,
|
||||
.max_maps = 1,
|
||||
.page_shift = PAGE_SHIFT
|
||||
};
|
||||
|
||||
mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
|
||||
sizeof(u64), GFP_KERNEL);
|
||||
if (!mr->fmr.fm_physaddrs)
|
||||
goto out_free;
|
||||
|
||||
mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
|
||||
sizeof(*mr->mr_sg), GFP_KERNEL);
|
||||
if (!mr->mr_sg)
|
||||
goto out_free;
|
||||
|
||||
sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
|
||||
|
||||
mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
|
||||
&fmr_attr);
|
||||
if (IS_ERR(mr->fmr.fm_mr))
|
||||
goto out_fmr_err;
|
||||
|
||||
INIT_LIST_HEAD(&mr->mr_list);
|
||||
INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
|
||||
return 0;
|
||||
|
||||
out_fmr_err:
|
||||
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
|
||||
PTR_ERR(mr->fmr.fm_mr));
|
||||
|
||||
out_free:
|
||||
kfree(mr->mr_sg);
|
||||
kfree(mr->fmr.fm_physaddrs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* On success, sets:
|
||||
* ep->rep_attr.cap.max_send_wr
|
||||
* ep->rep_attr.cap.max_recv_wr
|
||||
* cdata->max_requests
|
||||
* ia->ri_max_segs
|
||||
*/
|
||||
static int
|
||||
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
struct rpcrdma_create_data_internal *cdata)
|
||||
{
|
||||
int max_qp_wr;
|
||||
|
||||
max_qp_wr = ia->ri_device->attrs.max_qp_wr;
|
||||
max_qp_wr -= RPCRDMA_BACKWARD_WRS;
|
||||
max_qp_wr -= 1;
|
||||
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
|
||||
return -ENOMEM;
|
||||
if (cdata->max_requests > max_qp_wr)
|
||||
cdata->max_requests = max_qp_wr;
|
||||
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
|
||||
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
|
||||
ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
|
||||
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
|
||||
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
|
||||
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
|
||||
|
||||
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
|
||||
RPCRDMA_MAX_FMR_SGES);
|
||||
ia->ri_max_segs += 2; /* segments for head and tail buffers */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* FMR mode conveys up to 64 pages of payload per chunk segment.
|
||||
*/
|
||||
static size_t
|
||||
fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
|
||||
{
|
||||
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
|
||||
RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
|
||||
}
|
||||
|
||||
/* Use the ib_map_phys_fmr() verb to register a memory region
|
||||
* for remote access via RDMA READ or RDMA WRITE.
|
||||
*/
|
||||
static struct rpcrdma_mr_seg *
|
||||
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
int nsegs, bool writing, struct rpcrdma_mr **out)
|
||||
{
|
||||
struct rpcrdma_mr_seg *seg1 = seg;
|
||||
int len, pageoff, i, rc;
|
||||
struct rpcrdma_mr *mr;
|
||||
u64 *dma_pages;
|
||||
|
||||
mr = rpcrdma_mr_get(r_xprt);
|
||||
if (!mr)
|
||||
return ERR_PTR(-EAGAIN);
|
||||
|
||||
pageoff = offset_in_page(seg1->mr_offset);
|
||||
seg1->mr_offset -= pageoff; /* start of page */
|
||||
seg1->mr_len += pageoff;
|
||||
len = -pageoff;
|
||||
if (nsegs > RPCRDMA_MAX_FMR_SGES)
|
||||
nsegs = RPCRDMA_MAX_FMR_SGES;
|
||||
for (i = 0; i < nsegs;) {
|
||||
if (seg->mr_page)
|
||||
sg_set_page(&mr->mr_sg[i],
|
||||
seg->mr_page,
|
||||
seg->mr_len,
|
||||
offset_in_page(seg->mr_offset));
|
||||
else
|
||||
sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
|
||||
seg->mr_len);
|
||||
len += seg->mr_len;
|
||||
++seg;
|
||||
++i;
|
||||
/* Check for holes */
|
||||
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
|
||||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
|
||||
break;
|
||||
}
|
||||
mr->mr_dir = rpcrdma_data_dir(writing);
|
||||
|
||||
mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
|
||||
mr->mr_sg, i, mr->mr_dir);
|
||||
if (!mr->mr_nents)
|
||||
goto out_dmamap_err;
|
||||
trace_xprtrdma_mr_map(mr);
|
||||
|
||||
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
|
||||
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
|
||||
rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
|
||||
dma_pages[0]);
|
||||
if (rc)
|
||||
goto out_maperr;
|
||||
|
||||
mr->mr_handle = mr->fmr.fm_mr->rkey;
|
||||
mr->mr_length = len;
|
||||
mr->mr_offset = dma_pages[0] + pageoff;
|
||||
|
||||
*out = mr;
|
||||
return seg;
|
||||
|
||||
out_dmamap_err:
|
||||
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
|
||||
mr->mr_sg, i);
|
||||
rpcrdma_mr_put(mr);
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
out_maperr:
|
||||
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
|
||||
len, (unsigned long long)dma_pages[0],
|
||||
pageoff, mr->mr_nents, rc);
|
||||
rpcrdma_mr_unmap_and_put(mr);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
/* Post Send WR containing the RPC Call message.
|
||||
*/
|
||||
static int
|
||||
fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
{
|
||||
return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
|
||||
}
|
||||
|
||||
/* Invalidate all memory regions that were registered for "req".
|
||||
*
|
||||
* Sleeps until it is safe for the host CPU to access the
|
||||
* previously mapped memory regions.
|
||||
*
|
||||
* Caller ensures that @mrs is not empty before the call. This
|
||||
* function empties the list.
|
||||
*/
|
||||
static void
|
||||
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
|
||||
{
|
||||
struct rpcrdma_mr *mr;
|
||||
LIST_HEAD(unmap_list);
|
||||
int rc;
|
||||
|
||||
/* ORDER: Invalidate all of the req's MRs first
|
||||
*
|
||||
* ib_unmap_fmr() is slow, so use a single call instead
|
||||
* of one call per mapped FMR.
|
||||
*/
|
||||
list_for_each_entry(mr, mrs, mr_list) {
|
||||
dprintk("RPC: %s: unmapping fmr %p\n",
|
||||
__func__, &mr->fmr);
|
||||
trace_xprtrdma_mr_localinv(mr);
|
||||
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
|
||||
}
|
||||
r_xprt->rx_stats.local_inv_needed++;
|
||||
rc = ib_unmap_fmr(&unmap_list);
|
||||
if (rc)
|
||||
goto out_release;
|
||||
|
||||
/* ORDER: Now DMA unmap all of the req's MRs, and return
|
||||
* them to the free MW list.
|
||||
*/
|
||||
while (!list_empty(mrs)) {
|
||||
mr = rpcrdma_mr_pop(mrs);
|
||||
list_del(&mr->fmr.fm_mr->list);
|
||||
rpcrdma_mr_unmap_and_put(mr);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
out_release:
|
||||
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
|
||||
|
||||
while (!list_empty(mrs)) {
|
||||
mr = rpcrdma_mr_pop(mrs);
|
||||
list_del(&mr->fmr.fm_mr->list);
|
||||
rpcrdma_mr_recycle(mr);
|
||||
}
|
||||
}
|
||||
|
||||
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
|
||||
.ro_map = fmr_op_map,
|
||||
.ro_send = fmr_op_send,
|
||||
.ro_unmap_sync = fmr_op_unmap_sync,
|
||||
.ro_open = fmr_op_open,
|
||||
.ro_maxpages = fmr_op_maxpages,
|
||||
.ro_init_mr = fmr_op_init_mr,
|
||||
.ro_release_mr = fmr_op_release_mr,
|
||||
.ro_displayname = "fmr",
|
||||
.ro_send_w_inv_ok = 0,
|
||||
};
|
@@ -15,21 +15,21 @@
|
||||
/* Normal operation
|
||||
*
|
||||
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
|
||||
* Work Request (frwr_op_map). When the RDMA operation is finished, this
|
||||
* Work Request (frwr_map). When the RDMA operation is finished, this
|
||||
* Memory Region is invalidated using a LOCAL_INV Work Request
|
||||
* (frwr_op_unmap_sync).
|
||||
* (frwr_unmap_sync).
|
||||
*
|
||||
* Typically these Work Requests are not signaled, and neither are RDMA
|
||||
* SEND Work Requests (with the exception of signaling occasionally to
|
||||
* prevent provider work queue overflows). This greatly reduces HCA
|
||||
* interrupt workload.
|
||||
*
|
||||
* As an optimization, frwr_op_unmap marks MRs INVALID before the
|
||||
* As an optimization, frwr_unmap marks MRs INVALID before the
|
||||
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
|
||||
* rb_mrs immediately so that no work (like managing a linked list
|
||||
* under a spinlock) is needed in the completion upcall.
|
||||
*
|
||||
* But this means that frwr_op_map() can occasionally encounter an MR
|
||||
* But this means that frwr_map() can occasionally encounter an MR
|
||||
* that is INVALID but the LOCAL_INV WR has not completed. Work Queue
|
||||
* ordering prevents a subsequent FAST_REG WR from executing against
|
||||
* that MR while it is still being invalidated.
|
||||
@@ -57,14 +57,14 @@
|
||||
* FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
|
||||
* state, and the pending WR was flushed.
|
||||
*
|
||||
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
|
||||
* When frwr_map encounters FLUSHED and VALID MRs, they are recovered
|
||||
* with ib_dereg_mr and then are re-initialized. Because MR recovery
|
||||
* allocates fresh resources, it is deferred to a workqueue, and the
|
||||
* recovered MRs are placed back on the rb_mrs list when recovery is
|
||||
* complete. frwr_op_map allocates another MR for the current RPC while
|
||||
* complete. frwr_map allocates another MR for the current RPC while
|
||||
* the broken MR is reset.
|
||||
*
|
||||
* To ensure that frwr_op_map doesn't encounter an MR that is marked
|
||||
* To ensure that frwr_map doesn't encounter an MR that is marked
|
||||
* INVALID but that is about to be flushed due to a previous transport
|
||||
* disconnect, the transport connect worker attempts to drain all
|
||||
* pending send queue WRs before the transport is reconnected.
|
||||
@@ -80,8 +80,13 @@
|
||||
# define RPCDBG_FACILITY RPCDBG_TRANS
|
||||
#endif
|
||||
|
||||
bool
|
||||
frwr_is_supported(struct rpcrdma_ia *ia)
|
||||
/**
|
||||
* frwr_is_supported - Check if device supports FRWR
|
||||
* @ia: interface adapter to check
|
||||
*
|
||||
* Returns true if device supports FRWR, otherwise false
|
||||
*/
|
||||
bool frwr_is_supported(struct rpcrdma_ia *ia)
|
||||
{
|
||||
struct ib_device_attr *attrs = &ia->ri_device->attrs;
|
||||
|
||||
@@ -97,15 +102,18 @@ out_not_supported:
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
frwr_op_release_mr(struct rpcrdma_mr *mr)
|
||||
/**
|
||||
* frwr_release_mr - Destroy one MR
|
||||
* @mr: MR allocated by frwr_init_mr
|
||||
*
|
||||
*/
|
||||
void frwr_release_mr(struct rpcrdma_mr *mr)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = ib_dereg_mr(mr->frwr.fr_mr);
|
||||
if (rc)
|
||||
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
|
||||
mr, rc);
|
||||
trace_xprtrdma_frwr_dereg(mr, rc);
|
||||
kfree(mr->mr_sg);
|
||||
kfree(mr);
|
||||
}
|
||||
@@ -117,60 +125,78 @@ static void
|
||||
frwr_mr_recycle_worker(struct work_struct *work)
|
||||
{
|
||||
struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
|
||||
enum rpcrdma_frwr_state state = mr->frwr.fr_state;
|
||||
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
|
||||
|
||||
trace_xprtrdma_mr_recycle(mr);
|
||||
|
||||
if (state != FRWR_FLUSHED_LI) {
|
||||
if (mr->mr_dir != DMA_NONE) {
|
||||
trace_xprtrdma_mr_unmap(mr);
|
||||
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
|
||||
mr->mr_sg, mr->mr_nents, mr->mr_dir);
|
||||
mr->mr_dir = DMA_NONE;
|
||||
}
|
||||
|
||||
spin_lock(&r_xprt->rx_buf.rb_mrlock);
|
||||
list_del(&mr->mr_all);
|
||||
r_xprt->rx_stats.mrs_recycled++;
|
||||
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
|
||||
frwr_op_release_mr(mr);
|
||||
|
||||
frwr_release_mr(mr);
|
||||
}
|
||||
|
||||
static int
|
||||
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
|
||||
/**
|
||||
* frwr_init_mr - Initialize one MR
|
||||
* @ia: interface adapter
|
||||
* @mr: generic MR to prepare for FRWR
|
||||
*
|
||||
* Returns zero if successful. Otherwise a negative errno
|
||||
* is returned.
|
||||
*/
|
||||
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
|
||||
{
|
||||
unsigned int depth = ia->ri_max_frwr_depth;
|
||||
struct rpcrdma_frwr *frwr = &mr->frwr;
|
||||
struct scatterlist *sg;
|
||||
struct ib_mr *frmr;
|
||||
int rc;
|
||||
|
||||
frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
|
||||
if (IS_ERR(frwr->fr_mr))
|
||||
frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
|
||||
if (IS_ERR(frmr))
|
||||
goto out_mr_err;
|
||||
|
||||
mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
|
||||
if (!mr->mr_sg)
|
||||
sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
|
||||
if (!sg)
|
||||
goto out_list_err;
|
||||
|
||||
mr->frwr.fr_mr = frmr;
|
||||
mr->frwr.fr_state = FRWR_IS_INVALID;
|
||||
mr->mr_dir = DMA_NONE;
|
||||
INIT_LIST_HEAD(&mr->mr_list);
|
||||
INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
|
||||
sg_init_table(mr->mr_sg, depth);
|
||||
init_completion(&frwr->fr_linv_done);
|
||||
init_completion(&mr->frwr.fr_linv_done);
|
||||
|
||||
sg_init_table(sg, depth);
|
||||
mr->mr_sg = sg;
|
||||
return 0;
|
||||
|
||||
out_mr_err:
|
||||
rc = PTR_ERR(frwr->fr_mr);
|
||||
dprintk("RPC: %s: ib_alloc_mr status %i\n",
|
||||
__func__, rc);
|
||||
rc = PTR_ERR(frmr);
|
||||
trace_xprtrdma_frwr_alloc(mr, rc);
|
||||
return rc;
|
||||
|
||||
out_list_err:
|
||||
rc = -ENOMEM;
|
||||
dprintk("RPC: %s: sg allocation failure\n",
|
||||
__func__);
|
||||
ib_dereg_mr(frwr->fr_mr);
|
||||
return rc;
|
||||
ib_dereg_mr(frmr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* On success, sets:
|
||||
/**
|
||||
* frwr_open - Prepare an endpoint for use with FRWR
|
||||
* @ia: interface adapter this endpoint will use
|
||||
* @ep: endpoint to prepare
|
||||
* @cdata: transport parameters
|
||||
*
|
||||
* On success, sets:
|
||||
* ep->rep_attr.cap.max_send_wr
|
||||
* ep->rep_attr.cap.max_recv_wr
|
||||
* cdata->max_requests
|
||||
@@ -179,10 +205,11 @@ out_list_err:
|
||||
* And these FRWR-related fields:
|
||||
* ia->ri_max_frwr_depth
|
||||
* ia->ri_mrtype
|
||||
*
|
||||
* On failure, a negative errno is returned.
|
||||
*/
|
||||
static int
|
||||
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
struct rpcrdma_create_data_internal *cdata)
|
||||
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
struct rpcrdma_create_data_internal *cdata)
|
||||
{
|
||||
struct ib_device_attr *attrs = &ia->ri_device->attrs;
|
||||
int max_qp_wr, depth, delta;
|
||||
@@ -191,10 +218,17 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
|
||||
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
|
||||
|
||||
ia->ri_max_frwr_depth =
|
||||
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
|
||||
attrs->max_fast_reg_page_list_len);
|
||||
dprintk("RPC: %s: device's max FR page list len = %u\n",
|
||||
/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
|
||||
* capability, but perform optimally when the MRs are not larger
|
||||
* than a page.
|
||||
*/
|
||||
if (attrs->max_sge_rd > 1)
|
||||
ia->ri_max_frwr_depth = attrs->max_sge_rd;
|
||||
else
|
||||
ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
|
||||
if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
|
||||
ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
|
||||
dprintk("RPC: %s: max FR page list depth = %u\n",
|
||||
__func__, ia->ri_max_frwr_depth);
|
||||
|
||||
/* Add room for frwr register and invalidate WRs.
|
||||
@@ -242,20 +276,28 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
|
||||
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
|
||||
ia->ri_max_frwr_depth);
|
||||
ia->ri_max_segs += 2; /* segments for head and tail buffers */
|
||||
/* Reply chunks require segments for head and tail buffers */
|
||||
ia->ri_max_segs += 2;
|
||||
if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
|
||||
ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* FRWR mode conveys a list of pages per chunk segment. The
|
||||
/**
|
||||
* frwr_maxpages - Compute size of largest payload
|
||||
* @r_xprt: transport
|
||||
*
|
||||
* Returns maximum size of an RPC message, in pages.
|
||||
*
|
||||
* FRWR mode conveys a list of pages per chunk segment. The
|
||||
* maximum length of that list is the FRWR page list depth.
|
||||
*/
|
||||
static size_t
|
||||
frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
|
||||
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
|
||||
{
|
||||
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
||||
|
||||
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
|
||||
RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
|
||||
(ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -332,12 +374,25 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
|
||||
trace_xprtrdma_wc_li_wake(wc, frwr);
|
||||
}
|
||||
|
||||
/* Post a REG_MR Work Request to register a memory region
|
||||
/**
|
||||
* frwr_map - Register a memory region
|
||||
* @r_xprt: controlling transport
|
||||
* @seg: memory region co-ordinates
|
||||
* @nsegs: number of segments remaining
|
||||
* @writing: true when RDMA Write will be used
|
||||
* @xid: XID of RPC using the registered memory
|
||||
* @out: initialized MR
|
||||
*
|
||||
* Prepare a REG_MR Work Request to register a memory region
|
||||
* for remote access via RDMA READ or RDMA WRITE.
|
||||
*
|
||||
* Returns the next segment or a negative errno pointer.
|
||||
* On success, the prepared MR is planted in @out.
|
||||
*/
|
||||
static struct rpcrdma_mr_seg *
|
||||
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
int nsegs, bool writing, struct rpcrdma_mr **out)
|
||||
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
|
||||
struct rpcrdma_mr_seg *seg,
|
||||
int nsegs, bool writing, u32 xid,
|
||||
struct rpcrdma_mr **out)
|
||||
{
|
||||
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
||||
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
|
||||
@@ -384,13 +439,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
|
||||
if (!mr->mr_nents)
|
||||
goto out_dmamap_err;
|
||||
trace_xprtrdma_mr_map(mr);
|
||||
|
||||
ibmr = frwr->fr_mr;
|
||||
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
|
||||
if (unlikely(n != mr->mr_nents))
|
||||
goto out_mapmr_err;
|
||||
|
||||
ibmr->iova &= 0x00000000ffffffff;
|
||||
ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
|
||||
key = (u8)(ibmr->rkey & 0x000000FF);
|
||||
ib_update_fast_reg_key(ibmr, ++key);
|
||||
|
||||
@@ -404,32 +460,35 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
mr->mr_handle = ibmr->rkey;
|
||||
mr->mr_length = ibmr->length;
|
||||
mr->mr_offset = ibmr->iova;
|
||||
trace_xprtrdma_mr_map(mr);
|
||||
|
||||
*out = mr;
|
||||
return seg;
|
||||
|
||||
out_dmamap_err:
|
||||
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
|
||||
mr->mr_sg, i);
|
||||
frwr->fr_state = FRWR_IS_INVALID;
|
||||
trace_xprtrdma_frwr_sgerr(mr, i);
|
||||
rpcrdma_mr_put(mr);
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
out_mapmr_err:
|
||||
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
|
||||
frwr->fr_mr, n, mr->mr_nents);
|
||||
trace_xprtrdma_frwr_maperr(mr, n);
|
||||
rpcrdma_mr_recycle(mr);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
/* Post Send WR containing the RPC Call message.
|
||||
/**
|
||||
* frwr_send - post Send WR containing the RPC Call message
|
||||
* @ia: interface adapter
|
||||
* @req: Prepared RPC Call
|
||||
*
|
||||
* For FRMR, chain any FastReg WRs to the Send WR. Only a
|
||||
* For FRWR, chain any FastReg WRs to the Send WR. Only a
|
||||
* single ib_post_send call is needed to register memory
|
||||
* and then post the Send WR.
|
||||
*
|
||||
* Returns the result of ib_post_send.
|
||||
*/
|
||||
static int
|
||||
frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
{
|
||||
struct ib_send_wr *post_wr;
|
||||
struct rpcrdma_mr *mr;
|
||||
@@ -451,15 +510,18 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
}
|
||||
|
||||
/* If ib_post_send fails, the next ->send_request for
|
||||
* @req will queue these MWs for recovery.
|
||||
* @req will queue these MRs for recovery.
|
||||
*/
|
||||
return ib_post_send(ia->ri_id->qp, post_wr, NULL);
|
||||
}
|
||||
|
||||
/* Handle a remotely invalidated mr on the @mrs list
|
||||
/**
|
||||
* frwr_reminv - handle a remotely invalidated mr on the @mrs list
|
||||
* @rep: Received reply
|
||||
* @mrs: list of MRs to check
|
||||
*
|
||||
*/
|
||||
static void
|
||||
frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
|
||||
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
|
||||
{
|
||||
struct rpcrdma_mr *mr;
|
||||
|
||||
@@ -473,7 +535,10 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
|
||||
}
|
||||
}
|
||||
|
||||
/* Invalidate all memory regions that were registered for "req".
|
||||
/**
|
||||
* frwr_unmap_sync - invalidate memory regions that were registered for @req
|
||||
* @r_xprt: controlling transport
|
||||
* @mrs: list of MRs to process
|
||||
*
|
||||
* Sleeps until it is safe for the host CPU to access the
|
||||
* previously mapped memory regions.
|
||||
@@ -481,8 +546,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
|
||||
* Caller ensures that @mrs is not empty before the call. This
|
||||
* function empties the list.
|
||||
*/
|
||||
static void
|
||||
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
|
||||
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
|
||||
{
|
||||
struct ib_send_wr *first, **prev, *last;
|
||||
const struct ib_send_wr *bad_wr;
|
||||
@@ -561,20 +625,7 @@ out_release:
|
||||
mr = container_of(frwr, struct rpcrdma_mr, frwr);
|
||||
bad_wr = bad_wr->next;
|
||||
|
||||
list_del(&mr->mr_list);
|
||||
frwr_op_release_mr(mr);
|
||||
list_del_init(&mr->mr_list);
|
||||
rpcrdma_mr_recycle(mr);
|
||||
}
|
||||
}
|
||||
|
||||
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
|
||||
.ro_map = frwr_op_map,
|
||||
.ro_send = frwr_op_send,
|
||||
.ro_reminv = frwr_op_reminv,
|
||||
.ro_unmap_sync = frwr_op_unmap_sync,
|
||||
.ro_open = frwr_op_open,
|
||||
.ro_maxpages = frwr_op_maxpages,
|
||||
.ro_init_mr = frwr_op_init_mr,
|
||||
.ro_release_mr = frwr_op_release_mr,
|
||||
.ro_displayname = "frwr",
|
||||
.ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
|
||||
};
|
||||
|
@@ -218,11 +218,12 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
|
||||
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
|
||||
page_base = offset_in_page(xdrbuf->page_base);
|
||||
while (len) {
|
||||
if (unlikely(!*ppages)) {
|
||||
/* XXX: Certain upper layer operations do
|
||||
* not provide receive buffer pages.
|
||||
*/
|
||||
*ppages = alloc_page(GFP_ATOMIC);
|
||||
/* ACL likes to be lazy in allocating pages - ACLs
|
||||
* are small by default but can get huge.
|
||||
*/
|
||||
if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
|
||||
if (!*ppages)
|
||||
*ppages = alloc_page(GFP_ATOMIC);
|
||||
if (!*ppages)
|
||||
return -ENOBUFS;
|
||||
}
|
||||
@@ -356,8 +357,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
return nsegs;
|
||||
|
||||
do {
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
false, &mr);
|
||||
seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
if (encode_read_segment(xdr, mr, pos) < 0)
|
||||
return -EMSGSIZE;
|
||||
|
||||
trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
|
||||
trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
|
||||
r_xprt->rx_stats.read_chunk_count++;
|
||||
nsegs -= mr->mr_nents;
|
||||
} while (nsegs);
|
||||
@@ -414,8 +414,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
|
||||
nchunks = 0;
|
||||
do {
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
true, &mr);
|
||||
seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
@@ -423,7 +422,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
if (encode_rdma_segment(xdr, mr) < 0)
|
||||
return -EMSGSIZE;
|
||||
|
||||
trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
|
||||
trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
|
||||
r_xprt->rx_stats.write_chunk_count++;
|
||||
r_xprt->rx_stats.total_rdma_request += mr->mr_length;
|
||||
nchunks++;
|
||||
@@ -472,8 +471,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
|
||||
nchunks = 0;
|
||||
do {
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
true, &mr);
|
||||
seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
@@ -481,7 +479,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
if (encode_rdma_segment(xdr, mr) < 0)
|
||||
return -EMSGSIZE;
|
||||
|
||||
trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
|
||||
trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
|
||||
r_xprt->rx_stats.reply_chunk_count++;
|
||||
r_xprt->rx_stats.total_rdma_request += mr->mr_length;
|
||||
nchunks++;
|
||||
@@ -667,7 +665,7 @@ out_mapping_overflow:
|
||||
|
||||
out_mapping_err:
|
||||
rpcrdma_unmap_sendctx(sc);
|
||||
pr_err("rpcrdma: Send mapping error\n");
|
||||
trace_xprtrdma_dma_maperr(sge[sge_no].addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1188,17 +1186,20 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
|
||||
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
|
||||
if (!p)
|
||||
break;
|
||||
dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n",
|
||||
rqst->rq_task->tk_pid, __func__,
|
||||
be32_to_cpup(p), be32_to_cpu(*(p + 1)));
|
||||
dprintk("RPC: %s: server reports "
|
||||
"version error (%u-%u), xid %08x\n", __func__,
|
||||
be32_to_cpup(p), be32_to_cpu(*(p + 1)),
|
||||
be32_to_cpu(rep->rr_xid));
|
||||
break;
|
||||
case err_chunk:
|
||||
dprintk("RPC: %5u: %s: server reports header decoding error\n",
|
||||
rqst->rq_task->tk_pid, __func__);
|
||||
dprintk("RPC: %s: server reports "
|
||||
"header decoding error, xid %08x\n", __func__,
|
||||
be32_to_cpu(rep->rr_xid));
|
||||
break;
|
||||
default:
|
||||
dprintk("RPC: %5u: %s: server reports unrecognized error %d\n",
|
||||
rqst->rq_task->tk_pid, __func__, be32_to_cpup(p));
|
||||
dprintk("RPC: %s: server reports "
|
||||
"unrecognized error %d, xid %08x\n", __func__,
|
||||
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
|
||||
}
|
||||
|
||||
r_xprt->rx_stats.bad_reply_count++;
|
||||
@@ -1248,7 +1249,6 @@ out:
|
||||
out_badheader:
|
||||
trace_xprtrdma_reply_hdr(rep);
|
||||
r_xprt->rx_stats.bad_reply_count++;
|
||||
status = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1262,8 +1262,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
|
||||
* RPC has relinquished all its Send Queue entries.
|
||||
*/
|
||||
if (!list_empty(&req->rl_registered))
|
||||
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
|
||||
&req->rl_registered);
|
||||
frwr_unmap_sync(r_xprt, &req->rl_registered);
|
||||
|
||||
/* Ensure that any DMA mapped pages associated with
|
||||
* the Send of the RPC Call have been unmapped before
|
||||
@@ -1292,7 +1291,7 @@ void rpcrdma_deferred_completion(struct work_struct *work)
|
||||
|
||||
trace_xprtrdma_defer_cmp(rep);
|
||||
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
|
||||
r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
|
||||
frwr_reminv(rep, &req->rl_registered);
|
||||
rpcrdma_release_rqst(r_xprt, req);
|
||||
rpcrdma_complete_rqst(rep);
|
||||
}
|
||||
@@ -1312,11 +1311,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
|
||||
u32 credits;
|
||||
__be32 *p;
|
||||
|
||||
--buf->rb_posted_receives;
|
||||
|
||||
if (rep->rr_hdrbuf.head[0].iov_len == 0)
|
||||
goto out_badstatus;
|
||||
|
||||
/* Fixed transport header fields */
|
||||
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
|
||||
rep->rr_hdrbuf.head[0].iov_base);
|
||||
@@ -1356,36 +1350,30 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
|
||||
}
|
||||
|
||||
req = rpcr_to_rdmar(rqst);
|
||||
if (req->rl_reply) {
|
||||
trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
|
||||
rpcrdma_recv_buffer_put(req->rl_reply);
|
||||
}
|
||||
req->rl_reply = rep;
|
||||
rep->rr_rqst = rqst;
|
||||
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
|
||||
|
||||
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
|
||||
|
||||
rpcrdma_post_recvs(r_xprt, false);
|
||||
queue_work(rpcrdma_receive_wq, &rep->rr_work);
|
||||
queue_work(buf->rb_completion_wq, &rep->rr_work);
|
||||
return;
|
||||
|
||||
out_badversion:
|
||||
trace_xprtrdma_reply_vers(rep);
|
||||
goto repost;
|
||||
goto out;
|
||||
|
||||
/* The RPC transaction has already been terminated, or the header
|
||||
* is corrupt.
|
||||
*/
|
||||
out_norqst:
|
||||
spin_unlock(&xprt->queue_lock);
|
||||
trace_xprtrdma_reply_rqst(rep);
|
||||
goto repost;
|
||||
goto out;
|
||||
|
||||
out_shortreply:
|
||||
trace_xprtrdma_reply_short(rep);
|
||||
|
||||
/* If no pending RPC transaction was matched, post a replacement
|
||||
* receive buffer before returning.
|
||||
*/
|
||||
repost:
|
||||
rpcrdma_post_recvs(r_xprt, false);
|
||||
out_badstatus:
|
||||
out:
|
||||
rpcrdma_recv_buffer_put(rep);
|
||||
}
|
||||
|
@@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
|
||||
svc_rdma_send_ctxt_put(rdma, ctxt);
|
||||
goto drop_connection;
|
||||
}
|
||||
return rc;
|
||||
return 0;
|
||||
|
||||
drop_connection:
|
||||
dprintk("svcrdma: failed to send bc call\n");
|
||||
xprt_disconnect_done(xprt);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
@@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
|
||||
|
||||
ret = -ENOTCONN;
|
||||
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
|
||||
if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
|
||||
if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
|
||||
ret = rpcrdma_bc_send_request(rdma, rqst);
|
||||
if (ret == -ENOTCONN)
|
||||
svc_close_xprt(sxprt);
|
||||
}
|
||||
|
||||
mutex_unlock(&sxprt->xpt_mutex);
|
||||
|
||||
|
@@ -268,7 +268,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
trace_xprtrdma_inject_dsc(r_xprt);
|
||||
trace_xprtrdma_op_inject_dsc(r_xprt);
|
||||
rdma_disconnect(r_xprt->rx_ia.ri_id);
|
||||
}
|
||||
|
||||
@@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
trace_xprtrdma_destroy(r_xprt);
|
||||
trace_xprtrdma_op_destroy(r_xprt);
|
||||
|
||||
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
|
||||
|
||||
@@ -318,17 +318,12 @@ xprt_setup_rdma(struct xprt_create *args)
|
||||
struct sockaddr *sap;
|
||||
int rc;
|
||||
|
||||
if (args->addrlen > sizeof(xprt->addr)) {
|
||||
dprintk("RPC: %s: address too large\n", __func__);
|
||||
if (args->addrlen > sizeof(xprt->addr))
|
||||
return ERR_PTR(-EBADF);
|
||||
}
|
||||
|
||||
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
|
||||
if (xprt == NULL) {
|
||||
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
|
||||
__func__);
|
||||
if (!xprt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* 60 second timeout, no retries */
|
||||
xprt->timeout = &xprt_rdma_default_timeout;
|
||||
@@ -399,7 +394,7 @@ xprt_setup_rdma(struct xprt_create *args)
|
||||
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
|
||||
xprt_rdma_connect_worker);
|
||||
|
||||
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
|
||||
xprt->max_payload = frwr_maxpages(new_xprt);
|
||||
if (xprt->max_payload == 0)
|
||||
goto out4;
|
||||
xprt->max_payload <<= PAGE_SHIFT;
|
||||
@@ -423,7 +418,7 @@ out3:
|
||||
out2:
|
||||
rpcrdma_ia_close(&new_xprt->rx_ia);
|
||||
out1:
|
||||
trace_xprtrdma_destroy(new_xprt);
|
||||
trace_xprtrdma_op_destroy(new_xprt);
|
||||
xprt_rdma_free_addresses(xprt);
|
||||
xprt_free(xprt);
|
||||
return ERR_PTR(rc);
|
||||
@@ -433,29 +428,33 @@ out1:
|
||||
* xprt_rdma_close - close a transport connection
|
||||
* @xprt: transport context
|
||||
*
|
||||
* Called during transport shutdown, reconnect, or device removal.
|
||||
* Called during autoclose or device removal.
|
||||
*
|
||||
* Caller holds @xprt's send lock to prevent activity on this
|
||||
* transport while the connection is torn down.
|
||||
*/
|
||||
static void
|
||||
xprt_rdma_close(struct rpc_xprt *xprt)
|
||||
void xprt_rdma_close(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
|
||||
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
||||
|
||||
dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
|
||||
might_sleep();
|
||||
|
||||
trace_xprtrdma_op_close(r_xprt);
|
||||
|
||||
/* Prevent marshaling and sending of new requests */
|
||||
xprt_clear_connected(xprt);
|
||||
|
||||
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
|
||||
xprt_clear_connected(xprt);
|
||||
rpcrdma_ia_remove(ia);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ep->rep_connected == -ENODEV)
|
||||
return;
|
||||
if (ep->rep_connected > 0)
|
||||
xprt->reestablish_timeout = 0;
|
||||
xprt_disconnect_done(xprt);
|
||||
rpcrdma_ep_disconnect(ep, ia);
|
||||
|
||||
/* Prepare @xprt for the next connection by reinitializing
|
||||
@@ -463,6 +462,10 @@ xprt_rdma_close(struct rpc_xprt *xprt)
|
||||
*/
|
||||
r_xprt->rx_buf.rb_credits = 1;
|
||||
xprt->cwnd = RPC_CWNDSHIFT;
|
||||
|
||||
out:
|
||||
++xprt->connect_cookie;
|
||||
xprt_disconnect_done(xprt);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -525,6 +528,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
|
||||
|
||||
trace_xprtrdma_op_connect(r_xprt);
|
||||
if (r_xprt->rx_ep.rep_connected != 0) {
|
||||
/* Reconnect */
|
||||
schedule_delayed_work(&r_xprt->rx_connect_worker,
|
||||
@@ -659,11 +663,11 @@ xprt_rdma_allocate(struct rpc_task *task)
|
||||
|
||||
rqst->rq_buffer = req->rl_sendbuf->rg_base;
|
||||
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
|
||||
trace_xprtrdma_allocate(task, req);
|
||||
trace_xprtrdma_op_allocate(task, req);
|
||||
return 0;
|
||||
|
||||
out_fail:
|
||||
trace_xprtrdma_allocate(task, NULL);
|
||||
trace_xprtrdma_op_allocate(task, NULL);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -682,7 +686,7 @@ xprt_rdma_free(struct rpc_task *task)
|
||||
|
||||
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
|
||||
rpcrdma_release_rqst(r_xprt, req);
|
||||
trace_xprtrdma_rpc_done(task, req);
|
||||
trace_xprtrdma_op_free(task, req);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -696,8 +700,10 @@ xprt_rdma_free(struct rpc_task *task)
|
||||
* %-ENOTCONN if the caller should reconnect and call again
|
||||
* %-EAGAIN if the caller should call again
|
||||
* %-ENOBUFS if the caller should call again after a delay
|
||||
* %-EIO if a permanent error occurred and the request was not
|
||||
* sent. Do not try to send this message again.
|
||||
* %-EMSGSIZE if encoding ran out of buffer space. The request
|
||||
* was not sent. Do not try to send this message again.
|
||||
* %-EIO if an I/O error occurred. The request was not sent.
|
||||
* Do not try to send this message again.
|
||||
*/
|
||||
static int
|
||||
xprt_rdma_send_request(struct rpc_rqst *rqst)
|
||||
@@ -713,7 +719,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
|
||||
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
|
||||
|
||||
if (!xprt_connected(xprt))
|
||||
goto drop_connection;
|
||||
return -ENOTCONN;
|
||||
|
||||
if (!xprt_request_get_cong(xprt, rqst))
|
||||
return -EBADSLT;
|
||||
@@ -745,8 +751,8 @@ failed_marshal:
|
||||
if (rc != -ENOTCONN)
|
||||
return rc;
|
||||
drop_connection:
|
||||
xprt_disconnect_done(xprt);
|
||||
return -ENOTCONN; /* implies disconnect */
|
||||
xprt_rdma_close(xprt);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
|
||||
@@ -843,58 +849,31 @@ static struct xprt_class xprt_rdma = {
|
||||
|
||||
void xprt_rdma_cleanup(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
if (sunrpc_table_header) {
|
||||
unregister_sysctl_table(sunrpc_table_header);
|
||||
sunrpc_table_header = NULL;
|
||||
}
|
||||
#endif
|
||||
rc = xprt_unregister_transport(&xprt_rdma);
|
||||
if (rc)
|
||||
dprintk("RPC: %s: xprt_unregister returned %i\n",
|
||||
__func__, rc);
|
||||
|
||||
rpcrdma_destroy_wq();
|
||||
|
||||
rc = xprt_unregister_transport(&xprt_rdma_bc);
|
||||
if (rc)
|
||||
dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
|
||||
__func__, rc);
|
||||
xprt_unregister_transport(&xprt_rdma);
|
||||
xprt_unregister_transport(&xprt_rdma_bc);
|
||||
}
|
||||
|
||||
int xprt_rdma_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = rpcrdma_alloc_wq();
|
||||
rc = xprt_register_transport(&xprt_rdma);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = xprt_register_transport(&xprt_rdma);
|
||||
if (rc) {
|
||||
rpcrdma_destroy_wq();
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = xprt_register_transport(&xprt_rdma_bc);
|
||||
if (rc) {
|
||||
xprt_unregister_transport(&xprt_rdma);
|
||||
rpcrdma_destroy_wq();
|
||||
return rc;
|
||||
}
|
||||
|
||||
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
|
||||
|
||||
dprintk("Defaults:\n");
|
||||
dprintk("\tSlots %d\n"
|
||||
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
|
||||
xprt_rdma_slot_table_entries,
|
||||
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
|
||||
dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
if (!sunrpc_table_header)
|
||||
sunrpc_table_header = register_sysctl_table(sunrpc_table);
|
||||
|
@@ -78,53 +78,25 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
|
||||
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
|
||||
static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
|
||||
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
|
||||
static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
|
||||
|
||||
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
|
||||
|
||||
int
|
||||
rpcrdma_alloc_wq(void)
|
||||
{
|
||||
struct workqueue_struct *recv_wq;
|
||||
|
||||
recv_wq = alloc_workqueue("xprtrdma_receive",
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI,
|
||||
0);
|
||||
if (!recv_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
rpcrdma_receive_wq = recv_wq;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
rpcrdma_destroy_wq(void)
|
||||
{
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
if (rpcrdma_receive_wq) {
|
||||
wq = rpcrdma_receive_wq;
|
||||
rpcrdma_receive_wq = NULL;
|
||||
destroy_workqueue(wq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* rpcrdma_disconnect_worker - Force a disconnect
|
||||
* @work: endpoint to be disconnected
|
||||
*
|
||||
* Provider callbacks can possibly run in an IRQ context. This function
|
||||
* is invoked in a worker thread to guarantee that disconnect wake-up
|
||||
* calls are always done in process context.
|
||||
/* Wait for outstanding transport work to finish.
|
||||
*/
|
||||
static void
|
||||
rpcrdma_disconnect_worker(struct work_struct *work)
|
||||
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
|
||||
{
|
||||
struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
|
||||
rep_disconnect_worker.work);
|
||||
struct rpcrdma_xprt *r_xprt =
|
||||
container_of(ep, struct rpcrdma_xprt, rx_ep);
|
||||
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
|
||||
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
||||
|
||||
xprt_force_disconnect(&r_xprt->rx_xprt);
|
||||
/* Flush Receives, then wait for deferred Reply work
|
||||
* to complete.
|
||||
*/
|
||||
ib_drain_qp(ia->ri_id->qp);
|
||||
drain_workqueue(buf->rb_completion_wq);
|
||||
|
||||
/* Deferred Reply processing might have scheduled
|
||||
* local invalidations.
|
||||
*/
|
||||
ib_drain_sq(ia->ri_id->qp);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -143,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context)
|
||||
rx_ep);
|
||||
|
||||
trace_xprtrdma_qp_event(r_xprt, event);
|
||||
pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
|
||||
ib_event_msg(event->event), event->device->name,
|
||||
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
|
||||
|
||||
if (ep->rep_connected == 1) {
|
||||
ep->rep_connected = -EIO;
|
||||
schedule_delayed_work(&ep->rep_disconnect_worker, 0);
|
||||
wake_up_all(&ep->rep_connect_wait);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -189,11 +152,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
|
||||
struct ib_cqe *cqe = wc->wr_cqe;
|
||||
struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
|
||||
rr_cqe);
|
||||
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
|
||||
|
||||
/* WARNING: Only wr_id and status are reliable at this point */
|
||||
/* WARNING: Only wr_cqe and status are reliable at this point */
|
||||
trace_xprtrdma_wc_receive(wc);
|
||||
--r_xprt->rx_ep.rep_receive_count;
|
||||
if (wc->status != IB_WC_SUCCESS)
|
||||
goto out_fail;
|
||||
goto out_flushed;
|
||||
|
||||
/* status == SUCCESS means all fields in wc are trustworthy */
|
||||
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
|
||||
@@ -204,17 +169,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
|
||||
rdmab_addr(rep->rr_rdmabuf),
|
||||
wc->byte_len, DMA_FROM_DEVICE);
|
||||
|
||||
out_schedule:
|
||||
rpcrdma_post_recvs(r_xprt, false);
|
||||
rpcrdma_reply_handler(rep);
|
||||
return;
|
||||
|
||||
out_fail:
|
||||
out_flushed:
|
||||
if (wc->status != IB_WC_WR_FLUSH_ERR)
|
||||
pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
|
||||
ib_wc_status_msg(wc->status),
|
||||
wc->status, wc->vendor_err);
|
||||
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
|
||||
goto out_schedule;
|
||||
rpcrdma_recv_buffer_put(rep);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -316,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
|
||||
ep->rep_connected = -EAGAIN;
|
||||
goto disconnected;
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
++xprt->connect_cookie;
|
||||
ep->rep_connected = -ECONNABORTED;
|
||||
disconnected:
|
||||
xprt_force_disconnect(xprt);
|
||||
@@ -326,10 +289,9 @@ disconnected:
|
||||
break;
|
||||
}
|
||||
|
||||
dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__,
|
||||
dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
|
||||
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
|
||||
ia->ri_device->name, ia->ri_ops->ro_displayname,
|
||||
rdma_event_msg(event->event));
|
||||
ia->ri_device->name, rdma_event_msg(event->event));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -347,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
|
||||
|
||||
id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
|
||||
xprt, RDMA_PS_TCP, IB_QPT_RC);
|
||||
if (IS_ERR(id)) {
|
||||
rc = PTR_ERR(id);
|
||||
dprintk("RPC: %s: rdma_create_id() failed %i\n",
|
||||
__func__, rc);
|
||||
if (IS_ERR(id))
|
||||
return id;
|
||||
}
|
||||
|
||||
ia->ri_async_rc = -ETIMEDOUT;
|
||||
rc = rdma_resolve_addr(id, NULL,
|
||||
(struct sockaddr *)&xprt->rx_xprt.addr,
|
||||
RDMA_RESOLVE_TIMEOUT);
|
||||
if (rc) {
|
||||
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
|
||||
__func__, rc);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
|
||||
if (rc < 0) {
|
||||
trace_xprtrdma_conn_tout(xprt);
|
||||
@@ -375,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
|
||||
|
||||
ia->ri_async_rc = -ETIMEDOUT;
|
||||
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
|
||||
if (rc) {
|
||||
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
|
||||
__func__, rc);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
|
||||
if (rc < 0) {
|
||||
trace_xprtrdma_conn_tout(xprt);
|
||||
@@ -429,16 +381,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
|
||||
|
||||
switch (xprt_rdma_memreg_strategy) {
|
||||
case RPCRDMA_FRWR:
|
||||
if (frwr_is_supported(ia)) {
|
||||
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
|
||||
if (frwr_is_supported(ia))
|
||||
break;
|
||||
}
|
||||
/*FALLTHROUGH*/
|
||||
case RPCRDMA_MTHCAFMR:
|
||||
if (fmr_is_supported(ia)) {
|
||||
ia->ri_ops = &rpcrdma_fmr_memreg_ops;
|
||||
break;
|
||||
}
|
||||
/*FALLTHROUGH*/
|
||||
default:
|
||||
pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
|
||||
@@ -481,7 +425,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
|
||||
* connection is already gone.
|
||||
*/
|
||||
if (ia->ri_id->qp) {
|
||||
ib_drain_qp(ia->ri_id->qp);
|
||||
rpcrdma_xprt_drain(r_xprt);
|
||||
rdma_destroy_qp(ia->ri_id);
|
||||
ia->ri_id->qp = NULL;
|
||||
}
|
||||
@@ -552,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
||||
}
|
||||
ia->ri_max_send_sges = max_sge;
|
||||
|
||||
rc = ia->ri_ops->ro_open(ia, ep, cdata);
|
||||
rc = frwr_open(ia, ep, cdata);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@@ -579,16 +523,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
||||
cdata->max_requests >> 2);
|
||||
ep->rep_send_count = ep->rep_send_batch;
|
||||
init_waitqueue_head(&ep->rep_connect_wait);
|
||||
INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
|
||||
rpcrdma_disconnect_worker);
|
||||
ep->rep_receive_count = 0;
|
||||
|
||||
sendcq = ib_alloc_cq(ia->ri_device, NULL,
|
||||
ep->rep_attr.cap.max_send_wr + 1,
|
||||
1, IB_POLL_WORKQUEUE);
|
||||
if (IS_ERR(sendcq)) {
|
||||
rc = PTR_ERR(sendcq);
|
||||
dprintk("RPC: %s: failed to create send CQ: %i\n",
|
||||
__func__, rc);
|
||||
goto out1;
|
||||
}
|
||||
|
||||
@@ -597,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
||||
0, IB_POLL_WORKQUEUE);
|
||||
if (IS_ERR(recvcq)) {
|
||||
rc = PTR_ERR(recvcq);
|
||||
dprintk("RPC: %s: failed to create recv CQ: %i\n",
|
||||
__func__, rc);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
@@ -611,7 +550,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
||||
/* Prepare RDMA-CM private message */
|
||||
pmsg->cp_magic = rpcrdma_cmp_magic;
|
||||
pmsg->cp_version = RPCRDMA_CMP_VERSION;
|
||||
pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
|
||||
pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
|
||||
pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
|
||||
pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
|
||||
ep->rep_remote_cma.private_data = pmsg;
|
||||
@@ -653,8 +592,6 @@ out1:
|
||||
void
|
||||
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
||||
{
|
||||
cancel_delayed_work_sync(&ep->rep_disconnect_worker);
|
||||
|
||||
if (ia->ri_id && ia->ri_id->qp) {
|
||||
rpcrdma_ep_disconnect(ep, ia);
|
||||
rdma_destroy_qp(ia->ri_id);
|
||||
@@ -740,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
|
||||
}
|
||||
|
||||
err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
|
||||
if (err) {
|
||||
dprintk("RPC: %s: rdma_create_qp returned %d\n",
|
||||
__func__, err);
|
||||
if (err)
|
||||
goto out_destroy;
|
||||
}
|
||||
|
||||
/* Atomically replace the transport's ID and QP. */
|
||||
rc = 0;
|
||||
@@ -775,8 +709,6 @@ retry:
|
||||
dprintk("RPC: %s: connecting...\n", __func__);
|
||||
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
|
||||
if (rc) {
|
||||
dprintk("RPC: %s: rdma_create_qp failed %i\n",
|
||||
__func__, rc);
|
||||
rc = -ENETUNREACH;
|
||||
goto out_noupdate;
|
||||
}
|
||||
@@ -798,11 +730,8 @@ retry:
|
||||
rpcrdma_post_recvs(r_xprt, true);
|
||||
|
||||
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
|
||||
if (rc) {
|
||||
dprintk("RPC: %s: rdma_connect() failed with %i\n",
|
||||
__func__, rc);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
|
||||
if (ep->rep_connected <= 0) {
|
||||
@@ -822,8 +751,10 @@ out_noupdate:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* rpcrdma_ep_disconnect
|
||||
/**
|
||||
* rpcrdma_ep_disconnect - Disconnect underlying transport
|
||||
* @ep: endpoint to disconnect
|
||||
* @ia: associated interface adapter
|
||||
*
|
||||
* This is separate from destroy to facilitate the ability
|
||||
* to reconnect without recreating the endpoint.
|
||||
@@ -834,19 +765,20 @@ out_noupdate:
|
||||
void
|
||||
rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
|
||||
rx_ep);
|
||||
int rc;
|
||||
|
||||
/* returns without wait if ID is not connected */
|
||||
rc = rdma_disconnect(ia->ri_id);
|
||||
if (!rc)
|
||||
/* returns without wait if not connected */
|
||||
wait_event_interruptible(ep->rep_connect_wait,
|
||||
ep->rep_connected != 1);
|
||||
else
|
||||
ep->rep_connected = rc;
|
||||
trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
|
||||
rx_ep), rc);
|
||||
trace_xprtrdma_disconnect(r_xprt, rc);
|
||||
|
||||
ib_drain_qp(ia->ri_id->qp);
|
||||
rpcrdma_xprt_drain(r_xprt);
|
||||
}
|
||||
|
||||
/* Fixed-size circular FIFO queue. This implementation is wait-free and
|
||||
@@ -1034,7 +966,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
|
||||
if (!mr)
|
||||
break;
|
||||
|
||||
rc = ia->ri_ops->ro_init_mr(ia, mr);
|
||||
rc = frwr_init_mr(ia, mr);
|
||||
if (rc) {
|
||||
kfree(mr);
|
||||
break;
|
||||
@@ -1089,9 +1021,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
|
||||
req->rl_buffer = buffer;
|
||||
INIT_LIST_HEAD(&req->rl_registered);
|
||||
|
||||
spin_lock(&buffer->rb_reqslock);
|
||||
spin_lock(&buffer->rb_lock);
|
||||
list_add(&req->rl_all, &buffer->rb_allreqs);
|
||||
spin_unlock(&buffer->rb_reqslock);
|
||||
spin_unlock(&buffer->rb_lock);
|
||||
return req;
|
||||
}
|
||||
|
||||
@@ -1134,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
|
||||
out_free:
|
||||
kfree(rep);
|
||||
out:
|
||||
dprintk("RPC: %s: reply buffer %d alloc failed\n",
|
||||
__func__, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1159,7 +1089,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
|
||||
|
||||
INIT_LIST_HEAD(&buf->rb_send_bufs);
|
||||
INIT_LIST_HEAD(&buf->rb_allreqs);
|
||||
spin_lock_init(&buf->rb_reqslock);
|
||||
for (i = 0; i < buf->rb_max_requests; i++) {
|
||||
struct rpcrdma_req *req;
|
||||
|
||||
@@ -1174,13 +1103,19 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
|
||||
}
|
||||
|
||||
buf->rb_credits = 1;
|
||||
buf->rb_posted_receives = 0;
|
||||
INIT_LIST_HEAD(&buf->rb_recv_bufs);
|
||||
|
||||
rc = rpcrdma_sendctxs_create(r_xprt);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI,
|
||||
0,
|
||||
r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
|
||||
if (!buf->rb_completion_wq)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
rpcrdma_buffer_destroy(buf);
|
||||
@@ -1194,9 +1129,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
|
||||
kfree(rep);
|
||||
}
|
||||
|
||||
/**
|
||||
* rpcrdma_req_destroy - Destroy an rpcrdma_req object
|
||||
* @req: unused object to be destroyed
|
||||
*
|
||||
* This function assumes that the caller prevents concurrent device
|
||||
* unload and transport tear-down.
|
||||
*/
|
||||
void
|
||||
rpcrdma_destroy_req(struct rpcrdma_req *req)
|
||||
rpcrdma_req_destroy(struct rpcrdma_req *req)
|
||||
{
|
||||
list_del(&req->rl_all);
|
||||
|
||||
rpcrdma_free_regbuf(req->rl_recvbuf);
|
||||
rpcrdma_free_regbuf(req->rl_sendbuf);
|
||||
rpcrdma_free_regbuf(req->rl_rdmabuf);
|
||||
@@ -1208,7 +1152,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
|
||||
rx_buf);
|
||||
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
|
||||
struct rpcrdma_mr *mr;
|
||||
unsigned int count;
|
||||
|
||||
@@ -1224,7 +1167,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
|
||||
if (!list_empty(&mr->mr_list))
|
||||
list_del(&mr->mr_list);
|
||||
|
||||
ia->ri_ops->ro_release_mr(mr);
|
||||
frwr_release_mr(mr);
|
||||
count++;
|
||||
spin_lock(&buf->rb_mrlock);
|
||||
}
|
||||
@@ -1234,11 +1177,24 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
|
||||
dprintk("RPC: %s: released %u MRs\n", __func__, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* rpcrdma_buffer_destroy - Release all hw resources
|
||||
* @buf: root control block for resources
|
||||
*
|
||||
* ORDERING: relies on a prior ib_drain_qp :
|
||||
* - No more Send or Receive completions can occur
|
||||
* - All MRs, reps, and reqs are returned to their free lists
|
||||
*/
|
||||
void
|
||||
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
|
||||
{
|
||||
cancel_delayed_work_sync(&buf->rb_refresh_worker);
|
||||
|
||||
if (buf->rb_completion_wq) {
|
||||
destroy_workqueue(buf->rb_completion_wq);
|
||||
buf->rb_completion_wq = NULL;
|
||||
}
|
||||
|
||||
rpcrdma_sendctxs_destroy(buf);
|
||||
|
||||
while (!list_empty(&buf->rb_recv_bufs)) {
|
||||
@@ -1250,19 +1206,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
|
||||
rpcrdma_destroy_rep(rep);
|
||||
}
|
||||
|
||||
spin_lock(&buf->rb_reqslock);
|
||||
while (!list_empty(&buf->rb_allreqs)) {
|
||||
while (!list_empty(&buf->rb_send_bufs)) {
|
||||
struct rpcrdma_req *req;
|
||||
|
||||
req = list_first_entry(&buf->rb_allreqs,
|
||||
struct rpcrdma_req, rl_all);
|
||||
list_del(&req->rl_all);
|
||||
|
||||
spin_unlock(&buf->rb_reqslock);
|
||||
rpcrdma_destroy_req(req);
|
||||
spin_lock(&buf->rb_reqslock);
|
||||
req = list_first_entry(&buf->rb_send_bufs,
|
||||
struct rpcrdma_req, rl_list);
|
||||
list_del(&req->rl_list);
|
||||
rpcrdma_req_destroy(req);
|
||||
}
|
||||
spin_unlock(&buf->rb_reqslock);
|
||||
|
||||
rpcrdma_mrs_destroy(buf);
|
||||
}
|
||||
@@ -1329,9 +1280,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
|
||||
{
|
||||
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
|
||||
|
||||
trace_xprtrdma_mr_unmap(mr);
|
||||
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
|
||||
mr->mr_sg, mr->mr_nents, mr->mr_dir);
|
||||
if (mr->mr_dir != DMA_NONE) {
|
||||
trace_xprtrdma_mr_unmap(mr);
|
||||
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
|
||||
mr->mr_sg, mr->mr_nents, mr->mr_dir);
|
||||
mr->mr_dir = DMA_NONE;
|
||||
}
|
||||
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
|
||||
}
|
||||
|
||||
@@ -1410,7 +1364,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
|
||||
*
|
||||
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
|
||||
* receiving the payload of RDMA RECV operations. During Long Calls
|
||||
* or Replies they may be registered externally via ro_map.
|
||||
* or Replies they may be registered externally via frwr_map.
|
||||
*/
|
||||
struct rpcrdma_regbuf *
|
||||
rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
|
||||
@@ -1446,8 +1400,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
|
||||
(void *)rb->rg_base,
|
||||
rdmab_length(rb),
|
||||
rb->rg_direction);
|
||||
if (ib_dma_mapping_error(device, rdmab_addr(rb)))
|
||||
if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
|
||||
trace_xprtrdma_dma_maperr(rdmab_addr(rb));
|
||||
return false;
|
||||
}
|
||||
|
||||
rb->rg_device = device;
|
||||
rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
|
||||
@@ -1479,10 +1435,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
|
||||
kfree(rb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepost any receive buffer, then post send.
|
||||
/**
|
||||
* rpcrdma_ep_post - Post WRs to a transport's Send Queue
|
||||
* @ia: transport's device information
|
||||
* @ep: transport's RDMA endpoint information
|
||||
* @req: rpcrdma_req containing the Send WR to post
|
||||
*
|
||||
* Receive buffer is donated to hardware, reclaimed upon recv completion.
|
||||
* Returns 0 if the post was successful, otherwise -ENOTCONN
|
||||
* is returned.
|
||||
*/
|
||||
int
|
||||
rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
||||
@@ -1501,32 +1461,27 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
||||
--ep->rep_send_count;
|
||||
}
|
||||
|
||||
rc = ia->ri_ops->ro_send(ia, req);
|
||||
rc = frwr_send(ia, req);
|
||||
trace_xprtrdma_post_send(req, rc);
|
||||
if (rc)
|
||||
return -ENOTCONN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* rpcrdma_post_recvs - Maybe post some Receive buffers
|
||||
* @r_xprt: controlling transport
|
||||
* @temp: when true, allocate temp rpcrdma_rep objects
|
||||
*
|
||||
*/
|
||||
void
|
||||
static void
|
||||
rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
|
||||
{
|
||||
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
|
||||
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
|
||||
struct ib_recv_wr *wr, *bad_wr;
|
||||
int needed, count, rc;
|
||||
|
||||
rc = 0;
|
||||
count = 0;
|
||||
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
|
||||
if (buf->rb_posted_receives > needed)
|
||||
if (ep->rep_receive_count > needed)
|
||||
goto out;
|
||||
needed -= buf->rb_posted_receives;
|
||||
needed -= ep->rep_receive_count;
|
||||
|
||||
count = 0;
|
||||
wr = NULL;
|
||||
@@ -1574,7 +1529,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
|
||||
--count;
|
||||
}
|
||||
}
|
||||
buf->rb_posted_receives += count;
|
||||
ep->rep_receive_count += count;
|
||||
out:
|
||||
trace_xprtrdma_post_recvs(r_xprt, count, rc);
|
||||
}
|
||||
|
@@ -66,7 +66,6 @@
|
||||
* Interface Adapter -- one per transport instance
|
||||
*/
|
||||
struct rpcrdma_ia {
|
||||
const struct rpcrdma_memreg_ops *ri_ops;
|
||||
struct ib_device *ri_device;
|
||||
struct rdma_cm_id *ri_id;
|
||||
struct ib_pd *ri_pd;
|
||||
@@ -81,8 +80,6 @@ struct rpcrdma_ia {
|
||||
bool ri_implicit_roundup;
|
||||
enum ib_mr_type ri_mrtype;
|
||||
unsigned long ri_flags;
|
||||
struct ib_qp_attr ri_qp_attr;
|
||||
struct ib_qp_init_attr ri_qp_init_attr;
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -101,7 +98,7 @@ struct rpcrdma_ep {
|
||||
wait_queue_head_t rep_connect_wait;
|
||||
struct rpcrdma_connect_private rep_cm_private;
|
||||
struct rdma_conn_param rep_remote_cma;
|
||||
struct delayed_work rep_disconnect_worker;
|
||||
int rep_receive_count;
|
||||
};
|
||||
|
||||
/* Pre-allocate extra Work Requests for handling backward receives
|
||||
@@ -262,20 +259,12 @@ struct rpcrdma_frwr {
|
||||
};
|
||||
};
|
||||
|
||||
struct rpcrdma_fmr {
|
||||
struct ib_fmr *fm_mr;
|
||||
u64 *fm_physaddrs;
|
||||
};
|
||||
|
||||
struct rpcrdma_mr {
|
||||
struct list_head mr_list;
|
||||
struct scatterlist *mr_sg;
|
||||
int mr_nents;
|
||||
enum dma_data_direction mr_dir;
|
||||
union {
|
||||
struct rpcrdma_fmr fmr;
|
||||
struct rpcrdma_frwr frwr;
|
||||
};
|
||||
struct rpcrdma_frwr frwr;
|
||||
struct rpcrdma_xprt *mr_xprt;
|
||||
u32 mr_handle;
|
||||
u32 mr_length;
|
||||
@@ -401,20 +390,18 @@ struct rpcrdma_buffer {
|
||||
spinlock_t rb_lock; /* protect buf lists */
|
||||
struct list_head rb_send_bufs;
|
||||
struct list_head rb_recv_bufs;
|
||||
struct list_head rb_allreqs;
|
||||
|
||||
unsigned long rb_flags;
|
||||
u32 rb_max_requests;
|
||||
u32 rb_credits; /* most recent credit grant */
|
||||
int rb_posted_receives;
|
||||
|
||||
u32 rb_bc_srv_max_requests;
|
||||
spinlock_t rb_reqslock; /* protect rb_allreqs */
|
||||
struct list_head rb_allreqs;
|
||||
|
||||
u32 rb_bc_max_requests;
|
||||
|
||||
struct workqueue_struct *rb_completion_wq;
|
||||
struct delayed_work rb_refresh_worker;
|
||||
};
|
||||
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
|
||||
|
||||
/* rb_flags */
|
||||
enum {
|
||||
@@ -464,35 +451,6 @@ struct rpcrdma_stats {
|
||||
unsigned long bcall_count;
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-registration mode operations
|
||||
*/
|
||||
struct rpcrdma_xprt;
|
||||
struct rpcrdma_memreg_ops {
|
||||
struct rpcrdma_mr_seg *
|
||||
(*ro_map)(struct rpcrdma_xprt *,
|
||||
struct rpcrdma_mr_seg *, int, bool,
|
||||
struct rpcrdma_mr **);
|
||||
int (*ro_send)(struct rpcrdma_ia *ia,
|
||||
struct rpcrdma_req *req);
|
||||
void (*ro_reminv)(struct rpcrdma_rep *rep,
|
||||
struct list_head *mrs);
|
||||
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
|
||||
struct list_head *);
|
||||
int (*ro_open)(struct rpcrdma_ia *,
|
||||
struct rpcrdma_ep *,
|
||||
struct rpcrdma_create_data_internal *);
|
||||
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
|
||||
int (*ro_init_mr)(struct rpcrdma_ia *,
|
||||
struct rpcrdma_mr *);
|
||||
void (*ro_release_mr)(struct rpcrdma_mr *mr);
|
||||
const char *ro_displayname;
|
||||
const int ro_send_w_inv_ok;
|
||||
};
|
||||
|
||||
extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
|
||||
extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
|
||||
|
||||
/*
|
||||
* RPCRDMA transport -- encapsulates the structures above for
|
||||
* integration with RPC.
|
||||
@@ -544,10 +502,6 @@ extern unsigned int xprt_rdma_memreg_strategy;
|
||||
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
|
||||
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
|
||||
void rpcrdma_ia_close(struct rpcrdma_ia *);
|
||||
bool frwr_is_supported(struct rpcrdma_ia *);
|
||||
bool fmr_is_supported(struct rpcrdma_ia *);
|
||||
|
||||
extern struct workqueue_struct *rpcrdma_receive_wq;
|
||||
|
||||
/*
|
||||
* Endpoint calls - xprtrdma/verbs.c
|
||||
@@ -560,13 +514,12 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
|
||||
|
||||
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
|
||||
struct rpcrdma_req *);
|
||||
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
|
||||
|
||||
/*
|
||||
* Buffer calls - xprtrdma/verbs.c
|
||||
*/
|
||||
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
|
||||
void rpcrdma_destroy_req(struct rpcrdma_req *);
|
||||
void rpcrdma_req_destroy(struct rpcrdma_req *req);
|
||||
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
|
||||
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
|
||||
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
|
||||
@@ -604,9 +557,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
|
||||
return __rpcrdma_dma_map_regbuf(ia, rb);
|
||||
}
|
||||
|
||||
int rpcrdma_alloc_wq(void);
|
||||
void rpcrdma_destroy_wq(void);
|
||||
|
||||
/*
|
||||
* Wrappers for chunk registration, shared by read/write chunk code.
|
||||
*/
|
||||
@@ -617,6 +567,23 @@ rpcrdma_data_dir(bool writing)
|
||||
return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
|
||||
}
|
||||
|
||||
/* Memory registration calls xprtrdma/frwr_ops.c
|
||||
*/
|
||||
bool frwr_is_supported(struct rpcrdma_ia *);
|
||||
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
|
||||
struct rpcrdma_create_data_internal *cdata);
|
||||
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
|
||||
void frwr_release_mr(struct rpcrdma_mr *mr);
|
||||
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
|
||||
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
|
||||
struct rpcrdma_mr_seg *seg,
|
||||
int nsegs, bool writing, u32 xid,
|
||||
struct rpcrdma_mr **mr);
|
||||
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
|
||||
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
|
||||
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
|
||||
struct list_head *mrs);
|
||||
|
||||
/*
|
||||
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
|
||||
*/
|
||||
@@ -653,6 +620,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
|
||||
extern unsigned int xprt_rdma_max_inline_read;
|
||||
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
|
||||
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
|
||||
void xprt_rdma_close(struct rpc_xprt *xprt);
|
||||
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
|
||||
int xprt_rdma_init(void);
|
||||
void xprt_rdma_cleanup(void);
|
||||
|
@@ -68,8 +68,6 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
|
||||
static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
|
||||
static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
|
||||
#define XS_TCP_LINGER_TO (15U * HZ)
|
||||
static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
|
||||
|
||||
@@ -159,8 +157,6 @@ static struct ctl_table sunrpc_table[] = {
|
||||
{ },
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Wait duration for a reply from the RPC portmapper.
|
||||
*/
|
||||
@@ -1589,6 +1585,7 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
|
||||
|
||||
/**
|
||||
* xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
|
||||
* @xprt: controlling transport
|
||||
* @task: task that timed out
|
||||
*
|
||||
* Adjust the congestion window after a retransmit timeout has occurred.
|
||||
@@ -2246,6 +2243,7 @@ out:
|
||||
|
||||
/**
|
||||
* xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
|
||||
* @work: queued work item
|
||||
*
|
||||
* Invoked by a work queue tasklet.
|
||||
*/
|
||||
@@ -3095,10 +3093,8 @@ static struct xprt_class xs_bc_tcp_transport = {
|
||||
*/
|
||||
int init_socket_xprt(void)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
if (!sunrpc_table_header)
|
||||
sunrpc_table_header = register_sysctl_table(sunrpc_table);
|
||||
#endif
|
||||
|
||||
xprt_register_transport(&xs_local_transport);
|
||||
xprt_register_transport(&xs_udp_transport);
|
||||
@@ -3114,12 +3110,10 @@ int init_socket_xprt(void)
|
||||
*/
|
||||
void cleanup_socket_xprt(void)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
if (sunrpc_table_header) {
|
||||
unregister_sysctl_table(sunrpc_table_header);
|
||||
sunrpc_table_header = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
xprt_unregister_transport(&xs_local_transport);
|
||||
xprt_unregister_transport(&xs_udp_transport);
|
||||
|
新增問題並參考
封鎖使用者