Merge tag 'nfs-for-4.17-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Stable bugfixes: - xprtrdma: Fix corner cases when handling device removal # v4.12+ - xprtrdma: Fix latency regression on NUMA NFS/RDMA clients # v4.15+ Features: - New sunrpc tracepoint for RPC pings - Finer grained NFSv4 attribute checking - Don't unnecessarily return NFS v4 delegations Other bugfixes and cleanups: - Several other small NFSoRDMA cleanups - Improvements to the sunrpc RTT measurements - A few sunrpc tracepoint cleanups - Various fixes for NFS v4 lock notifications - Various sunrpc and NFS v4 XDR encoding cleanups - Switch to the ida_simple API - Fix NFSv4.1 exclusive create - Forget acl cache after setattr operation - Don't advance the nfs_entry readdir cookie if xdr decoding fails" * tag 'nfs-for-4.17-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (47 commits) NFS: advance nfs_entry cookie only after decoding completes successfully NFSv3/acl: forget acl cache after setattr NFSv4.1: Fix exclusive create NFSv4: Declare the size up to date after it was set. nfs: Use ida_simple API NFSv4: Fix the nfs_inode_set_delegation() arguments NFSv4: Clean up CB_GETATTR encoding NFSv4: Don't ask for attributes when ACCESS is protected by a delegation NFSv4: Add a helper to encode/decode struct timespec NFSv4: Clean up encode_attrs NFSv4; Clean up XDR encoding of type bitmap4 NFSv4: Allow GFP_NOIO sleeps in decode_attr_owner/decode_attr_group SUNRPC: Add a helper for encoding opaque data inline SUNRPC: Add helpers for decoding opaque and string types NFSv4: Ignore change attribute invalidations if we hold a delegation NFS: More fine grained attribute tracking NFS: Don't force unnecessary cache invalidation in nfs_update_inode() NFS: Don't redirty the attribute cache in nfs_wcc_update_inode() NFS: Don't force a revalidation of all attributes if change is missing NFS: Convert NFS_INO_INVALID flags to unsigned long ...
This commit is contained in:
@@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task)
|
||||
|
||||
dprint_status(task);
|
||||
|
||||
trace_rpc_connect_status(task, status);
|
||||
trace_rpc_connect_status(task);
|
||||
task->tk_status = 0;
|
||||
switch (status) {
|
||||
case -ECONNREFUSED:
|
||||
@@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task)
|
||||
case -EPERM:
|
||||
if (RPC_IS_SOFTCONN(task)) {
|
||||
xprt_end_transmit(task);
|
||||
if (!task->tk_msg.rpc_proc->p_proc)
|
||||
trace_xprt_ping(task->tk_xprt,
|
||||
task->tk_status);
|
||||
rpc_exit(task, task->tk_status);
|
||||
break;
|
||||
}
|
||||
@@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task)
|
||||
struct rpc_rqst *req = task->tk_rqstp;
|
||||
int status;
|
||||
|
||||
if (!task->tk_msg.rpc_proc->p_proc)
|
||||
trace_xprt_ping(task->tk_xprt, task->tk_status);
|
||||
|
||||
if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
|
||||
task->tk_status = req->rq_reply_bytes_recvd;
|
||||
|
||||
|
@@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task)
|
||||
{
|
||||
rpc_task_set_debuginfo(task);
|
||||
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
|
||||
trace_rpc_task_begin(task->tk_client, task, NULL);
|
||||
trace_rpc_task_begin(task, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task)
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
trace_rpc_task_complete(task->tk_client, task, NULL);
|
||||
trace_rpc_task_complete(task, NULL);
|
||||
|
||||
spin_lock_irqsave(&wq->lock, flags);
|
||||
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
|
||||
@@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
|
||||
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
|
||||
task->tk_pid, rpc_qname(q), jiffies);
|
||||
|
||||
trace_rpc_task_sleep(task->tk_client, task, q);
|
||||
trace_rpc_task_sleep(task, q);
|
||||
|
||||
__rpc_add_wait_queue(q, task, queue_priority);
|
||||
|
||||
@@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
|
||||
return;
|
||||
}
|
||||
|
||||
trace_rpc_task_wakeup(task->tk_client, task, queue);
|
||||
trace_rpc_task_wakeup(task, queue);
|
||||
|
||||
__rpc_remove_wait_queue(queue, task);
|
||||
|
||||
@@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task)
|
||||
}
|
||||
if (!do_action)
|
||||
break;
|
||||
trace_rpc_task_run_action(task->tk_client, task, do_action);
|
||||
trace_rpc_task_run_action(task, do_action);
|
||||
do_action(task);
|
||||
|
||||
/*
|
||||
|
@@ -24,6 +24,8 @@
|
||||
#include <linux/sunrpc/metrics.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#include <trace/events/sunrpc.h>
|
||||
|
||||
#include "netns.h"
|
||||
|
||||
#define RPCDBG_FACILITY RPCDBG_MISC
|
||||
@@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
|
||||
struct rpc_iostats *op_metrics)
|
||||
{
|
||||
struct rpc_rqst *req = task->tk_rqstp;
|
||||
ktime_t delta, now;
|
||||
ktime_t backlog, execute, now;
|
||||
|
||||
if (!op_metrics || !req)
|
||||
return;
|
||||
@@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
|
||||
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
|
||||
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
|
||||
|
||||
backlog = 0;
|
||||
if (ktime_to_ns(req->rq_xtime)) {
|
||||
delta = ktime_sub(req->rq_xtime, task->tk_start);
|
||||
op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
|
||||
backlog = ktime_sub(req->rq_xtime, task->tk_start);
|
||||
op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog);
|
||||
}
|
||||
|
||||
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
|
||||
|
||||
delta = ktime_sub(now, task->tk_start);
|
||||
op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
|
||||
execute = ktime_sub(now, task->tk_start);
|
||||
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
|
||||
|
||||
spin_unlock(&op_metrics->om_lock);
|
||||
|
||||
trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics);
|
||||
|
||||
|
@@ -37,12 +37,6 @@ struct rpc_buffer {
|
||||
char data[];
|
||||
};
|
||||
|
||||
static inline int rpc_reply_expected(struct rpc_task *task)
|
||||
{
|
||||
return (task->tk_msg.rpc_proc != NULL) &&
|
||||
(task->tk_msg.rpc_proc->p_decode != NULL);
|
||||
}
|
||||
|
||||
static inline int sock_is_loopback(struct sock *sk)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
@@ -1518,6 +1518,88 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_process_buf);
|
||||
|
||||
/**
|
||||
* xdr_stream_decode_opaque - Decode variable length opaque
|
||||
* @xdr: pointer to xdr_stream
|
||||
* @ptr: location to store opaque data
|
||||
* @size: size of storage buffer @ptr
|
||||
*
|
||||
* Return values:
|
||||
* On success, returns size of object stored in *@ptr
|
||||
* %-EBADMSG on XDR buffer overflow
|
||||
* %-EMSGSIZE on overflow of storage buffer @ptr
|
||||
*/
|
||||
ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
|
||||
{
|
||||
ssize_t ret;
|
||||
void *p;
|
||||
|
||||
ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
memcpy(ptr, p, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
|
||||
|
||||
/**
|
||||
* xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
|
||||
* @xdr: pointer to xdr_stream
|
||||
* @ptr: location to store pointer to opaque data
|
||||
* @maxlen: maximum acceptable object size
|
||||
* @gfp_flags: GFP mask to use
|
||||
*
|
||||
* Return values:
|
||||
* On success, returns size of object stored in *@ptr
|
||||
* %-EBADMSG on XDR buffer overflow
|
||||
* %-EMSGSIZE if the size of the object would exceed @maxlen
|
||||
* %-ENOMEM on memory allocation failure
|
||||
*/
|
||||
ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
|
||||
size_t maxlen, gfp_t gfp_flags)
|
||||
{
|
||||
ssize_t ret;
|
||||
void *p;
|
||||
|
||||
ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
|
||||
if (ret > 0) {
|
||||
*ptr = kmemdup(p, ret, gfp_flags);
|
||||
if (*ptr != NULL)
|
||||
return ret;
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
*ptr = NULL;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
|
||||
|
||||
/**
|
||||
* xdr_stream_decode_string - Decode variable length string
|
||||
* @xdr: pointer to xdr_stream
|
||||
* @str: location to store string
|
||||
* @size: size of storage buffer @str
|
||||
*
|
||||
* Return values:
|
||||
* On success, returns length of NUL-terminated string stored in *@str
|
||||
* %-EBADMSG on XDR buffer overflow
|
||||
* %-EMSGSIZE on overflow of storage buffer @str
|
||||
*/
|
||||
ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
|
||||
{
|
||||
ssize_t ret;
|
||||
void *p;
|
||||
|
||||
ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
|
||||
if (ret > 0) {
|
||||
memcpy(str, p, ret);
|
||||
str[ret] = '\0';
|
||||
return strlen(str);
|
||||
}
|
||||
*str = '\0';
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
|
||||
|
||||
/**
|
||||
* xdr_stream_decode_string_dup - Decode and duplicate variable length string
|
||||
* @xdr: pointer to xdr_stream
|
||||
|
@@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
|
||||
* @xprt: transport on which the original request was transmitted
|
||||
* @xid: RPC XID of incoming reply
|
||||
*
|
||||
* Caller holds xprt->recv_lock.
|
||||
*/
|
||||
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
|
||||
{
|
||||
@@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
|
||||
list_for_each_entry(entry, &xprt->recv, rq_list)
|
||||
if (entry->rq_xid == xid) {
|
||||
trace_xprt_lookup_rqst(xprt, xid, 0);
|
||||
entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
|
||||
return entry;
|
||||
}
|
||||
|
||||
@@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock)
|
||||
}
|
||||
}
|
||||
|
||||
static void xprt_update_rtt(struct rpc_task *task)
|
||||
/**
|
||||
* xprt_update_rtt - Update RPC RTT statistics
|
||||
* @task: RPC request that recently completed
|
||||
*
|
||||
* Caller holds xprt->recv_lock.
|
||||
*/
|
||||
void xprt_update_rtt(struct rpc_task *task)
|
||||
{
|
||||
struct rpc_rqst *req = task->tk_rqstp;
|
||||
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
|
||||
@@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task)
|
||||
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xprt_update_rtt);
|
||||
|
||||
/**
|
||||
* xprt_complete_rqst - called when reply processing is complete
|
||||
* @task: RPC request that recently completed
|
||||
* @copied: actual number of bytes received from the transport
|
||||
*
|
||||
* Caller holds transport lock.
|
||||
* Caller holds xprt->recv_lock.
|
||||
*/
|
||||
void xprt_complete_rqst(struct rpc_task *task, int copied)
|
||||
{
|
||||
@@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
|
||||
trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
|
||||
|
||||
xprt->stat.recvs++;
|
||||
req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
|
||||
if (xprt->ops->timer != NULL)
|
||||
xprt_update_rtt(task);
|
||||
|
||||
list_del_init(&req->rq_list);
|
||||
req->rq_private_buf.len = copied;
|
||||
@@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task)
|
||||
struct rpc_rqst *req = task->tk_rqstp;
|
||||
struct rpc_xprt *xprt = req->rq_xprt;
|
||||
unsigned int connect_cookie;
|
||||
int status, numreqs;
|
||||
int status;
|
||||
|
||||
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
|
||||
|
||||
@@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task)
|
||||
return;
|
||||
|
||||
connect_cookie = xprt->connect_cookie;
|
||||
req->rq_xtime = ktime_get();
|
||||
status = xprt->ops->send_request(task);
|
||||
trace_xprt_transmit(xprt, req->rq_xid, status);
|
||||
if (status != 0) {
|
||||
@@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task)
|
||||
|
||||
xprt->ops->set_retrans_timeout(task);
|
||||
|
||||
numreqs = atomic_read(&xprt->num_reqs);
|
||||
if (numreqs > xprt->stat.max_slots)
|
||||
xprt->stat.max_slots = numreqs;
|
||||
xprt->stat.sends++;
|
||||
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
|
||||
xprt->stat.bklog_u += xprt->backlog.qlen;
|
||||
@@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
|
||||
{
|
||||
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
|
||||
|
||||
if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
|
||||
if (xprt->num_reqs >= xprt->max_reqs)
|
||||
goto out;
|
||||
++xprt->num_reqs;
|
||||
spin_unlock(&xprt->reserve_lock);
|
||||
req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
|
||||
spin_lock(&xprt->reserve_lock);
|
||||
if (req != NULL)
|
||||
goto out;
|
||||
atomic_dec(&xprt->num_reqs);
|
||||
--xprt->num_reqs;
|
||||
req = ERR_PTR(-ENOMEM);
|
||||
out:
|
||||
return req;
|
||||
@@ -1121,7 +1124,8 @@ out:
|
||||
|
||||
static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
|
||||
{
|
||||
if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
|
||||
if (xprt->num_reqs > xprt->min_reqs) {
|
||||
--xprt->num_reqs;
|
||||
kfree(req);
|
||||
return true;
|
||||
}
|
||||
@@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
spin_unlock(&xprt->reserve_lock);
|
||||
return;
|
||||
out_init_req:
|
||||
xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
|
||||
xprt->num_reqs);
|
||||
task->tk_status = 0;
|
||||
task->tk_rqstp = req;
|
||||
xprt_request_init(task, xprt);
|
||||
@@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
|
||||
else
|
||||
xprt->max_reqs = num_prealloc;
|
||||
xprt->min_reqs = num_prealloc;
|
||||
atomic_set(&xprt->num_reqs, num_prealloc);
|
||||
xprt->num_reqs = num_prealloc;
|
||||
|
||||
return xprt;
|
||||
|
||||
|
@@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
|
||||
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
|
||||
DMA_TO_DEVICE, GFP_KERNEL);
|
||||
if (IS_ERR(rb))
|
||||
goto out_fail;
|
||||
req->rl_rdmabuf = rb;
|
||||
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
|
||||
|
||||
size = r_xprt->rx_data.inline_rsize;
|
||||
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
|
||||
if (IS_ERR(rb))
|
||||
|
@@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
|
||||
mr = rpcrdma_mr_get(r_xprt);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOBUFS);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
|
||||
pageoff = offset_in_page(seg1->mr_offset);
|
||||
seg1->mr_offset -= pageoff; /* start of page */
|
||||
@@ -251,6 +251,16 @@ out_maperr:
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
/* Post Send WR containing the RPC Call message.
|
||||
*/
|
||||
static int
|
||||
fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
{
|
||||
struct ib_send_wr *bad_wr;
|
||||
|
||||
return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
|
||||
}
|
||||
|
||||
/* Invalidate all memory regions that were registered for "req".
|
||||
*
|
||||
* Sleeps until it is safe for the host CPU to access the
|
||||
@@ -305,6 +315,7 @@ out_reset:
|
||||
|
||||
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
|
||||
.ro_map = fmr_op_map,
|
||||
.ro_send = fmr_op_send,
|
||||
.ro_unmap_sync = fmr_op_unmap_sync,
|
||||
.ro_recover_mr = fmr_op_recover_mr,
|
||||
.ro_open = fmr_op_open,
|
||||
|
@@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
struct rpcrdma_mr *mr;
|
||||
struct ib_mr *ibmr;
|
||||
struct ib_reg_wr *reg_wr;
|
||||
struct ib_send_wr *bad_wr;
|
||||
int rc, i, n;
|
||||
int i, n;
|
||||
u8 key;
|
||||
|
||||
mr = NULL;
|
||||
@@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
rpcrdma_mr_defer_recovery(mr);
|
||||
mr = rpcrdma_mr_get(r_xprt);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOBUFS);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
} while (mr->frwr.fr_state != FRWR_IS_INVALID);
|
||||
frwr = &mr->frwr;
|
||||
frwr->fr_state = FRWR_IS_VALID;
|
||||
@@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
|
||||
ib_update_fast_reg_key(ibmr, ++key);
|
||||
|
||||
reg_wr = &frwr->fr_regwr;
|
||||
reg_wr->wr.next = NULL;
|
||||
reg_wr->wr.opcode = IB_WR_REG_MR;
|
||||
frwr->fr_cqe.done = frwr_wc_fastreg;
|
||||
reg_wr->wr.wr_cqe = &frwr->fr_cqe;
|
||||
reg_wr->wr.num_sge = 0;
|
||||
reg_wr->wr.send_flags = 0;
|
||||
reg_wr->mr = ibmr;
|
||||
reg_wr->key = ibmr->rkey;
|
||||
reg_wr->access = writing ?
|
||||
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
|
||||
IB_ACCESS_REMOTE_READ;
|
||||
|
||||
rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr);
|
||||
if (rc)
|
||||
goto out_senderr;
|
||||
|
||||
mr->mr_handle = ibmr->rkey;
|
||||
mr->mr_length = ibmr->length;
|
||||
mr->mr_offset = ibmr->iova;
|
||||
@@ -442,11 +431,40 @@ out_mapmr_err:
|
||||
frwr->fr_mr, n, mr->mr_nents);
|
||||
rpcrdma_mr_defer_recovery(mr);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
out_senderr:
|
||||
pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
|
||||
rpcrdma_mr_defer_recovery(mr);
|
||||
return ERR_PTR(-ENOTCONN);
|
||||
/* Post Send WR containing the RPC Call message.
|
||||
*
|
||||
* For FRMR, chain any FastReg WRs to the Send WR. Only a
|
||||
* single ib_post_send call is needed to register memory
|
||||
* and then post the Send WR.
|
||||
*/
|
||||
static int
|
||||
frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
|
||||
{
|
||||
struct ib_send_wr *post_wr, *bad_wr;
|
||||
struct rpcrdma_mr *mr;
|
||||
|
||||
post_wr = &req->rl_sendctx->sc_wr;
|
||||
list_for_each_entry(mr, &req->rl_registered, mr_list) {
|
||||
struct rpcrdma_frwr *frwr;
|
||||
|
||||
frwr = &mr->frwr;
|
||||
|
||||
frwr->fr_cqe.done = frwr_wc_fastreg;
|
||||
frwr->fr_regwr.wr.next = post_wr;
|
||||
frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
|
||||
frwr->fr_regwr.wr.num_sge = 0;
|
||||
frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
|
||||
frwr->fr_regwr.wr.send_flags = 0;
|
||||
|
||||
post_wr = &frwr->fr_regwr.wr;
|
||||
}
|
||||
|
||||
/* If ib_post_send fails, the next ->send_request for
|
||||
* @req will queue these MWs for recovery.
|
||||
*/
|
||||
return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
|
||||
}
|
||||
|
||||
/* Handle a remotely invalidated mr on the @mrs list
|
||||
@@ -561,6 +579,7 @@ reset_mrs:
|
||||
|
||||
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
|
||||
.ro_map = frwr_op_map,
|
||||
.ro_send = frwr_op_send,
|
||||
.ro_reminv = frwr_op_reminv,
|
||||
.ro_unmap_sync = frwr_op_unmap_sync,
|
||||
.ro_recover_mr = frwr_op_recover_mr,
|
||||
|
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
false, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
goto out_maperr;
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
|
||||
if (encode_read_segment(xdr, mr, pos) < 0)
|
||||
@@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
} while (nsegs);
|
||||
|
||||
return 0;
|
||||
|
||||
out_maperr:
|
||||
if (PTR_ERR(seg) == -EAGAIN)
|
||||
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
|
||||
return PTR_ERR(seg);
|
||||
}
|
||||
|
||||
/* Register and XDR encode the Write list. Supports encoding a list
|
||||
@@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
true, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
goto out_maperr;
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
|
||||
if (encode_rdma_segment(xdr, mr) < 0)
|
||||
@@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
*segcount = cpu_to_be32(nchunks);
|
||||
|
||||
return 0;
|
||||
|
||||
out_maperr:
|
||||
if (PTR_ERR(seg) == -EAGAIN)
|
||||
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
|
||||
return PTR_ERR(seg);
|
||||
}
|
||||
|
||||
/* Register and XDR encode the Reply chunk. Supports encoding an array
|
||||
@@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
|
||||
true, &mr);
|
||||
if (IS_ERR(seg))
|
||||
return PTR_ERR(seg);
|
||||
goto out_maperr;
|
||||
rpcrdma_mr_push(mr, &req->rl_registered);
|
||||
|
||||
if (encode_rdma_segment(xdr, mr) < 0)
|
||||
@@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
*segcount = cpu_to_be32(nchunks);
|
||||
|
||||
return 0;
|
||||
|
||||
out_maperr:
|
||||
if (PTR_ERR(seg) == -EAGAIN)
|
||||
xprt_wait_for_buffer_space(rqst->rq_task, NULL);
|
||||
return PTR_ERR(seg);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
|
||||
* Returns:
|
||||
* %0 if the RPC was sent successfully,
|
||||
* %-ENOTCONN if the connection was lost,
|
||||
* %-EAGAIN if not enough pages are available for on-demand reply buffer,
|
||||
* %-ENOBUFS if no MRs are available to register chunks,
|
||||
* %-EAGAIN if the caller should call again with the same arguments,
|
||||
* %-ENOBUFS if the caller should call again after a delay,
|
||||
* %-EMSGSIZE if the transport header is too small,
|
||||
* %-EIO if a permanent problem occurred while marshaling.
|
||||
*/
|
||||
@@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
if (ret != -ENOBUFS) {
|
||||
pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
|
||||
r_xprt->rx_stats.failed_marshal_count++;
|
||||
}
|
||||
r_xprt->rx_stats.failed_marshal_count++;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
|
||||
|
||||
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
|
||||
|
||||
queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
|
||||
queue_work(rpcrdma_receive_wq, &rep->rr_work);
|
||||
return;
|
||||
|
||||
out_badstatus:
|
||||
|
@@ -52,7 +52,6 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/sunrpc/addr.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include "xprt_rdma.h"
|
||||
|
||||
@@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work)
|
||||
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
|
||||
|
||||
spin_lock_bh(&xprt->transport_lock);
|
||||
if (++xprt->connect_cookie == 0) /* maintain a reserved value */
|
||||
++xprt->connect_cookie;
|
||||
if (ep->rep_connected > 0) {
|
||||
if (!xprt_test_and_set_connected(xprt))
|
||||
xprt_wake_pending_tasks(xprt, 0);
|
||||
@@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate a fixed-size buffer in which to construct and send the
|
||||
* RPC-over-RDMA header for this request.
|
||||
*/
|
||||
static bool
|
||||
rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
gfp_t flags)
|
||||
{
|
||||
size_t size = RPCRDMA_HDRBUF_SIZE;
|
||||
struct rpcrdma_regbuf *rb;
|
||||
|
||||
if (req->rl_rdmabuf)
|
||||
return true;
|
||||
|
||||
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
|
||||
if (IS_ERR(rb))
|
||||
return false;
|
||||
|
||||
r_xprt->rx_stats.hardway_register_count += size;
|
||||
req->rl_rdmabuf = rb;
|
||||
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
|
||||
size_t size, gfp_t flags)
|
||||
@@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task)
|
||||
if (RPC_IS_SWAPPER(task))
|
||||
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
|
||||
|
||||
if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
|
||||
goto out_fail;
|
||||
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
|
||||
goto out_fail;
|
||||
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
|
||||
goto out_fail;
|
||||
|
||||
req->rl_cpu = smp_processor_id();
|
||||
req->rl_connect_cookie = 0; /* our reserved value */
|
||||
rpcrdma_set_xprtdata(rqst, req);
|
||||
rqst->rq_buffer = req->rl_sendbuf->rg_base;
|
||||
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
|
||||
@@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task)
|
||||
* Returns:
|
||||
* %0 if the RPC message has been sent
|
||||
* %-ENOTCONN if the caller should reconnect and call again
|
||||
* %-ENOBUFS if the caller should call again later
|
||||
* %-EAGAIN if the caller should call again
|
||||
* %-ENOBUFS if the caller should call again after a delay
|
||||
* %-EIO if a permanent error occurred and the request was not
|
||||
* sent. Do not try to send this message again.
|
||||
*/
|
||||
@@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task)
|
||||
rpcrdma_recv_buffer_get(req);
|
||||
|
||||
/* Must suppress retransmit to maintain credits */
|
||||
if (req->rl_connect_cookie == xprt->connect_cookie)
|
||||
if (rqst->rq_connect_cookie == xprt->connect_cookie)
|
||||
goto drop_connection;
|
||||
req->rl_connect_cookie = xprt->connect_cookie;
|
||||
rqst->rq_xtime = ktime_get();
|
||||
|
||||
__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
|
||||
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
|
||||
@@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task)
|
||||
|
||||
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
|
||||
rqst->rq_bytes_sent = 0;
|
||||
|
||||
/* An RPC with no reply will throw off credit accounting,
|
||||
* so drop the connection to reset the credit grant.
|
||||
*/
|
||||
if (!rpc_reply_expected(task))
|
||||
goto drop_connection;
|
||||
return 0;
|
||||
|
||||
failed_marshal:
|
||||
|
@@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
|
||||
wait_for_completion(&ia->ri_remove_done);
|
||||
|
||||
ia->ri_id = NULL;
|
||||
ia->ri_pd = NULL;
|
||||
ia->ri_device = NULL;
|
||||
/* Return 1 to ensure the core destroys the id. */
|
||||
return 1;
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
++xprt->rx_xprt.connect_cookie;
|
||||
connstate = 1;
|
||||
rpcrdma_update_connect_private(xprt, &event->param.conn);
|
||||
goto connected;
|
||||
@@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
|
||||
connstate = -EAGAIN;
|
||||
goto connected;
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
++xprt->rx_xprt.connect_cookie;
|
||||
connstate = -ECONNABORTED;
|
||||
connected:
|
||||
xprt->rx_buf.rb_credits = 1;
|
||||
@@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
|
||||
ia->ri_id->qp = NULL;
|
||||
}
|
||||
ib_free_cq(ep->rep_attr.recv_cq);
|
||||
ep->rep_attr.recv_cq = NULL;
|
||||
ib_free_cq(ep->rep_attr.send_cq);
|
||||
ep->rep_attr.send_cq = NULL;
|
||||
|
||||
/* The ULP is responsible for ensuring all DMA
|
||||
* mappings and MRs are gone.
|
||||
@@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
|
||||
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
|
||||
}
|
||||
rpcrdma_mrs_destroy(buf);
|
||||
ib_dealloc_pd(ia->ri_pd);
|
||||
ia->ri_pd = NULL;
|
||||
|
||||
/* Allow waiters to continue */
|
||||
complete(&ia->ri_remove_done);
|
||||
@@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
||||
|
||||
/* Client offers RDMA Read but does not initiate */
|
||||
ep->rep_remote_cma.initiator_depth = 0;
|
||||
if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
|
||||
ep->rep_remote_cma.responder_resources = 32;
|
||||
else
|
||||
ep->rep_remote_cma.responder_resources =
|
||||
ia->ri_device->attrs.max_qp_rd_atom;
|
||||
ep->rep_remote_cma.responder_resources =
|
||||
min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
|
||||
|
||||
/* Limit transport retries so client can detect server
|
||||
* GID changes quickly. RPC layer handles re-establishing
|
||||
@@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
||||
{
|
||||
cancel_delayed_work_sync(&ep->rep_connect_worker);
|
||||
|
||||
if (ia->ri_id->qp) {
|
||||
if (ia->ri_id && ia->ri_id->qp) {
|
||||
rpcrdma_ep_disconnect(ep, ia);
|
||||
rdma_destroy_qp(ia->ri_id);
|
||||
ia->ri_id->qp = NULL;
|
||||
}
|
||||
|
||||
ib_free_cq(ep->rep_attr.recv_cq);
|
||||
ib_free_cq(ep->rep_attr.send_cq);
|
||||
if (ep->rep_attr.recv_cq)
|
||||
ib_free_cq(ep->rep_attr.recv_cq);
|
||||
if (ep->rep_attr.send_cq)
|
||||
ib_free_cq(ep->rep_attr.send_cq);
|
||||
}
|
||||
|
||||
/* Re-establish a connection after a device removal event.
|
||||
@@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
|
||||
LIST_HEAD(free);
|
||||
LIST_HEAD(all);
|
||||
|
||||
for (count = 0; count < 32; count++) {
|
||||
for (count = 0; count < 3; count++) {
|
||||
struct rpcrdma_mr *mr;
|
||||
int rc;
|
||||
|
||||
@@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
|
||||
list_splice(&all, &buf->rb_all);
|
||||
r_xprt->rx_stats.mrs_allocated += count;
|
||||
spin_unlock(&buf->rb_mrlock);
|
||||
|
||||
trace_xprtrdma_createmrs(r_xprt, count);
|
||||
|
||||
xprt_write_space(&r_xprt->rx_xprt);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1068,17 +1073,27 @@ struct rpcrdma_req *
|
||||
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
|
||||
{
|
||||
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
|
||||
struct rpcrdma_regbuf *rb;
|
||||
struct rpcrdma_req *req;
|
||||
|
||||
req = kzalloc(sizeof(*req), GFP_KERNEL);
|
||||
if (req == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
|
||||
DMA_TO_DEVICE, GFP_KERNEL);
|
||||
if (IS_ERR(rb)) {
|
||||
kfree(req);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
req->rl_rdmabuf = rb;
|
||||
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
|
||||
req->rl_buffer = buffer;
|
||||
INIT_LIST_HEAD(&req->rl_registered);
|
||||
|
||||
spin_lock(&buffer->rb_reqslock);
|
||||
list_add(&req->rl_all, &buffer->rb_allreqs);
|
||||
spin_unlock(&buffer->rb_reqslock);
|
||||
req->rl_buffer = &r_xprt->rx_buf;
|
||||
INIT_LIST_HEAD(&req->rl_registered);
|
||||
return req;
|
||||
}
|
||||
|
||||
@@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
||||
struct rpcrdma_req *req)
|
||||
{
|
||||
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
|
||||
struct ib_send_wr *send_wr_fail;
|
||||
int rc;
|
||||
|
||||
if (req->rl_reply) {
|
||||
@@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
||||
--ep->rep_send_count;
|
||||
}
|
||||
|
||||
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
|
||||
rc = ia->ri_ops->ro_send(ia, req);
|
||||
trace_xprtrdma_post_send(req, rc);
|
||||
if (rc)
|
||||
return -ENOTCONN;
|
||||
|
@@ -334,8 +334,6 @@ enum {
|
||||
struct rpcrdma_buffer;
|
||||
struct rpcrdma_req {
|
||||
struct list_head rl_list;
|
||||
int rl_cpu;
|
||||
unsigned int rl_connect_cookie;
|
||||
struct rpcrdma_buffer *rl_buffer;
|
||||
struct rpcrdma_rep *rl_reply;
|
||||
struct xdr_stream rl_stream;
|
||||
@@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops {
|
||||
(*ro_map)(struct rpcrdma_xprt *,
|
||||
struct rpcrdma_mr_seg *, int, bool,
|
||||
struct rpcrdma_mr **);
|
||||
int (*ro_send)(struct rpcrdma_ia *ia,
|
||||
struct rpcrdma_req *req);
|
||||
void (*ro_reminv)(struct rpcrdma_rep *rep,
|
||||
struct list_head *mrs);
|
||||
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
|
||||
|
@@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task)
|
||||
xs_pktdump("packet data:",
|
||||
req->rq_svec->iov_base, req->rq_svec->iov_len);
|
||||
|
||||
req->rq_xtime = ktime_get();
|
||||
status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
|
||||
true, &sent);
|
||||
dprintk("RPC: %s(%u) = %d\n",
|
||||
@@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task)
|
||||
|
||||
if (!xprt_bound(xprt))
|
||||
return -ENOTCONN;
|
||||
req->rq_xtime = ktime_get();
|
||||
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
|
||||
xdr, req->rq_bytes_sent, true, &sent);
|
||||
|
||||
@@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
|
||||
/* Continue transmitting the packet/record. We must be careful
|
||||
* to cope with writespace callbacks arriving _after_ we have
|
||||
* called sendmsg(). */
|
||||
req->rq_xtime = ktime_get();
|
||||
while (1) {
|
||||
sent = 0;
|
||||
status = xs_sendpages(transport->sock, NULL, 0, xdr,
|
||||
@@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
|
||||
if (!rovr)
|
||||
goto out_unlock;
|
||||
xprt_pin_rqst(rovr);
|
||||
xprt_update_rtt(rovr->rq_task);
|
||||
spin_unlock(&xprt->recv_lock);
|
||||
task = rovr->rq_task;
|
||||
|
||||
|
Reference in New Issue
Block a user