Merge tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

   - massive cleanup of the NFS read/write code by Anna and Dros
   - support multiple NFS read/write requests per page in order to deal
     with non-page aligned pNFS striping.  Also cleans up the r/wsize <
     page size code nicely.
   - stable fix for ensuring inode is declared uptodate only after all
     the attributes have been checked.
   - stable fix for a kernel Oops when remounting
   - NFS over RDMA client fixes
   - move the pNFS files layout driver into its own subdirectory"

* tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
  NFS: populate ->net in mount data when remounting
  pnfs: fix lockup caused by pnfs_generic_pg_test
  NFSv4.1: Fix typo in dprintk
  NFSv4.1: Comment is now wrong and redundant to code
  NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state
  xprtrdma: Disconnect on registration failure
  xprtrdma: Remove BUG_ON() call sites
  xprtrdma: Avoid deadlock when credit window is reset
  SUNRPC: Move congestion window constants to header file
  xprtrdma: Reset connection timeout after successful reconnect
  xprtrdma: Use macros for reconnection timeout constants
  xprtrdma: Allocate missing pagelist
  xprtrdma: Remove Tavor MTU setting
  xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
  xprtrdma: Reduce the number of hardway buffer allocations
  xprtrdma: Limit work done by completion handler
  xprtrmda: Reduce calls to ib_poll_cq() in completion handlers
  xprtrmda: Reduce lock contention in completion handlers
  xprtrdma: Split the completion queue
  xprtrdma: Make rpcrdma_ep_destroy() return void
  ...
This commit is contained in:
Linus Torvalds
2014-06-10 15:02:42 -07:00
41 changed files with 1817 additions and 1909 deletions

View File

@@ -42,10 +42,10 @@
* Local function declarations
*/
static void nfs_redirty_request(struct nfs_page *req);
static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
struct nfs_write_header *nfs_writehdr_alloc(void)
static struct nfs_rw_header *nfs_writehdr_alloc(void)
{
struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p) {
struct nfs_pgio_header *hdr = &p->header;
struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p)
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&hdr->pages);
INIT_LIST_HEAD(&hdr->rpc_list);
spin_lock_init(&hdr->lock);
atomic_set(&hdr->refcnt, 0);
hdr->verf = &p->verf;
}
return p;
}
EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount)
static void nfs_writehdr_free(struct nfs_rw_header *whdr)
{
struct nfs_write_data *data, *prealloc;
prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
if (prealloc->header == NULL)
data = prealloc;
else
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
if (nfs_pgarray_set(&data->pages, pagecount)) {
data->header = hdr;
atomic_inc(&hdr->refcnt);
} else {
if (data != prealloc)
kfree(data);
data = NULL;
}
out:
return data;
}
void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
mempool_free(whdr, nfs_wdata_mempool);
}
EXPORT_SYMBOL_GPL(nfs_writehdr_free);
void nfs_writedata_release(struct nfs_write_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
put_nfs_open_context(wdata->args.context);
if (wdata->pages.pagevec != wdata->pages.page_array)
kfree(wdata->pages.pagevec);
if (wdata == &write_header->rpc_data) {
wdata->header = NULL;
wdata = NULL;
}
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
/* Note: we only free the rpc_task after callbacks are done.
* See the comment in rpc_free_task() for why
*/
kfree(wdata);
}
EXPORT_SYMBOL_GPL(nfs_writedata_release);
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
@@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page)
nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
}
/*
* nfs_page_group_search_locked
* @head - head request of page group
* @page_offset - offset into page
*
* Search page group with head @head to find a request that contains the
* page offset @page_offset.
*
* Returns a pointer to the first matching nfs request, or NULL if no
* match is found.
*
* Must be called with the page group lock held
*/
static struct nfs_page *
nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
{
struct nfs_page *req;
WARN_ON_ONCE(head != head->wb_head);
WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
req = head;
do {
if (page_offset >= req->wb_pgbase &&
page_offset < (req->wb_pgbase + req->wb_bytes))
return req;
req = req->wb_this_page;
} while (req != head);
return NULL;
}
/*
* nfs_page_group_covers_page
* @head - head request of page group
*
* Return true if the page group with head @head covers the whole page,
* returns false otherwise
*/
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
struct nfs_page *tmp;
unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page);
nfs_page_group_lock(req);
do {
tmp = nfs_page_group_search_locked(req->wb_head, pos);
if (tmp) {
/* no way this should happen */
WARN_ON_ONCE(tmp->wb_pgbase != pos);
pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
}
} while (tmp && pos < len);
nfs_page_group_unlock(req);
WARN_ON_ONCE(pos > len);
return pos == len;
}
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
static void nfs_mark_uptodate(struct nfs_page *req)
{
if (PageUptodate(page))
if (PageUptodate(req->wb_page))
return;
if (base != 0)
if (!nfs_page_group_covers_page(req))
return;
if (count != nfs_page_length(page))
return;
SetPageUptodate(page);
SetPageUptodate(req->wb_page);
}
static int wb_priority(struct writeback_control *wbc)
@@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page)
}
}
static void nfs_end_page_writeback(struct page *page)
static void nfs_end_page_writeback(struct nfs_page *req)
{
struct inode *inode = page_file_mapping(page)->host;
struct inode *inode = page_file_mapping(req->wb_page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
end_page_writeback(page);
if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
return;
end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
@@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
page->mapping->host,
wb_priority(wbc),
&nfs_async_write_completion_ops);
nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -400,7 +404,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
&nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
@@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
WARN_ON_ONCE(req->wb_this_page != req);
/* Lock the request! */
nfs_lock_request(req);
@@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *head;
spin_lock(&inode->i_lock);
if (likely(!PageSwapCache(req->wb_page))) {
set_page_private(req->wb_page, 0);
ClearPagePrivate(req->wb_page);
clear_bit(PG_MAPPED, &req->wb_flags);
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
head = req->wb_head;
spin_lock(&inode->i_lock);
if (likely(!PageSwapCache(head->wb_page))) {
set_page_private(head->wb_page, 0);
ClearPagePrivate(head->wb_page);
clear_bit(PG_MAPPED, &head->wb_flags);
}
nfsi->npages--;
spin_unlock(&inode->i_lock);
}
nfsi->npages--;
spin_unlock(&inode->i_lock);
nfs_release_request(req);
}
@@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
int nfs_write_need_commit(struct nfs_write_data *data)
int nfs_write_need_commit(struct nfs_pgio_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
return data->header->lseg == NULL;
@@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
int nfs_write_need_commit(struct nfs_write_data *data)
int nfs_write_need_commit(struct nfs_pgio_data *data)
{
return 0;
}
@@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
@@ -653,7 +667,8 @@ remove_req:
nfs_inode_remove_request(req);
next:
nfs_unlock_request(req);
nfs_end_page_writeback(req->wb_page);
nfs_end_page_writeback(req);
do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -661,7 +676,7 @@ out:
}
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static unsigned long
unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return cinfo->mds->ncommit;
@@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
}
#else
static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
if (req == NULL)
goto out_unlock;
/* should be handled by nfs_flush_incompatible */
WARN_ON_ONCE(req->wb_head != req);
WARN_ON_ONCE(req->wb_this_page != req);
rqend = req->wb_offset + req->wb_bytes;
/*
* Tell the caller to flush out the request if
@@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
req = nfs_create_request(ctx, inode, page, offset, bytes);
req = nfs_create_request(ctx, page, NULL, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
return PTR_ERR(req);
/* Update file length */
nfs_grow_file(page, offset, count);
nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
@@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx;
/* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req;
if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid;
@@ -990,126 +1011,17 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
int nfs_initiate_write(struct rpc_clnt *clnt,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how, int flags)
static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->header->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.task = &data->task,
.rpc_message = &msg,
.callback_ops = call_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | flags,
.priority = priority,
};
int ret = 0;
/* Set up the initial task struct. */
NFS_PROTO(inode)->write_setup(data, &msg);
dprintk("NFS: %5u initiated write call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode),
data->args.count,
(unsigned long long)data->args.offset);
task_setup_data->priority = priority;
NFS_PROTO(inode)->write_setup(data, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
&task_setup_data.rpc_client, &msg, data);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
}
if (how & FLUSH_SYNC) {
ret = rpc_wait_for_completion_task(task);
if (ret == 0)
ret = task->tk_status;
}
rpc_put_task(task);
out:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
static void nfs_write_rpcsetup(struct nfs_write_data *data,
unsigned int count, unsigned int offset,
int how, struct nfs_commit_info *cinfo)
{
struct nfs_page *req = data->header->req;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
data->args.fh = NFS_FH(data->header->inode);
data->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */
data->mds_offset = data->args.offset;
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pages.pagevec;
data->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
data->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
case 0:
break;
case FLUSH_COND_STABLE:
if (nfs_reqs_to_commit(cinfo))
break;
default:
data->args.stable = NFS_FILE_SYNC;
}
data->res.fattr = &data->fattr;
data->res.count = count;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
}
static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
struct inode *inode = data->header->inode;
return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
}
static int nfs_do_multiple_writes(struct list_head *head,
const struct rpc_call_ops *call_ops,
int how)
{
struct nfs_write_data *data;
int ret = 0;
while (!list_empty(head)) {
int ret2;
data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);
ret2 = nfs_do_write(data, call_ops, how);
if (ret == 0)
ret = ret2;
}
return ret;
&task_setup_data->rpc_client, msg, data);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1120,7 +1032,7 @@ static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
nfs_unlock_request(req);
nfs_end_page_writeback(req->wb_page);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1140,173 +1052,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.completion = nfs_write_completion,
};
static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
set_bit(NFS_IOHDR_REDO, &hdr->flags);
while (!list_empty(&hdr->rpc_list)) {
struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
struct nfs_write_data, list);
list_del(&data->list);
nfs_writedata_release(data);
}
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
}
/*
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
struct nfs_page *req = hdr->req;
struct page *page = req->wb_page;
struct nfs_write_data *data;
size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
struct nfs_commit_info cinfo;
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
desc->pg_count > wsize))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
data = nfs_writedata_alloc(hdr, 1);
if (!data) {
nfs_flush_error(desc, hdr);
return -ENOMEM;
}
data->pages.pagevec[0] = page;
nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
list_add(&data->list, &hdr->rpc_list);
requests++;
nbytes -= len;
offset += len;
} while (nbytes != 0);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);
desc->pg_rpc_callops = &nfs_write_common_ops;
return 0;
}
/*
* Create an RPC task for the given write request and kick it.
* The page must have been locked by the caller.
*
* It may happen that the page we're passed is not marked dirty.
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
struct nfs_commit_info cinfo;
data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
nfs_flush_error(desc, hdr);
return -ENOMEM;
}
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);
*pages++ = req->wb_page;
}
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
list_add(&data->list, &hdr->rpc_list);
desc->pg_rpc_callops = &nfs_write_common_ops;
return 0;
}
int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
return nfs_flush_multi(desc, hdr);
return nfs_flush_one(desc, hdr);
}
EXPORT_SYMBOL_GPL(nfs_generic_flush);
static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
struct nfs_write_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
whdr = nfs_writehdr_alloc();
if (!whdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
hdr = &whdr->header;
nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_flush(desc, hdr);
if (ret == 0)
ret = nfs_do_multiple_writes(&hdr->rpc_list,
desc->pg_rpc_callops,
desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
}
static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_test = nfs_generic_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags,
struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
NFS_SERVER(inode)->wsize, ioflags);
struct nfs_server *server = NFS_SERVER(inode);
const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
#ifdef CONFIG_NFS_V4_1
if (server->pnfs_curr_ld && !force_mds)
pg_ops = server->pnfs_curr_ld->pg_write_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
server->wsize, ioflags);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_write_ops;
pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
int err;
err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
if (err)
rpc_exit(task, err);
}
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_commit_data *data = calldata;
@@ -1314,23 +1083,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
/*
* Handle a write reply that flushes a whole page.
*
* FIXME: There is an inherent race with invalidate_inode_pages and
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
static void nfs_writeback_release_common(struct nfs_pgio_data *data)
{
struct nfs_write_data *data = calldata;
nfs_writeback_done(task, data);
}
static void nfs_writeback_release_common(void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
@@ -1339,34 +1093,46 @@ static void nfs_writeback_release_common(void *calldata)
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
; /* Do nothing */
else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
spin_unlock(&hdr->lock);
}
nfs_writedata_release(data);
}
static const struct rpc_call_ops nfs_write_common_ops = {
.rpc_call_prepare = nfs_write_prepare,
.rpc_call_done = nfs_writeback_done_common,
.rpc_release = nfs_writeback_release_common,
};
/*
* Special version of should_remove_suid() that ignores capabilities.
*/
static int nfs_should_remove_suid(const struct inode *inode)
{
umode_t mode = inode->i_mode;
int kill = 0;
/* suid always must be killed */
if (unlikely(mode & S_ISUID))
kill = ATTR_KILL_SUID;
/*
* sgid without any exec bits is just a mandatory locking mark; leave
* it alone. If some exec bits are set, it's a real sgid; kill it.
*/
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
if (unlikely(kill && S_ISREG(mode)))
return kill;
return 0;
}
/*
* This function is called when the WRITE call is complete.
*/
void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
struct inode *inode)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
struct inode *inode = data->header->inode;
int status;
dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
/*
* ->write_done will attempt to use post-op attributes to detect
* conflicting writes by other clients. A strict interpretation
@@ -1376,11 +1142,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
return;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
return status;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1396,18 +1162,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
resp->verf->committed, argp->stable);
data->res.verf->committed, data->args.stable);
complain = jiffies + 300 * HZ;
}
}
#endif
if (task->tk_status < 0)
nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
else if (resp->count < argp->count) {
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode))
nfs_mark_for_revalidate(inode);
return 0;
}
/*
* This function is called when the WRITE call is complete.
*/
static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct nfs_pgio_args *argp = &data->args;
struct nfs_pgio_res *resp = &data->res;
if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1874,7 +1653,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
sizeof(struct nfs_write_header),
sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
@@ -1936,3 +1715,12 @@ void nfs_destroy_writepagecache(void)
kmem_cache_destroy(nfs_wdata_cachep);
}
static const struct nfs_rw_ops nfs_rw_write_ops = {
.rw_mode = FMODE_WRITE,
.rw_alloc_header = nfs_writehdr_alloc,
.rw_free_header = nfs_writehdr_free,
.rw_release = nfs_writeback_release_common,
.rw_done = nfs_writeback_done,
.rw_result = nfs_writeback_result,
.rw_initiate = nfs_initiate_write,
};