nfs: enable swap on NFS

Implement the new swapfile a_ops for NFS and hook up ->direct_IO.  This
will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under
PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol
->connect() method.

PF_MEMALLOC should allow the allocation of struct socket and related
objects and the early (re)setting of SOCK_MEMALLOC should allow us to
receive the packets required for the TCP connection buildup.

[jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases]
[dfeng@redhat.com: Fix handling of multiple swap files]
[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
此提交包含在:
Mel Gorman
2012-07-31 16:45:12 -07:00
提交者 Linus Torvalds
父節點 29418aa4bd
當前提交 a564b8f039
共有 9 個檔案被更改,包括 149 行新增34 行删除

查看文件

@@ -86,6 +86,14 @@ config NFS_V4
If unsure, say Y.
config NFS_SWAP
bool "Provide swap over NFS support"
default n
depends on NFS_FS
select SUNRPC_SWAP
help
This option enables swapon to work on files located on NFS mounts.
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_V4 && EXPERIMENTAL

查看文件

@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
* @nr_segs: size of iovec array
*
* The presence of this routine in the address space ops vector means
* the NFS client supports direct I/O. However, we shunt off direct
* read and write requests before the VFS gets them, so this method
* should never be called.
* the NFS client supports direct I/O. However, for most direct IO, we
* shunt off direct read and write requests before the VFS gets them,
* so this method is only ever called for swap.
*/
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
#ifndef CONFIG_NFS_SWAP
dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
(long long) pos, nr_segs);
return -EINVAL;
#else
VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
if (rw == READ || rw == KERNEL_READ)
return nfs_file_direct_read(iocb, iov, nr_segs, pos,
rw == READ ? true : false);
return nfs_file_direct_write(iocb, iov, nr_segs, pos,
rw == WRITE ? true : false);
#endif /* CONFIG_NFS_SWAP */
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
*/
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
loff_t pos)
loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
GFP_KERNEL);
if (!pagevec)
break;
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
if (uio) {
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
npages, 1, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0)
break;
up_read(&current->mm->mmap_sem);
if (result < 0)
break;
} else {
WARN_ON(npages != 1);
result = get_kernel_page(user_addr, 1, pagevec);
if (WARN_ON(result != 1))
break;
}
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
@@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
@@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
result = nfs_direct_read_schedule_segment(&desc, vec, pos);
result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
}
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
NFS_I(inode)->read_io += result;
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
*/
static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
loff_t pos)
loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
if (!pagevec)
break;
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
npages, 0, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0)
break;
if (uio) {
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
npages, 0, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0)
break;
} else {
WARN_ON(npages != 1);
result = get_kernel_page(user_addr, 0, pagevec);
if (WARN_ON(result != 1))
break;
}
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
result = nfs_direct_write_schedule_segment(&desc, vec, pos);
result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
size_t count)
size_t count, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -867,7 +893,7 @@ out:
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
task_io_account_read(count);
retval = nfs_direct_read(iocb, iov, nr_segs, pos);
retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -923,7 +949,7 @@ out:
* is no atomic O_APPEND write facility in the NFS protocol.
*/
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
task_io_account_write(count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
if (retval > 0) {
struct inode *inode = mapping->host;

查看文件

@@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
dprintk("NFS: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page)
return nfs_wb_page(inode, page);
}
#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
*span = sis->pages;
return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
}
static void nfs_swap_deactivate(struct file *file)
{
xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
}
#endif
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = {
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
#endif
};
/*
@@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
size_t count = iov_length(iov, nr_segs);
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_write(iocb, iov, nr_segs, pos);
return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,