nfs: enable swap on NFS
Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol ->connect() method. PF_MEMALLOC should allow the allocation of struct socket and related objects and the early (re)setting of SOCK_MEMALLOC should allow us to receive the packets required for the TCP connection buildup. [jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases] [dfeng@redhat.com: Fix handling of multiple swap files] [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David S. Miller <davem@davemloft.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Eric Paris <eparis@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Christie <michaelc@cs.wisc.edu> Cc: Neil Brown <neilb@suse.de> Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Xiaotian Feng <dfeng@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
此提交包含在:
@@ -86,6 +86,14 @@ config NFS_V4
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config NFS_SWAP
|
||||
bool "Provide swap over NFS support"
|
||||
default n
|
||||
depends on NFS_FS
|
||||
select SUNRPC_SWAP
|
||||
help
|
||||
This option enables swapon to work on files located on NFS mounts.
|
||||
|
||||
config NFS_V4_1
|
||||
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
|
||||
depends on NFS_V4 && EXPERIMENTAL
|
||||
|
@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
|
||||
* @nr_segs: size of iovec array
|
||||
*
|
||||
* The presence of this routine in the address space ops vector means
|
||||
* the NFS client supports direct I/O. However, we shunt off direct
|
||||
* read and write requests before the VFS gets them, so this method
|
||||
* should never be called.
|
||||
* the NFS client supports direct I/O. However, for most direct IO, we
|
||||
* shunt off direct read and write requests before the VFS gets them,
|
||||
* so this method is only ever called for swap.
|
||||
*/
|
||||
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
|
||||
{
|
||||
#ifndef CONFIG_NFS_SWAP
|
||||
dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
|
||||
iocb->ki_filp->f_path.dentry->d_name.name,
|
||||
(long long) pos, nr_segs);
|
||||
|
||||
return -EINVAL;
|
||||
#else
|
||||
VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
|
||||
VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
|
||||
|
||||
if (rw == READ || rw == KERNEL_READ)
|
||||
return nfs_file_direct_read(iocb, iov, nr_segs, pos,
|
||||
rw == READ ? true : false);
|
||||
return nfs_file_direct_write(iocb, iov, nr_segs, pos,
|
||||
rw == WRITE ? true : false);
|
||||
#endif /* CONFIG_NFS_SWAP */
|
||||
}
|
||||
|
||||
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
|
||||
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
|
||||
*/
|
||||
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
|
||||
const struct iovec *iov,
|
||||
loff_t pos)
|
||||
loff_t pos, bool uio)
|
||||
{
|
||||
struct nfs_direct_req *dreq = desc->pg_dreq;
|
||||
struct nfs_open_context *ctx = dreq->ctx;
|
||||
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
|
||||
GFP_KERNEL);
|
||||
if (!pagevec)
|
||||
break;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
result = get_user_pages(current, current->mm, user_addr,
|
||||
if (uio) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
result = get_user_pages(current, current->mm, user_addr,
|
||||
npages, 1, 0, pagevec, NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (result < 0)
|
||||
break;
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (result < 0)
|
||||
break;
|
||||
} else {
|
||||
WARN_ON(npages != 1);
|
||||
result = get_kernel_page(user_addr, 1, pagevec);
|
||||
if (WARN_ON(result != 1))
|
||||
break;
|
||||
}
|
||||
|
||||
if ((unsigned)result < npages) {
|
||||
bytes = result * PAGE_SIZE;
|
||||
if (bytes <= pgbase) {
|
||||
@@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
|
||||
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
loff_t pos, bool uio)
|
||||
{
|
||||
struct nfs_pageio_descriptor desc;
|
||||
ssize_t result = -EINVAL;
|
||||
@@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
|
||||
for (seg = 0; seg < nr_segs; seg++) {
|
||||
const struct iovec *vec = &iov[seg];
|
||||
result = nfs_direct_read_schedule_segment(&desc, vec, pos);
|
||||
result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
|
||||
if (result < 0)
|
||||
break;
|
||||
requested_bytes += result;
|
||||
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
}
|
||||
|
||||
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
unsigned long nr_segs, loff_t pos, bool uio)
|
||||
{
|
||||
ssize_t result = -ENOMEM;
|
||||
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
||||
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
if (!is_sync_kiocb(iocb))
|
||||
dreq->iocb = iocb;
|
||||
|
||||
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
|
||||
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
|
||||
if (!result)
|
||||
result = nfs_direct_wait(dreq);
|
||||
NFS_I(inode)->read_io += result;
|
||||
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
|
||||
*/
|
||||
static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
|
||||
const struct iovec *iov,
|
||||
loff_t pos)
|
||||
loff_t pos, bool uio)
|
||||
{
|
||||
struct nfs_direct_req *dreq = desc->pg_dreq;
|
||||
struct nfs_open_context *ctx = dreq->ctx;
|
||||
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
|
||||
if (!pagevec)
|
||||
break;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
result = get_user_pages(current, current->mm, user_addr,
|
||||
npages, 0, 0, pagevec, NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (result < 0)
|
||||
break;
|
||||
if (uio) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
result = get_user_pages(current, current->mm, user_addr,
|
||||
npages, 0, 0, pagevec, NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (result < 0)
|
||||
break;
|
||||
} else {
|
||||
WARN_ON(npages != 1);
|
||||
result = get_kernel_page(user_addr, 0, pagevec);
|
||||
if (WARN_ON(result != 1))
|
||||
break;
|
||||
}
|
||||
|
||||
if ((unsigned)result < npages) {
|
||||
bytes = result * PAGE_SIZE;
|
||||
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
|
||||
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
loff_t pos, bool uio)
|
||||
{
|
||||
struct nfs_pageio_descriptor desc;
|
||||
struct inode *inode = dreq->inode;
|
||||
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
|
||||
for (seg = 0; seg < nr_segs; seg++) {
|
||||
const struct iovec *vec = &iov[seg];
|
||||
result = nfs_direct_write_schedule_segment(&desc, vec, pos);
|
||||
result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
|
||||
if (result < 0)
|
||||
break;
|
||||
requested_bytes += result;
|
||||
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
|
||||
|
||||
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos,
|
||||
size_t count)
|
||||
size_t count, bool uio)
|
||||
{
|
||||
ssize_t result = -ENOMEM;
|
||||
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
||||
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
if (!is_sync_kiocb(iocb))
|
||||
dreq->iocb = iocb;
|
||||
|
||||
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
|
||||
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
|
||||
if (!result)
|
||||
result = nfs_direct_wait(dreq);
|
||||
out_release:
|
||||
@@ -867,7 +893,7 @@ out:
|
||||
* cache.
|
||||
*/
|
||||
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
unsigned long nr_segs, loff_t pos, bool uio)
|
||||
{
|
||||
ssize_t retval = -EINVAL;
|
||||
struct file *file = iocb->ki_filp;
|
||||
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
|
||||
task_io_account_read(count);
|
||||
|
||||
retval = nfs_direct_read(iocb, iov, nr_segs, pos);
|
||||
retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
|
||||
if (retval > 0)
|
||||
iocb->ki_pos = pos + retval;
|
||||
|
||||
@@ -923,7 +949,7 @@ out:
|
||||
* is no atomic O_APPEND write facility in the NFS protocol.
|
||||
*/
|
||||
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
unsigned long nr_segs, loff_t pos, bool uio)
|
||||
{
|
||||
ssize_t retval = -EINVAL;
|
||||
struct file *file = iocb->ki_filp;
|
||||
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
|
||||
task_io_account_write(count);
|
||||
|
||||
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
|
||||
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
|
||||
if (retval > 0) {
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
|
@@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
ssize_t result;
|
||||
|
||||
if (iocb->ki_filp->f_flags & O_DIRECT)
|
||||
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
|
||||
return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
|
||||
|
||||
dprintk("NFS: read(%s/%s, %lu@%lu)\n",
|
||||
dentry->d_parent->d_name.name, dentry->d_name.name,
|
||||
@@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page)
|
||||
return nfs_wb_page(inode, page);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NFS_SWAP
|
||||
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
|
||||
sector_t *span)
|
||||
{
|
||||
*span = sis->pages;
|
||||
return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
|
||||
}
|
||||
|
||||
static void nfs_swap_deactivate(struct file *file)
|
||||
{
|
||||
xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct address_space_operations nfs_file_aops = {
|
||||
.readpage = nfs_readpage,
|
||||
.readpages = nfs_readpages,
|
||||
@@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = {
|
||||
.migratepage = nfs_migrate_page,
|
||||
.launder_page = nfs_launder_page,
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
#ifdef CONFIG_NFS_SWAP
|
||||
.swap_activate = nfs_swap_activate,
|
||||
.swap_deactivate = nfs_swap_deactivate,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
size_t count = iov_length(iov, nr_segs);
|
||||
|
||||
if (iocb->ki_filp->f_flags & O_DIRECT)
|
||||
return nfs_file_direct_write(iocb, iov, nr_segs, pos);
|
||||
return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
|
||||
|
||||
dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
|
||||
dentry->d_parent->d_name.name, dentry->d_name.name,
|
||||
|
新增問題並參考
封鎖使用者